Skip to content

Web page to Text

Use attr: 'text' when your consumer needs readable plain text instead of Markdown structure or HTML markup. Microlink extracts the page content and returns it as a string.

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.text.attr=text

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.text.attr=text" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    text: {
      attr: "text"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.text.attr": "text",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.text.attr: "text",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.text.attr" => "text",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.text.attr", "text")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Read the converted text from data.text.

Return text directly

Add embed: 'text' when the API URL itself should return plain text:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://example.com&data.text.attr=text&embed=text

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.text.attr=text" \
  -d "meta=false" \
  -d "embed=text"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    text: {
      attr: "text"
    }
  },
  meta: false,
  embed: "text"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.text.attr": "text",
    "meta": "false",
    "embed": "text"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.text.attr: "text",
  meta: "false",
  embed: "text"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.text.attr" => "text",
    "meta" => "false",
    "embed" => "text"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.text.attr", "text")
    q.Set("meta", "false")
    q.Set("embed", "text")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
The response body is plain text and the content type is text/plain.
The same request as a raw URL:
https://api.microlink.io?url=https://example.com&data.text.attr=text&meta=false&embed=text

Scope text extraction

Use a selector when the page has navigation, footers, or other text that should not be included:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://microlink.io/docs/api/getting-started/overview' URL with 'data', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://microlink.io/docs/api/getting-started/overview&data.text.selector=main&data.text.attr=text&embed=text

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://microlink.io/docs/api/getting-started/overview" \
  -d "data.text.selector=main" \
  -d "data.text.attr=text" \
  -d "meta=false" \
  -d "embed=text"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://microlink.io/docs/api/getting-started/overview', {
  data: {
    text: {
      selector: "main",
      attr: "text"
    }
  },
  meta: false,
  embed: "text"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://microlink.io/docs/api/getting-started/overview",
    "data.text.selector": "main",
    "data.text.attr": "text",
    "meta": "false",
    "embed": "text"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://microlink.io/docs/api/getting-started/overview",
  data.text.selector: "main",
  data.text.attr: "text",
  meta: "false",
  embed: "text"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://microlink.io/docs/api/getting-started/overview",
    "data.text.selector" => "main",
    "data.text.attr" => "text",
    "meta" => "false",
    "embed" => "text"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://microlink.io/docs/api/getting-started/overview")
    q.Set("data.text.selector", "main")
    q.Set("data.text.attr", "text")
    q.Set("meta", "false")
    q.Set("embed", "text")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Use main, article, or a page-specific selector to keep only the useful text.

Choose text, markdown, or HTML

NeedUse
The smallest readable bodyattr: 'text'
Headings, links, lists, and code blocksattr: 'markdown'
Markup for downstream parsing or renderingattr: 'html'

Next step

Use Web page to Markdown when you need document structure, or Web page to HTML when you need markup.