Skip to content

Web page to HTML

Use attr: 'html' when your consumer needs markup instead of Markdown or plain text. Microlink can fetch the source document, optionally render it in a browser, and expose the resulting HTML through a data field.

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.html.attr=html

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.html.attr=html" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    html: {
      attr: "html"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.html.attr": "html",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.html.attr: "html",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.html.attr" => "html",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.html.attr", "html")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Read the HTML document from data.html.

Return HTML directly

Add embed: 'html' when the API URL itself should return HTML:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://example.com&data.html.attr=html&embed=html

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.html.attr=html" \
  -d "meta=false" \
  -d "embed=html"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    html: {
      attr: "html"
    }
  },
  meta: false,
  embed: "html"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.html.attr": "html",
    "meta": "false",
    "embed": "html"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.html.attr: "html",
  meta: "false",
  embed: "html"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.html.attr" => "html",
    "meta" => "false",
    "embed" => "html"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.html.attr", "html")
    q.Set("meta", "false")
    q.Set("embed", "html")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
The response body is HTML and the content type is text/html.
The same request as a raw URL:
https://api.microlink.io?url=https://example.com&data.html.attr=html&meta=false&embed=html

Extract a fragment instead of the full document

Use a selector when you only need one part of the page:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://microlink.io/docs/api/getting-started/overview' URL with 'data', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://microlink.io/docs/api/getting-started/overview&data.html.selector=main&data.html.attr=html&embed=html

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://microlink.io/docs/api/getting-started/overview" \
  -d "data.html.selector=main" \
  -d "data.html.attr=html" \
  -d "meta=false" \
  -d "embed=html"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://microlink.io/docs/api/getting-started/overview', {
  data: {
    html: {
      selector: "main",
      attr: "html"
    }
  },
  meta: false,
  embed: "html"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://microlink.io/docs/api/getting-started/overview",
    "data.html.selector": "main",
    "data.html.attr": "html",
    "meta": "false",
    "embed": "html"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://microlink.io/docs/api/getting-started/overview",
  data.html.selector: "main",
  data.html.attr: "html",
  meta: "false",
  embed: "html"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://microlink.io/docs/api/getting-started/overview",
    "data.html.selector" => "main",
    "data.html.attr" => "html",
    "meta" => "false",
    "embed" => "html"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://microlink.io/docs/api/getting-started/overview")
    q.Set("data.html.selector", "main")
    q.Set("data.html.attr", "html")
    q.Set("meta", "false")
    q.Set("embed", "html")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
With a selector, attr: 'html' returns the matched element HTML for the selected page region.

Render JavaScript pages

When the initial HTML is empty because the page is client-rendered, turn on browser rendering and wait for the content wrapper:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://microlink.io' URL with 'data', 'prerender', 'waitForSelector', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://microlink.io&data.html.selector=main&data.html.attr=html&prerender&waitForSelector=main&embed=html

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://microlink.io" \
  -d "data.html.selector=main" \
  -d "data.html.attr=html" \
  -d "prerender=true" \
  -d "waitForSelector=main" \
  -d "meta=false" \
  -d "embed=html"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://microlink.io', {
  data: {
    html: {
      selector: "main",
      attr: "html"
    }
  },
  prerender: true,
  waitForSelector: "main",
  meta: false,
  embed: "html"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://microlink.io",
    "data.html.selector": "main",
    "data.html.attr": "html",
    "prerender": "true",
    "waitForSelector": "main",
    "meta": "false",
    "embed": "html"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://microlink.io",
  data.html.selector: "main",
  data.html.attr: "html",
  prerender: "true",
  waitForSelector: "main",
  meta: "false",
  embed: "html"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://microlink.io",
    "data.html.selector" => "main",
    "data.html.attr" => "html",
    "prerender" => "true",
    "waitForSelector" => "main",
    "meta" => "false",
    "embed" => "html"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://microlink.io")
    q.Set("data.html.selector", "main")
    q.Set("data.html.attr", "html")
    q.Set("prerender", "true")
    q.Set("waitForSelector", "main")
    q.Set("meta", "false")
    q.Set("embed", "html")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Use prerender only when the page needs browser execution before HTML extraction.

Next step

Use Data extraction: Page preparation for waits, clicks, scrolling, browser settings, and CSS cleanup. Use Web page to Text when the consumer only needs readable text, or PDF file to HTML when the source URL points directly to a PDF file.