Skip to content

Defining rules

Good extraction starts with good rule design. The goal is not to scrape everything blindly, but to declare the smallest set of fields your application actually needs.

Start with a single field

The simplest rule targets one element and extracts one value:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.title.selector=h1&data.title.attr=text

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.title.selector=h1" \
  -d "data.title.attr=text" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    title: {
      selector: "h1",
      attr: "text"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.title.selector": "h1",
    "data.title.attr": "text",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.title.selector: "h1",
  data.title.attr: "text",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.title.selector" => "h1",
    "data.title.attr" => "text",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.title.selector", "h1")
    q.Set("data.title.attr", "text")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Use selector for one element and attr for the value you want from it.
This is the best first step when you are learning a new page structure. Once the first field works, add the rest one by one.

Extract a collection

If the page contains repeated elements, switch to selectorAll:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://news.ycombinator.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://news.ycombinator.com&data.titles.selectorAll='.titleline > a'&data.titles.attr=text

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://news.ycombinator.com" \
  -d "data.titles.selectorAll=.titleline%20%3E%20a" \
  -d "data.titles.attr=text" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://news.ycombinator.com', {
  data: {
    titles: {
      selectorAll: ".titleline > a",
      attr: "text"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://news.ycombinator.com",
    "data.titles.selectorAll": ".titleline > a",
    "data.titles.attr": "text",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://news.ycombinator.com",
  data.titles.selectorAll: ".titleline > a",
  data.titles.attr: "text",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://news.ycombinator.com",
    "data.titles.selectorAll" => ".titleline > a",
    "data.titles.attr" => "text",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://news.ycombinator.com")
    q.Set("data.titles.selectorAll", ".titleline > a")
    q.Set("data.titles.attr", "text")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Use selectorAll when you need an array instead of only the first match.
This pattern is ideal for headlines, product cards, search results, table rows, and navigation links.

Return structured objects

You can group related values into one field by using nested attr rules:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://news.ycombinator.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://news.ycombinator.com&data.stories.selectorAll=.athing&data.stories.attr.title.selector='.titleline > a'&data.stories.attr.title.attr=text&data.stories.attr.href.selector='.titleline > a'&data.stories.attr.href.attr=href&data.stories.attr.href.type=url

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://news.ycombinator.com" \
  -d "data.stories.selectorAll=.athing" \
  -d "data.stories.attr.title.selector=.titleline%20%3E%20a" \
  -d "data.stories.attr.title.attr=text" \
  -d "data.stories.attr.href.selector=.titleline%20%3E%20a" \
  -d "data.stories.attr.href.attr=href" \
  -d "data.stories.attr.href.type=url" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://news.ycombinator.com', {
  data: {
    stories: {
      selectorAll: ".athing",
      attr: {
        title: {
          selector: ".titleline > a",
          attr: "text"
        },
        href: {
          selector: ".titleline > a",
          attr: "href",
          type: "url"
        }
      }
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://news.ycombinator.com",
    "data.stories.selectorAll": ".athing",
    "data.stories.attr.title.selector": ".titleline > a",
    "data.stories.attr.title.attr": "text",
    "data.stories.attr.href.selector": ".titleline > a",
    "data.stories.attr.href.attr": "href",
    "data.stories.attr.href.type": "url",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://news.ycombinator.com",
  data.stories.selectorAll: ".athing",
  data.stories.attr.title.selector: ".titleline > a",
  data.stories.attr.title.attr: "text",
  data.stories.attr.href.selector: ".titleline > a",
  data.stories.attr.href.attr: "href",
  data.stories.attr.href.type: "url",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://news.ycombinator.com",
    "data.stories.selectorAll" => ".athing",
    "data.stories.attr.title.selector" => ".titleline > a",
    "data.stories.attr.title.attr" => "text",
    "data.stories.attr.href.selector" => ".titleline > a",
    "data.stories.attr.href.attr" => "href",
    "data.stories.attr.href.type" => "url",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://news.ycombinator.com")
    q.Set("data.stories.selectorAll", ".athing")
    q.Set("data.stories.attr.title.selector", ".titleline > a")
    q.Set("data.stories.attr.title.attr", "text")
    q.Set("data.stories.attr.href.selector", ".titleline > a")
    q.Set("data.stories.attr.href.attr", "href")
    q.Set("data.stories.attr.href.type", "url")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Nested rules let each matched item become a structured object instead of a single string.
Use this when a card, row, or result item contains multiple related fields that should stay together.

Use fallback rules

When the page structure is not consistent, provide more than one rule in priority order:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.title='[object Object],[object Object],[object Object]'

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.title=%5Bobject%20Object%5D%2C%5Bobject%20Object%5D%2C%5Bobject%20Object%5D" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    title: [
      {
        selector: 'meta[property="og:title"]',
        attr: "content"
      },
      {
        selector: "title",
        attr: "text"
      },
      {
        selector: "h1",
        attr: "text"
      }
    ]
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.title": "[object Object],[object Object],[object Object]",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.title: "[object Object],[object Object],[object Object]",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.title" => "[object Object],[object Object],[object Object]",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.title", "[object Object],[object Object],[object Object]")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Microlink tries each rule in order and keeps the first one that returns a valid value.
Fallbacks are especially useful across publisher templates, ecommerce catalogs, or documentation sites where the markup changes from page to page.

Pick the right output form

attr and type solve different problems:
PropertyUse it for
attrChoosing the source value such as text, html, markdown, href, src, or content
typeValidating or normalizing the result as url, number, date, image, and more
For example, a link field usually needs both:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.link.selector=a&data.link.attr=href&data.link.type=url

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.link.selector=a" \
  -d "data.link.attr=href" \
  -d "data.link.type=url" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    link: {
      selector: "a",
      attr: "href",
      type: "url"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.link.selector": "a",
    "data.link.attr": "href",
    "data.link.type": "url",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.link.selector: "a",
  data.link.attr: "href",
  data.link.type: "url",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.link.selector" => "a",
    "data.link.attr" => "href",
    "data.link.type" => "url",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.link.selector", "a")
    q.Set("data.link.attr", "href")
    q.Set("data.link.type", "url")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Think of attr as “where to read from” and type as “what shape this value should have”.
If you specifically want HTML-to-Markdown serialization, see the Markdown guide.

Use evaluate for custom values

If selectors are still not enough, evaluate can compute a field directly in the browser context:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://example.com&data.summary.evaluate='`${document.querySelector('"'"'h1'"'"')?.textContent.trim()} — ${document.querySelector('"'"'p'"'"')?.textContent.trim()}`'&data.summary.type=string

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://example.com" \
  -d "data.summary.evaluate=%60%24%7Bdocument.querySelector('h1')%3F.textContent.trim()%7D%20%E2%80%94%20%24%7Bdocument.querySelector('p')%3F.textContent.trim()%7D%60" \
  -d "data.summary.type=string" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://example.com', {
  data: {
    summary: {
      evaluate: "`${document.querySelector('h1')?.textContent.trim()} — ${document.querySelector('p')?.textContent.trim()}`",
      type: "string"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://example.com",
    "data.summary.evaluate": '''`${document.querySelector('h1')?.textContent.trim()} — ${document.querySelector('p')?.textContent.trim()}`''',
    "data.summary.type": "string",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://example.com",
  data.summary.evaluate: "`${document.querySelector('h1')?.textContent.trim()} — ${document.querySelector('p')?.textContent.trim()}`",
  data.summary.type: "string",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://example.com",
    "data.summary.evaluate" => "`${document.querySelector('h1')?.textContent.trim()} — ${document.querySelector('p')?.textContent.trim()}`",
    "data.summary.type" => "string",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    data.summary.evaluateParam := ``${document.querySelector('h1')?.textContent.trim()} — ${document.querySelector('p')?.textContent.trim()}``

    q := u.Query()
    q.Set("url", "https://example.com")
    q.Set("data.summary.evaluate", data.summary.evaluateParam)
    q.Set("data.summary.type", "string")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Use evaluate for values that are awkward to express with selectors alone, but keep it as a last resort.
Start with plain selectors and fallbacks. Add evaluate only when the page needs custom logic.

Rule references

For the full rule surface, see the MQL reference pages for selector, selectorAll, attr, type, evaluate, nested rules, and fallback rules.

Next step

Learn how to render the right page state before extraction in page preparation.