Data extraction
Custom data extraction with Microlink API starts with two things: the target
url and a data object.Each key inside
data becomes a field in the response, and each rule tells Microlink how to obtain that field from the page.The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://news.ycombinator.com' URL with 'data' & 'meta' API parameters:
CLI Microlink API example
microlink https://news.ycombinator.com&data.story.selector=.athing&data.story.attr.title.selector='.titleline > a'&data.story.attr.title.attr=text&data.story.attr.href.selector='.titleline > a'&data.story.attr.href.attr=href&data.story.attr.href.type=urlcURL Microlink API example
curl -G "https://api.microlink.io" \
-d "url=https://news.ycombinator.com" \
-d "data.story.selector=.athing" \
-d "data.story.attr.title.selector=.titleline%20%3E%20a" \
-d "data.story.attr.title.attr=text" \
-d "data.story.attr.href.selector=.titleline%20%3E%20a" \
-d "data.story.attr.href.attr=href" \
-d "data.story.attr.href.type=url" \
-d "meta=false"JavaScript Microlink API example
import mql from '@microlink/mql'
const { data } = await mql('https://news.ycombinator.com', {
data: {
story: {
selector: ".athing",
attr: {
title: {
selector: ".titleline > a",
attr: "text"
},
href: {
selector: ".titleline > a",
attr: "href",
type: "url"
}
}
}
},
meta: false
})Python Microlink API example
import requests
url = "https://api.microlink.io/"
querystring = {
"url": "https://news.ycombinator.com",
"data.story.selector": ".athing",
"data.story.attr.title.selector": ".titleline > a",
"data.story.attr.title.attr": "text",
"data.story.attr.href.selector": ".titleline > a",
"data.story.attr.href.attr": "href",
"data.story.attr.href.type": "url",
"meta": "false"
}
response = requests.get(url, params=querystring)
print(response.json())Ruby Microlink API example
require 'uri'
require 'net/http'
base_url = "https://api.microlink.io/"
params = {
url: "https://news.ycombinator.com",
data.story.selector: ".athing",
data.story.attr.title.selector: ".titleline > a",
data.story.attr.title.attr: "text",
data.story.attr.href.selector: ".titleline > a",
data.story.attr.href.attr: "href",
data.story.attr.href.type: "url",
meta: "false"
}
uri = URI(base_url)
uri.query = URI.encode_www_form(params)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
response = http.request(request)
puts response.bodyPHP Microlink API example
<?php
$baseUrl = "https://api.microlink.io/";
$params = [
"url" => "https://news.ycombinator.com",
"data.story.selector" => ".athing",
"data.story.attr.title.selector" => ".titleline > a",
"data.story.attr.title.attr" => "text",
"data.story.attr.href.selector" => ".titleline > a",
"data.story.attr.href.attr" => "href",
"data.story.attr.href.type" => "url",
"meta" => "false"
];
$query = http_build_query($params);
$url = $baseUrl . '?' . $query;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET"
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #: " . $err;
} else {
echo $response;
}Golang Microlink API example
package main
import (
"fmt"
"net/http"
"net/url"
"io"
)
func main() {
baseURL := "https://api.microlink.io"
u, err := url.Parse(baseURL)
if err != nil {
panic(err)
}
q := u.Query()
q.Set("url", "https://news.ycombinator.com")
q.Set("data.story.selector", ".athing")
q.Set("data.story.attr.title.selector", ".titleline > a")
q.Set("data.story.attr.title.attr", "text")
q.Set("data.story.attr.href.selector", ".titleline > a")
q.Set("data.story.attr.href.attr", "href")
q.Set("data.story.attr.href.type", "url")
q.Set("meta", "false")
u.RawQuery = q.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
panic(err)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}import mql from '@microlink/mql'
const { data } = await mql('https://news.ycombinator.com', {
data: {
story: {
selector: ".athing",
attr: {
title: {
selector: ".titleline > a",
attr: "text"
},
href: {
selector: ".titleline > a",
attr: "href",
type: "url"
}
}
}
},
meta: false
})Run the request and inspect
data.story. The field name comes from your key, and the nested object comes from the rule shape you declared.MQL installation
To run the JavaScript examples with MQL, install
@microlink/mql:npm install @microlink/mql --saveIt works in Node.js, Edge runtimes, and the browser. See the MQL installation guide for the environment-specific setup.
If you are using another language, you do not need to install MQL to follow this guide. You can use the terminal examples or call the API directly from any HTTP client.
How data extraction works
The
data object is your output schema. A field can be:- A single value extracted with
selectorandattr. - A list extracted with
selectorAll. - A structured object built with nested
attrrules. - A fallback array of rules tried in priority order.
- A computed value produced with
evaluate.
{
url: 'https://news.ycombinator.com',
data: {
stories: {
selectorAll: '.athing',
attr: {
title: { selector: '.titleline > a', attr: 'text' },
href: { selector: '.titleline > a', attr: 'href', type: 'url' }
}
}
},
meta: false
}This guide uses the
data object form consistently because it keeps the response shape and the extraction logic in one place.The response
The response contains the fields you declared under
data:{
"status": "success",
"data": {
"story": {
"title": "Launch HN: Example",
"href": "https://example.com"
}
}
}If
meta stays enabled, your custom fields live alongside normalized metadata such as title, description, image, and url.Choose an extraction pattern
| Need | Best pattern | Why |
|---|---|---|
| One field from one element | selector + attr | Smallest and easiest rule to maintain |
| A repeated list of values | selectorAll | Returns an array instead of only the first match |
| A structured object or list of objects | Nested attr rules | Keeps related fields grouped together |
| Serialized page content | attr: 'text', attr: 'html', or attr: 'markdown' | Useful for indexing, content pipelines, and exports |
| A computed or awkward value | evaluate or fallback rules | Helps when one selector is not enough |
If your main goal is HTML-to-Markdown conversion, jump to the dedicated Markdown guide. This guide focuses on the broader
data utility.Skip metadata for faster responses
By default, the API also extracts normalized metadata. If you only need your custom fields, disable it:
The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data' & 'meta' API parameters:
CLI Microlink API example
microlink https://example.com&data.title.selector=h1&data.title.attr=textcURL Microlink API example
curl -G "https://api.microlink.io" \
-d "url=https://example.com" \
-d "data.title.selector=h1" \
-d "data.title.attr=text" \
-d "meta=false"JavaScript Microlink API example
import mql from '@microlink/mql'
const { data } = await mql('https://example.com', {
data: {
title: {
selector: "h1",
attr: "text"
}
},
meta: false
})Python Microlink API example
import requests
url = "https://api.microlink.io/"
querystring = {
"url": "https://example.com",
"data.title.selector": "h1",
"data.title.attr": "text",
"meta": "false"
}
response = requests.get(url, params=querystring)
print(response.json())Ruby Microlink API example
require 'uri'
require 'net/http'
base_url = "https://api.microlink.io/"
params = {
url: "https://example.com",
data.title.selector: "h1",
data.title.attr: "text",
meta: "false"
}
uri = URI(base_url)
uri.query = URI.encode_www_form(params)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
response = http.request(request)
puts response.bodyPHP Microlink API example
<?php
$baseUrl = "https://api.microlink.io/";
$params = [
"url" => "https://example.com",
"data.title.selector" => "h1",
"data.title.attr" => "text",
"meta" => "false"
];
$query = http_build_query($params);
$url = $baseUrl . '?' . $query;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET"
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #: " . $err;
} else {
echo $response;
}Golang Microlink API example
package main
import (
"fmt"
"net/http"
"net/url"
"io"
)
func main() {
baseURL := "https://api.microlink.io"
u, err := url.Parse(baseURL)
if err != nil {
panic(err)
}
q := u.Query()
q.Set("url", "https://example.com")
q.Set("data.title.selector", "h1")
q.Set("data.title.attr", "text")
q.Set("meta", "false")
u.RawQuery = q.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
panic(err)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}import mql from '@microlink/mql'
const { data } = await mql('https://example.com', {
data: {
title: {
selector: "h1",
attr: "text"
}
},
meta: false
})Setting
meta: false skips the extra metadata pass and is usually the biggest speedup for data-only requests.If you still need a few metadata fields,
meta also accepts an object for selective extraction. See the meta reference.Using the raw URL
You can call the API directly from your browser address bar or any HTTP client:
https://api.microlink.io?url=https://example.com&data.title.selector=h1&data.title.attr=text&meta=falseThat returns JSON. To make the API URL return a single extracted field directly instead, use
embed=title in delivery and response shaping.Free tier and API key
The Microlink API works without an API key. You get 50 free requests per day, which is enough to test the full data extraction flow and most of the examples in this guide.
For production usage, you'll usually want a
PRO
plan. It unlocks features such as configurable TTL, stale-while-revalidate caching, custom headers, and proxy.To authenticate, pass your API key as the
x-api-key header:The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://example.com' URL with 'data', 'meta' & 'apiKey' API parameters:
CLI Microlink API example
microlink https://example.com&data.title.selector=h1&data.title.attr=text --api-key YOUR_API_TOKENcURL Microlink API example
curl -G "https://api.microlink.io" \
-H "x-api-key: YOUR_API_TOKEN" \
-d "url=https://example.com" \
-d "data.title.selector=h1" \
-d "data.title.attr=text" \
-d "meta=false"JavaScript Microlink API example
import mql from '@microlink/mql'
const { data } = await mql('https://example.com', {
data: {
title: {
selector: "h1",
attr: "text"
}
},
meta: false,
apiKey: "YOUR_API_TOKEN"
})Python Microlink API example
import requests
url = "https://api.microlink.io/"
querystring = {
"url": "https://example.com",
"data.title.selector": "h1",
"data.title.attr": "text",
"meta": "false"
}
headers = {
"x-api-key": "YOUR_API_TOKEN"
}
response = requests.get(url, params=querystring, headers=headers)
print(response.json())Ruby Microlink API example
require 'uri'
require 'net/http'
base_url = "https://api.microlink.io/"
params = {
url: "https://example.com",
data.title.selector: "h1",
data.title.attr: "text",
meta: "false"
}
uri = URI(base_url)
uri.query = URI.encode_www_form(params)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
request['x-api-key'] = "YOUR_API_TOKEN"
response = http.request(request)
puts response.bodyPHP Microlink API example
<?php
$baseUrl = "https://api.microlink.io/";
$params = [
"url" => "https://example.com",
"data.title.selector" => "h1",
"data.title.attr" => "text",
"meta" => "false"
];
$query = http_build_query($params);
$url = $baseUrl . '?' . $query;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => [
"x-api-key: YOUR_API_TOKEN"
]
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #: " . $err;
} else {
echo $response;
}Golang Microlink API example
package main
import (
"fmt"
"net/http"
"net/url"
"io"
)
func main() {
baseURL := "https://api.microlink.io"
u, err := url.Parse(baseURL)
if err != nil {
panic(err)
}
q := u.Query()
q.Set("url", "https://example.com")
q.Set("data.title.selector", "h1")
q.Set("data.title.attr", "text")
q.Set("meta", "false")
u.RawQuery = q.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
panic(err)
}
req.Header.Set("x-api-key", "YOUR_API_TOKEN")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}import mql from '@microlink/mql'
const { data } = await mql('https://example.com', {
data: {
title: {
selector: "h1",
attr: "text"
}
},
meta: false,
apiKey: "YOUR_API_TOKEN"
})You can enter your API key in any interactive example by clicking the key icon in the terminal toolbar.
Throughout this guide, features that require a
PRO
plan are marked inline.See the authentication and rate limit docs for details.
What's next
Pick the next step based on the result you want:
- Defining rules — model single fields, collections, nested objects, fallbacks, and computed values.
- Page preparation — render the right page state, wait for dynamic content, and mutate the DOM before extraction.
- Delivery and response shaping — choose between full JSON, filtered payloads, and direct field responses.
- Caching and performance — tune freshness, cache behavior, and extraction speed.
- Private pages — extract data from logged-in or header-dependent pages safely.
- Troubleshooting — fix empty fields, wrong selectors, timeouts, and blocked sites.