Convert a PDF URL to Markdown
PDF URLs use the same Markdown extraction rule as web pages. Set the PDF as
url, then request a Markdown field with attr: 'markdown'.The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://cdn.microlink.io/file-examples/sample.pdf' URL with 'data' & 'meta' API parameters:
CLI Microlink API example
microlink https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdowncURL Microlink API example
curl -G "https://api.microlink.io" \
-d "url=https://cdn.microlink.io/file-examples/sample.pdf" \
-d "data.markdown.attr=markdown" \
-d "meta=false"JavaScript Microlink API example
import mql from '@microlink/mql'
const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
data: {
markdown: {
attr: "markdown"
}
},
meta: false
})Python Microlink API example
import requests
url = "https://api.microlink.io/"
querystring = {
"url": "https://cdn.microlink.io/file-examples/sample.pdf",
"data.markdown.attr": "markdown",
"meta": "false"
}
response = requests.get(url, params=querystring)
print(response.json())Ruby Microlink API example
require 'uri'
require 'net/http'
base_url = "https://api.microlink.io/"
params = {
url: "https://cdn.microlink.io/file-examples/sample.pdf",
data.markdown.attr: "markdown",
meta: "false"
}
uri = URI(base_url)
uri.query = URI.encode_www_form(params)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
response = http.request(request)
puts response.bodyPHP Microlink API example
<?php
$baseUrl = "https://api.microlink.io/";
$params = [
"url" => "https://cdn.microlink.io/file-examples/sample.pdf",
"data.markdown.attr" => "markdown",
"meta" => "false"
];
$query = http_build_query($params);
$url = $baseUrl . '?' . $query;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET"
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #: " . $err;
} else {
echo $response;
}Golang Microlink API example
package main
import (
"fmt"
"net/http"
"net/url"
"io"
)
func main() {
baseURL := "https://api.microlink.io"
u, err := url.Parse(baseURL)
if err != nil {
panic(err)
}
q := u.Query()
q.Set("url", "https://cdn.microlink.io/file-examples/sample.pdf")
q.Set("data.markdown.attr", "markdown")
q.Set("meta", "false")
u.RawQuery = q.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
panic(err)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}import mql from '@microlink/mql'
const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
data: {
markdown: {
attr: "markdown"
}
},
meta: false
})Read the extracted document text from
data.markdown.Return markdown directly
Add
embed: 'markdown' when the API URL should behave like a Markdown file:The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://cdn.microlink.io/file-examples/sample.pdf' URL with 'data', 'meta' & 'embed' API parameters:
CLI Microlink API example
microlink https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdown&embed=markdowncURL Microlink API example
curl -G "https://api.microlink.io" \
-d "url=https://cdn.microlink.io/file-examples/sample.pdf" \
-d "data.markdown.attr=markdown" \
-d "meta=false" \
-d "embed=markdown"JavaScript Microlink API example
import mql from '@microlink/mql'
const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
data: {
markdown: {
attr: "markdown"
}
},
meta: false,
embed: "markdown"
})Python Microlink API example
import requests
url = "https://api.microlink.io/"
querystring = {
"url": "https://cdn.microlink.io/file-examples/sample.pdf",
"data.markdown.attr": "markdown",
"meta": "false",
"embed": "markdown"
}
response = requests.get(url, params=querystring)
print(response.json())Ruby Microlink API example
require 'uri'
require 'net/http'
base_url = "https://api.microlink.io/"
params = {
url: "https://cdn.microlink.io/file-examples/sample.pdf",
data.markdown.attr: "markdown",
meta: "false",
embed: "markdown"
}
uri = URI(base_url)
uri.query = URI.encode_www_form(params)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
response = http.request(request)
puts response.bodyPHP Microlink API example
<?php
$baseUrl = "https://api.microlink.io/";
$params = [
"url" => "https://cdn.microlink.io/file-examples/sample.pdf",
"data.markdown.attr" => "markdown",
"meta" => "false",
"embed" => "markdown"
];
$query = http_build_query($params);
$url = $baseUrl . '?' . $query;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET"
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #: " . $err;
} else {
echo $response;
}Golang Microlink API example
package main
import (
"fmt"
"net/http"
"net/url"
"io"
)
func main() {
baseURL := "https://api.microlink.io"
u, err := url.Parse(baseURL)
if err != nil {
panic(err)
}
q := u.Query()
q.Set("url", "https://cdn.microlink.io/file-examples/sample.pdf")
q.Set("data.markdown.attr", "markdown")
q.Set("meta", "false")
q.Set("embed", "markdown")
u.RawQuery = q.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
panic(err)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
fmt.Println(string(body))
}import mql from '@microlink/mql'
const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
data: {
markdown: {
attr: "markdown"
}
},
meta: false,
embed: "markdown"
})The response body is Markdown, so a worker, crawler, or LLM pipeline can consume it without unpacking JSON.
The same request as a raw URL:
https://api.microlink.io?url=https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdown&meta=false&embed=markdownKeep JSON when you need document metadata
Leave
embed out when your application needs the normal response envelope:{
"status": "success",
"data": {
"title": "sample.pdf",
"url": "https://cdn.microlink.io/file-examples/sample.pdf",
"markdown": "# Instructions for Adding Your Logo..."
}
}Set
meta: false for the smallest payload. Keep metadata enabled when the title, publisher, image, or URL fields are useful to your indexer.Know the PDF limit
This works best for PDFs with an embedded text layer. If the PDF is only scanned images, the extracted Markdown can be sparse because there is little document text to serialize.
Next step
Use Convert a PDF URL to HTML when you need markup instead. Use Convert any URL to Markdown for web pages.