Skip to content

Convert a PDF URL to Markdown

PDF URLs use the same Markdown extraction rule as web pages. Set the PDF as url, then request a Markdown field with attr: 'markdown'.

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://cdn.microlink.io/file-examples/sample.pdf' URL with 'data' & 'meta' API parameters:

CLI Microlink API example

microlink https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdown

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://cdn.microlink.io/file-examples/sample.pdf" \
  -d "data.markdown.attr=markdown" \
  -d "meta=false"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
  data: {
    markdown: {
      attr: "markdown"
    }
  },
  meta: false
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://cdn.microlink.io/file-examples/sample.pdf",
    "data.markdown.attr": "markdown",
    "meta": "false"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://cdn.microlink.io/file-examples/sample.pdf",
  data.markdown.attr: "markdown",
  meta: "false"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://cdn.microlink.io/file-examples/sample.pdf",
    "data.markdown.attr" => "markdown",
    "meta" => "false"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://cdn.microlink.io/file-examples/sample.pdf")
    q.Set("data.markdown.attr", "markdown")
    q.Set("meta", "false")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
Read the extracted document text from data.markdown.

Return markdown directly

Add embed: 'markdown' when the API URL should behave like a Markdown file:

The following examples show how to use the Microlink API with CLI, cURL, JavaScript, Python, Ruby, PHP & Golang, targeting 'https://cdn.microlink.io/file-examples/sample.pdf' URL with 'data', 'meta' & 'embed' API parameters:

CLI Microlink API example

microlink https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdown&embed=markdown

cURL Microlink API example

curl -G "https://api.microlink.io" \
  -d "url=https://cdn.microlink.io/file-examples/sample.pdf" \
  -d "data.markdown.attr=markdown" \
  -d "meta=false" \
  -d "embed=markdown"

JavaScript Microlink API example

import mql from '@microlink/mql'

const { data } = await mql('https://cdn.microlink.io/file-examples/sample.pdf', {
  data: {
    markdown: {
      attr: "markdown"
    }
  },
  meta: false,
  embed: "markdown"
})

Python Microlink API example

import requests

url = "https://api.microlink.io/"

querystring = {
    "url": "https://cdn.microlink.io/file-examples/sample.pdf",
    "data.markdown.attr": "markdown",
    "meta": "false",
    "embed": "markdown"
}

response = requests.get(url, params=querystring)

print(response.json())

Ruby Microlink API example

require 'uri'
require 'net/http'

base_url = "https://api.microlink.io/"

params = {
  url: "https://cdn.microlink.io/file-examples/sample.pdf",
  data.markdown.attr: "markdown",
  meta: "false",
  embed: "markdown"
}

uri = URI(base_url)
uri.query = URI.encode_www_form(params)

http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Get.new(uri)
response = http.request(request)

puts response.body

PHP Microlink API example

<?php

$baseUrl = "https://api.microlink.io/";

$params = [
    "url" => "https://cdn.microlink.io/file-examples/sample.pdf",
    "data.markdown.attr" => "markdown",
    "meta" => "false",
    "embed" => "markdown"
];

$query = http_build_query($params);
$url = $baseUrl . '?' . $query;

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET"
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #: " . $err;
} else {
    echo $response;
}

Golang Microlink API example

package main

import (
    "fmt"
    "net/http"
    "net/url"
    "io"
)

func main() {
    baseURL := "https://api.microlink.io"

    u, err := url.Parse(baseURL)
    if err != nil {
        panic(err)
    }
    q := u.Query()
    q.Set("url", "https://cdn.microlink.io/file-examples/sample.pdf")
    q.Set("data.markdown.attr", "markdown")
    q.Set("meta", "false")
    q.Set("embed", "markdown")
    u.RawQuery = q.Encode()

    req, err := http.NewRequest("GET", u.String(), nil)
    if err != nil {
        panic(err)
    }

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    body, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(err)
    }

    fmt.Println(string(body))
}
The response body is Markdown, so a worker, crawler, or LLM pipeline can consume it without unpacking JSON.
The same request as a raw URL:
https://api.microlink.io?url=https://cdn.microlink.io/file-examples/sample.pdf&data.markdown.attr=markdown&meta=false&embed=markdown

Keep JSON when you need document metadata

Leave embed out when your application needs the normal response envelope:
{
  "status": "success",
  "data": {
    "title": "sample.pdf",
    "url": "https://cdn.microlink.io/file-examples/sample.pdf",
    "markdown": "# Instructions for Adding Your Logo..."
  }
}
Set meta: false for the smallest payload. Keep metadata enabled when the title, publisher, image, or URL fields are useful to your indexer.

Know the PDF limit

This works best for PDFs with an embedded text layer. If the PDF is only scanned images, the extracted Markdown can be sparse because there is little document text to serialize.

Next step

Use Convert a PDF URL to HTML when you need markup instead. Use Convert any URL to Markdown for web pages.