The Ultimate Web Scraping API

Page2API is a powerful and delightful API that makes web scraping easy and fun.

(1000 free API calls. No credit card required)

page2api git:( main ) export API_KEY=YOUR_API_KEY
page2api git:( main ) curl -v -XPOST -H "Content-type: application/json" -d '{

  "api_key": "'"$API_KEY"'",
  "url": "https://www.amazon.com/s?k=luminox+watches",
  "real_browser": true,
  "parse": {
    "watches": [
      {
        "_parent": "[data-component-type='s-search-result']",
        "title": "h2 >> text",
        "link": ".a-link-normal >> href",
        "price": ".a-offscreen >> text",
        "stars": ".a-icon-alt >> text"
      }
    ]
  }
}' 'https://www.page2api.com/api/v1/scrape' | python -mjson.tool # this will format the response

...

"result": {
  "watches": [
    {
      "title": "Mens Wrist Watch Leatherback Sea Turtle Giant 44 mm Black",
      "link": "https://www.amazon.com/Luminox-Leather-back-Turtle-Giant-0337/dp/B07D58LGG9",
      "price": "$189.00",
      "stars": "4.6 out of 5 stars"
    },
    {
      "title": "Men's Navy Seal Pacific Diver 3120 Series Silver Stainless Steel",
      "link": "https://www.amazon.com/Luminox-Pacific-Silver-Stainless-Oyster/dp/B089GYKDHD",
      "price": "$399.00",
      "stars": "4.7 out of 5 stars"
    }, ...
  ]
}

...

page2api git:( main ) export API_KEY=YOUR_API_KEY
page2api git:( main ) curl -v -XPOST -H "Content-type: application/json" -d '{

  "api_key": "'"$API_KEY"'",
  "url": "https://www.indiehackers.com",
  "real_browser": true,
  "scenario": [
    { "click": "a.title-bar__search-link" },
    { "fill_in": ["input.search-page__field", "Page2API"] },
    { "wait_for": ".search-page__results" },
    { "execute": "parse" }
  ],
  "parse": {
    "posts": [
      {
        "_parent": ".search-result",
        "title": ".result__title >> text",
        "link": ".result__text-link >> href",
        "meta": ".result__metadata >> text"
      }
    ]
  }
}' 'https://www.page2api.com/api/v1/scrape' | python -mjson.tool # this will format the response

...

"result": {
  "posts": [
    {
      "title": "I just launched a powerful and fun-to-use Web Scraping API! 🚀🌔"
      "link": "https://www.indiehackers.com/post/7defc5b83c",
      "meta": "16 upvotes · 26 replies"
    }, ...
  ]
}

...

page2api git:( main ) export API_KEY=YOUR_API_KEY
page2api git:( main ) curl -v -XPOST -H "Content-type: application/json" -d '{

  "api_key": "'"$API_KEY"'",
  "url": "https://free-proxy-list.net",
  "real_browser": true,
  "parse": {
    "table": "table"
  },
  "scenario" : [
    { "execute_js": "document.getElementsByClassName(\"dropdown-toggle\")[1].click()" },
    { "execute_js": "document.querySelectorAll(\".dropdown-menu\")[1].querySelectorAll(\"a\")[5].click()" },
    { "wait": 0.1 },
    { "execute": "parse" }
  ]
}' 'https://www.page2api.com/api/v1/scrape' | python -mjson.tool # this will format the response

...

"result": {
  "proxies": [
    {
      "Anonymity": "elite proxy",
      "Code": "SG",
      "Country": "Singapore",
      "Google": "no",
      "Https": "yes",
      "IP Address": "194.233.69.90",
      "Last Checked": "24 minutes ago",
      "Port": "443"
    },
    {
      "Anonymity": "elite proxy",
      "Code": "US",
      "Country": "United States",
      "Google": "no",
      "Https": "yes",
      "IP Address": "185.249.219.249",
      "Last Checked": "24 minutes ago",
      "Port": "55443"
    }, ...
  ]
}

...

page2api git:( main ) export API_KEY=YOUR_API_KEY
page2api git:( main ) curl -v -XPOST -H "Content-type: application/json" -d '{

  "api_key": "'"$API_KEY"'",
  "url": "https://news.ycombinator.com",
  "real_browser": true,
  "merge_loops": true,
  "parse": {
    "posts": [
      {
        "_parent": ".athing",
        "url": ".storylink >> href",
        "rank": ".rank >> text",
        "site": ".sitestr >> text",
        "title": ".storylink >> text"
      }
    ]
  },
  "scenario" : [
    {
      "loop" : [
        { "wait_for": ".storylink" },
        { "execute": "parse" },
        { "execute_js": "document.querySelector(\".morelink\").click()" },
        { "wait": "0.1" }
      ],
      "iterations": 3
    }
  ]
}' 'https://www.page2api.com/api/v1/scrape' | python -mjson.tool # this will format the response

...

"result": {
  "posts": [
    {
      "rank": "1.",
      "site": "example.com",
      "title": "Example Domain",
      "url": "https://www.example.com",
    },
    {
      "rank": "2.",
      "site": "page2api.com",
      "title": "The Ultimate Web Scraping API",
      "url": "https://www.page2api.com",
    }, ...
  ]
}

...

Intuitive and powerful API

Our main goal was to reimagine the way the data is collected from the scraped web pages. We designed an API that offers the most versatile way to pick the needed elements from the HTML and create a comprehensive data structure.

Asynchronous scraping

Our API offers the possibility to launch long-running scraping sessions in the background and receive the obtained data via a webhook (callback URL). This feature excludes the need to keep the API request opened while the data is being scraped.

Javascript rendering

We can use a real browser for scraping and handle all the javascript that runs on the page, thus offering the possibility to manage all kinds of websites, even if it's a simple one or a Single Page Application.

Scheduled scraping

Page2API can launch scheduled scraping sessions at the interval you need that will automatically scrape the data for you and send it to your application via the callback URL.

Custom browser scenarios

This API represents a reimagined way to interact with the browser and the page that is being scraped. With a custom scenario, you can build a set of instructions that will wait for specific elements, execute javascript, handle pagination, and much more.

Fast and reliable proxies

Our API has access to tens of millions of proxies. This enables you to collect the data you need without worrying about getting blocked. For hard-to-scrape websites we offer the possibility to use Premium (Residential) Proxies, located in 138 countries around the world.

Live Demo

{"url":"https://www.page2api.com","parse":{"title":"h1 \u003e\u003e text","docs_url":"#docs-url \u003e\u003e href","contact":"//a[contains(@href,'mailto')] \u003e\u003e text","features":[{"_parent":".feature-container","title":"h2 \u003e\u003e text","description":".feature-description \u003e\u003e text"}],"pricing":".pricing-table"}}
Below are some ready-to-use examples.
If you want to scrape a different webpage, you need to build your selectors according to the page structure.

  /*
    The scraped data will appear here.
    You can also update the payload and test on different pages with various selectors.
  */


Simple and flexible pricing

Pages to scrape

40.0k

Price

$ 10
Request type Cost
Rotating Proxy without real browser
0.00025$/page
Rotating Proxy with real browser
0.002$/page
Premium Proxy without real browser
0.0025$/page
Premium Proxy with real browser
0.02$/page
Currently, there is only a pay-as-you-go option available.
The above prices do not include VAT

Contact us

We are happy to answer your questions.

Ready to give it a try?

Make Web Scraping a process you will enjoy.
(1000 free API calls. No credit card required)