How to bypass bot detection
Websites can have a different level of anti-bot mechanisms depending on the sensitivity of their data and budget. If your automation is being blocked, take action with the steps below.
The BrowserQL API
You can use our query-language, BrowserQL, which is designed to bypass sophisticated bot detection mechanisms effectively. This API allows you to specify a target URL and return data you care about: the HTML content, a .png
screenshot or an unblocked browser session to use with Playwright or Puppeteer.
A simple cURL request to the API specifying your target website will return all the data needed to scrape it after it is done bypassing the bot detection:
curl --request POST \
--url 'https://production-sfo.browserless.io/chrome/bql?token=YOUR-TOKEN-HERE' \
--header 'Content-Type: application/json' \
--data '{
"query": "mutation Reconnect($url: String!) { goto(url: $url, waitUntil: networkIdle) { status } reconnect(timeout: 30000) { browserWSEndpoint } }",
"variables": { "url": "https://example.com/" }
}'
You can use the content, or screenshot directly, or use the endpoint to run further actions with a library:
- Puppeteer
- Playwright
import puppeteer from "puppeteer-core";
const TOKEN = 'YOUR_API_TOKEN_HERE';
const url = "https://www.browserless.io/"
const unblock = async (url) => {
const opts = {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
"query": "mutation Reconnect($url: String!) { goto(url: $url, waitUntil: networkIdle) { status } reconnect(timeout: 30000) { browserWSEndpoint } }",
"variables": { url }
}),
};
const response = await fetch(
`https://production-sfo.browserless.io/chromium/bql?token=${TOKEN}`,
opts,
);
return await response.json();
};
// Reconnect
const { data } = await unblock(url);
const browser = await puppeteer.connect({
browserWSEndpoint: data.reconnect.browserWSEndpoint + `?token=${TOKEN}`,
});
const pages = await browser.pages();
const page = pages.find((p) => p.url() === url);
await page.screenshot({ path: `screenshot-${Date.now()}.png` });
await browser.close();
- Javascript
- Python
- Java
- C#
import { chromium } from "playwright-core";
const TOKEN = 'YOUR_API_TOKEN_HERE';
const url = "https://www.browserless.io/";
const unblock = async (url) => {
const opts = {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
"query": "mutation Reconnect($url: String!) { goto(url: $url, waitUntil: networkIdle) { status } reconnect(timeout: 30000) { browserWSEndpoint } }",
"variables": { url }
}),
};
const response = await fetch(
`https://production-sfo.browserless.io/chromium/bql?token=${TOKEN}`,
opts,
);
return await response.json();
};
const { data } = await unblock(url);
const browser = await chromium.connectOverCDP(data.reconnect.browserWSEndpoint + `?token=${TOKEN}`);
const pages = browser.contexts()[0].pages();
const page = pages.find((p) => p.url() === url);
await page.screenshot({ path: `screenshot-${Date.now()}.png` });
await browser.close();
import asyncio
import requests
from playwright.async_api import async_playwright
TOKEN = "YOUR_API_TOKEN_HERE"
url = "https://www.browserless.io/"
def unblock(url):
opts = {
"query": """mutation Reconnect($url: String!) {
goto(url: $url, waitUntil: networkIdle) { status }
reconnect(timeout: 30000) { browserWSEndpoint }
}""",
"variables": {"url": url},
}
response = requests.post(
f"https://production-sfo.browserless.io/chromium/bql?token={TOKEN}",
json=opts,
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return response.json()
async def main():
data = unblock(url)["data"]
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(
data["reconnect"]["browserWSEndpoint"] + f"?token={TOKEN}"
)
context = browser.contexts[0]
page = next((p for p in context.pages if p.url == url), None)
await page.screenshot(path="screenshot.png")
await browser.close()
asyncio.run(main())
import com.microsoft.playwright.*;
import java.net.http.*;
import java.net.URI;
import java.net.http.HttpClient;
import java.util.Map;
import com.google.gson.Gson;
public class PlaywrightExample {
private static final String TOKEN = "YOUR_API_TOKEN_HERE";
private static final String URL = "https://www.browserless.io/";
public static void main(String[] args) throws Exception {
HttpClient client = HttpClient.newHttpClient();
Map<String, Object> payload = Map.of(
"query", """
mutation Reconnect($url: String!) {
goto(url: $url, waitUntil: networkIdle) { status }
reconnect(timeout: 30000) { browserWSEndpoint }
}
""",
"variables", Map.of("url", URL)
);
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://production-sfo.browserless.io/chromium/bql?token=" + TOKEN))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(new Gson().toJson(payload)))
.build();
System.out.println("Unblocking " + URL);
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
throw new Exception("Error: " + response.body());
}
String browserWSEndpoint = new Gson().fromJson(response.body(), Map.class)
.get("data").get("reconnect").get("browserWSEndpoint").toString();
System.out.println("Connecting to Playwright...");
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().connectOverCDP(browserWSEndpoint);
Page page = browser.contexts().get(0).pages().stream()
.filter(p -> p.url().equals(URL))
.findFirst()
.orElseThrow(() -> new Exception("Page not found"));
page.screenshot(new Page.ScreenshotOptions().setPath("screenshot.png"));
}
System.out.println("Done!");
}
}
using System;
using System.Net.Http;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Microsoft.Playwright;
class Program
{
private const string TOKEN = "YOUR_API_TOKEN_HERE";
private const string URL = "https://www.browserless.io/";
static async Task Main(string[] args)
{
var httpClient = new HttpClient();
var payload = new
{
query = @"
mutation Reconnect($url: String!) {
goto(url: $url, waitUntil: networkIdle) { status }
reconnect(timeout: 30000) { browserWSEndpoint }
}",
variables = new { url = URL }
};
var requestContent = new StringContent(JsonSerializer.Serialize(payload), System.Text.Encoding.UTF8, "application/json");
var response = await httpClient.PostAsync($"https://production-sfo.browserless.io/chromium/bql?token={TOKEN}", requestContent);
if (!response.IsSuccessStatusCode)
{
Console.WriteLine($"Error: {await response.Content.ReadAsStringAsync()}");
return;
}
var jsonResponse = JsonSerializer.Deserialize<JsonElement>(await response.Content.ReadAsStringAsync());
var browserWSEndpoint = jsonResponse.GetProperty("data").GetProperty("reconnect").GetProperty("browserWSEndpoint").GetString();
var playwright = await Playwright.CreateAsync();
var browser = await playwright.Chromium.ConnectOverCDPAsync(browserWSEndpoint + $"?token={TOKEN}");
var context = browser.Contexts[0];
var page = context.Pages[0];
Console.WriteLine("Taking screenshot...");
await page.ScreenshotAsync(new PageScreenshotOptions { Path = "screenshot.png" });
await browser.CloseAsync();
Console.WriteLine("Done!");
}
}
Additional strategies
If none of these do the trick, get in touch with us at support@browserless.io>.
We have more trick up our sleeves we can show you, such as captcha solving and changing viewport sizes, especially using our enterprise features.
Try out the Stealth routes
The stealth routes below are for only for paid cloud-unit or Enterprise plans.
We have native support for things like puppeteer-stealth, but also offer our own stealth routes that encompass more stealthy behaviors. We use a route path semantic for this, and today this only supports libraries that work over the Chrome Devtools Protocol.
- Puppeteer
- Playwright
// Chromium:
await puppeteer.connect({
browserWSEndpoint:
"wss://production-sfo.browserless.io/chromium/stealth?token=YOUR_API_TOKEN_HERE",
});
// Chrome:
await puppeteer.connect({
browserWSEndpoint:
"wss://production-sfo.browserless.io/chrome/stealth?token=YOUR_API_TOKEN_HERE",
});
- Javascript
- Python
- Java
- C#
// Chromium
const chromiumBrowser = await playwright.chromium.connectOverCDP(
"wss://production-sfo.browserless.io/chromium/stealth?token=YOUR_API_TOKEN_HERE"
);
// Chrome
const chromeBrowser = await playwright.chromium.connectOverCDP(
"wss://production-sfo.browserless.io/chrome/stealth?token=YOUR_API_TOKEN_HERE"
);
import asyncio
from playwright.async_api import async_playwright
CHROMIUM_URL = "wss://production-sfo.browserless.io/chromium/stealth?token=YOUR_API_TOKEN_HERE"
CHROME_URL = "wss://production-sfo.browserless.io/chrome/stealth?token=YOUR_API_TOKEN_HERE"
async def main():
async with async_playwright() as p:
# Chromium
chromium_browser = await p.chromium.connect_over_cdp(CHROMIUM_URL)
# Chrome
chrome_browser = await p.chromium.connect_over_cdp(CHROME_URL)
asyncio.run(main())
import com.microsoft.playwright.*;
public class PlaywrightConnectExample {
public static void main(String[] args) {
String CHROMIUM_URL = "wss://production-sfo.browserless.io/chromium/stealth?token=YOUR_API_TOKEN_HERE";
String CHROME_URL = "wss://production-sfo.browserless.io/chrome/stealth?token=YOUR_API_TOKEN_HERE";
try (Playwright playwright = Playwright.create()) {
// Chromium
Browser chromiumBrowser = playwright.chromium().connectOverCDP(CHROMIUM_URL);
// Chrome
Browser chromeBrowser = playwright.chromium().connectOverCDP(CHROME_URL);
}
}
}
using System;
using System.Threading.Tasks;
using Microsoft.Playwright;
class Program
{
static async Task Main(string[] args)
{
string chromiumUrl = "wss://production-sfo.browserless.io/chromium/stealth?token=YOUR_API_TOKEN_HERE";
string chromeUrl = "wss://production-sfo.browserless.io/chrome/stealth?token=YOUR_API_TOKEN_HERE";
var playwright = await Playwright.CreateAsync();
// Chromium
var chromiumBrowser = await playwright.Chromium.ConnectOverCDPAsync(chromiumUrl);
// Chrome
var chromeBrowser = await playwright.Chromium.ConnectOverCDPAsync(chromeUrl);
}
}
These routes incorporate many of the anti-detection mechanisms below, which you're free to try as well.
Launch args to bypass bot detection
Use the headless
arg
Most bot detectors will check your user-agent, which by default explicitly claims you're running headless chrome. This is a dead giveaway. It can be changed by setting a specific user-agent but we highly recommend you use the &headless=false flag instead, which changes your user-agent to a more credible one.
- Puppeteer
- Playwright
import puppeteer from "puppeteer-core";
const launchArgs = JSON.stringify({ headless: false });
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=${launchArgs}`,
});
//...
- Javascript
- Python
- Java
- C#
import { chromium } from "playwright-core";
const launchArgs = JSON.stringify({ headless: false });
const browser = await chromium.connectOverCDP(
`wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=${launchArgs}`
);
// ...
import asyncio
from playwright.async_api import async_playwright
LAUNCH_ARGS = '{"headless": false}'
WS_ENDPOINT = f"wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch={LAUNCH_ARGS}"
async def main():
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(WS_ENDPOINT)
# ...
asyncio.run(main())
import com.microsoft.playwright.*;
public class PlaywrightConnectExample {
public static void main(String[] args) {
String launchArgs = "{\"headless\": false}";
String WS_ENDPOINT = "wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=" + launchArgs;
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().connectOverCDP(WS_ENDPOINT);
// ...
}
}
}
using System;
using System.Threading.Tasks;
using Microsoft.Playwright;
class Program
{
static async Task Main(string[] args)
{
string launchArgs = "{\"headless\": false}";
string wsEndpoint = $"wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch={launchArgs}";
var playwright = await Playwright.CreateAsync();
var browser = await playwright.Chromium.ConnectOverCDPAsync(wsEndpoint);
// ...
}
}
Use the stealth
arg
The stealth flag implements Puppeteer's puppeteer-extra-plugin-stealth plugin which applies various techniques to make detection of headless puppeteer harder. This flag may backfire and be easily detected by some sites, so consider avoiding it as well.
- Puppeteer
- Playwright
import puppeteer from "puppeteer-core";
const launchArgs = JSON.stringify({ stealth: true });
const browser = await puppeteer.connect({
browserWSEndpoint: `wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=${launchArgs}`,
});
//...
- Javascript
- Python
- Java
- C#
import { chromium } from "playwright-core";
const launchArgs = JSON.stringify({ stealth: true });
const browser = await chromium.connectOverCDP(
`wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=${launchArgs}`
);
// ...
import asyncio
from playwright.async_api import async_playwright
LAUNCH_ARGS = '{"stealth": true}'
WS_ENDPOINT = f"wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch={LAUNCH_ARGS}"
async def main():
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(WS_ENDPOINT)
# ...
asyncio.run(main())
import com.microsoft.playwright.*;
public class PlaywrightConnectExample {
public static void main(String[] args) {
String launchArgs = "{\"stealth\": true}";
String WS_ENDPOINT = "wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch=" + launchArgs;
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().connectOverCDP(WS_ENDPOINT);
// ...
}
}
}
using System;
using System.Threading.Tasks;
using Microsoft.Playwright;
class Program
{
static async Task Main(string[] args)
{
string launchArgs = "{\"stealth\": true}";
string wsEndpoint = $"wss://production-sfo.browserless.io/?token=YOUR_API_TOKEN_HERE&launch={launchArgs}";
var playwright = await Playwright.CreateAsync();
var browser = await playwright.Chromium.ConnectOverCDPAsync(wsEndpoint);
// ...
}
}
Use a proxy
Finally, the hardest sites to crack down check your IP address; there are two type of bocks that can occur, those based on type of IP, and those based on frequency of requests (rate-limits).
- Sites checking the type of IP address will detect your data-center IP addresses when using Browserless. To overcome this, using a proxy with residential IP addresses will be the best option.
- Sites that work the first few times and then stop working, are probably rate-limiting and it's not the residential part of it that blocks us. For these cases, you don't necessarily need a residential proxy and data-center IP addresses that rotate should be enough.
Browserless offers a residential proxy API that you can easily incorporate into your scripts.
- Puppeteer
- Playwright
import puppeteer from "puppeteer-core";
const browserWSEndpoint = "http://production-sfo.browserless.io/content?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us&proxySticky";
const browser = await puppeteer.connect({ browserWSEndpoint });
//...
- Javascript
- Python
- Java
- C#
import { chromium } from "playwright-core";
const browserWSEndpoint = "http://production-sfo.browserless.io/content?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us&proxySticky";
const browser = await chromium.connectOverCDP(browserWSEndpoint);
// ...
import asyncio
from playwright.async_api import async_playwright
WS_ENDPOINT = "http://production-sfo.browserless.io/content?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us&proxySticky"
async def main():
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(WS_ENDPOINT)
# ...
asyncio.run(main())
import com.microsoft.playwright.*;
public class PlaywrightConnectExample {
public static void main(String[] args) {
String WS_ENDPOINT = "http://production-sfo.browserless.io/content?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us&proxySticky";
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().connectOverCDP(WS_ENDPOINT);
// ...
}
}
}
using System;
using System.Threading.Tasks;
using Microsoft.Playwright;
class Program
{
static async Task Main(string[] args)
{
string wsEndpoint = "http://production-sfo.browserless.io/content?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us&proxySticky";
var playwright = await Playwright.CreateAsync();
var browser = await playwright.Chromium.ConnectOverCDPAsync(wsEndpoint);
// ...
}
}
Utilizing proxies remains a crucial strategy in bypassing bot detection. Depending on the site's mechanism, you might need a proxy with residential IP addresses or a data-center IP that rotates. For more information about these topics, please check our documentation on our built-in proxy and third-party proxy.