Scrape IMDb movie ratings
Extract movie titles, release years, and ratings from IMDb's Top 250 chart.
- A Browserless API token from your account dashboard
Steps
IMDb's Top 250 page renders with JavaScript and is owned by Amazon, which applies bot-detection measures. The examples below extract the top-rated movies and route through stealth mode with a residential proxy.
IMDb updates its markup periodically. If selectors stop returning results, inspect the live page with browser DevTools to find the current element names.
- AI Agent
- REST API
- Frameworks
- BQL
Use the Browserless MCP server to scrape movie ratings from IMDb from any MCP-compatible AI agent (Claude Desktop, Cursor, Windsurf, ChatGPT, etc.).
1. Connect the MCP server
Send this prompt to your AI agent to install the Browserless MCP server:
Go to https://github.com/browserless/browserless-mcp/blob/main/install.md
and follow the instructions to install the Browserless MCP server
for my client.
2. Scrape IMDb
Use browserless_smartscraper. It handles IMDb's dynamic content and bot protection automatically.
Use the browserless_smartscraper tool to scrape the top 250 movies
from https://www.imdb.com/chart/top/
and return the results as markdown
Send the BQL mutation over HTTP to the stealth endpoint. No browser library or BQL IDE required.
- cURL
- JavaScript
- Python
- Java
- C#
1. Send the request
curl -X POST \
"https://production-sfo.browserless.io/stealth/bql?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us" \
-H "Content-Type: application/json" \
-d '{
"query": "mutation ScrapeIMDb { goto(url: \"https://www.imdb.com/chart/top/\", waitUntil: networkIdle) { status } waitForSelector(selector: \".ipc-metadata-list-summary-item\", timeout: 15000) { time } movies: mapSelector(selector: \".ipc-metadata-list-summary-item\") { title: mapSelector(selector: \".ipc-title__text\") { innerText } metadata: mapSelector(selector: \".cli-title-metadata span\") { innerText } rating: mapSelector(selector: \".ipc-rating-star--imdb\") { ratingLabel: attribute(name: \"aria-label\") { value } } } }",
"variables": {}
}'
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 2340 },
"movies": [
{
"title": [{ "innerText": "1. The Shawshank Redemption" }],
"metadata": [{ "innerText": "1994" }, { "innerText": "2h 22m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.3" } }]
},
{
"title": [{ "innerText": "2. The Godfather" }],
"metadata": [{ "innerText": "1972" }, { "innerText": "2h 55m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.2" } }]
}
]
}
}
1. Send the request
const query = `mutation ScrapeIMDb {
goto(url: "https://www.imdb.com/chart/top/", waitUntil: networkIdle) {
status
}
waitForSelector(selector: ".ipc-metadata-list-summary-item", timeout: 15000) {
time
}
movies: mapSelector(selector: ".ipc-metadata-list-summary-item") {
title: mapSelector(selector: ".ipc-title__text") { innerText }
metadata: mapSelector(selector: ".cli-title-metadata span") { innerText }
rating: mapSelector(selector: ".ipc-rating-star--imdb") {
ratingLabel: attribute(name: "aria-label") { value }
}
}
}`;
const response = await fetch(
'https://production-sfo.browserless.io/stealth/bql?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, variables: {} }),
}
);
const { data } = await response.json();
console.log(JSON.stringify(data.movies, null, 2));
2. Check the output
[
{
"title": [{ "innerText": "1. The Shawshank Redemption" }],
"metadata": [{ "innerText": "1994" }, { "innerText": "2h 22m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.3" } }]
},
{
"title": [{ "innerText": "2. The Godfather" }],
"metadata": [{ "innerText": "1972" }, { "innerText": "2h 55m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.2" } }]
}
]
1. Install dependencies
pip install requests
2. Send the request
import requests
query = """
mutation ScrapeIMDb {
goto(url: "https://www.imdb.com/chart/top/", waitUntil: networkIdle) {
status
}
waitForSelector(selector: ".ipc-metadata-list-summary-item", timeout: 15000) {
time
}
movies: mapSelector(selector: ".ipc-metadata-list-summary-item") {
title: mapSelector(selector: ".ipc-title__text") { innerText }
metadata: mapSelector(selector: ".cli-title-metadata span") { innerText }
rating: mapSelector(selector: ".ipc-rating-star--imdb") {
ratingLabel: attribute(name: "aria-label") { value }
}
}
}
"""
response = requests.post(
'https://production-sfo.browserless.io/stealth/bql',
params={
'token': 'YOUR_API_TOKEN_HERE',
'proxy': 'residential',
'proxyCountry': 'us',
},
json={'query': query, 'variables': {}},
)
data = response.json()['data']
for movie in data['movies']:
title = movie['title'][0]['innerText']
year = movie['metadata'][0]['innerText'] if movie['metadata'] else 'N/A'
rating = movie['rating'][0]['ratingLabel']['value'] if movie['rating'] else 'N/A'
print(f'{title} ({year}) - {rating}')
3. Check the output
1. The Shawshank Redemption (1994) - IMDb rating: 9.3
2. The Godfather (1972) - IMDb rating: 9.2
1. Send the request
import java.net.URI;
import java.net.http.*;
String token = "YOUR_API_TOKEN_HERE";
String endpoint = "https://production-sfo.browserless.io/stealth/bql?token=" + token
+ "&proxy=residential&proxyCountry=us";
String query = "mutation ScrapeIMDb {"
+ " goto(url: \\\"https://www.imdb.com/chart/top/\\\", waitUntil: networkIdle) { status }"
+ " waitForSelector(selector: \\\".ipc-metadata-list-summary-item\\\", timeout: 15000) { time }"
+ " movies: mapSelector(selector: \\\".ipc-metadata-list-summary-item\\\") {"
+ " title: mapSelector(selector: \\\".ipc-title__text\\\") { innerText }"
+ " metadata: mapSelector(selector: \\\".cli-title-metadata span\\\") { innerText }"
+ " rating: mapSelector(selector: \\\".ipc-rating-star--imdb\\\") { ratingLabel: attribute(name: \\\"aria-label\\\") { value } }"
+ " }"
+ " }";
String payload = "{\"query\": \"" + query + "\", \"variables\": {}}";
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(endpoint))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(payload))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 2340 },
"movies": [
{
"title": [{ "innerText": "1. The Shawshank Redemption" }],
"metadata": [{ "innerText": "1994" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.3" } }]
}
]
}
}
1. Send the request
using System.Net.Http;
using System.Text;
using System.Text.Json;
string token = "YOUR_API_TOKEN_HERE";
string endpoint = $"https://production-sfo.browserless.io/stealth/bql?token={token}&proxy=residential&proxyCountry=us";
var payload = new
{
query = @"mutation ScrapeIMDb {
goto(url: ""https://www.imdb.com/chart/top/"", waitUntil: networkIdle) { status }
waitForSelector(selector: "".ipc-metadata-list-summary-item"", timeout: 15000) { time }
movies: mapSelector(selector: "".ipc-metadata-list-summary-item"") {
title: mapSelector(selector: "".ipc-title__text"") { innerText }
metadata: mapSelector(selector: "".cli-title-metadata span"") { innerText }
rating: mapSelector(selector: "".ipc-rating-star--imdb"") {
ratingLabel: attribute(name: ""aria-label"") { value }
}
}
}",
variables = new { },
};
using (HttpClient httpClient = new HttpClient())
{
var content = new StringContent(
JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json");
var response = await httpClient.PostAsync(endpoint, content);
string body = await response.Content.ReadAsStringAsync();
Console.WriteLine(body);
}
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 2340 },
"movies": [
{
"title": [{ "innerText": "1. The Shawshank Redemption" }],
"metadata": [{ "innerText": "1994" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.3" } }]
}
]
}
}
Connect through stealth mode and a residential proxy so IMDb sees traffic from a real browser, then extract movie data from the rendered chart.
- Puppeteer
- Playwright
1. Install dependencies
npm install puppeteer-core
2. Connect and scrape
import puppeteer from 'puppeteer-core';
const browser = await puppeteer.connect({
browserWSEndpoint:
'wss://production-sfo.browserless.io/stealth?token=YOUR_API_TOKEN_HERE&proxy=residential&proxyCountry=us',
});
try {
const page = await browser.newPage();
await page.goto('https://www.imdb.com/chart/top/', {
waitUntil: 'networkidle2',
});
await page.waitForSelector('.ipc-metadata-list-summary-item');
const movies = await page.evaluate(() =>
Array.from(document.querySelectorAll('.ipc-metadata-list-summary-item')).map((item) => ({
title: item.querySelector('.ipc-title__text')?.innerText?.trim() ?? '',
year: item.querySelector('.cli-title-metadata span')?.innerText?.trim() ?? '',
rating: item.querySelector('.ipc-rating-star--imdb')?.getAttribute('aria-label') ?? '',
}))
);
console.log(JSON.stringify(movies.slice(0, 10), null, 2));
} finally {
await browser.close();
}
3. Check the output
Run with node scrape-imdb.mjs. Each object has title, year, and rating fields.
[
{
"title": "1. The Shawshank Redemption",
"year": "1994",
"rating": "IMDb rating: 9.3"
},
{
"title": "2. The Godfather",
"year": "1972",
"rating": "IMDb rating: 9.2"
}
]
1. Install dependencies
npm install playwright-core
2. Connect and scrape
import { chromium } from 'playwright-core';
const browser = await chromium.connectOverCDP(
'wss://production-sfo.browserless.io?token=YOUR_API_TOKEN_HERE&stealth&proxy=residential&proxyCountry=us'
);
try {
const context = browser.contexts()[0];
const page = await context.newPage();
await page.goto('https://www.imdb.com/chart/top/', {
waitUntil: 'networkidle',
});
await page.waitForSelector('.ipc-metadata-list-summary-item');
const movies = await page.evaluate(() =>
Array.from(document.querySelectorAll('.ipc-metadata-list-summary-item')).map((item) => ({
title: item.querySelector('.ipc-title__text')?.innerText?.trim() ?? '',
year: item.querySelector('.cli-title-metadata span')?.innerText?.trim() ?? '',
rating: item.querySelector('.ipc-rating-star--imdb')?.getAttribute('aria-label') ?? '',
}))
);
console.log(JSON.stringify(movies.slice(0, 10), null, 2));
} finally {
await browser.close();
}
3. Check the output
Run with node scrape-imdb.mjs. Each object has title, year, and rating fields.
[
{
"title": "1. The Shawshank Redemption",
"year": "1994",
"rating": "IMDb rating: 9.3"
},
{
"title": "2. The Godfather",
"year": "1972",
"rating": "IMDb rating: 9.2"
}
]
1. Write the mutation
Navigate to IMDb's Top 250 chart, wait for the list to render, then extract movie titles, years, and ratings. We use /stealth/bql because IMDb (Amazon-owned) applies bot detection that blocks standard headless browsers.
mutation ScrapeIMDb {
goto(url: "https://www.imdb.com/chart/top/", waitUntil: networkIdle) {
status
}
waitForSelector(selector: ".ipc-metadata-list-summary-item", timeout: 15000) {
time
}
movies: mapSelector(selector: ".ipc-metadata-list-summary-item") {
title: mapSelector(selector: ".ipc-title__text") { innerText }
metadata: mapSelector(selector: ".cli-title-metadata span") { innerText }
rating: mapSelector(selector: ".ipc-rating-star--imdb") {
ratingLabel: attribute(name: "aria-label") { value }
}
}
}
2. Run it
Paste into the BQL IDE and click Run.
3. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 2340 },
"movies": [
{
"title": [{ "innerText": "1. The Shawshank Redemption" }],
"metadata": [{ "innerText": "1994" }, { "innerText": "2h 22m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.3" } }]
},
{
"title": [{ "innerText": "2. The Godfather" }],
"metadata": [{ "innerText": "1972" }, { "innerText": "2h 55m" }],
"rating": [{ "ratingLabel": { "value": "IMDb rating: 9.2" } }]
}
]
}
}
Next steps
- Scrape GitHub Trending Repos -- scrape trending repositories from GitHub
- Scrape Amazon Product Listings -- scrape another Amazon-owned property with stealth mode
- Scrape Structured Data -- extract data from other sites using the
/scrapeendpoint