Scrape GitHub trending repos
Extract repository names, descriptions, languages, and star counts from GitHub's trending page.
- A Browserless API token from your account dashboard
Steps
GitHub's trending page is server-rendered and doesn't require stealth mode or residential proxies. A standard Browserless session handles it without issues. The examples below extract today's trending repositories.
- AI Agent
- REST API
- Frameworks
- BQL
Use the Browserless MCP server to scrape trending repositories from GitHub from any MCP-compatible AI agent (Claude Desktop, Cursor, Windsurf, ChatGPT, etc.).
1. Connect the MCP server
Send this prompt to your AI agent to install the Browserless MCP server:
Go to https://github.com/browserless/browserless-mcp/blob/main/install.md
and follow the instructions to install the Browserless MCP server
for my client.
2. Scrape GitHub trending
Use browserless_smartscraper. GitHub's trending page is straightforward to scrape.
Use the browserless_smartscraper tool to scrape the trending repositories
from https://github.com/trending
and return the results as markdown
Send the BQL mutation over HTTP. GitHub's trending page doesn't need stealth mode, so we use the standard /chromium/bql endpoint.
- cURL
- JavaScript
- Python
- Java
- C#
1. Send the request
curl -X POST \
"https://production-sfo.browserless.io/chromium/bql?token=YOUR_API_TOKEN_HERE" \
-H "Content-Type: application/json" \
-d '{
"query": "mutation ScrapeGitHubTrending { goto(url: \"https://github.com/trending\", waitUntil: networkIdle) { status } waitForSelector(selector: \"article.Box-row\", timeout: 15000) { time } repos: mapSelector(selector: \"article.Box-row\") { name: mapSelector(selector: \"h2 a\") { innerText } description: mapSelector(selector: \"p\") { innerText } language: mapSelector(selector: \"[itemprop=programmingLanguage]\") { innerText } stars: mapSelector(selector: \"a[href*=stargazers]\") { innerText } forks: mapSelector(selector: \"a[href*=forks]\") { innerText } todayStars: mapSelector(selector: \"span.d-inline-block.float-sm-right\") { innerText } } }",
"variables": {}
}'
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 1245 },
"repos": [
{
"name": [{ "innerText": "vercel / next.js" }],
"description": [{ "innerText": "The React Framework" }],
"language": [{ "innerText": "JavaScript" }],
"stars": [{ "innerText": "128,456" }],
"forks": [{ "innerText": "27,891" }],
"todayStars": [{ "innerText": "245 stars today" }]
},
{
"name": [{ "innerText": "denoland / deno" }],
"description": [{ "innerText": "A modern runtime for JavaScript and TypeScript" }],
"language": [{ "innerText": "Rust" }],
"stars": [{ "innerText": "98,234" }],
"forks": [{ "innerText": "5,412" }],
"todayStars": [{ "innerText": "189 stars today" }]
}
]
}
}
1. Send the request
const query = `mutation ScrapeGitHubTrending {
goto(url: "https://github.com/trending", waitUntil: networkIdle) {
status
}
waitForSelector(selector: "article.Box-row", timeout: 15000) {
time
}
repos: mapSelector(selector: "article.Box-row") {
name: mapSelector(selector: "h2 a") { innerText }
description: mapSelector(selector: "p") { innerText }
language: mapSelector(selector: "[itemprop=programmingLanguage]") { innerText }
stars: mapSelector(selector: "a[href*=stargazers]") { innerText }
forks: mapSelector(selector: "a[href*=forks]") { innerText }
todayStars: mapSelector(selector: "span.d-inline-block.float-sm-right") { innerText }
}
}`;
const response = await fetch(
'https://production-sfo.browserless.io/chromium/bql?token=YOUR_API_TOKEN_HERE',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, variables: {} }),
}
);
const { data } = await response.json();
console.log(JSON.stringify(data.repos, null, 2));
2. Check the output
[
{
"name": [{ "innerText": "vercel / next.js" }],
"description": [{ "innerText": "The React Framework" }],
"language": [{ "innerText": "JavaScript" }],
"stars": [{ "innerText": "128,456" }],
"forks": [{ "innerText": "27,891" }],
"todayStars": [{ "innerText": "245 stars today" }]
},
{
"name": [{ "innerText": "denoland / deno" }],
"description": [{ "innerText": "A modern runtime for JavaScript and TypeScript" }],
"language": [{ "innerText": "Rust" }],
"stars": [{ "innerText": "98,234" }],
"forks": [{ "innerText": "5,412" }],
"todayStars": [{ "innerText": "189 stars today" }]
}
]
1. Install dependencies
pip install requests
2. Send the request
import requests
query = """
mutation ScrapeGitHubTrending {
goto(url: "https://github.com/trending", waitUntil: networkIdle) {
status
}
waitForSelector(selector: "article.Box-row", timeout: 15000) {
time
}
repos: mapSelector(selector: "article.Box-row") {
name: mapSelector(selector: "h2 a") { innerText }
description: mapSelector(selector: "p") { innerText }
language: mapSelector(selector: "[itemprop=programmingLanguage]") { innerText }
stars: mapSelector(selector: "a[href*=stargazers]") { innerText }
forks: mapSelector(selector: "a[href*=forks]") { innerText }
todayStars: mapSelector(selector: "span.d-inline-block.float-sm-right") { innerText }
}
}
"""
response = requests.post(
'https://production-sfo.browserless.io/chromium/bql',
params={'token': 'YOUR_API_TOKEN_HERE'},
json={'query': query, 'variables': {}},
)
data = response.json()['data']
for repo in data['repos']:
name = repo['name'][0]['innerText'].strip()
desc = repo['description'][0]['innerText'].strip() if repo['description'] else ''
lang = repo['language'][0]['innerText'] if repo['language'] else 'N/A'
stars = repo['stars'][0]['innerText'].strip() if repo['stars'] else '0'
print(f'{name} ({lang}) - {stars} stars - {desc}')
3. Check the output
vercel / next.js (JavaScript) - 128,456 stars - The React Framework
denoland / deno (Rust) - 98,234 stars - A modern runtime for JavaScript and TypeScript
1. Send the request
import java.net.URI;
import java.net.http.*;
String token = "YOUR_API_TOKEN_HERE";
String endpoint = "https://production-sfo.browserless.io/chromium/bql?token=" + token;
String query = "mutation ScrapeGitHubTrending {"
+ " goto(url: \\\"https://github.com/trending\\\", waitUntil: networkIdle) { status }"
+ " waitForSelector(selector: \\\"article.Box-row\\\", timeout: 15000) { time }"
+ " repos: mapSelector(selector: \\\"article.Box-row\\\") {"
+ " name: mapSelector(selector: \\\"h2 a\\\") { innerText }"
+ " description: mapSelector(selector: \\\"p\\\") { innerText }"
+ " language: mapSelector(selector: \\\"[itemprop=programmingLanguage]\\\") { innerText }"
+ " stars: mapSelector(selector: \\\"a[href*=stargazers]\\\") { innerText }"
+ " }"
+ " }";
String payload = "{\"query\": \"" + query + "\", \"variables\": {}}";
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(endpoint))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(payload))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 1245 },
"repos": [
{
"name": [{ "innerText": "vercel / next.js" }],
"language": [{ "innerText": "JavaScript" }],
"stars": [{ "innerText": "128,456" }]
}
]
}
}
1. Send the request
using System.Net.Http;
using System.Text;
using System.Text.Json;
string token = "YOUR_API_TOKEN_HERE";
string endpoint = $"https://production-sfo.browserless.io/chromium/bql?token={token}";
var payload = new
{
query = @"mutation ScrapeGitHubTrending {
goto(url: ""https://github.com/trending"", waitUntil: networkIdle) { status }
waitForSelector(selector: ""article.Box-row"", timeout: 15000) { time }
repos: mapSelector(selector: ""article.Box-row"") {
name: mapSelector(selector: ""h2 a"") { innerText }
description: mapSelector(selector: ""p"") { innerText }
language: mapSelector(selector: ""[itemprop=programmingLanguage]"") { innerText }
stars: mapSelector(selector: ""a[href*=stargazers]"") { innerText }
forks: mapSelector(selector: ""a[href*=forks]"") { innerText }
}
}",
variables = new { },
};
using (HttpClient httpClient = new HttpClient())
{
var content = new StringContent(
JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json");
var response = await httpClient.PostAsync(endpoint, content);
string body = await response.Content.ReadAsStringAsync();
Console.WriteLine(body);
}
2. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 1245 },
"repos": [
{
"name": [{ "innerText": "vercel / next.js" }],
"language": [{ "innerText": "JavaScript" }],
"stars": [{ "innerText": "128,456" }]
}
]
}
}
Connect a headless browser and extract trending repository data from the rendered page. GitHub doesn't need stealth mode or proxies.
- Puppeteer
- Playwright
1. Install dependencies
npm install puppeteer-core
2. Connect and scrape
import puppeteer from 'puppeteer-core';
const browser = await puppeteer.connect({
browserWSEndpoint:
'wss://production-sfo.browserless.io?token=YOUR_API_TOKEN_HERE',
});
try {
const page = await browser.newPage();
await page.goto('https://github.com/trending', {
waitUntil: 'networkidle2',
});
await page.waitForSelector('article.Box-row');
const repos = await page.evaluate(() =>
Array.from(document.querySelectorAll('article.Box-row')).map((row) => ({
name: row.querySelector('h2 a')?.innerText?.replace(/\s+/g, ' ').trim() ?? '',
description: row.querySelector('p')?.innerText?.trim() ?? '',
language: row.querySelector('[itemprop="programmingLanguage"]')?.innerText?.trim() ?? '',
stars: row.querySelector('a[href*="stargazers"]')?.innerText?.trim() ?? '',
}))
);
console.log(JSON.stringify(repos, null, 2));
} finally {
await browser.close();
}
3. Check the output
Run with node scrape-github-trending.mjs. Each object has name, description, language, and stars fields.
[
{
"name": "vercel / next.js",
"description": "The React Framework",
"language": "JavaScript",
"stars": "128,456"
}
]
1. Install dependencies
npm install playwright-core
2. Connect and scrape
import { chromium } from 'playwright-core';
const browser = await chromium.connectOverCDP(
'wss://production-sfo.browserless.io?token=YOUR_API_TOKEN_HERE'
);
try {
const context = browser.contexts()[0];
const page = await context.newPage();
await page.goto('https://github.com/trending', {
waitUntil: 'networkidle',
});
await page.waitForSelector('article.Box-row');
const repos = await page.evaluate(() =>
Array.from(document.querySelectorAll('article.Box-row')).map((row) => ({
name: row.querySelector('h2 a')?.innerText?.replace(/\s+/g, ' ').trim() ?? '',
description: row.querySelector('p')?.innerText?.trim() ?? '',
language: row.querySelector('[itemprop="programmingLanguage"]')?.innerText?.trim() ?? '',
stars: row.querySelector('a[href*="stargazers"]')?.innerText?.trim() ?? '',
}))
);
console.log(JSON.stringify(repos, null, 2));
} finally {
await browser.close();
}
3. Check the output
Run with node scrape-github-trending.mjs. Each object has name, description, language, and stars fields.
[
{
"name": "vercel / next.js",
"description": "The React Framework",
"language": "JavaScript",
"stars": "128,456"
}
]
1. Write the mutation
Navigate to GitHub's trending page and extract repo names, descriptions, languages, and star counts. GitHub doesn't need stealth mode, so this works on the standard /chromium/bql endpoint.
mutation ScrapeGitHubTrending {
goto(url: "https://github.com/trending", waitUntil: networkIdle) {
status
}
waitForSelector(selector: "article.Box-row", timeout: 15000) {
time
}
repos: mapSelector(selector: "article.Box-row") {
name: mapSelector(selector: "h2 a") { innerText }
description: mapSelector(selector: "p") { innerText }
language: mapSelector(selector: "[itemprop=programmingLanguage]") { innerText }
stars: mapSelector(selector: "a[href*=stargazers]") { innerText }
forks: mapSelector(selector: "a[href*=forks]") { innerText }
todayStars: mapSelector(selector: "span.d-inline-block.float-sm-right") { innerText }
}
}
2. Run it
Paste into the BQL IDE and click Run.
3. Check the output
{
"data": {
"goto": { "status": 200 },
"waitForSelector": { "time": 1245 },
"repos": [
{
"name": [{ "innerText": "vercel / next.js" }],
"description": [{ "innerText": "The React Framework" }],
"language": [{ "innerText": "JavaScript" }],
"stars": [{ "innerText": "128,456" }],
"forks": [{ "innerText": "27,891" }],
"todayStars": [{ "innerText": "245 stars today" }]
},
{
"name": [{ "innerText": "denoland / deno" }],
"description": [{ "innerText": "A modern runtime for JavaScript and TypeScript" }],
"language": [{ "innerText": "Rust" }],
"stars": [{ "innerText": "98,234" }],
"forks": [{ "innerText": "5,412" }],
"todayStars": [{ "innerText": "189 stars today" }]
}
]
}
}
Next steps
- Scrape IMDb Movie Ratings -- extract movie data from another popular site
- Scrape Structured Data -- use the
/scrapeendpoint for simpler extraction - Automate Google Search -- pull search results with the
/stealth/bqlendpoint