added requests to scrapper

This commit is contained in:
2025-12-11 19:03:07 +01:00
parent 6476911c63
commit 79f1733aef

View File

@@ -64,18 +64,26 @@ async function scrape(url, handler, options = {}) {
} = options; } = options;
if (!browser) await initHeadless(); if (!browser) await initHeadless();
const page = await context.newPage(); const page = await context.newPage();
let collectedRequests = [];
await page.route("**/*", (route) => { await page.route("**/*", (route) => {
const req = route.request(); const req = route.request();
const url = req.url().toLowerCase(); const rUrl = req.url().toLowerCase();
const type = req.resourceType(); const type = req.resourceType();
if (
type === "font" || collectedRequests.push({
type === "media" || url: req.url(),
type === "manifest" method: req.method(),
) return route.abort(); resourceType: type
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort(); });
if (type === "font" || type === "media" || type === "manifest")
return route.abort();
if (BLOCK_LIST.some(k => rUrl.includes(k)))
return route.abort();
if (!loadImages && ( if (!loadImages && (
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/) type === "image" || rUrl.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
)) return route.abort(); )) return route.abort();
route.continue(); route.continue();
}); });
@@ -93,8 +101,10 @@ async function scrape(url, handler, options = {}) {
} }
const result = await handler(page); const result = await handler(page);
await page.close(); await page.close();
return { result , "": ""};
return { result, requests: collectedRequests };
} }
async function closeScraper() { async function closeScraper() {
if (context) await context.close(); if (context) await context.close();
if (browser) await browser.close(); if (browser) await browser.close();