added requests to scrapper
This commit is contained in:
@@ -64,18 +64,26 @@ async function scrape(url, handler, options = {}) {
|
||||
} = options;
|
||||
if (!browser) await initHeadless();
|
||||
const page = await context.newPage();
|
||||
let collectedRequests = [];
|
||||
await page.route("**/*", (route) => {
|
||||
const req = route.request();
|
||||
const url = req.url().toLowerCase();
|
||||
const rUrl = req.url().toLowerCase();
|
||||
const type = req.resourceType();
|
||||
if (
|
||||
type === "font" ||
|
||||
type === "media" ||
|
||||
type === "manifest"
|
||||
) return route.abort();
|
||||
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
||||
|
||||
collectedRequests.push({
|
||||
url: req.url(),
|
||||
method: req.method(),
|
||||
resourceType: type
|
||||
});
|
||||
|
||||
if (type === "font" || type === "media" || type === "manifest")
|
||||
return route.abort();
|
||||
|
||||
if (BLOCK_LIST.some(k => rUrl.includes(k)))
|
||||
return route.abort();
|
||||
|
||||
if (!loadImages && (
|
||||
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||
type === "image" || rUrl.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||
)) return route.abort();
|
||||
route.continue();
|
||||
});
|
||||
@@ -93,8 +101,10 @@ async function scrape(url, handler, options = {}) {
|
||||
}
|
||||
const result = await handler(page);
|
||||
await page.close();
|
||||
return { result , "": ""};
|
||||
|
||||
return { result, requests: collectedRequests };
|
||||
}
|
||||
|
||||
async function closeScraper() {
|
||||
if (context) await context.close();
|
||||
if (browser) await browser.close();
|
||||
|
||||
Reference in New Issue
Block a user