added requests to scrapper
This commit is contained in:
@@ -64,18 +64,26 @@ async function scrape(url, handler, options = {}) {
|
|||||||
} = options;
|
} = options;
|
||||||
if (!browser) await initHeadless();
|
if (!browser) await initHeadless();
|
||||||
const page = await context.newPage();
|
const page = await context.newPage();
|
||||||
|
let collectedRequests = [];
|
||||||
await page.route("**/*", (route) => {
|
await page.route("**/*", (route) => {
|
||||||
const req = route.request();
|
const req = route.request();
|
||||||
const url = req.url().toLowerCase();
|
const rUrl = req.url().toLowerCase();
|
||||||
const type = req.resourceType();
|
const type = req.resourceType();
|
||||||
if (
|
|
||||||
type === "font" ||
|
collectedRequests.push({
|
||||||
type === "media" ||
|
url: req.url(),
|
||||||
type === "manifest"
|
method: req.method(),
|
||||||
) return route.abort();
|
resourceType: type
|
||||||
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
});
|
||||||
|
|
||||||
|
if (type === "font" || type === "media" || type === "manifest")
|
||||||
|
return route.abort();
|
||||||
|
|
||||||
|
if (BLOCK_LIST.some(k => rUrl.includes(k)))
|
||||||
|
return route.abort();
|
||||||
|
|
||||||
if (!loadImages && (
|
if (!loadImages && (
|
||||||
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
type === "image" || rUrl.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||||
)) return route.abort();
|
)) return route.abort();
|
||||||
route.continue();
|
route.continue();
|
||||||
});
|
});
|
||||||
@@ -93,8 +101,10 @@ async function scrape(url, handler, options = {}) {
|
|||||||
}
|
}
|
||||||
const result = await handler(page);
|
const result = await handler(page);
|
||||||
await page.close();
|
await page.close();
|
||||||
return { result , "": ""};
|
|
||||||
|
return { result, requests: collectedRequests };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function closeScraper() {
|
async function closeScraper() {
|
||||||
if (context) await context.close();
|
if (context) await context.close();
|
||||||
if (browser) await browser.close();
|
if (browser) await browser.close();
|
||||||
|
|||||||
Reference in New Issue
Block a user