enhanced anime backend
This commit is contained in:
@@ -5,9 +5,14 @@ let context;
|
||||
|
||||
const BLOCK_LIST = [
|
||||
"google-analytics", "doubleclick", "facebook", "twitter",
|
||||
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic"
|
||||
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic",
|
||||
"hotjar", "yandex", "ads", "widgets", "gravatar", "fonts.googleapis",
|
||||
"map", "cdn.ampproject.org", "googletagmanager"
|
||||
|
||||
];
|
||||
|
||||
const ALLOWED_SCRIPTS = [];
|
||||
|
||||
async function initHeadless() {
|
||||
if (browser) return;
|
||||
|
||||
@@ -25,7 +30,12 @@ async function initHeadless() {
|
||||
"--mute-audio",
|
||||
"--no-first-run",
|
||||
"--no-zygote",
|
||||
"--single-process"
|
||||
"--single-process",
|
||||
|
||||
"--disable-software-rasterizer",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--no-default-browser-check",
|
||||
"--no-experiments"
|
||||
]
|
||||
});
|
||||
|
||||
@@ -35,7 +45,6 @@ async function initHeadless() {
|
||||
});
|
||||
}
|
||||
|
||||
// ✅ Scroll infinito
|
||||
async function turboScroll(page) {
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
@@ -47,6 +56,7 @@ async function turboScroll(page) {
|
||||
if (h === last) {
|
||||
same++;
|
||||
if (same >= 5) {
|
||||
|
||||
clearInterval(timer);
|
||||
resolve();
|
||||
}
|
||||
@@ -55,11 +65,11 @@ async function turboScroll(page) {
|
||||
last = h;
|
||||
}
|
||||
}, 20);
|
||||
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ✅ Scrape principal
|
||||
async function scrape(url, handler, options = {}) {
|
||||
const {
|
||||
waitUntil = "domcontentloaded",
|
||||
@@ -67,56 +77,76 @@ async function scrape(url, handler, options = {}) {
|
||||
timeout = 10000,
|
||||
scrollToBottom = false,
|
||||
renderWaitTime = 0,
|
||||
loadImages = true
|
||||
loadImages = true,
|
||||
blockScripts = true
|
||||
|
||||
} = options;
|
||||
|
||||
if (!browser) await init();
|
||||
if (!browser) await initHeadless();
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// 🔒 Bloqueo de recursos
|
||||
await page.route("**/*", (route) => {
|
||||
const req = route.request();
|
||||
const url = req.url().toLowerCase();
|
||||
const resUrl = req.url().toLowerCase();
|
||||
const type = req.resourceType();
|
||||
|
||||
if (
|
||||
type === "font" ||
|
||||
type === "stylesheet" ||
|
||||
type === "media" ||
|
||||
type === "manifest"
|
||||
) return route.abort();
|
||||
type === "manifest" ||
|
||||
type === "other" ||
|
||||
|
||||
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
||||
(blockScripts && type === "script" && !ALLOWED_SCRIPTS.some(k => resUrl.includes(k)))
|
||||
) {
|
||||
|
||||
return route.abort("blockedbyclient", { timeout: 100 });
|
||||
}
|
||||
|
||||
if (BLOCK_LIST.some(k => resUrl.includes(k))) {
|
||||
return route.abort("blockedbyclient", { timeout: 100 });
|
||||
}
|
||||
|
||||
if (!loadImages && (
|
||||
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||
)) return route.abort();
|
||||
type === "image" || resUrl.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||
)) {
|
||||
return route.abort("blockedbyclient", { timeout: 100 });
|
||||
}
|
||||
|
||||
route.continue();
|
||||
});
|
||||
|
||||
await page.goto(url, { waitUntil, timeout });
|
||||
try {
|
||||
await page.goto(url, { waitUntil, timeout });
|
||||
|
||||
if (waitSelector) {
|
||||
try {
|
||||
await page.waitForSelector(waitSelector, { timeout });
|
||||
} catch {}
|
||||
if (waitSelector) {
|
||||
try {
|
||||
await page.waitForSelector(waitSelector, { timeout });
|
||||
} catch (e) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (scrollToBottom) {
|
||||
await turboScroll(page);
|
||||
}
|
||||
|
||||
if (renderWaitTime > 0) {
|
||||
|
||||
await new Promise(r => setTimeout(r, renderWaitTime));
|
||||
}
|
||||
|
||||
return await handler(page);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Error durante el scraping de ${url}:`, error);
|
||||
return null;
|
||||
|
||||
} finally {
|
||||
|
||||
await page.close();
|
||||
}
|
||||
|
||||
if (scrollToBottom) {
|
||||
await turboScroll(page);
|
||||
}
|
||||
|
||||
if (renderWaitTime > 0) {
|
||||
await new Promise(r => setTimeout(r, renderWaitTime));
|
||||
}
|
||||
|
||||
const result = await handler(page);
|
||||
|
||||
await page.close();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function closeScraper() {
|
||||
|
||||
Reference in New Issue
Block a user