enhanced anime backend
This commit is contained in:
@@ -6,7 +6,7 @@ import {Anime, AnimeRequest, SearchRequest, WatchStreamRequest} from '../types';
|
|||||||
export async function getAnime(req: AnimeRequest, reply: FastifyReply) {
|
export async function getAnime(req: AnimeRequest, reply: FastifyReply) {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
const source = req.query.ext || 'anilist';
|
const source = req.query.source;
|
||||||
|
|
||||||
let anime: Anime | { error: string };
|
let anime: Anime | { error: string };
|
||||||
if (source === 'anilist') {
|
if (source === 'anilist') {
|
||||||
@@ -25,12 +25,12 @@ export async function getAnime(req: AnimeRequest, reply: FastifyReply) {
|
|||||||
export async function getAnimeEpisodes(req: AnimeRequest, reply: FastifyReply) {
|
export async function getAnimeEpisodes(req: AnimeRequest, reply: FastifyReply) {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
const extensionName = req.query.ext || 'anilist';
|
const source = req.query.source || 'anilist';
|
||||||
const ext = getExtension(extensionName);
|
const ext = getExtension(source);
|
||||||
|
|
||||||
return await animeService.searchEpisodesInExtension(
|
return await animeService.searchEpisodesInExtension(
|
||||||
ext,
|
ext,
|
||||||
extensionName,
|
source,
|
||||||
id
|
id
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -87,29 +87,16 @@ export async function searchInExtension(req: any, reply: FastifyReply) {
|
|||||||
|
|
||||||
export async function getWatchStream(req: WatchStreamRequest, reply: FastifyReply) {
|
export async function getWatchStream(req: WatchStreamRequest, reply: FastifyReply) {
|
||||||
try {
|
try {
|
||||||
const { animeId, episode, server, category, ext } = req.query;
|
const { animeId, episode, server, category, ext, source } = req.query;
|
||||||
|
|
||||||
const extension = getExtension(ext);
|
const extension = getExtension(ext);
|
||||||
if (!extension) return { error: "Extension not found" };
|
if (!extension) return { error: "Extension not found" };
|
||||||
|
|
||||||
let anime: Anime | { error: string };
|
|
||||||
if (!isNaN(Number(animeId))) {
|
|
||||||
anime = await animeService.getAnimeById(animeId);
|
|
||||||
if ('error' in anime) return { error: "Anime metadata not found" };
|
|
||||||
} else {
|
|
||||||
const results = await animeService.searchAnimeInExtension(
|
|
||||||
extension,
|
|
||||||
ext,
|
|
||||||
animeId.replace(/--/g, '\u0000').replace(/-/g, ' ').replace(new RegExp('\u0000', 'g'), '-').trim()
|
|
||||||
);
|
|
||||||
anime = results[0];
|
|
||||||
if (!anime) return { error: "Anime not found in extension search" };
|
|
||||||
}
|
|
||||||
|
|
||||||
return await animeService.getStreamData(
|
return await animeService.getStreamData(
|
||||||
extension,
|
extension,
|
||||||
anime,
|
|
||||||
episode,
|
episode,
|
||||||
|
animeId,
|
||||||
|
source,
|
||||||
server,
|
server,
|
||||||
category
|
category
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -142,8 +142,7 @@ export async function searchEpisodesInExtension(ext: Extension | null, name: str
|
|||||||
const title = await getExtensionTitle(name, query);
|
const title = await getExtensionTitle(name, query);
|
||||||
let mediaId: string;
|
let mediaId: string;
|
||||||
|
|
||||||
if (title) {
|
if (!title) {
|
||||||
|
|
||||||
const matches = await ext.search({
|
const matches = await ext.search({
|
||||||
query,
|
query,
|
||||||
media: {
|
media: {
|
||||||
@@ -161,7 +160,6 @@ export async function searchEpisodesInExtension(ext: Extension | null, name: str
|
|||||||
mediaId = res.id;
|
mediaId = res.id;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
mediaId = query;
|
mediaId = query;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +177,6 @@ export async function searchEpisodesInExtension(ext: Extension | null, name: str
|
|||||||
await setCache(cacheKey, result, CACHE_TTL_MS);
|
await setCache(cacheKey, result, CACHE_TTL_MS);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(`Extension search failed for ${name}:`, e);
|
console.error(`Extension search failed for ${name}:`, e);
|
||||||
}
|
}
|
||||||
@@ -188,10 +185,10 @@ export async function searchEpisodesInExtension(ext: Extension | null, name: str
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getStreamData(extension: Extension, animeData: Anime, episode: string, server?: string, category?: string): Promise<StreamData> {
|
export async function getStreamData(extension: Extension, episode: string, id: string, source: string, server?: string, category?: string): Promise<StreamData> {
|
||||||
const providerName = extension.constructor.name;
|
const providerName = extension.constructor.name;
|
||||||
|
|
||||||
const cacheKey = `anime:stream:${providerName}:${animeData.id}:${episode}:${server || 'default'}:${category || 'sub'}`;
|
const cacheKey = `anime:stream:${providerName}:${id}:${episode}:${server || 'default'}:${category || 'sub'}`;
|
||||||
|
|
||||||
const cached = await getCache(cacheKey);
|
const cached = await getCache(cacheKey);
|
||||||
|
|
||||||
@@ -210,28 +207,18 @@ export async function getStreamData(extension: Extension, animeData: Anime, epis
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchOptions = {
|
if (!extension.findEpisodes || !extension.findEpisodeServer) {
|
||||||
query: animeData.title.english || animeData.title.romaji,
|
|
||||||
dub: category === 'dub',
|
|
||||||
media: {
|
|
||||||
romajiTitle: animeData.title.romaji,
|
|
||||||
englishTitle: animeData.title.english || "",
|
|
||||||
startDate: animeData.startDate || { year: 0, month: 0, day: 0 }
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!extension.search || !extension.findEpisodes || !extension.findEpisodeServer) {
|
|
||||||
throw new Error("Extension doesn't support required methods");
|
throw new Error("Extension doesn't support required methods");
|
||||||
}
|
}
|
||||||
|
let episodes;
|
||||||
|
|
||||||
const searchResults = await extension.search(searchOptions);
|
if (source === "anilist"){
|
||||||
|
const anime: any = await getAnimeById(id)
|
||||||
if (!searchResults || searchResults.length === 0) {
|
episodes = await searchEpisodesInExtension(extension, extension.constructor.name, anime.title.romaji);
|
||||||
throw new Error("Anime not found on provider");
|
}
|
||||||
|
else{
|
||||||
|
episodes = await extension.findEpisodes(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
const bestMatch = searchResults[0];
|
|
||||||
const episodes = await extension.findEpisodes(bestMatch.id);
|
|
||||||
const targetEp = episodes.find(e => e.number === parseInt(episode));
|
const targetEp = episodes.find(e => e.number === parseInt(episode));
|
||||||
|
|
||||||
if (!targetEp) {
|
if (!targetEp) {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { FastifyReply } from 'fastify';
|
import {FastifyReply} from 'fastify';
|
||||||
import { proxyRequest, processM3U8Content, streamToReadable } from './proxy.service';
|
import {processM3U8Content, proxyRequest, streamToReadable} from './proxy.service';
|
||||||
import { ProxyRequest } from '../types';
|
import {ProxyRequest} from '../types';
|
||||||
|
|
||||||
export async function handleProxy(req: ProxyRequest, reply: FastifyReply) {
|
export async function handleProxy(req: ProxyRequest, reply: FastifyReply) {
|
||||||
const { url, referer, origin, userAgent } = req.query;
|
const { url, referer, origin, userAgent } = req.query;
|
||||||
@@ -10,7 +10,7 @@ export async function handleProxy(req: ProxyRequest, reply: FastifyReply) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { response, contentType, isM3U8 } = await proxyRequest(url, {
|
const { response, contentType, isM3U8, contentLength } = await proxyRequest(url, {
|
||||||
referer,
|
referer,
|
||||||
origin,
|
origin,
|
||||||
userAgent
|
userAgent
|
||||||
@@ -18,28 +18,43 @@ export async function handleProxy(req: ProxyRequest, reply: FastifyReply) {
|
|||||||
|
|
||||||
reply.header('Access-Control-Allow-Origin', '*');
|
reply.header('Access-Control-Allow-Origin', '*');
|
||||||
reply.header('Access-Control-Allow-Methods', 'GET, OPTIONS');
|
reply.header('Access-Control-Allow-Methods', 'GET, OPTIONS');
|
||||||
|
reply.header('Access-Control-Allow-Headers', 'Content-Type, Range');
|
||||||
|
reply.header('Access-Control-Expose-Headers', 'Content-Length, Content-Range, Accept-Ranges');
|
||||||
|
|
||||||
if (contentType) {
|
if (contentType) {
|
||||||
reply.header('Content-Type', contentType);
|
reply.header('Content-Type', contentType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (contentLength) {
|
||||||
|
reply.header('Content-Length', contentLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentType?.startsWith('image/') || contentType?.startsWith('video/')) {
|
||||||
|
reply.header('Cache-Control', 'public, max-age=31536000, immutable');
|
||||||
|
}
|
||||||
|
|
||||||
|
reply.header('Accept-Ranges', 'bytes');
|
||||||
|
|
||||||
if (isM3U8) {
|
if (isM3U8) {
|
||||||
const text = await response.text();
|
const text = await response.text();
|
||||||
const baseUrl = new URL(response.url);
|
const baseUrl = new URL(response.url);
|
||||||
|
|
||||||
const processed = processM3U8Content(text, baseUrl, {
|
const processedContent = processM3U8Content(text, baseUrl, {
|
||||||
referer,
|
referer,
|
||||||
origin,
|
origin,
|
||||||
userAgent
|
userAgent
|
||||||
});
|
});
|
||||||
|
|
||||||
return processed;
|
return reply.send(processedContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
return reply.send(streamToReadable(response.body!));
|
return reply.send(streamToReadable(response.body!));
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
req.server.log.error(err);
|
req.server.log.error(err);
|
||||||
return reply.code(500).send({ error: "Internal Server Error" });
|
|
||||||
|
if (!reply.sent) {
|
||||||
|
return reply.code(500).send({ error: "Internal Server Error" });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -10,39 +10,79 @@ interface ProxyResponse {
|
|||||||
response: Response;
|
response: Response;
|
||||||
contentType: string | null;
|
contentType: string | null;
|
||||||
isM3U8: boolean;
|
isM3U8: boolean;
|
||||||
|
contentLength: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function proxyRequest(url: string, { referer, origin, userAgent }: ProxyHeaders): Promise<ProxyResponse> {
|
export async function proxyRequest(url: string, { referer, origin, userAgent }: ProxyHeaders): Promise<ProxyResponse> {
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
'User-Agent': userAgent || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
'User-Agent': userAgent || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
'Accept': '*/*',
|
'Accept': '*/*',
|
||||||
'Accept-Language': 'en-US,en;q=0.9'
|
'Accept-Language': 'en-US,en;q=0.9',
|
||||||
|
'Accept-Encoding': 'identity',
|
||||||
|
|
||||||
|
'Connection': 'keep-alive'
|
||||||
};
|
};
|
||||||
|
|
||||||
if (referer) headers['Referer'] = referer;
|
if (referer) headers['Referer'] = referer;
|
||||||
if (origin) headers['Origin'] = origin;
|
if (origin) headers['Origin'] = origin;
|
||||||
|
|
||||||
const response = await fetch(url, { headers, redirect: 'follow' });
|
let lastError: Error | null = null;
|
||||||
|
const maxRetries = 2;
|
||||||
|
|
||||||
if (!response.ok) {
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||||
throw new Error(`Proxy Error: ${response.statusText}`);
|
try {
|
||||||
|
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 60000);
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
headers,
|
||||||
|
redirect: 'follow',
|
||||||
|
signal: controller.signal
|
||||||
|
});
|
||||||
|
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
|
||||||
|
if (response.status === 404 || response.status === 403) {
|
||||||
|
throw new Error(`Proxy Error: ${response.status} ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt < maxRetries - 1) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Proxy Error: ${response.status} ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType = response.headers.get('content-type');
|
||||||
|
const contentLength = response.headers.get('content-length');
|
||||||
|
const isM3U8 = (contentType && contentType.includes('mpegurl')) || url.includes('.m3u8');
|
||||||
|
|
||||||
|
return {
|
||||||
|
response,
|
||||||
|
contentType,
|
||||||
|
isM3U8,
|
||||||
|
contentLength
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
lastError = error as Error;
|
||||||
|
|
||||||
|
if (attempt === maxRetries - 1) {
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const contentType = response.headers.get('content-type');
|
throw lastError || new Error('Unknown error in proxyRequest');
|
||||||
const isM3U8 = (contentType && contentType.includes('mpegurl')) || url.includes('.m3u8');
|
|
||||||
|
|
||||||
return {
|
|
||||||
response,
|
|
||||||
contentType,
|
|
||||||
isM3U8
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function processM3U8Content(
|
export function processM3U8Content(text: string, baseUrl: URL, { referer, origin, userAgent }: ProxyHeaders): string {
|
||||||
text: string,
|
|
||||||
baseUrl: URL,
|
|
||||||
{ referer, origin, userAgent }: ProxyHeaders
|
|
||||||
): string {
|
|
||||||
return text.replace(/^(?!#)(?!\s*$).+/gm, (line) => {
|
return text.replace(/^(?!#)(?!\s*$).+/gm, (line) => {
|
||||||
line = line.trim();
|
line = line.trim();
|
||||||
let absoluteUrl: string;
|
let absoluteUrl: string;
|
||||||
@@ -64,5 +104,35 @@ export function processM3U8Content(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function streamToReadable(webStream: ReadableStream): Readable {
|
export function streamToReadable(webStream: ReadableStream): Readable {
|
||||||
return Readable.fromWeb(webStream as any);
|
const reader = webStream.getReader();
|
||||||
|
let readTimeout: NodeJS.Timeout;
|
||||||
|
|
||||||
|
return new Readable({
|
||||||
|
async read() {
|
||||||
|
try {
|
||||||
|
|
||||||
|
const timeoutPromise = new Promise((_, reject) => {
|
||||||
|
readTimeout = setTimeout(() => reject(new Error('Stream read timeout')), 10000);
|
||||||
|
});
|
||||||
|
|
||||||
|
const readPromise = reader.read();
|
||||||
|
const { done, value } = await Promise.race([readPromise, timeoutPromise]) as any;
|
||||||
|
|
||||||
|
clearTimeout(readTimeout);
|
||||||
|
|
||||||
|
if (done) {
|
||||||
|
this.push(null);
|
||||||
|
} else {
|
||||||
|
this.push(Buffer.from(value));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
clearTimeout(readTimeout);
|
||||||
|
this.destroy(error as Error);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
destroy(error, callback) {
|
||||||
|
clearTimeout(readTimeout);
|
||||||
|
reader.cancel().then(() => callback(error)).catch(callback);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
@@ -138,7 +138,7 @@ export interface AnimeParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface AnimeQuery {
|
export interface AnimeQuery {
|
||||||
ext?: string;
|
source?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SearchQuery {
|
export interface SearchQuery {
|
||||||
@@ -150,6 +150,7 @@ export interface ExtensionNameParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface WatchStreamQuery {
|
export interface WatchStreamQuery {
|
||||||
|
source: string;
|
||||||
animeId: string;
|
animeId: string;
|
||||||
episode: string;
|
episode: string;
|
||||||
server?: string;
|
server?: string;
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ async function loadAnime() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const fetchUrl = extensionName
|
const fetchUrl = extensionName
|
||||||
? `/api/anime/${animeId}?ext=${extensionName}`
|
? `/api/anime/${animeId}?source=${extensionName}`
|
||||||
: `/api/anime/${animeId}`;
|
: `/api/anime/${animeId}?source=anilist`;
|
||||||
const res = await fetch(fetchUrl);
|
const res = await fetch(fetchUrl);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ document.addEventListener('click', (e) => {
|
|||||||
|
|
||||||
async function fetchSearh(query) {
|
async function fetchSearh(query) {
|
||||||
try {
|
try {
|
||||||
let apiUrl = `/api/search?q=${encodeURIComponent(query.slice(0, 30))}`;
|
let apiUrl = `/api/search?q=${encodeURIComponent(query)}`;
|
||||||
let extensionName = null;
|
let extensionName = null;
|
||||||
let finalQuery = query;
|
let finalQuery = query;
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ async function fetchSearh(query) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
apiUrl = `/api/search/${extensionName}?q=${encodeURIComponent(finalQuery.slice(0, 30))}`;
|
apiUrl = `/api/search/${extensionName}?q=${encodeURIComponent(finalQuery)}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ document.getElementById('episode-label').innerText = `Episode ${currentEpisode}`
|
|||||||
|
|
||||||
async function loadMetadata() {
|
async function loadMetadata() {
|
||||||
try {
|
try {
|
||||||
const extQuery = extName ? `?ext=${extName}` : "";
|
const extQuery = extName ? `?source=${extName}` : "?source=anilist";
|
||||||
const res = await fetch(`/api/anime/${animeId}${extQuery}`);
|
const res = await fetch(`/api/anime/${animeId}${extQuery}`);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
@@ -108,7 +108,7 @@ async function loadMetadata() {
|
|||||||
|
|
||||||
async function loadExtensionEpisodes() {
|
async function loadExtensionEpisodes() {
|
||||||
try {
|
try {
|
||||||
const extQuery = extName ? `?ext=${extName}` : "";
|
const extQuery = extName ? `?source=${extName}` : "?source=anilist";
|
||||||
const res = await fetch(`/api/anime/${animeId}/episodes${extQuery}`);
|
const res = await fetch(`/api/anime/${animeId}/episodes${extQuery}`);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
@@ -271,7 +271,11 @@ async function loadStream() {
|
|||||||
setLoading(`Loading stream (${audioMode})...`);
|
setLoading(`Loading stream (${audioMode})...`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const url = `/api/watch/stream?animeId=${animeId}&episode=${currentEpisode}&server=${server}&category=${audioMode}&ext=${currentExtension}`;
|
let sourc = "&source=anilist";
|
||||||
|
if (extName){
|
||||||
|
sourc = `&source=${extName}`;
|
||||||
|
}
|
||||||
|
const url = `/api/watch/stream?animeId=${animeId}&episode=${currentEpisode}&server=${server}&category=${audioMode}&ext=${currentExtension}${sourc}`;
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
|
|||||||
@@ -482,13 +482,12 @@ function updateURL(newChapter) {
|
|||||||
|
|
||||||
document.getElementById('back-btn').addEventListener('click', () => {
|
document.getElementById('back-btn').addEventListener('click', () => {
|
||||||
const parts = window.location.pathname.split('/');
|
const parts = window.location.pathname.split('/');
|
||||||
const provider = parts[2];
|
|
||||||
const mangaId = parts[4];
|
const mangaId = parts[4];
|
||||||
|
|
||||||
const urlParams = new URLSearchParams(window.location.search);
|
const urlParams = new URLSearchParams(window.location.search);
|
||||||
let source = urlParams.get('source');
|
let source = urlParams.get('source');
|
||||||
|
|
||||||
if (!source) {
|
if (source === 'anilist') {
|
||||||
window.location.href = `/book/${mangaId}`;
|
window.location.href = `/book/${mangaId}`;
|
||||||
} else {
|
} else {
|
||||||
window.location.href = `/book/${source}/${mangaId}`;
|
window.location.href = `/book/${source}/${mangaId}`;
|
||||||
|
|||||||
@@ -5,9 +5,14 @@ let context;
|
|||||||
|
|
||||||
const BLOCK_LIST = [
|
const BLOCK_LIST = [
|
||||||
"google-analytics", "doubleclick", "facebook", "twitter",
|
"google-analytics", "doubleclick", "facebook", "twitter",
|
||||||
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic"
|
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic",
|
||||||
|
"hotjar", "yandex", "ads", "widgets", "gravatar", "fonts.googleapis",
|
||||||
|
"map", "cdn.ampproject.org", "googletagmanager"
|
||||||
|
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const ALLOWED_SCRIPTS = [];
|
||||||
|
|
||||||
async function initHeadless() {
|
async function initHeadless() {
|
||||||
if (browser) return;
|
if (browser) return;
|
||||||
|
|
||||||
@@ -25,7 +30,12 @@ async function initHeadless() {
|
|||||||
"--mute-audio",
|
"--mute-audio",
|
||||||
"--no-first-run",
|
"--no-first-run",
|
||||||
"--no-zygote",
|
"--no-zygote",
|
||||||
"--single-process"
|
"--single-process",
|
||||||
|
|
||||||
|
"--disable-software-rasterizer",
|
||||||
|
"--disable-client-side-phishing-detection",
|
||||||
|
"--no-default-browser-check",
|
||||||
|
"--no-experiments"
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -35,7 +45,6 @@ async function initHeadless() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// ✅ Scroll infinito
|
|
||||||
async function turboScroll(page) {
|
async function turboScroll(page) {
|
||||||
await page.evaluate(() => {
|
await page.evaluate(() => {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
@@ -47,6 +56,7 @@ async function turboScroll(page) {
|
|||||||
if (h === last) {
|
if (h === last) {
|
||||||
same++;
|
same++;
|
||||||
if (same >= 5) {
|
if (same >= 5) {
|
||||||
|
|
||||||
clearInterval(timer);
|
clearInterval(timer);
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
@@ -55,11 +65,11 @@ async function turboScroll(page) {
|
|||||||
last = h;
|
last = h;
|
||||||
}
|
}
|
||||||
}, 20);
|
}, 20);
|
||||||
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// ✅ Scrape principal
|
|
||||||
async function scrape(url, handler, options = {}) {
|
async function scrape(url, handler, options = {}) {
|
||||||
const {
|
const {
|
||||||
waitUntil = "domcontentloaded",
|
waitUntil = "domcontentloaded",
|
||||||
@@ -67,56 +77,76 @@ async function scrape(url, handler, options = {}) {
|
|||||||
timeout = 10000,
|
timeout = 10000,
|
||||||
scrollToBottom = false,
|
scrollToBottom = false,
|
||||||
renderWaitTime = 0,
|
renderWaitTime = 0,
|
||||||
loadImages = true
|
loadImages = true,
|
||||||
|
blockScripts = true
|
||||||
|
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
if (!browser) await init();
|
if (!browser) await initHeadless();
|
||||||
|
|
||||||
const page = await context.newPage();
|
const page = await context.newPage();
|
||||||
|
|
||||||
// 🔒 Bloqueo de recursos
|
|
||||||
await page.route("**/*", (route) => {
|
await page.route("**/*", (route) => {
|
||||||
const req = route.request();
|
const req = route.request();
|
||||||
const url = req.url().toLowerCase();
|
const resUrl = req.url().toLowerCase();
|
||||||
const type = req.resourceType();
|
const type = req.resourceType();
|
||||||
|
|
||||||
if (
|
if (
|
||||||
type === "font" ||
|
type === "font" ||
|
||||||
type === "stylesheet" ||
|
type === "stylesheet" ||
|
||||||
type === "media" ||
|
type === "media" ||
|
||||||
type === "manifest"
|
type === "manifest" ||
|
||||||
) return route.abort();
|
type === "other" ||
|
||||||
|
|
||||||
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
(blockScripts && type === "script" && !ALLOWED_SCRIPTS.some(k => resUrl.includes(k)))
|
||||||
|
) {
|
||||||
|
|
||||||
|
return route.abort("blockedbyclient", { timeout: 100 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BLOCK_LIST.some(k => resUrl.includes(k))) {
|
||||||
|
return route.abort("blockedbyclient", { timeout: 100 });
|
||||||
|
}
|
||||||
|
|
||||||
if (!loadImages && (
|
if (!loadImages && (
|
||||||
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
type === "image" || resUrl.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||||
)) return route.abort();
|
)) {
|
||||||
|
return route.abort("blockedbyclient", { timeout: 100 });
|
||||||
|
}
|
||||||
|
|
||||||
route.continue();
|
route.continue();
|
||||||
});
|
});
|
||||||
|
|
||||||
await page.goto(url, { waitUntil, timeout });
|
try {
|
||||||
|
await page.goto(url, { waitUntil, timeout });
|
||||||
|
|
||||||
if (waitSelector) {
|
if (waitSelector) {
|
||||||
try {
|
try {
|
||||||
await page.waitForSelector(waitSelector, { timeout });
|
await page.waitForSelector(waitSelector, { timeout });
|
||||||
} catch {}
|
} catch (e) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scrollToBottom) {
|
||||||
|
await turboScroll(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (renderWaitTime > 0) {
|
||||||
|
|
||||||
|
await new Promise(r => setTimeout(r, renderWaitTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
return await handler(page);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error durante el scraping de ${url}:`, error);
|
||||||
|
return null;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
|
||||||
|
await page.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scrollToBottom) {
|
|
||||||
await turboScroll(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (renderWaitTime > 0) {
|
|
||||||
await new Promise(r => setTimeout(r, renderWaitTime));
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = await handler(page);
|
|
||||||
|
|
||||||
await page.close();
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function closeScraper() {
|
async function closeScraper() {
|
||||||
|
|||||||
Reference in New Issue
Block a user