diff --git a/package-lock.json b/package-lock.json index e158195..f7f27dc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "bindings": "^1.5.0", "fastify": "^5.6.2", "node-addon-api": "^8.5.0", + "playwright-chromium": "1.57.0", "sqlite3": "^5.1.7" }, "devDependencies": { @@ -399,7 +400,6 @@ "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -2097,7 +2097,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -2142,6 +2141,34 @@ "integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==", "license": "MIT" }, + "node_modules/playwright-chromium": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/playwright-chromium/-/playwright-chromium-1.57.0.tgz", + "integrity": "sha512-GCVVTbmIDrZuBxWYoQ70rehRXMb3Q7ccENe63a+rGTWwypeVAgh/DD5o5QQ898oer5pdIv3vGINUlEkHtOZQEw==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.57.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright-core": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz", + "integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/prebuild-install": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", @@ -3261,7 +3288,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/package.json b/package.json index 7eeb4c3..fb6cc19 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "bindings": "^1.5.0", "fastify": "^5.6.2", "node-addon-api": "^8.5.0", - "sqlite3": "^5.1.7" + "sqlite3": "^5.1.7", + "playwright-chromium": "1.57.0" }, "devDependencies": { "@types/node": "^24.0.0", diff --git a/server.js b/server.js index d0a828e..499b4d8 100644 --- a/server.js +++ b/server.js @@ -2,6 +2,7 @@ const fastify = require('fastify')({ logger: true }); const path = require('path'); const { spawn } = require('child_process'); const fs = require('fs'); +import { initHeadless } from "./src/shared/headless"; const { initDatabase } = require('./src/shared/database'); const { loadExtensions } = require('./src/shared/extensions'); @@ -85,6 +86,8 @@ const start = async () => { startCppScraper(); + await initHeadless(); + } catch (err) { fastify.log.error(err); process.exit(1); diff --git a/src/api/books/books.controller.ts b/src/api/books/books.controller.ts index f3406d9..21634aa 100644 --- a/src/api/books/books.controller.ts +++ b/src/api/books/books.controller.ts @@ -85,23 +85,26 @@ export async function searchBooksInExtension(req: any, reply: FastifyReply) { } } -export async function getChapters(req: BookRequest, reply: FastifyReply) { +export async function getChapters(req: any, reply: FastifyReply) { try { const { id } = req.params; - return await booksService.getChaptersForBook(id); + const { ext } = req.query; + return await booksService.getChaptersForBook(id, Boolean(ext)); } catch (err) { return { chapters: [] }; } } -export async function getChapterContent(req: ChapterRequest, reply: FastifyReply) { +export async function getChapterContent(req: any, reply: FastifyReply) { try { const { bookId, chapter, provider } = req.params; + const { ext } = req.query; const content = await booksService.getChapterContent( bookId, chapter, - provider + provider, + ext ); return reply.send(content); diff --git a/src/api/books/books.service.ts b/src/api/books/books.service.ts index a058866..26bafe8 100644 --- a/src/api/books/books.service.ts +++ b/src/api/books/books.service.ts @@ -126,6 +126,7 @@ export async function getBookInfoExtension(ext: Extension | null, id: string): P if (ext.type === 'book-board' && ext.getMetadata) { try { + const info = await ext.getMetadata(id); if (info) { @@ -198,8 +199,8 @@ async function fetchBookMetadata(id: string): Promise { } } -async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean): Promise { - const cacheKey = `chapters:${name}:${searchTitle}`; +async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean, origin: string): Promise { + const cacheKey = `chapters:${name}:${origin}:${search ? "search" : "id"}:${searchTitle}`; const cached = await getCache(cacheKey); if (cached) { @@ -244,7 +245,7 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit const chaps = await ext.findChapters!(mediaId); - if (!chaps?.length) { + if (!chaps?.length){ return []; } @@ -267,11 +268,11 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit } } -export async function getChaptersForBook(id: string): Promise<{ chapters: ChapterWithProvider[] }> { +export async function getChaptersForBook(id: string, ext: Boolean): Promise<{ chapters: ChapterWithProvider[] }> { let bookData: Book | null = null; let searchTitle: string = ""; - if (!isNaN(Number(id))) { + if (!ext) { const result = await getBookById(id); if (!result || "error" in result) return { chapters: [] } bookData = result; @@ -292,13 +293,15 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte } const allChapters: any[] = []; + let exts = "anilist"; + if (ext) exts = "ext"; for (const [name, ext] of bookExtensions) { if (name == extension) { - const chapters = await searchChaptersInExtension(ext, name, id, false); + const chapters = await searchChaptersInExtension(ext, name, id, false, exts); allChapters.push(...chapters); } else { - const chapters = await searchChaptersInExtension(ext, name, searchTitle, true); + const chapters = await searchChaptersInExtension(ext, name, searchTitle, true, exts); allChapters.push(...chapters); } } @@ -308,15 +311,17 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte }; } -export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string): Promise { +export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string, name: string): Promise { const extensions = getAllExtensions(); const ext = extensions.get(providerName); if (!ext) { throw new Error("Provider not found"); } + let exts = "anilist"; + if (name) exts = "ext"; - const contentCacheKey = `content:${providerName}:${bookId}:${chapterIndex}`; + const contentCacheKey = `content:${providerName}:${exts}:${bookId}:${chapterIndex}`; const cachedContent = await getCache(contentCacheKey); if (cachedContent) { @@ -335,7 +340,7 @@ export async function getChapterContent(bookId: string, chapterIndex: string, pr } } - const chapterList = await getChaptersForBook(bookId); + const chapterList = await getChaptersForBook(bookId, Boolean(name)); if (!chapterList?.chapters || chapterList.chapters.length === 0) { throw new Error("Chapters not found"); diff --git a/src/scripts/books/book.js b/src/scripts/books/book.js index 260c321..7e89c62 100644 --- a/src/scripts/books/book.js +++ b/src/scripts/books/book.js @@ -130,7 +130,7 @@ async function loadChapters(idForFetch) { try { const fetchUrl = extensionName - ? `/api/book/${idForFetch}/chapters` + ? `/api/book/${idForFetch}/chapters?ext=${extensionName}` : `/api/book/${idForFetch}/chapters`; const res = await fetch(fetchUrl); @@ -174,7 +174,7 @@ function populateProviderFilter() { if (providers.length > 0) { select.style.display = 'inline-block'; - select.innerHTML = ''; + select.innerHTML = ''; providers.forEach(prov => { const opt = document.createElement('option'); @@ -276,7 +276,9 @@ function updatePagination() { function openReader(bookId, chapterId, provider) { const c = encodeURIComponent(chapterId); const p = encodeURIComponent(provider); - window.location.href = `/read/${p}/${c}/${bookId}`; + let extension = ""; + if (extensionName) extension = "?" + extensionName; + window.location.href = `/read/${p}/${c}/${bookId}${extension}`; } init(); \ No newline at end of file diff --git a/src/scripts/books/reader.js b/src/scripts/books/reader.js index de4da9c..358d167 100644 --- a/src/scripts/books/reader.js +++ b/src/scripts/books/reader.js @@ -10,6 +10,8 @@ const nextBtn = document.getElementById('next-chapter'); const lnSettings = document.getElementById('ln-settings'); const mangaSettings = document.getElementById('manga-settings'); +const hasQuery = window.location.search.length > 0; + const config = { ln: { fontSize: 18, @@ -125,7 +127,9 @@ async function loadChapter() { `; try { - const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}`); + let ext = "" + if(hasQuery) ext = "?ext=yes" + const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}${ext}`); const data = await res.json(); if (data.title) { diff --git a/src/shared/extensions.js b/src/shared/extensions.js index 774bce7..407b29e 100644 --- a/src/shared/extensions.js +++ b/src/shared/extensions.js @@ -2,6 +2,7 @@ const fs = require('fs'); const path = require('path'); const os = require('os'); const { queryAll, run } = require('./database'); +const { scrape } = require("./headless"); const extensions = new Map(); @@ -37,6 +38,7 @@ async function loadExtensions() { const name = instance.constructor.name; extensions.set(name, instance); + instance.scrape = scrape; console.log(`📦 Loaded Extension: ${name}`); } } catch (e) { diff --git a/src/shared/headless.js b/src/shared/headless.js new file mode 100644 index 0000000..5e0487c --- /dev/null +++ b/src/shared/headless.js @@ -0,0 +1,133 @@ +const { chromium } = require("playwright-chromium"); + +let browser; +let context; + +const BLOCK_LIST = [ + "google-analytics", "doubleclick", "facebook", "twitter", + "adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic" +]; + +async function initHeadless() { + if (browser) return; + + browser = await chromium.launch({ + headless: true, + args: [ + "--no-sandbox", + "--disable-setuid-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--disable-extensions", + "--disable-background-networking", + "--disable-sync", + "--disable-translate", + "--mute-audio", + "--no-first-run", + "--no-zygote", + "--single-process" + ] + }); + + context = await browser.newContext({ + userAgent: + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36" + }); +} + +// ✅ Scroll infinito +async function turboScroll(page) { + await page.evaluate(() => { + return new Promise((resolve) => { + let last = 0; + let same = 0; + const timer = setInterval(() => { + const h = document.body.scrollHeight; + window.scrollTo(0, h); + if (h === last) { + same++; + if (same >= 5) { + clearInterval(timer); + resolve(); + } + } else { + same = 0; + last = h; + } + }, 20); + }); + }); +} + +// ✅ Scrape principal +async function scrape(url, handler, options = {}) { + const { + waitUntil = "domcontentloaded", + waitSelector = null, + timeout = 10000, + scrollToBottom = false, + renderWaitTime = 0, + loadImages = true + } = options; + + if (!browser) await init(); + + const page = await context.newPage(); + + // 🔒 Bloqueo de recursos + await page.route("**/*", (route) => { + const req = route.request(); + const url = req.url().toLowerCase(); + const type = req.resourceType(); + + if ( + type === "font" || + type === "stylesheet" || + type === "media" || + type === "manifest" + ) return route.abort(); + + if (BLOCK_LIST.some(k => url.includes(k))) return route.abort(); + + if (!loadImages && ( + type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/) + )) return route.abort(); + + route.continue(); + }); + + await page.goto(url, { waitUntil, timeout }); + + if (waitSelector) { + try { + await page.waitForSelector(waitSelector, { timeout }); + } catch {} + } + + if (scrollToBottom) { + await turboScroll(page); + } + + if (renderWaitTime > 0) { + await new Promise(r => setTimeout(r, renderWaitTime)); + } + + const result = await handler(page); + + await page.close(); + + return result; +} + +async function closeScraper() { + if (context) await context.close(); + if (browser) await browser.close(); + context = null; + browser = null; +} + +module.exports = { + initHeadless, + scrape, + closeScraper +}; \ No newline at end of file