support for headless browser & changes on book api

This commit is contained in:
2025-12-03 20:40:53 +01:00
parent 8e20743e8b
commit 920ce19cc2
9 changed files with 201 additions and 22 deletions

32
package-lock.json generated
View File

@@ -13,6 +13,7 @@
"bindings": "^1.5.0", "bindings": "^1.5.0",
"fastify": "^5.6.2", "fastify": "^5.6.2",
"node-addon-api": "^8.5.0", "node-addon-api": "^8.5.0",
"playwright-chromium": "1.57.0",
"sqlite3": "^5.1.7" "sqlite3": "^5.1.7"
}, },
"devDependencies": { "devDependencies": {
@@ -399,7 +400,6 @@
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.16.0" "undici-types": "~7.16.0"
} }
@@ -2097,7 +2097,6 @@
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"engines": { "engines": {
"node": ">=12" "node": ">=12"
}, },
@@ -2142,6 +2141,34 @@
"integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==", "integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/playwright-chromium": {
"version": "1.57.0",
"resolved": "https://registry.npmjs.org/playwright-chromium/-/playwright-chromium-1.57.0.tgz",
"integrity": "sha512-GCVVTbmIDrZuBxWYoQ70rehRXMb3Q7ccENe63a+rGTWwypeVAgh/DD5o5QQ898oer5pdIv3vGINUlEkHtOZQEw==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.57.0"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/playwright-core": {
"version": "1.57.0",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz",
"integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/prebuild-install": { "node_modules/prebuild-install": {
"version": "7.1.3", "version": "7.1.3",
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
@@ -3261,7 +3288,6 @@
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"

View File

@@ -16,7 +16,8 @@
"bindings": "^1.5.0", "bindings": "^1.5.0",
"fastify": "^5.6.2", "fastify": "^5.6.2",
"node-addon-api": "^8.5.0", "node-addon-api": "^8.5.0",
"sqlite3": "^5.1.7" "sqlite3": "^5.1.7",
"playwright-chromium": "1.57.0"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^24.0.0", "@types/node": "^24.0.0",

View File

@@ -2,6 +2,7 @@ const fastify = require('fastify')({ logger: true });
const path = require('path'); const path = require('path');
const { spawn } = require('child_process'); const { spawn } = require('child_process');
const fs = require('fs'); const fs = require('fs');
import { initHeadless } from "./src/shared/headless";
const { initDatabase } = require('./src/shared/database'); const { initDatabase } = require('./src/shared/database');
const { loadExtensions } = require('./src/shared/extensions'); const { loadExtensions } = require('./src/shared/extensions');
@@ -85,6 +86,8 @@ const start = async () => {
startCppScraper(); startCppScraper();
await initHeadless();
} catch (err) { } catch (err) {
fastify.log.error(err); fastify.log.error(err);
process.exit(1); process.exit(1);

View File

@@ -85,23 +85,26 @@ export async function searchBooksInExtension(req: any, reply: FastifyReply) {
} }
} }
export async function getChapters(req: BookRequest, reply: FastifyReply) { export async function getChapters(req: any, reply: FastifyReply) {
try { try {
const { id } = req.params; const { id } = req.params;
return await booksService.getChaptersForBook(id); const { ext } = req.query;
return await booksService.getChaptersForBook(id, Boolean(ext));
} catch (err) { } catch (err) {
return { chapters: [] }; return { chapters: [] };
} }
} }
export async function getChapterContent(req: ChapterRequest, reply: FastifyReply) { export async function getChapterContent(req: any, reply: FastifyReply) {
try { try {
const { bookId, chapter, provider } = req.params; const { bookId, chapter, provider } = req.params;
const { ext } = req.query;
const content = await booksService.getChapterContent( const content = await booksService.getChapterContent(
bookId, bookId,
chapter, chapter,
provider provider,
ext
); );
return reply.send(content); return reply.send(content);

View File

@@ -126,6 +126,7 @@ export async function getBookInfoExtension(ext: Extension | null, id: string): P
if (ext.type === 'book-board' && ext.getMetadata) { if (ext.type === 'book-board' && ext.getMetadata) {
try { try {
const info = await ext.getMetadata(id); const info = await ext.getMetadata(id);
if (info) { if (info) {
@@ -198,8 +199,8 @@ async function fetchBookMetadata(id: string): Promise<Book | null> {
} }
} }
async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean): Promise<ChapterWithProvider[]> { async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean, origin: string): Promise<ChapterWithProvider[]> {
const cacheKey = `chapters:${name}:${searchTitle}`; const cacheKey = `chapters:${name}:${origin}:${search ? "search" : "id"}:${searchTitle}`;
const cached = await getCache(cacheKey); const cached = await getCache(cacheKey);
if (cached) { if (cached) {
@@ -267,11 +268,11 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit
} }
} }
export async function getChaptersForBook(id: string): Promise<{ chapters: ChapterWithProvider[] }> { export async function getChaptersForBook(id: string, ext: Boolean): Promise<{ chapters: ChapterWithProvider[] }> {
let bookData: Book | null = null; let bookData: Book | null = null;
let searchTitle: string = ""; let searchTitle: string = "";
if (!isNaN(Number(id))) { if (!ext) {
const result = await getBookById(id); const result = await getBookById(id);
if (!result || "error" in result) return { chapters: [] } if (!result || "error" in result) return { chapters: [] }
bookData = result; bookData = result;
@@ -292,13 +293,15 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
} }
const allChapters: any[] = []; const allChapters: any[] = [];
let exts = "anilist";
if (ext) exts = "ext";
for (const [name, ext] of bookExtensions) { for (const [name, ext] of bookExtensions) {
if (name == extension) { if (name == extension) {
const chapters = await searchChaptersInExtension(ext, name, id, false); const chapters = await searchChaptersInExtension(ext, name, id, false, exts);
allChapters.push(...chapters); allChapters.push(...chapters);
} else { } else {
const chapters = await searchChaptersInExtension(ext, name, searchTitle, true); const chapters = await searchChaptersInExtension(ext, name, searchTitle, true, exts);
allChapters.push(...chapters); allChapters.push(...chapters);
} }
} }
@@ -308,15 +311,17 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
}; };
} }
export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string): Promise<ChapterContent> { export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string, name: string): Promise<ChapterContent> {
const extensions = getAllExtensions(); const extensions = getAllExtensions();
const ext = extensions.get(providerName); const ext = extensions.get(providerName);
if (!ext) { if (!ext) {
throw new Error("Provider not found"); throw new Error("Provider not found");
} }
let exts = "anilist";
if (name) exts = "ext";
const contentCacheKey = `content:${providerName}:${bookId}:${chapterIndex}`; const contentCacheKey = `content:${providerName}:${exts}:${bookId}:${chapterIndex}`;
const cachedContent = await getCache(contentCacheKey); const cachedContent = await getCache(contentCacheKey);
if (cachedContent) { if (cachedContent) {
@@ -335,7 +340,7 @@ export async function getChapterContent(bookId: string, chapterIndex: string, pr
} }
} }
const chapterList = await getChaptersForBook(bookId); const chapterList = await getChaptersForBook(bookId, Boolean(name));
if (!chapterList?.chapters || chapterList.chapters.length === 0) { if (!chapterList?.chapters || chapterList.chapters.length === 0) {
throw new Error("Chapters not found"); throw new Error("Chapters not found");

View File

@@ -130,7 +130,7 @@ async function loadChapters(idForFetch) {
try { try {
const fetchUrl = extensionName const fetchUrl = extensionName
? `/api/book/${idForFetch}/chapters` ? `/api/book/${idForFetch}/chapters?ext=${extensionName}`
: `/api/book/${idForFetch}/chapters`; : `/api/book/${idForFetch}/chapters`;
const res = await fetch(fetchUrl); const res = await fetch(fetchUrl);
@@ -174,7 +174,7 @@ function populateProviderFilter() {
if (providers.length > 0) { if (providers.length > 0) {
select.style.display = 'inline-block'; select.style.display = 'inline-block';
select.innerHTML = '<option value="all">All Providers</option>'; select.innerHTML = '';
providers.forEach(prov => { providers.forEach(prov => {
const opt = document.createElement('option'); const opt = document.createElement('option');
@@ -276,7 +276,9 @@ function updatePagination() {
function openReader(bookId, chapterId, provider) { function openReader(bookId, chapterId, provider) {
const c = encodeURIComponent(chapterId); const c = encodeURIComponent(chapterId);
const p = encodeURIComponent(provider); const p = encodeURIComponent(provider);
window.location.href = `/read/${p}/${c}/${bookId}`; let extension = "";
if (extensionName) extension = "?" + extensionName;
window.location.href = `/read/${p}/${c}/${bookId}${extension}`;
} }
init(); init();

View File

@@ -10,6 +10,8 @@ const nextBtn = document.getElementById('next-chapter');
const lnSettings = document.getElementById('ln-settings'); const lnSettings = document.getElementById('ln-settings');
const mangaSettings = document.getElementById('manga-settings'); const mangaSettings = document.getElementById('manga-settings');
const hasQuery = window.location.search.length > 0;
const config = { const config = {
ln: { ln: {
fontSize: 18, fontSize: 18,
@@ -125,7 +127,9 @@ async function loadChapter() {
`; `;
try { try {
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}`); let ext = ""
if(hasQuery) ext = "?ext=yes"
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}${ext}`);
const data = await res.json(); const data = await res.json();
if (data.title) { if (data.title) {

View File

@@ -2,6 +2,7 @@ const fs = require('fs');
const path = require('path'); const path = require('path');
const os = require('os'); const os = require('os');
const { queryAll, run } = require('./database'); const { queryAll, run } = require('./database');
const { scrape } = require("./headless");
const extensions = new Map(); const extensions = new Map();
@@ -37,6 +38,7 @@ async function loadExtensions() {
const name = instance.constructor.name; const name = instance.constructor.name;
extensions.set(name, instance); extensions.set(name, instance);
instance.scrape = scrape;
console.log(`📦 Loaded Extension: ${name}`); console.log(`📦 Loaded Extension: ${name}`);
} }
} catch (e) { } catch (e) {

133
src/shared/headless.js Normal file
View File

@@ -0,0 +1,133 @@
const { chromium } = require("playwright-chromium");
let browser;
let context;
const BLOCK_LIST = [
"google-analytics", "doubleclick", "facebook", "twitter",
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic"
];
async function initHeadless() {
if (browser) return;
browser = await chromium.launch({
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--disable-extensions",
"--disable-background-networking",
"--disable-sync",
"--disable-translate",
"--mute-audio",
"--no-first-run",
"--no-zygote",
"--single-process"
]
});
context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36"
});
}
// ✅ Scroll infinito
async function turboScroll(page) {
await page.evaluate(() => {
return new Promise((resolve) => {
let last = 0;
let same = 0;
const timer = setInterval(() => {
const h = document.body.scrollHeight;
window.scrollTo(0, h);
if (h === last) {
same++;
if (same >= 5) {
clearInterval(timer);
resolve();
}
} else {
same = 0;
last = h;
}
}, 20);
});
});
}
// ✅ Scrape principal
async function scrape(url, handler, options = {}) {
const {
waitUntil = "domcontentloaded",
waitSelector = null,
timeout = 10000,
scrollToBottom = false,
renderWaitTime = 0,
loadImages = true
} = options;
if (!browser) await init();
const page = await context.newPage();
// 🔒 Bloqueo de recursos
await page.route("**/*", (route) => {
const req = route.request();
const url = req.url().toLowerCase();
const type = req.resourceType();
if (
type === "font" ||
type === "stylesheet" ||
type === "media" ||
type === "manifest"
) return route.abort();
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
if (!loadImages && (
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
)) return route.abort();
route.continue();
});
await page.goto(url, { waitUntil, timeout });
if (waitSelector) {
try {
await page.waitForSelector(waitSelector, { timeout });
} catch {}
}
if (scrollToBottom) {
await turboScroll(page);
}
if (renderWaitTime > 0) {
await new Promise(r => setTimeout(r, renderWaitTime));
}
const result = await handler(page);
await page.close();
return result;
}
async function closeScraper() {
if (context) await context.close();
if (browser) await browser.close();
context = null;
browser = null;
}
module.exports = {
initHeadless,
scrape,
closeScraper
};