support for headless browser & changes on book api
This commit is contained in:
32
package-lock.json
generated
32
package-lock.json
generated
@@ -13,6 +13,7 @@
|
||||
"bindings": "^1.5.0",
|
||||
"fastify": "^5.6.2",
|
||||
"node-addon-api": "^8.5.0",
|
||||
"playwright-chromium": "1.57.0",
|
||||
"sqlite3": "^5.1.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -399,7 +400,6 @@
|
||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
@@ -2097,7 +2097,6 @@
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -2142,6 +2141,34 @@
|
||||
"integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/playwright-chromium": {
|
||||
"version": "1.57.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright-chromium/-/playwright-chromium-1.57.0.tgz",
|
||||
"integrity": "sha512-GCVVTbmIDrZuBxWYoQ70rehRXMb3Q7ccENe63a+rGTWwypeVAgh/DD5o5QQ898oer5pdIv3vGINUlEkHtOZQEw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright-core": "1.57.0"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright-core": {
|
||||
"version": "1.57.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz",
|
||||
"integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"playwright-core": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/prebuild-install": {
|
||||
"version": "7.1.3",
|
||||
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
|
||||
@@ -3261,7 +3288,6 @@
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
|
||||
@@ -16,7 +16,8 @@
|
||||
"bindings": "^1.5.0",
|
||||
"fastify": "^5.6.2",
|
||||
"node-addon-api": "^8.5.0",
|
||||
"sqlite3": "^5.1.7"
|
||||
"sqlite3": "^5.1.7",
|
||||
"playwright-chromium": "1.57.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.0.0",
|
||||
|
||||
@@ -2,6 +2,7 @@ const fastify = require('fastify')({ logger: true });
|
||||
const path = require('path');
|
||||
const { spawn } = require('child_process');
|
||||
const fs = require('fs');
|
||||
import { initHeadless } from "./src/shared/headless";
|
||||
|
||||
const { initDatabase } = require('./src/shared/database');
|
||||
const { loadExtensions } = require('./src/shared/extensions');
|
||||
@@ -85,6 +86,8 @@ const start = async () => {
|
||||
|
||||
startCppScraper();
|
||||
|
||||
await initHeadless();
|
||||
|
||||
} catch (err) {
|
||||
fastify.log.error(err);
|
||||
process.exit(1);
|
||||
|
||||
@@ -85,23 +85,26 @@ export async function searchBooksInExtension(req: any, reply: FastifyReply) {
|
||||
}
|
||||
}
|
||||
|
||||
export async function getChapters(req: BookRequest, reply: FastifyReply) {
|
||||
export async function getChapters(req: any, reply: FastifyReply) {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
return await booksService.getChaptersForBook(id);
|
||||
const { ext } = req.query;
|
||||
return await booksService.getChaptersForBook(id, Boolean(ext));
|
||||
} catch (err) {
|
||||
return { chapters: [] };
|
||||
}
|
||||
}
|
||||
|
||||
export async function getChapterContent(req: ChapterRequest, reply: FastifyReply) {
|
||||
export async function getChapterContent(req: any, reply: FastifyReply) {
|
||||
try {
|
||||
const { bookId, chapter, provider } = req.params;
|
||||
const { ext } = req.query;
|
||||
|
||||
const content = await booksService.getChapterContent(
|
||||
bookId,
|
||||
chapter,
|
||||
provider
|
||||
provider,
|
||||
ext
|
||||
);
|
||||
|
||||
return reply.send(content);
|
||||
|
||||
@@ -126,6 +126,7 @@ export async function getBookInfoExtension(ext: Extension | null, id: string): P
|
||||
|
||||
if (ext.type === 'book-board' && ext.getMetadata) {
|
||||
try {
|
||||
|
||||
const info = await ext.getMetadata(id);
|
||||
|
||||
if (info) {
|
||||
@@ -198,8 +199,8 @@ async function fetchBookMetadata(id: string): Promise<Book | null> {
|
||||
}
|
||||
}
|
||||
|
||||
async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean): Promise<ChapterWithProvider[]> {
|
||||
const cacheKey = `chapters:${name}:${searchTitle}`;
|
||||
async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean, origin: string): Promise<ChapterWithProvider[]> {
|
||||
const cacheKey = `chapters:${name}:${origin}:${search ? "search" : "id"}:${searchTitle}`;
|
||||
const cached = await getCache(cacheKey);
|
||||
|
||||
if (cached) {
|
||||
@@ -267,11 +268,11 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit
|
||||
}
|
||||
}
|
||||
|
||||
export async function getChaptersForBook(id: string): Promise<{ chapters: ChapterWithProvider[] }> {
|
||||
export async function getChaptersForBook(id: string, ext: Boolean): Promise<{ chapters: ChapterWithProvider[] }> {
|
||||
let bookData: Book | null = null;
|
||||
let searchTitle: string = "";
|
||||
|
||||
if (!isNaN(Number(id))) {
|
||||
if (!ext) {
|
||||
const result = await getBookById(id);
|
||||
if (!result || "error" in result) return { chapters: [] }
|
||||
bookData = result;
|
||||
@@ -292,13 +293,15 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
|
||||
}
|
||||
|
||||
const allChapters: any[] = [];
|
||||
let exts = "anilist";
|
||||
if (ext) exts = "ext";
|
||||
|
||||
for (const [name, ext] of bookExtensions) {
|
||||
if (name == extension) {
|
||||
const chapters = await searchChaptersInExtension(ext, name, id, false);
|
||||
const chapters = await searchChaptersInExtension(ext, name, id, false, exts);
|
||||
allChapters.push(...chapters);
|
||||
} else {
|
||||
const chapters = await searchChaptersInExtension(ext, name, searchTitle, true);
|
||||
const chapters = await searchChaptersInExtension(ext, name, searchTitle, true, exts);
|
||||
allChapters.push(...chapters);
|
||||
}
|
||||
}
|
||||
@@ -308,15 +311,17 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
|
||||
};
|
||||
}
|
||||
|
||||
export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string): Promise<ChapterContent> {
|
||||
export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string, name: string): Promise<ChapterContent> {
|
||||
const extensions = getAllExtensions();
|
||||
const ext = extensions.get(providerName);
|
||||
|
||||
if (!ext) {
|
||||
throw new Error("Provider not found");
|
||||
}
|
||||
let exts = "anilist";
|
||||
if (name) exts = "ext";
|
||||
|
||||
const contentCacheKey = `content:${providerName}:${bookId}:${chapterIndex}`;
|
||||
const contentCacheKey = `content:${providerName}:${exts}:${bookId}:${chapterIndex}`;
|
||||
const cachedContent = await getCache(contentCacheKey);
|
||||
|
||||
if (cachedContent) {
|
||||
@@ -335,7 +340,7 @@ export async function getChapterContent(bookId: string, chapterIndex: string, pr
|
||||
}
|
||||
}
|
||||
|
||||
const chapterList = await getChaptersForBook(bookId);
|
||||
const chapterList = await getChaptersForBook(bookId, Boolean(name));
|
||||
|
||||
if (!chapterList?.chapters || chapterList.chapters.length === 0) {
|
||||
throw new Error("Chapters not found");
|
||||
|
||||
@@ -130,7 +130,7 @@ async function loadChapters(idForFetch) {
|
||||
try {
|
||||
|
||||
const fetchUrl = extensionName
|
||||
? `/api/book/${idForFetch}/chapters`
|
||||
? `/api/book/${idForFetch}/chapters?ext=${extensionName}`
|
||||
: `/api/book/${idForFetch}/chapters`;
|
||||
|
||||
const res = await fetch(fetchUrl);
|
||||
@@ -174,7 +174,7 @@ function populateProviderFilter() {
|
||||
if (providers.length > 0) {
|
||||
select.style.display = 'inline-block';
|
||||
|
||||
select.innerHTML = '<option value="all">All Providers</option>';
|
||||
select.innerHTML = '';
|
||||
|
||||
providers.forEach(prov => {
|
||||
const opt = document.createElement('option');
|
||||
@@ -276,7 +276,9 @@ function updatePagination() {
|
||||
function openReader(bookId, chapterId, provider) {
|
||||
const c = encodeURIComponent(chapterId);
|
||||
const p = encodeURIComponent(provider);
|
||||
window.location.href = `/read/${p}/${c}/${bookId}`;
|
||||
let extension = "";
|
||||
if (extensionName) extension = "?" + extensionName;
|
||||
window.location.href = `/read/${p}/${c}/${bookId}${extension}`;
|
||||
}
|
||||
|
||||
init();
|
||||
@@ -10,6 +10,8 @@ const nextBtn = document.getElementById('next-chapter');
|
||||
const lnSettings = document.getElementById('ln-settings');
|
||||
const mangaSettings = document.getElementById('manga-settings');
|
||||
|
||||
const hasQuery = window.location.search.length > 0;
|
||||
|
||||
const config = {
|
||||
ln: {
|
||||
fontSize: 18,
|
||||
@@ -125,7 +127,9 @@ async function loadChapter() {
|
||||
`;
|
||||
|
||||
try {
|
||||
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}`);
|
||||
let ext = ""
|
||||
if(hasQuery) ext = "?ext=yes"
|
||||
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}${ext}`);
|
||||
const data = await res.json();
|
||||
|
||||
if (data.title) {
|
||||
|
||||
@@ -2,6 +2,7 @@ const fs = require('fs');
|
||||
const path = require('path');
|
||||
const os = require('os');
|
||||
const { queryAll, run } = require('./database');
|
||||
const { scrape } = require("./headless");
|
||||
|
||||
const extensions = new Map();
|
||||
|
||||
@@ -37,6 +38,7 @@ async function loadExtensions() {
|
||||
|
||||
const name = instance.constructor.name;
|
||||
extensions.set(name, instance);
|
||||
instance.scrape = scrape;
|
||||
console.log(`📦 Loaded Extension: ${name}`);
|
||||
}
|
||||
} catch (e) {
|
||||
|
||||
133
src/shared/headless.js
Normal file
133
src/shared/headless.js
Normal file
@@ -0,0 +1,133 @@
|
||||
const { chromium } = require("playwright-chromium");
|
||||
|
||||
let browser;
|
||||
let context;
|
||||
|
||||
const BLOCK_LIST = [
|
||||
"google-analytics", "doubleclick", "facebook", "twitter",
|
||||
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic"
|
||||
];
|
||||
|
||||
async function initHeadless() {
|
||||
if (browser) return;
|
||||
|
||||
browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-gpu",
|
||||
"--disable-extensions",
|
||||
"--disable-background-networking",
|
||||
"--disable-sync",
|
||||
"--disable-translate",
|
||||
"--mute-audio",
|
||||
"--no-first-run",
|
||||
"--no-zygote",
|
||||
"--single-process"
|
||||
]
|
||||
});
|
||||
|
||||
context = await browser.newContext({
|
||||
userAgent:
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36"
|
||||
});
|
||||
}
|
||||
|
||||
// ✅ Scroll infinito
|
||||
async function turboScroll(page) {
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
let last = 0;
|
||||
let same = 0;
|
||||
const timer = setInterval(() => {
|
||||
const h = document.body.scrollHeight;
|
||||
window.scrollTo(0, h);
|
||||
if (h === last) {
|
||||
same++;
|
||||
if (same >= 5) {
|
||||
clearInterval(timer);
|
||||
resolve();
|
||||
}
|
||||
} else {
|
||||
same = 0;
|
||||
last = h;
|
||||
}
|
||||
}, 20);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ✅ Scrape principal
|
||||
async function scrape(url, handler, options = {}) {
|
||||
const {
|
||||
waitUntil = "domcontentloaded",
|
||||
waitSelector = null,
|
||||
timeout = 10000,
|
||||
scrollToBottom = false,
|
||||
renderWaitTime = 0,
|
||||
loadImages = true
|
||||
} = options;
|
||||
|
||||
if (!browser) await init();
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// 🔒 Bloqueo de recursos
|
||||
await page.route("**/*", (route) => {
|
||||
const req = route.request();
|
||||
const url = req.url().toLowerCase();
|
||||
const type = req.resourceType();
|
||||
|
||||
if (
|
||||
type === "font" ||
|
||||
type === "stylesheet" ||
|
||||
type === "media" ||
|
||||
type === "manifest"
|
||||
) return route.abort();
|
||||
|
||||
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
||||
|
||||
if (!loadImages && (
|
||||
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||
)) return route.abort();
|
||||
|
||||
route.continue();
|
||||
});
|
||||
|
||||
await page.goto(url, { waitUntil, timeout });
|
||||
|
||||
if (waitSelector) {
|
||||
try {
|
||||
await page.waitForSelector(waitSelector, { timeout });
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (scrollToBottom) {
|
||||
await turboScroll(page);
|
||||
}
|
||||
|
||||
if (renderWaitTime > 0) {
|
||||
await new Promise(r => setTimeout(r, renderWaitTime));
|
||||
}
|
||||
|
||||
const result = await handler(page);
|
||||
|
||||
await page.close();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function closeScraper() {
|
||||
if (context) await context.close();
|
||||
if (browser) await browser.close();
|
||||
context = null;
|
||||
browser = null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initHeadless,
|
||||
scrape,
|
||||
closeScraper
|
||||
};
|
||||
Reference in New Issue
Block a user