support for headless browser & changes on book api
This commit is contained in:
32
package-lock.json
generated
32
package-lock.json
generated
@@ -13,6 +13,7 @@
|
|||||||
"bindings": "^1.5.0",
|
"bindings": "^1.5.0",
|
||||||
"fastify": "^5.6.2",
|
"fastify": "^5.6.2",
|
||||||
"node-addon-api": "^8.5.0",
|
"node-addon-api": "^8.5.0",
|
||||||
|
"playwright-chromium": "1.57.0",
|
||||||
"sqlite3": "^5.1.7"
|
"sqlite3": "^5.1.7"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@@ -399,7 +400,6 @@
|
|||||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peer": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~7.16.0"
|
"undici-types": "~7.16.0"
|
||||||
}
|
}
|
||||||
@@ -2097,7 +2097,6 @@
|
|||||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peer": true,
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
@@ -2142,6 +2141,34 @@
|
|||||||
"integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==",
|
"integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/playwright-chromium": {
|
||||||
|
"version": "1.57.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/playwright-chromium/-/playwright-chromium-1.57.0.tgz",
|
||||||
|
"integrity": "sha512-GCVVTbmIDrZuBxWYoQ70rehRXMb3Q7ccENe63a+rGTWwypeVAgh/DD5o5QQ898oer5pdIv3vGINUlEkHtOZQEw==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"playwright-core": "1.57.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"playwright": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/playwright-core": {
|
||||||
|
"version": "1.57.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz",
|
||||||
|
"integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bin": {
|
||||||
|
"playwright-core": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/prebuild-install": {
|
"node_modules/prebuild-install": {
|
||||||
"version": "7.1.3",
|
"version": "7.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
|
||||||
@@ -3261,7 +3288,6 @@
|
|||||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"peer": true,
|
|
||||||
"bin": {
|
"bin": {
|
||||||
"tsc": "bin/tsc",
|
"tsc": "bin/tsc",
|
||||||
"tsserver": "bin/tsserver"
|
"tsserver": "bin/tsserver"
|
||||||
|
|||||||
@@ -16,7 +16,8 @@
|
|||||||
"bindings": "^1.5.0",
|
"bindings": "^1.5.0",
|
||||||
"fastify": "^5.6.2",
|
"fastify": "^5.6.2",
|
||||||
"node-addon-api": "^8.5.0",
|
"node-addon-api": "^8.5.0",
|
||||||
"sqlite3": "^5.1.7"
|
"sqlite3": "^5.1.7",
|
||||||
|
"playwright-chromium": "1.57.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^24.0.0",
|
"@types/node": "^24.0.0",
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ const fastify = require('fastify')({ logger: true });
|
|||||||
const path = require('path');
|
const path = require('path');
|
||||||
const { spawn } = require('child_process');
|
const { spawn } = require('child_process');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
|
import { initHeadless } from "./src/shared/headless";
|
||||||
|
|
||||||
const { initDatabase } = require('./src/shared/database');
|
const { initDatabase } = require('./src/shared/database');
|
||||||
const { loadExtensions } = require('./src/shared/extensions');
|
const { loadExtensions } = require('./src/shared/extensions');
|
||||||
@@ -85,6 +86,8 @@ const start = async () => {
|
|||||||
|
|
||||||
startCppScraper();
|
startCppScraper();
|
||||||
|
|
||||||
|
await initHeadless();
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
fastify.log.error(err);
|
fastify.log.error(err);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
|
|||||||
@@ -85,23 +85,26 @@ export async function searchBooksInExtension(req: any, reply: FastifyReply) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getChapters(req: BookRequest, reply: FastifyReply) {
|
export async function getChapters(req: any, reply: FastifyReply) {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
return await booksService.getChaptersForBook(id);
|
const { ext } = req.query;
|
||||||
|
return await booksService.getChaptersForBook(id, Boolean(ext));
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return { chapters: [] };
|
return { chapters: [] };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getChapterContent(req: ChapterRequest, reply: FastifyReply) {
|
export async function getChapterContent(req: any, reply: FastifyReply) {
|
||||||
try {
|
try {
|
||||||
const { bookId, chapter, provider } = req.params;
|
const { bookId, chapter, provider } = req.params;
|
||||||
|
const { ext } = req.query;
|
||||||
|
|
||||||
const content = await booksService.getChapterContent(
|
const content = await booksService.getChapterContent(
|
||||||
bookId,
|
bookId,
|
||||||
chapter,
|
chapter,
|
||||||
provider
|
provider,
|
||||||
|
ext
|
||||||
);
|
);
|
||||||
|
|
||||||
return reply.send(content);
|
return reply.send(content);
|
||||||
|
|||||||
@@ -126,6 +126,7 @@ export async function getBookInfoExtension(ext: Extension | null, id: string): P
|
|||||||
|
|
||||||
if (ext.type === 'book-board' && ext.getMetadata) {
|
if (ext.type === 'book-board' && ext.getMetadata) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const info = await ext.getMetadata(id);
|
const info = await ext.getMetadata(id);
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
@@ -198,8 +199,8 @@ async function fetchBookMetadata(id: string): Promise<Book | null> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean): Promise<ChapterWithProvider[]> {
|
async function searchChaptersInExtension(ext: Extension, name: string, searchTitle: string, search: boolean, origin: string): Promise<ChapterWithProvider[]> {
|
||||||
const cacheKey = `chapters:${name}:${searchTitle}`;
|
const cacheKey = `chapters:${name}:${origin}:${search ? "search" : "id"}:${searchTitle}`;
|
||||||
const cached = await getCache(cacheKey);
|
const cached = await getCache(cacheKey);
|
||||||
|
|
||||||
if (cached) {
|
if (cached) {
|
||||||
@@ -244,7 +245,7 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit
|
|||||||
|
|
||||||
const chaps = await ext.findChapters!(mediaId);
|
const chaps = await ext.findChapters!(mediaId);
|
||||||
|
|
||||||
if (!chaps?.length) {
|
if (!chaps?.length){
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,11 +268,11 @@ async function searchChaptersInExtension(ext: Extension, name: string, searchTit
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getChaptersForBook(id: string): Promise<{ chapters: ChapterWithProvider[] }> {
|
export async function getChaptersForBook(id: string, ext: Boolean): Promise<{ chapters: ChapterWithProvider[] }> {
|
||||||
let bookData: Book | null = null;
|
let bookData: Book | null = null;
|
||||||
let searchTitle: string = "";
|
let searchTitle: string = "";
|
||||||
|
|
||||||
if (!isNaN(Number(id))) {
|
if (!ext) {
|
||||||
const result = await getBookById(id);
|
const result = await getBookById(id);
|
||||||
if (!result || "error" in result) return { chapters: [] }
|
if (!result || "error" in result) return { chapters: [] }
|
||||||
bookData = result;
|
bookData = result;
|
||||||
@@ -292,13 +293,15 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
|
|||||||
}
|
}
|
||||||
|
|
||||||
const allChapters: any[] = [];
|
const allChapters: any[] = [];
|
||||||
|
let exts = "anilist";
|
||||||
|
if (ext) exts = "ext";
|
||||||
|
|
||||||
for (const [name, ext] of bookExtensions) {
|
for (const [name, ext] of bookExtensions) {
|
||||||
if (name == extension) {
|
if (name == extension) {
|
||||||
const chapters = await searchChaptersInExtension(ext, name, id, false);
|
const chapters = await searchChaptersInExtension(ext, name, id, false, exts);
|
||||||
allChapters.push(...chapters);
|
allChapters.push(...chapters);
|
||||||
} else {
|
} else {
|
||||||
const chapters = await searchChaptersInExtension(ext, name, searchTitle, true);
|
const chapters = await searchChaptersInExtension(ext, name, searchTitle, true, exts);
|
||||||
allChapters.push(...chapters);
|
allChapters.push(...chapters);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -308,15 +311,17 @@ export async function getChaptersForBook(id: string): Promise<{ chapters: Chapte
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string): Promise<ChapterContent> {
|
export async function getChapterContent(bookId: string, chapterIndex: string, providerName: string, name: string): Promise<ChapterContent> {
|
||||||
const extensions = getAllExtensions();
|
const extensions = getAllExtensions();
|
||||||
const ext = extensions.get(providerName);
|
const ext = extensions.get(providerName);
|
||||||
|
|
||||||
if (!ext) {
|
if (!ext) {
|
||||||
throw new Error("Provider not found");
|
throw new Error("Provider not found");
|
||||||
}
|
}
|
||||||
|
let exts = "anilist";
|
||||||
|
if (name) exts = "ext";
|
||||||
|
|
||||||
const contentCacheKey = `content:${providerName}:${bookId}:${chapterIndex}`;
|
const contentCacheKey = `content:${providerName}:${exts}:${bookId}:${chapterIndex}`;
|
||||||
const cachedContent = await getCache(contentCacheKey);
|
const cachedContent = await getCache(contentCacheKey);
|
||||||
|
|
||||||
if (cachedContent) {
|
if (cachedContent) {
|
||||||
@@ -335,7 +340,7 @@ export async function getChapterContent(bookId: string, chapterIndex: string, pr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const chapterList = await getChaptersForBook(bookId);
|
const chapterList = await getChaptersForBook(bookId, Boolean(name));
|
||||||
|
|
||||||
if (!chapterList?.chapters || chapterList.chapters.length === 0) {
|
if (!chapterList?.chapters || chapterList.chapters.length === 0) {
|
||||||
throw new Error("Chapters not found");
|
throw new Error("Chapters not found");
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ async function loadChapters(idForFetch) {
|
|||||||
try {
|
try {
|
||||||
|
|
||||||
const fetchUrl = extensionName
|
const fetchUrl = extensionName
|
||||||
? `/api/book/${idForFetch}/chapters`
|
? `/api/book/${idForFetch}/chapters?ext=${extensionName}`
|
||||||
: `/api/book/${idForFetch}/chapters`;
|
: `/api/book/${idForFetch}/chapters`;
|
||||||
|
|
||||||
const res = await fetch(fetchUrl);
|
const res = await fetch(fetchUrl);
|
||||||
@@ -174,7 +174,7 @@ function populateProviderFilter() {
|
|||||||
if (providers.length > 0) {
|
if (providers.length > 0) {
|
||||||
select.style.display = 'inline-block';
|
select.style.display = 'inline-block';
|
||||||
|
|
||||||
select.innerHTML = '<option value="all">All Providers</option>';
|
select.innerHTML = '';
|
||||||
|
|
||||||
providers.forEach(prov => {
|
providers.forEach(prov => {
|
||||||
const opt = document.createElement('option');
|
const opt = document.createElement('option');
|
||||||
@@ -276,7 +276,9 @@ function updatePagination() {
|
|||||||
function openReader(bookId, chapterId, provider) {
|
function openReader(bookId, chapterId, provider) {
|
||||||
const c = encodeURIComponent(chapterId);
|
const c = encodeURIComponent(chapterId);
|
||||||
const p = encodeURIComponent(provider);
|
const p = encodeURIComponent(provider);
|
||||||
window.location.href = `/read/${p}/${c}/${bookId}`;
|
let extension = "";
|
||||||
|
if (extensionName) extension = "?" + extensionName;
|
||||||
|
window.location.href = `/read/${p}/${c}/${bookId}${extension}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
init();
|
init();
|
||||||
@@ -10,6 +10,8 @@ const nextBtn = document.getElementById('next-chapter');
|
|||||||
const lnSettings = document.getElementById('ln-settings');
|
const lnSettings = document.getElementById('ln-settings');
|
||||||
const mangaSettings = document.getElementById('manga-settings');
|
const mangaSettings = document.getElementById('manga-settings');
|
||||||
|
|
||||||
|
const hasQuery = window.location.search.length > 0;
|
||||||
|
|
||||||
const config = {
|
const config = {
|
||||||
ln: {
|
ln: {
|
||||||
fontSize: 18,
|
fontSize: 18,
|
||||||
@@ -125,7 +127,9 @@ async function loadChapter() {
|
|||||||
`;
|
`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}`);
|
let ext = ""
|
||||||
|
if(hasQuery) ext = "?ext=yes"
|
||||||
|
const res = await fetch(`/api/book/${bookId}/${chapter}/${provider}${ext}`);
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
|
||||||
if (data.title) {
|
if (data.title) {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ const fs = require('fs');
|
|||||||
const path = require('path');
|
const path = require('path');
|
||||||
const os = require('os');
|
const os = require('os');
|
||||||
const { queryAll, run } = require('./database');
|
const { queryAll, run } = require('./database');
|
||||||
|
const { scrape } = require("./headless");
|
||||||
|
|
||||||
const extensions = new Map();
|
const extensions = new Map();
|
||||||
|
|
||||||
@@ -37,6 +38,7 @@ async function loadExtensions() {
|
|||||||
|
|
||||||
const name = instance.constructor.name;
|
const name = instance.constructor.name;
|
||||||
extensions.set(name, instance);
|
extensions.set(name, instance);
|
||||||
|
instance.scrape = scrape;
|
||||||
console.log(`📦 Loaded Extension: ${name}`);
|
console.log(`📦 Loaded Extension: ${name}`);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
133
src/shared/headless.js
Normal file
133
src/shared/headless.js
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
const { chromium } = require("playwright-chromium");
|
||||||
|
|
||||||
|
let browser;
|
||||||
|
let context;
|
||||||
|
|
||||||
|
const BLOCK_LIST = [
|
||||||
|
"google-analytics", "doubleclick", "facebook", "twitter",
|
||||||
|
"adsystem", "analytics", "tracker", "pixel", "quantserve", "newrelic"
|
||||||
|
];
|
||||||
|
|
||||||
|
async function initHeadless() {
|
||||||
|
if (browser) return;
|
||||||
|
|
||||||
|
browser = await chromium.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-setuid-sandbox",
|
||||||
|
"--disable-dev-shm-usage",
|
||||||
|
"--disable-gpu",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--disable-background-networking",
|
||||||
|
"--disable-sync",
|
||||||
|
"--disable-translate",
|
||||||
|
"--mute-audio",
|
||||||
|
"--no-first-run",
|
||||||
|
"--no-zygote",
|
||||||
|
"--single-process"
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
context = await browser.newContext({
|
||||||
|
userAgent:
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ✅ Scroll infinito
|
||||||
|
async function turboScroll(page) {
|
||||||
|
await page.evaluate(() => {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
let last = 0;
|
||||||
|
let same = 0;
|
||||||
|
const timer = setInterval(() => {
|
||||||
|
const h = document.body.scrollHeight;
|
||||||
|
window.scrollTo(0, h);
|
||||||
|
if (h === last) {
|
||||||
|
same++;
|
||||||
|
if (same >= 5) {
|
||||||
|
clearInterval(timer);
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
same = 0;
|
||||||
|
last = h;
|
||||||
|
}
|
||||||
|
}, 20);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ✅ Scrape principal
|
||||||
|
async function scrape(url, handler, options = {}) {
|
||||||
|
const {
|
||||||
|
waitUntil = "domcontentloaded",
|
||||||
|
waitSelector = null,
|
||||||
|
timeout = 10000,
|
||||||
|
scrollToBottom = false,
|
||||||
|
renderWaitTime = 0,
|
||||||
|
loadImages = true
|
||||||
|
} = options;
|
||||||
|
|
||||||
|
if (!browser) await init();
|
||||||
|
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
// 🔒 Bloqueo de recursos
|
||||||
|
await page.route("**/*", (route) => {
|
||||||
|
const req = route.request();
|
||||||
|
const url = req.url().toLowerCase();
|
||||||
|
const type = req.resourceType();
|
||||||
|
|
||||||
|
if (
|
||||||
|
type === "font" ||
|
||||||
|
type === "stylesheet" ||
|
||||||
|
type === "media" ||
|
||||||
|
type === "manifest"
|
||||||
|
) return route.abort();
|
||||||
|
|
||||||
|
if (BLOCK_LIST.some(k => url.includes(k))) return route.abort();
|
||||||
|
|
||||||
|
if (!loadImages && (
|
||||||
|
type === "image" || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/)
|
||||||
|
)) return route.abort();
|
||||||
|
|
||||||
|
route.continue();
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil, timeout });
|
||||||
|
|
||||||
|
if (waitSelector) {
|
||||||
|
try {
|
||||||
|
await page.waitForSelector(waitSelector, { timeout });
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scrollToBottom) {
|
||||||
|
await turboScroll(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (renderWaitTime > 0) {
|
||||||
|
await new Promise(r => setTimeout(r, renderWaitTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await handler(page);
|
||||||
|
|
||||||
|
await page.close();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function closeScraper() {
|
||||||
|
if (context) await context.close();
|
||||||
|
if (browser) await browser.close();
|
||||||
|
context = null;
|
||||||
|
browser = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
initHeadless,
|
||||||
|
scrape,
|
||||||
|
closeScraper
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user