Added in Book Boards! (NEW!) Updated rendering logic Updated search logic Updated extension loading logic Updated image handling logic
181 lines
5.0 KiB
JavaScript
181 lines
5.0 KiB
JavaScript
const { BrowserWindow, session } = require('electron');
|
|
|
|
class HeadlessBrowser {
|
|
constructor() {
|
|
this.win = null;
|
|
this.currentConfig = null;
|
|
}
|
|
|
|
/**
|
|
* Pre-loads the browser window on app startup.
|
|
*/
|
|
async init() {
|
|
console.log('[Headless] Pre-warming browser instance...');
|
|
await this.getWindow(true); // Default to loading images
|
|
console.log('[Headless] Browser ready.');
|
|
}
|
|
|
|
/**
|
|
* Gets an existing window or creates a new one if config changes/window missing.
|
|
*/
|
|
async getWindow(loadImages) {
|
|
// If window exists and config matches, reuse it (FAST PATH)
|
|
if (this.win && !this.win.isDestroyed() && this.currentConfig === loadImages) {
|
|
return this.win;
|
|
}
|
|
|
|
// Otherwise, destroy old window and create new one (SLOW PATH)
|
|
if (this.win && !this.win.isDestroyed()) {
|
|
this.win.destroy();
|
|
}
|
|
|
|
this.currentConfig = loadImages;
|
|
|
|
this.win = new BrowserWindow({
|
|
show: false,
|
|
width: 1920,
|
|
height: 1080,
|
|
webPreferences: {
|
|
offscreen: true,
|
|
contextIsolation: false,
|
|
nodeIntegration: false,
|
|
images: loadImages,
|
|
webgl: false,
|
|
backgroundThrottling: false,
|
|
autoplayPolicy: 'no-user-gesture-required',
|
|
disableHtmlFullscreenWindowResize: true
|
|
},
|
|
});
|
|
|
|
const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36';
|
|
this.win.webContents.setUserAgent(userAgent);
|
|
|
|
const ses = this.win.webContents.session;
|
|
|
|
ses.webRequest.onBeforeRequest({ urls: ['*://*/*'] }, (details, callback) => {
|
|
const url = details.url.toLowerCase();
|
|
const type = details.resourceType;
|
|
|
|
if (
|
|
type === 'font' ||
|
|
type === 'stylesheet' ||
|
|
type === 'media' ||
|
|
type === 'websocket' ||
|
|
type === 'manifest'
|
|
) {
|
|
return callback({ cancel: true });
|
|
}
|
|
|
|
const blockList = [
|
|
'google-analytics', 'doubleclick', 'facebook', 'twitter', 'adsystem',
|
|
'analytics', 'tracker', 'pixel', 'quantserve', 'newrelic'
|
|
];
|
|
|
|
if (blockList.some(keyword => url.includes(keyword))) return callback({ cancel: true });
|
|
|
|
if (!loadImages && (type === 'image' || url.match(/\.(jpg|jpeg|png|gif|webp|svg)$/))) {
|
|
return callback({ cancel: true });
|
|
}
|
|
|
|
return callback({ cancel: false });
|
|
});
|
|
|
|
// Load a blank page to keep the process alive and ready
|
|
await this.win.loadURL('about:blank');
|
|
|
|
return this.win;
|
|
}
|
|
|
|
async scrape(url, evalFunc, options = {}) {
|
|
const {
|
|
waitSelector = null,
|
|
timeout = 10000,
|
|
args = [],
|
|
scrollToBottom = false,
|
|
renderWaitTime = 0,
|
|
loadImages = true
|
|
} = options;
|
|
|
|
try {
|
|
const win = await this.getWindow(loadImages);
|
|
|
|
await win.loadURL(url);
|
|
|
|
if (waitSelector) {
|
|
try {
|
|
await this.waitForSelector(win, waitSelector, timeout);
|
|
} catch (e) {
|
|
console.warn(`[Headless] Timeout waiting for ${waitSelector}, proceeding...`);
|
|
}
|
|
}
|
|
|
|
if (scrollToBottom) {
|
|
await this.turboScroll(win);
|
|
}
|
|
|
|
if (renderWaitTime > 0) {
|
|
await new Promise(resolve => setTimeout(resolve, renderWaitTime));
|
|
}
|
|
|
|
const result = await win.webContents.executeJavaScript(
|
|
`(${evalFunc.toString()}).apply(null, ${JSON.stringify(args)})`
|
|
);
|
|
|
|
return result;
|
|
|
|
} catch (error) {
|
|
console.error('Headless Scrape Error:', error.message);
|
|
// Force recreation next time if something crashed
|
|
if (this.win) {
|
|
try { this.win.destroy(); } catch(e){}
|
|
this.win = null;
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async waitForSelector(win, selector, timeout) {
|
|
const script = `
|
|
new Promise((resolve, reject) => {
|
|
const start = Date.now();
|
|
const check = () => {
|
|
if (document.querySelector('${selector}')) {
|
|
resolve(true);
|
|
} else if (Date.now() - start > ${timeout}) {
|
|
reject(new Error('Timeout'));
|
|
} else {
|
|
requestAnimationFrame(check);
|
|
}
|
|
};
|
|
check();
|
|
});
|
|
`;
|
|
await win.webContents.executeJavaScript(script);
|
|
}
|
|
|
|
async turboScroll(win) {
|
|
const script = `
|
|
new Promise((resolve) => {
|
|
let lastHeight = 0;
|
|
let sameHeightCount = 0;
|
|
const timer = setInterval(() => {
|
|
const scrollHeight = document.body.scrollHeight;
|
|
window.scrollTo(0, scrollHeight);
|
|
if (scrollHeight === lastHeight) {
|
|
sameHeightCount++;
|
|
if (sameHeightCount >= 5) {
|
|
clearInterval(timer);
|
|
resolve();
|
|
}
|
|
} else {
|
|
sameHeightCount = 0;
|
|
lastHeight = scrollHeight;
|
|
}
|
|
}, 20);
|
|
});
|
|
`;
|
|
await win.webContents.executeJavaScript(script);
|
|
}
|
|
}
|
|
|
|
module.exports = new HeadlessBrowser(); |