Updated headless browser to support dynamic sites

Removed tabs and moved over to pages
Updated the rendering system
Fixed multiple pages not loading on scroll and re-rending or not rendering anything or just page 1.
Fixed the search bar not taking in spaces for each query
Updated how extensions are made
Updated how extensions are loaded
This commit is contained in:
2025-11-21 11:48:07 -05:00
parent c3de5af1f2
commit 04f37218de
11 changed files with 567 additions and 295 deletions

View File

@@ -2,10 +2,11 @@ const fs = require('fs');
const fetchPath = require.resolve('node-fetch');
const cheerioPath = require.resolve('cheerio');
function peekBaseUrl(filePath) {
function peekProperty(filePath, propertyName) {
try {
const content = fs.readFileSync(filePath, 'utf-8');
const match = content.match(/baseUrl\s*=\s*["']([^"']+)["']/);
const regex = new RegExp(`(?:this\\.|^|\\s)${propertyName}\\s*=\\s*["']([^"']+)["']`);
const match = content.match(regex);
return match ? match[1] : null;
} catch (e) {
return null;
@@ -16,23 +17,47 @@ module.exports = function (availableScrapers, headlessBrowser) {
Object.keys(availableScrapers).forEach(name => {
const scraper = availableScrapers[name];
if (!scraper.url) {
const url = peekBaseUrl(scraper.path);
if (url) {
scraper.url = url;
if (scraper.instance && scraper.instance.baseUrl) {
scraper.url = scraper.instance.baseUrl;
} else {
scraper.url = peekProperty(scraper.path, 'baseUrl');
}
}
if (!scraper.type) {
if (scraper.instance && scraper.instance.type) {
scraper.type = scraper.instance.type;
} else {
const typeFromFile = peekProperty(scraper.path, 'type');
if (typeFromFile) {
console.log(`[API] Recovered type for ${name} via static analysis: ${typeFromFile}`);
scraper.type = typeFromFile;
}
}
}
});
return {
getSources: () => {
return Object.keys(availableScrapers).map((name) => {
console.log("[API] Handling getSources request...");
const results = Object.keys(availableScrapers).map((name) => {
const scraper = availableScrapers[name];
const typeToReturn = scraper.type || null;
console.log(`[API] Processing ${name}: Type found = "${typeToReturn}"`);
return {
name: name,
url: scraper.url || name
url: scraper.url || name,
type: typeToReturn
};
});
return results;
},
search: async (event, source, query, page) => {
@@ -46,7 +71,6 @@ module.exports = function (availableScrapers, headlessBrowser) {
console.log(`[LazyLoad] Initializing scraper: ${source}...`);
try {
const scraperModule = require(scraperData.path);
const className = Object.keys(scraperModule)[0];
const ScraperClass = scraperModule[className];
@@ -55,12 +79,10 @@ module.exports = function (availableScrapers, headlessBrowser) {
}
const instance = new ScraperClass(fetchPath, cheerioPath, headlessBrowser);
scraperData.instance = instance;
if (instance.baseUrl) {
scraperData.url = instance.baseUrl;
}
if (instance.type) scraperData.type = instance.type;
if (instance.baseUrl) scraperData.url = instance.baseUrl;
} catch (err) {
console.error(`Failed to lazy load ${source}:`, err);