Updated headless browser to support dynamic sites
Removed tabs and moved over to pages Updated the rendering system Fixed multiple pages not loading on scroll and re-rending or not rendering anything or just page 1. Fixed the search bar not taking in spaces for each query Updated how extensions are made Updated how extensions are loaded
This commit is contained in:
@@ -2,10 +2,11 @@ const fs = require('fs');
|
||||
const fetchPath = require.resolve('node-fetch');
|
||||
const cheerioPath = require.resolve('cheerio');
|
||||
|
||||
function peekBaseUrl(filePath) {
|
||||
function peekProperty(filePath, propertyName) {
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const match = content.match(/baseUrl\s*=\s*["']([^"']+)["']/);
|
||||
const regex = new RegExp(`(?:this\\.|^|\\s)${propertyName}\\s*=\\s*["']([^"']+)["']`);
|
||||
const match = content.match(regex);
|
||||
return match ? match[1] : null;
|
||||
} catch (e) {
|
||||
return null;
|
||||
@@ -16,23 +17,47 @@ module.exports = function (availableScrapers, headlessBrowser) {
|
||||
|
||||
Object.keys(availableScrapers).forEach(name => {
|
||||
const scraper = availableScrapers[name];
|
||||
|
||||
if (!scraper.url) {
|
||||
const url = peekBaseUrl(scraper.path);
|
||||
if (url) {
|
||||
scraper.url = url;
|
||||
if (scraper.instance && scraper.instance.baseUrl) {
|
||||
scraper.url = scraper.instance.baseUrl;
|
||||
} else {
|
||||
scraper.url = peekProperty(scraper.path, 'baseUrl');
|
||||
}
|
||||
}
|
||||
|
||||
if (!scraper.type) {
|
||||
if (scraper.instance && scraper.instance.type) {
|
||||
scraper.type = scraper.instance.type;
|
||||
} else {
|
||||
const typeFromFile = peekProperty(scraper.path, 'type');
|
||||
if (typeFromFile) {
|
||||
console.log(`[API] Recovered type for ${name} via static analysis: ${typeFromFile}`);
|
||||
scraper.type = typeFromFile;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
getSources: () => {
|
||||
return Object.keys(availableScrapers).map((name) => {
|
||||
console.log("[API] Handling getSources request...");
|
||||
|
||||
const results = Object.keys(availableScrapers).map((name) => {
|
||||
const scraper = availableScrapers[name];
|
||||
|
||||
const typeToReturn = scraper.type || null;
|
||||
|
||||
console.log(`[API] Processing ${name}: Type found = "${typeToReturn}"`);
|
||||
|
||||
return {
|
||||
name: name,
|
||||
url: scraper.url || name
|
||||
url: scraper.url || name,
|
||||
type: typeToReturn
|
||||
};
|
||||
});
|
||||
|
||||
return results;
|
||||
},
|
||||
|
||||
search: async (event, source, query, page) => {
|
||||
@@ -46,7 +71,6 @@ module.exports = function (availableScrapers, headlessBrowser) {
|
||||
console.log(`[LazyLoad] Initializing scraper: ${source}...`);
|
||||
try {
|
||||
const scraperModule = require(scraperData.path);
|
||||
|
||||
const className = Object.keys(scraperModule)[0];
|
||||
const ScraperClass = scraperModule[className];
|
||||
|
||||
@@ -55,12 +79,10 @@ module.exports = function (availableScrapers, headlessBrowser) {
|
||||
}
|
||||
|
||||
const instance = new ScraperClass(fetchPath, cheerioPath, headlessBrowser);
|
||||
|
||||
scraperData.instance = instance;
|
||||
|
||||
if (instance.baseUrl) {
|
||||
scraperData.url = instance.baseUrl;
|
||||
}
|
||||
if (instance.type) scraperData.type = instance.type;
|
||||
if (instance.baseUrl) scraperData.url = instance.baseUrl;
|
||||
|
||||
} catch (err) {
|
||||
console.error(`Failed to lazy load ${source}:`, err);
|
||||
|
||||
Reference in New Issue
Block a user