Merge pull request 'Dev Branch Merge To Main' (#1) from dev into main

Reviewed-on: ItsSkaiya/WaifuBoard-Recode#1
This commit is contained in:
2025-11-30 15:40:36 +01:00
29 changed files with 25756 additions and 296 deletions

14
binding.gyp Normal file
View File

@@ -0,0 +1,14 @@
{
"targets": [
{
"target_name": "anime_core",
"cflags!": [ "-fno-exceptions" ],
"cflags_cc!": [ "-fno-exceptions" ],
"sources": [ "./src/main.cpp" ],
"include_dirs": [
"<!@(node -p \"require('node-addon-api').include\")"
],
"defines": [ "NAPI_DISABLE_CPP_EXCEPTIONS" ]
}
]
}

968
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -13,12 +13,15 @@
"type": "commonjs",
"dependencies": {
"@fastify/static": "^8.3.0",
"bindings": "^1.5.0",
"fastify": "^5.6.2",
"node-addon-api": "^8.5.0",
"sqlite3": "^5.1.7"
},
"devDependencies": {
"@types/node": "^24.0.0",
"typescript": "^5.3.0",
"ts-node": "^10.9.0"
"node-gyp": "^12.1.0",
"ts-node": "^10.9.0",
"typescript": "^5.3.0"
}
}

View File

@@ -1,6 +1,7 @@
const fastify = require('fastify')({ logger: true });
const path = require('path');
const { animeMetadata } = require('./src/metadata/anilist');
const { spawn } = require('child_process');
const fs = require('fs');
const { initDatabase } = require('./src/shared/database');
const { loadExtensions } = require('./src/shared/extensions');
@@ -35,6 +36,41 @@ fastify.register(booksRoutes, { prefix: '/api' });
fastify.register(proxyRoutes, { prefix: '/api' });
fastify.register(extensionsRoutes, { prefix: '/api' });
function startCppScraper() {
const exePath = path.join(
__dirname,
'src',
'metadata',
process.platform === 'win32' ? 'anilist.exe' : 'anilist'
);
const dllPath = path.join(__dirname, 'src', 'metadata', 'binaries');
if (!fs.existsSync(exePath)) {
console.error(`❌ C++ Error: Could not find executable at: ${exePath}`);
console.error(" Did you compile it? (g++ src/metadata/anilist.cpp -o src/metadata/anilist.exe ...)");
return;
}
const env = { ...process.env };
env.PATH = `${dllPath};${env.PATH}`;
console.log("⚡ Starting WaifuBoard Scraper Engine (C++)...");
const scraper = spawn(exePath, [], {
stdio: 'inherit',
cwd: __dirname,
env: env
});
scraper.on('error', (err) => {
console.error('❌ Failed to spawn C++ process:', err);
});
scraper.on('close', (code) => {
console.log(`✅ Scraper process finished with code ${code}`);
});
}
const start = async () => {
try {
initDatabase();
@@ -43,7 +79,8 @@ const start = async () => {
await fastify.listen({ port: 3000, host: '0.0.0.0' });
console.log(`Server running at http://localhost:3000`);
animeMetadata();
startCppScraper();
} catch (err) {
fastify.log.error(err);
process.exit(1);

BIN
src/metadata/anilist Executable file

Binary file not shown.

424
src/metadata/anilist.cpp Normal file
View File

@@ -0,0 +1,424 @@
#include <iostream>
#include <string>
#include <vector>
#include <thread>
#include <chrono>
#include <fstream>
#include <filesystem>
#include <atomic>
#include <mutex>
#include <map>
#include <iomanip>
#include <sstream>
#ifdef _WIN32
#include <windows.h>
#endif
#include <sqlite3.h>
#include <curl/curl.h>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace fs = std::filesystem;
struct AppState {
std::atomic<int> animePage{1};
std::atomic<int> animeTotalRemote{0};
std::atomic<int> mangaPage{1};
std::atomic<int> mangaTotalRemote{0};
std::string animeAction = "Initializing...";
std::string mangaAction = "Initializing...";
std::string featuredStatus = "Waiting...";
std::string lastLog = "";
std::mutex stateMutex;
};
AppState appState;
const std::string DB_PATH = "src/metadata/anilist_anime.db";
const int REQUESTS_PER_MINUTE = 20;
const int DELAY_MS = (60000 / REQUESTS_PER_MINUTE);
const int FEATURED_REFRESH_RATE_MS = 8 * 60 * 1000;
const std::string MEDIA_FIELDS = R"(
id idMal title { romaji english native userPreferred } type format status description
startDate { year month day } endDate { year month day } season seasonYear episodes
duration chapters volumes countryOfOrigin isLicensed source hashtag
trailer { id site thumbnail } updatedAt coverImage { extraLarge large medium color }
bannerImage genres synonyms averageScore popularity isLocked trending favourites
isAdult siteUrl tags { id name description category rank isGeneralSpoiler isMediaSpoiler isAdult }
relations { edges { relationType node { id title { romaji } type format status } } }
studios { edges { isMain node { id name isAnimationStudio } } }
nextAiringEpisode { airingAt timeUntilAiring episode }
externalLinks { id url site type language color icon notes }
rankings { id rank type format year season allTime context }
stats { scoreDistribution { score amount } statusDistribution { status amount } }
recommendations(perPage: 7, sort: RATING_DESC) { nodes { mediaRecommendation { id title { romaji } coverImage { medium } format type } } }
)";
const std::string BULK_QUERY = R"(
query ($page: Int, $type: MediaType) {
Page(page: $page, perPage: 50) {
pageInfo { total currentPage lastPage hasNextPage }
media(type: $type, sort: ID) { )" + MEDIA_FIELDS + R"( }
}
}
)";
const std::string FEATURED_QUERY = R"(
query ($sort: [MediaSort], $type: MediaType, $status: MediaStatus) {
Page(page: 1, perPage: 20) {
media(type: $type, sort: $sort, status: $status, isAdult: false) { )" + MEDIA_FIELDS + R"( }
}
}
)";
void safeLog(const std::string& message) {
std::lock_guard<std::mutex> lock(appState.stateMutex);
appState.lastLog = message;
}
void updateAction(std::string type, std::string action) {
std::lock_guard<std::mutex> lock(appState.stateMutex);
if (type == "ANIME") appState.animeAction = action;
else appState.mangaAction = action;
}
class Database {
sqlite3* db;
std::mutex db_mutex;
public:
Database(const std::string& path) {
if (sqlite3_open(path.c_str(), &db)) {
safeLog("❌ Error: Can't open database at " + path);
exit(1);
}
init();
}
~Database() { sqlite3_close(db); }
void init() {
std::lock_guard<std::mutex> lock(db_mutex);
char* errMsg = 0;
const char* sql =
"CREATE TABLE IF NOT EXISTS anime (id INTEGER PRIMARY KEY, title TEXT, updatedAt INTEGER, full_data JSON);"
"CREATE TABLE IF NOT EXISTS trending (rank INTEGER PRIMARY KEY, id INTEGER, full_data JSON);"
"CREATE TABLE IF NOT EXISTS top_airing (rank INTEGER PRIMARY KEY, id INTEGER, full_data JSON);"
"CREATE TABLE IF NOT EXISTS books (id INTEGER PRIMARY KEY, title TEXT, updatedAt INTEGER, full_data JSON);"
"CREATE TABLE IF NOT EXISTS trending_books (rank INTEGER PRIMARY KEY, id INTEGER, full_data JSON);"
"CREATE TABLE IF NOT EXISTS popular_books (rank INTEGER PRIMARY KEY, id INTEGER, full_data JSON);";
sqlite3_exec(db, sql, 0, 0, &errMsg);
}
void saveBatch(const std::string& table, const json& mediaList) {
std::lock_guard<std::mutex> lock(db_mutex);
sqlite3_exec(db, "BEGIN TRANSACTION", 0, 0, 0);
std::string sql = "INSERT INTO " + table + " (id, title, updatedAt, full_data) VALUES (?, ?, ?, ?) "
"ON CONFLICT(id) DO UPDATE SET title=excluded.title, updatedAt=excluded.updatedAt, full_data=excluded.full_data "
"WHERE updatedAt < excluded.updatedAt OR title != excluded.title";
sqlite3_stmt* stmt;
sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, 0);
for (const auto& media : mediaList) {
int id = media["id"].get<int>();
int updatedAt = media.value("updatedAt", 0);
std::string title = "Unknown";
if (media.contains("title")) {
if (media["title"].contains("english") && !media["title"]["english"].is_null())
title = media["title"]["english"];
else if (media["title"].contains("romaji") && !media["title"]["romaji"].is_null())
title = media["title"]["romaji"];
}
std::string jsonDump = media.dump();
sqlite3_bind_int(stmt, 1, id);
sqlite3_bind_text(stmt, 2, title.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 3, updatedAt);
sqlite3_bind_text(stmt, 4, jsonDump.c_str(), -1, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_reset(stmt);
}
sqlite3_finalize(stmt);
sqlite3_exec(db, "COMMIT", 0, 0, 0);
}
void updateFeatured(const std::string& table, const json& mediaList) {
std::lock_guard<std::mutex> lock(db_mutex);
sqlite3_exec(db, ("DELETE FROM " + table).c_str(), 0, 0, 0);
sqlite3_exec(db, "BEGIN TRANSACTION", 0, 0, 0);
sqlite3_stmt* stmt;
sqlite3_prepare_v2(db, ("INSERT INTO " + table + " (rank, id, full_data) VALUES (?, ?, ?)").c_str(), -1, &stmt, 0);
int rank = 1;
for (const auto& media : mediaList) {
int id = media["id"].get<int>();
std::string jsonDump = media.dump();
sqlite3_bind_int(stmt, 1, rank++);
sqlite3_bind_int(stmt, 2, id);
sqlite3_bind_text(stmt, 3, jsonDump.c_str(), -1, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_reset(stmt);
}
sqlite3_finalize(stmt);
sqlite3_exec(db, "COMMIT", 0, 0, 0);
}
};
size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* userp) {
userp->append((char*)contents, size * nmemb);
return size * nmemb;
}
size_t HeaderCallback(char* buffer, size_t size, size_t nitems, std::map<std::string, std::string>* headers) {
std::string header(buffer, size * nitems);
size_t colon = header.find(':');
if (colon != std::string::npos) {
std::string key = header.substr(0, colon);
std::string value = header.substr(colon + 1);
value.erase(0, value.find_first_not_of(" \r\n"));
value.erase(value.find_last_not_of(" \r\n") + 1);
(*headers)[key] = value;
}
return size * nitems;
}
json fetchGraphQL(const std::string& query, const json& variables) {
CURL* curl;
CURLcode res;
std::string readBuffer;
std::map<std::string, std::string> responseHeaders;
char errbuf[CURL_ERROR_SIZE];
curl = curl_easy_init();
if (!curl) return nullptr;
json body;
body["query"] = query;
body["variables"] = variables;
std::string jsonStr = body.dump();
struct curl_slist* headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
headers = curl_slist_append(headers, "Accept: application/json");
curl_easy_setopt(curl, CURLOPT_URL, "https://graphql.anilist.co");
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, jsonStr.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, HeaderCallback);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &responseHeaders);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errbuf);
res = curl_easy_perform(curl);
long http_code = 0;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
if (responseHeaders.count("X-RateLimit-Remaining")) {
try {
int remaining = std::stoi(responseHeaders["X-RateLimit-Remaining"]);
if (remaining < 10) {
int resetTime = std::stoi(responseHeaders["X-RateLimit-Reset"]);
auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
int waitSeconds = (resetTime - now) + 2;
if (waitSeconds > 0) {
safeLog("⚠️ Rate Limit! Sleeping " + std::to_string(waitSeconds) + "s");
std::this_thread::sleep_for(std::chrono::seconds(waitSeconds));
}
}
} catch (...) {}
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
if (res != CURLE_OK) {
std::string errorMsg = "❌ Curl Error: " + std::string(errbuf);
safeLog(errorMsg);
return nullptr;
}
if (http_code != 200) {
if (http_code == 429) {
safeLog("⚠️ ABSOLUTE RATE LIMIT. Sleeping 1m.");
std::this_thread::sleep_for(std::chrono::minutes(1));
return fetchGraphQL(query, variables);
}
safeLog("❌ HTTP Error: " + std::to_string(http_code));
return nullptr;
}
try {
json j = json::parse(readBuffer);
return j.contains("data") ? j["data"]["Page"] : nullptr;
} catch (...) { return nullptr; }
}
void startScraper(Database& db, std::string type, std::string tableName) {
int page = 1;
bool isCaughtUp = false;
while (true) {
if (isCaughtUp) {
if (type == "ANIME") { appState.animePage = -1; }
else { appState.mangaPage = -1; }
updateAction(type, "Caught Up (Sleep 10m)");
std::this_thread::sleep_for(std::chrono::minutes(10));
page = 1;
isCaughtUp = false;
}
updateAction(type, "Fetching Page " + std::to_string(page) + "...");
json data = fetchGraphQL(BULK_QUERY, {{"page", page}, {"type", type}});
if (data.is_null() || !data.contains("media") || data["media"].empty()) {
if (!data.is_null() && data.contains("pageInfo") && !data["pageInfo"]["hasNextPage"].get<bool>()) {
isCaughtUp = true;
} else {
updateAction(type, "Fetch Failed. Retrying...");
std::this_thread::sleep_for(std::chrono::seconds(5));
}
continue;
}
int totalRemote = data["pageInfo"]["total"].get<int>();
if (type == "ANIME") {
appState.animePage = page;
appState.animeTotalRemote = totalRemote;
} else {
appState.mangaPage = page;
appState.mangaTotalRemote = totalRemote;
}
updateAction(type, "Saving to DB...");
db.saveBatch(tableName, data["media"]);
if (data["pageInfo"]["hasNextPage"].get<bool>()) {
page++;
updateAction(type, "Waiting " + std::to_string(DELAY_MS) + "ms...");
std::this_thread::sleep_for(std::chrono::milliseconds(DELAY_MS));
} else {
isCaughtUp = true;
}
}
}
void startFeaturedLoop(Database& db) {
while (true) {
{
std::lock_guard<std::mutex> lock(appState.stateMutex);
appState.featuredStatus = "Refreshing...";
}
json animeTrending = fetchGraphQL(FEATURED_QUERY, {{"sort", "TRENDING_DESC"}, {"type", "ANIME"}});
if (!animeTrending.is_null()) db.updateFeatured("trending", animeTrending["media"]);
json animeTop = fetchGraphQL(FEATURED_QUERY, {{"sort", "SCORE_DESC"}, {"type", "ANIME"}, {"status", "RELEASING"}});
if (!animeTop.is_null()) db.updateFeatured("top_airing", animeTop["media"]);
json mangaTrending = fetchGraphQL(FEATURED_QUERY, {{"sort", "TRENDING_DESC"}, {"type", "MANGA"}});
if (!mangaTrending.is_null()) db.updateFeatured("trending_books", mangaTrending["media"]);
json mangaPop = fetchGraphQL(FEATURED_QUERY, {{"sort", "POPULARITY_DESC"}, {"type", "MANGA"}});
if (!mangaPop.is_null()) db.updateFeatured("popular_books", mangaPop["media"]);
{
std::lock_guard<std::mutex> lock(appState.stateMutex);
appState.featuredStatus = "Idle";
}
std::this_thread::sleep_for(std::chrono::milliseconds(FEATURED_REFRESH_RATE_MS));
}
}
std::string getStatusLine(int page, int total, std::string action) {
if (page == -1) return "Caught Up";
std::stringstream stream;
double percent = 0.0;
if (total > 0 && page > 0) {
percent = ((double)page * 50.0) / (double)total * 100.0;
}
stream << "Pg " << page << " (" << std::fixed << std::setprecision(2) << percent << "%) - " << action;
return stream.str();
}
void uiThreadLoop() {
std::cout << "\n\n";
while(true) {
int aPage = appState.animePage;
int aTotal = appState.animeTotalRemote;
int mPage = appState.mangaPage;
int mTotal = appState.mangaTotalRemote;
std::string aAction, mAction, fStatus, log;
{
std::lock_guard<std::mutex> lock(appState.stateMutex);
aAction = appState.animeAction;
mAction = appState.mangaAction;
fStatus = appState.featuredStatus;
if (!appState.lastLog.empty()) {
log = appState.lastLog;
appState.lastLog = "";
}
}
if (!log.empty()) {
std::cout << "\r \r";
std::cout << log << std::endl;
}
std::cout << "\r----------------------------------------------------------------\n"
<< " 📺 Anime: " << std::left << std::setw(45) << getStatusLine(aPage, aTotal, aAction) << "\n"
<< " 📖 Manga: " << std::left << std::setw(45) << getStatusLine(mPage, mTotal, mAction) << "\n"
<< " ✨ Feat : " << fStatus << "\n"
<< "----------------------------------------------------------------\x1b[4A" << std::flush;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
int main() {
#ifdef _WIN32
SetConsoleOutputCP(65001);
#endif
fs::path p(DB_PATH);
if (p.has_parent_path() && !fs::exists(p.parent_path())) {
fs::create_directories(p.parent_path());
}
Database db(DB_PATH);
std::cout << "⚡ Starting WaifuBoard Scraper Engine..." << std::endl;
std::thread featuredThread(startFeaturedLoop, std::ref(db));
std::thread animeThread(startScraper, std::ref(db), "ANIME", "anime");
std::thread mangaThread(startScraper, std::ref(db), "MANGA", "books");
std::thread dashboard(uiThreadLoop);
featuredThread.join();
animeThread.join();
mangaThread.join();
dashboard.join();
return 0;
}

BIN
src/metadata/anilist.exe Executable file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

24596
src/metadata/json.hpp Normal file

File diff suppressed because it is too large Load Diff