From 2668bc5e72637bb105cb8655e054048f818c6ce3 Mon Sep 17 00:00:00 2001 From: lenafx Date: Wed, 31 Dec 2025 16:14:28 +0100 Subject: [PATCH] better auto matching for local entries --- desktop/src/api/local/local.service.ts | 71 ++++++++++++++++++++++++-- docker/src/api/local/local.service.ts | 71 ++++++++++++++++++++++++-- 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/desktop/src/api/local/local.service.ts b/desktop/src/api/local/local.service.ts index 1d90844..32209b7 100644 --- a/desktop/src/api/local/local.service.ts +++ b/desktop/src/api/local/local.service.ts @@ -13,6 +13,59 @@ const MANGA_IMAGE_EXTS = ['.jpg', '.jpeg', '.png', '.webp']; const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip']; const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi']; +function normalize(str: string) { + return str + .toLowerCase() + .normalize('NFD') + .replace(/[\u0300-\u036f]/g, '') + .replace(/[^a-z0-9 ]/g, '') + .trim(); +} + +function levenshtein(a: string, b: string) { + const matrix = Array.from({ length: b.length + 1 }, (_, i) => [i]); + + for (let j = 0; j <= a.length; j++) matrix[0][j] = j; + + for (let i = 1; i <= b.length; i++) { + for (let j = 1; j <= a.length; j++) { + matrix[i][j] = Math.min( + matrix[i - 1][j] + 1, + matrix[i][j - 1] + 1, + matrix[i - 1][j - 1] + (b[i - 1] === a[j - 1] ? 0 : 1) + ); + } + } + return matrix[b.length][a.length]; +} + +function getTitleVariants(media: any): string[] { + const t = media.title || {}; + return [ + t.romaji, + t.english, + t.native, + ...(media.synonyms || []) + ].filter(Boolean); +} + +function scoreEntry(query: string, media: any) { + const q = normalize(query); + let best = Infinity; + + for (const title of getTitleVariants(media)) { + const t = normalize(title); + + if (t.includes(q) || q.includes(t)) { + return 0; + } + + best = Math.min(best, levenshtein(q, t)); + } + + return best; +} + export async function resolveEntryMetadata(entry: any, type: string) { let metadata = null; let matchedId = entry.matched_id; @@ -26,16 +79,26 @@ export async function resolveEntryMetadata(entry: any, type: string) { let picked = null; + let candidates = results; + if (type !== 'anime' && Array.isArray(results)) { - console.log(type); if (entry.type === 'novels') { - picked = results.find(r => r.format === 'NOVEL'); + candidates = results.filter(r => r.format === 'NOVEL'); } else if (entry.type === 'manga') { - picked = results.find(r => r.format !== 'NOVEL'); + candidates = results.filter(r => r.format !== 'NOVEL'); } } - picked ??= results?.[0]; + if (Array.isArray(candidates) && candidates.length) { + const scored = candidates + .map(r => ({ r, score: scoreEntry(entry.folder_name, r) })) + .sort((a, b) => a.score - b.score); + + // cutoff opcional + if (scored[0].score <= 10) { + picked = scored[0].r; + } + } if (picked?.id) { matchedId = picked.id; diff --git a/docker/src/api/local/local.service.ts b/docker/src/api/local/local.service.ts index 1d90844..32209b7 100644 --- a/docker/src/api/local/local.service.ts +++ b/docker/src/api/local/local.service.ts @@ -13,6 +13,59 @@ const MANGA_IMAGE_EXTS = ['.jpg', '.jpeg', '.png', '.webp']; const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip']; const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi']; +function normalize(str: string) { + return str + .toLowerCase() + .normalize('NFD') + .replace(/[\u0300-\u036f]/g, '') + .replace(/[^a-z0-9 ]/g, '') + .trim(); +} + +function levenshtein(a: string, b: string) { + const matrix = Array.from({ length: b.length + 1 }, (_, i) => [i]); + + for (let j = 0; j <= a.length; j++) matrix[0][j] = j; + + for (let i = 1; i <= b.length; i++) { + for (let j = 1; j <= a.length; j++) { + matrix[i][j] = Math.min( + matrix[i - 1][j] + 1, + matrix[i][j - 1] + 1, + matrix[i - 1][j - 1] + (b[i - 1] === a[j - 1] ? 0 : 1) + ); + } + } + return matrix[b.length][a.length]; +} + +function getTitleVariants(media: any): string[] { + const t = media.title || {}; + return [ + t.romaji, + t.english, + t.native, + ...(media.synonyms || []) + ].filter(Boolean); +} + +function scoreEntry(query: string, media: any) { + const q = normalize(query); + let best = Infinity; + + for (const title of getTitleVariants(media)) { + const t = normalize(title); + + if (t.includes(q) || q.includes(t)) { + return 0; + } + + best = Math.min(best, levenshtein(q, t)); + } + + return best; +} + export async function resolveEntryMetadata(entry: any, type: string) { let metadata = null; let matchedId = entry.matched_id; @@ -26,16 +79,26 @@ export async function resolveEntryMetadata(entry: any, type: string) { let picked = null; + let candidates = results; + if (type !== 'anime' && Array.isArray(results)) { - console.log(type); if (entry.type === 'novels') { - picked = results.find(r => r.format === 'NOVEL'); + candidates = results.filter(r => r.format === 'NOVEL'); } else if (entry.type === 'manga') { - picked = results.find(r => r.format !== 'NOVEL'); + candidates = results.filter(r => r.format !== 'NOVEL'); } } - picked ??= results?.[0]; + if (Array.isArray(candidates) && candidates.length) { + const scored = candidates + .map(r => ({ r, score: scoreEntry(entry.folder_name, r) })) + .sort((a, b) => a.score - b.score); + + // cutoff opcional + if (scored[0].score <= 10) { + picked = scored[0].r; + } + } if (picked?.id) { matchedId = picked.id;