better auto matching for local entries

This commit is contained in:
2025-12-31 16:14:28 +01:00
parent d07c8de452
commit 2668bc5e72
2 changed files with 134 additions and 8 deletions

View File

@@ -13,6 +13,59 @@ const MANGA_IMAGE_EXTS = ['.jpg', '.jpeg', '.png', '.webp'];
const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip']; const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip'];
const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi']; const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi'];
function normalize(str: string) {
return str
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9 ]/g, '')
.trim();
}
function levenshtein(a: string, b: string) {
const matrix = Array.from({ length: b.length + 1 }, (_, i) => [i]);
for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + (b[i - 1] === a[j - 1] ? 0 : 1)
);
}
}
return matrix[b.length][a.length];
}
function getTitleVariants(media: any): string[] {
const t = media.title || {};
return [
t.romaji,
t.english,
t.native,
...(media.synonyms || [])
].filter(Boolean);
}
function scoreEntry(query: string, media: any) {
const q = normalize(query);
let best = Infinity;
for (const title of getTitleVariants(media)) {
const t = normalize(title);
if (t.includes(q) || q.includes(t)) {
return 0;
}
best = Math.min(best, levenshtein(q, t));
}
return best;
}
export async function resolveEntryMetadata(entry: any, type: string) { export async function resolveEntryMetadata(entry: any, type: string) {
let metadata = null; let metadata = null;
let matchedId = entry.matched_id; let matchedId = entry.matched_id;
@@ -26,16 +79,26 @@ export async function resolveEntryMetadata(entry: any, type: string) {
let picked = null; let picked = null;
let candidates = results;
if (type !== 'anime' && Array.isArray(results)) { if (type !== 'anime' && Array.isArray(results)) {
console.log(type);
if (entry.type === 'novels') { if (entry.type === 'novels') {
picked = results.find(r => r.format === 'NOVEL'); candidates = results.filter(r => r.format === 'NOVEL');
} else if (entry.type === 'manga') { } else if (entry.type === 'manga') {
picked = results.find(r => r.format !== 'NOVEL'); candidates = results.filter(r => r.format !== 'NOVEL');
} }
} }
picked ??= results?.[0]; if (Array.isArray(candidates) && candidates.length) {
const scored = candidates
.map(r => ({ r, score: scoreEntry(entry.folder_name, r) }))
.sort((a, b) => a.score - b.score);
// cutoff opcional
if (scored[0].score <= 10) {
picked = scored[0].r;
}
}
if (picked?.id) { if (picked?.id) {
matchedId = picked.id; matchedId = picked.id;

View File

@@ -13,6 +13,59 @@ const MANGA_IMAGE_EXTS = ['.jpg', '.jpeg', '.png', '.webp'];
const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip']; const MANGA_ARCHIVES = ['.cbz', '.cbr', '.zip'];
const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi']; const NOVEL_EXTS = ['.epub', '.pdf', '.txt', '.md', '.docx', '.mobi'];
function normalize(str: string) {
return str
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9 ]/g, '')
.trim();
}
function levenshtein(a: string, b: string) {
const matrix = Array.from({ length: b.length + 1 }, (_, i) => [i]);
for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + (b[i - 1] === a[j - 1] ? 0 : 1)
);
}
}
return matrix[b.length][a.length];
}
function getTitleVariants(media: any): string[] {
const t = media.title || {};
return [
t.romaji,
t.english,
t.native,
...(media.synonyms || [])
].filter(Boolean);
}
function scoreEntry(query: string, media: any) {
const q = normalize(query);
let best = Infinity;
for (const title of getTitleVariants(media)) {
const t = normalize(title);
if (t.includes(q) || q.includes(t)) {
return 0;
}
best = Math.min(best, levenshtein(q, t));
}
return best;
}
export async function resolveEntryMetadata(entry: any, type: string) { export async function resolveEntryMetadata(entry: any, type: string) {
let metadata = null; let metadata = null;
let matchedId = entry.matched_id; let matchedId = entry.matched_id;
@@ -26,16 +79,26 @@ export async function resolveEntryMetadata(entry: any, type: string) {
let picked = null; let picked = null;
let candidates = results;
if (type !== 'anime' && Array.isArray(results)) { if (type !== 'anime' && Array.isArray(results)) {
console.log(type);
if (entry.type === 'novels') { if (entry.type === 'novels') {
picked = results.find(r => r.format === 'NOVEL'); candidates = results.filter(r => r.format === 'NOVEL');
} else if (entry.type === 'manga') { } else if (entry.type === 'manga') {
picked = results.find(r => r.format !== 'NOVEL'); candidates = results.filter(r => r.format !== 'NOVEL');
} }
} }
picked ??= results?.[0]; if (Array.isArray(candidates) && candidates.length) {
const scored = candidates
.map(r => ({ r, score: scoreEntry(entry.folder_name, r) }))
.sort((a, b) => a.score - b.score);
// cutoff opcional
if (scored[0].score <= 10) {
picked = scored[0].r;
}
}
if (picked?.id) { if (picked?.id) {
matchedId = picked.id; matchedId = picked.id;