From e2466aab8559364285a11a6d5a0f122585c0060e Mon Sep 17 00:00:00 2001 From: Maxime Killinger Date: Thu, 8 Jan 2026 13:16:10 +0000 Subject: [PATCH] =?UTF-8?q?fix(audit):=20am=C3=A9lioration=20compatibilit?= =?UTF-8?q?=C3=A9=20Calibre?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrections pour la détection des métadonnées EPUB générées par Calibre : ## Couverture - Corrige la détection du tag avec namespace XML - Ajoute un fallback sans namespace pour les anciens formats - Résout les faux positifs "Couverture absente" ## Identifiants - Ajoute le support du format "isbn:XXXXX" (généré par Calibre polish) - Accepte les identifiants Amazon ASIN et MOBI-ASIN comme valides (via attribut scheme= ou préfixe texte asin:/mobi-asin:) - Conserve le rejet des UUID Calibre seuls comme erreur ## Titre - Tolère les titres numériques courts (< 5 caractères) - Permet les titres légitimes comme "1984" --- audit_epubs.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/audit_epubs.py b/audit_epubs.py index 952635b..0fee9ce 100644 --- a/audit_epubs.py +++ b/audit_epubs.py @@ -100,7 +100,8 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult: result.add_error("Titre sale (Underscores)") if title.lower().endswith('.epub'): result.add_error("Titre sale (Extension .epub)") - if title.isdigit(): + if title.isdigit() and len(title) >= 5: + # Allow short numeric titles like "1984" result.add_error("Titre sale (Numérique)") # 2. Author @@ -177,12 +178,22 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult: continue # Don't count as valid ID for the "Only Calibre" rule check elif scheme == 'isbn': valid_id_found = True + elif scheme in ('asin', 'mobi-asin'): + # Amazon identifiers + valid_id_found = True else: # Heuristics - if re.match(r'^\d{9}[\d|X]$', clean_id) or re.match(r'^\d{13}$', clean_id): + # Check for ISBN in various formats + if re.match(r'^\\d{9}[\\d|X]$', clean_id) or re.match(r'^\\d{13}$', clean_id): valid_id_found = True elif text.lower().startswith('urn:isbn'): valid_id_found = True + elif text.lower().startswith('isbn:'): + # Format: isbn:9782732497150 (used by Calibre polish) + valid_id_found = True + elif text.lower().startswith('asin:') or text.lower().startswith('mobi-asin:'): + # Amazon identifiers + valid_id_found = True elif text.lower().startswith('urn:uuid'): # We treat generic UUIDs as valid unless we are strict about "No Calibre UUID ONLY". # The rule: "Vide, ou ne contient aucun identifiant valide (ni ISBN, ni un URN standard). Si uniquement un UUID Calibre est présent -> Erreur." @@ -212,16 +223,20 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult: # Look for # Then look for in cover_meta = None - if metadata: + if metadata is not None: # - for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"): # opf:meta not usually in 2.0? - pass - # The 'name' attribute is not in a namespace usually for OPF 2.0 meta elements? - # Actually in OPF 2.0: - for meta in metadata.findall("meta"): # Searching without namespace + # Try with namespace first (correct for valid EPUB 2) + for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"): if meta.get('name') == 'cover': cover_meta = meta.get('content') break + + # Fallback: search without namespace + if not cover_meta: + for meta in metadata.findall("meta"): + if meta.get('name') == 'cover': + cover_meta = meta.get('content') + break # Also check namespaced meta if parsing strict OPF 3.0? # element is in 3.0? # User asked for "cover" meta specifically or general cover check.