From e2466aab8559364285a11a6d5a0f122585c0060e Mon Sep 17 00:00:00 2001
From: Maxime Killinger <max.killinger@outlook.fr>
Date: Thu, 8 Jan 2026 13:16:10 +0000
Subject: [PATCH] =?UTF-8?q?fix(audit):=20am=C3=A9lioration=20compatibilit?=
 =?UTF-8?q?=C3=A9=20Calibre?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Corrections pour la détection des métadonnées EPUB générées par Calibre :

## Couverture
- Corrige la détection du tag <meta name="cover"> avec namespace XML
- Ajoute un fallback sans namespace pour les anciens formats
- Résout les faux positifs "Couverture absente"

## Identifiants
- Ajoute le support du format "isbn:XXXXX" (généré par Calibre polish)
- Accepte les identifiants Amazon ASIN et MOBI-ASIN comme valides
  (via attribut scheme= ou préfixe texte asin:/mobi-asin:)
- Conserve le rejet des UUID Calibre seuls comme erreur

## Titre
- Tolère les titres numériques courts (< 5 caractères)
- Permet les titres légitimes comme "1984"
---
 audit_epubs.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)
diff --git a/audit_epubs.py b/audit_epubs.py
index 952635b..0fee9ce 100644
--- a/audit_epubs.py
+++ b/audit_epubs.py
@@ -100,7 +100,8 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
                     result.add_error("Titre sale (Underscores)")
                 if title.lower().endswith('.epub'):
                     result.add_error("Titre sale (Extension .epub)")
-                if title.isdigit():
+                if title.isdigit() and len(title) >= 5:
+                    # Allow short numeric titles like "1984"
                     result.add_error("Titre sale (Numérique)")
 
             # 2. Author
@@ -177,12 +178,22 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
                     continue # Don't count as valid ID for the "Only Calibre" rule check
                 elif scheme == 'isbn':
                     valid_id_found = True
+                elif scheme in ('asin', 'mobi-asin'):
+                    # Amazon identifiers
+                    valid_id_found = True
                 else: 
                      # Heuristics
-                     if re.match(r'^\d{9}[\d|X]$', clean_id) or re.match(r'^\d{13}$', clean_id):
+                     # Check for ISBN in various formats
+                     if re.match(r'^\\d{9}[\\d|X]$', clean_id) or re.match(r'^\\d{13}$', clean_id):
                           valid_id_found = True
                      elif text.lower().startswith('urn:isbn'):
                           valid_id_found = True
+                     elif text.lower().startswith('isbn:'):
+                          # Format: isbn:9782732497150 (used by Calibre polish)
+                          valid_id_found = True
+                     elif text.lower().startswith('asin:') or text.lower().startswith('mobi-asin:'):
+                          # Amazon identifiers
+                          valid_id_found = True
                      elif text.lower().startswith('urn:uuid'):
                           # We treat generic UUIDs as valid unless we are strict about "No Calibre UUID ONLY".
                           # The rule: "Vide, ou ne contient aucun identifiant valide (ni ISBN, ni un URN standard). Si uniquement un UUID Calibre est présent -> Erreur."
@@ -212,16 +223,20 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
             # Look for <meta name="cover" content="item_id" />
             # Then look for <item id="item_id" href="..." /> in <manifest>
             cover_meta = None
-            if metadata:
+            if metadata is not None:
                 # <meta name="cover" content="...">
-                for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"): # opf:meta not usually in 2.0?
-                    pass
-                # The 'name' attribute is not in a namespace usually for OPF 2.0 meta elements?
-                # Actually in OPF 2.0: <meta name="cover" content="cover-image" />
-                for meta in metadata.findall("meta"): # Searching without namespace
+                # Try with namespace first (correct for valid EPUB 2)
+                for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"):
                      if meta.get('name') == 'cover':
                           cover_meta = meta.get('content')
                           break
+                
+                # Fallback: search without namespace
+                if not cover_meta:
+                    for meta in metadata.findall("meta"): 
+                         if meta.get('name') == 'cover':
+                              cover_meta = meta.get('content')
+                              break
                 # Also check namespaced meta if parsing strict OPF 3.0? 
                 # element is <meta property="cover-image"> in 3.0?
                 # User asked for "cover" meta specifically or general cover check.