fix(audit): amélioration compatibilité Calibre
All checks were successful
🚀 Docker Build and Push / build-and-push (push) Successful in 2m14s
All checks were successful
🚀 Docker Build and Push / build-and-push (push) Successful in 2m14s
Corrections pour la détection des métadonnées EPUB générées par Calibre : ## Couverture - Corrige la détection du tag <meta name="cover"> avec namespace XML - Ajoute un fallback sans namespace pour les anciens formats - Résout les faux positifs "Couverture absente" ## Identifiants - Ajoute le support du format "isbn:XXXXX" (généré par Calibre polish) - Accepte les identifiants Amazon ASIN et MOBI-ASIN comme valides (via attribut scheme= ou préfixe texte asin:/mobi-asin:) - Conserve le rejet des UUID Calibre seuls comme erreur ## Titre - Tolère les titres numériques courts (< 5 caractères) - Permet les titres légitimes comme "1984"
This commit is contained in:
@@ -100,7 +100,8 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
|
|||||||
result.add_error("Titre sale (Underscores)")
|
result.add_error("Titre sale (Underscores)")
|
||||||
if title.lower().endswith('.epub'):
|
if title.lower().endswith('.epub'):
|
||||||
result.add_error("Titre sale (Extension .epub)")
|
result.add_error("Titre sale (Extension .epub)")
|
||||||
if title.isdigit():
|
if title.isdigit() and len(title) >= 5:
|
||||||
|
# Allow short numeric titles like "1984"
|
||||||
result.add_error("Titre sale (Numérique)")
|
result.add_error("Titre sale (Numérique)")
|
||||||
|
|
||||||
# 2. Author
|
# 2. Author
|
||||||
@@ -177,12 +178,22 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
|
|||||||
continue # Don't count as valid ID for the "Only Calibre" rule check
|
continue # Don't count as valid ID for the "Only Calibre" rule check
|
||||||
elif scheme == 'isbn':
|
elif scheme == 'isbn':
|
||||||
valid_id_found = True
|
valid_id_found = True
|
||||||
|
elif scheme in ('asin', 'mobi-asin'):
|
||||||
|
# Amazon identifiers
|
||||||
|
valid_id_found = True
|
||||||
else:
|
else:
|
||||||
# Heuristics
|
# Heuristics
|
||||||
if re.match(r'^\d{9}[\d|X]$', clean_id) or re.match(r'^\d{13}$', clean_id):
|
# Check for ISBN in various formats
|
||||||
|
if re.match(r'^\\d{9}[\\d|X]$', clean_id) or re.match(r'^\\d{13}$', clean_id):
|
||||||
valid_id_found = True
|
valid_id_found = True
|
||||||
elif text.lower().startswith('urn:isbn'):
|
elif text.lower().startswith('urn:isbn'):
|
||||||
valid_id_found = True
|
valid_id_found = True
|
||||||
|
elif text.lower().startswith('isbn:'):
|
||||||
|
# Format: isbn:9782732497150 (used by Calibre polish)
|
||||||
|
valid_id_found = True
|
||||||
|
elif text.lower().startswith('asin:') or text.lower().startswith('mobi-asin:'):
|
||||||
|
# Amazon identifiers
|
||||||
|
valid_id_found = True
|
||||||
elif text.lower().startswith('urn:uuid'):
|
elif text.lower().startswith('urn:uuid'):
|
||||||
# We treat generic UUIDs as valid unless we are strict about "No Calibre UUID ONLY".
|
# We treat generic UUIDs as valid unless we are strict about "No Calibre UUID ONLY".
|
||||||
# The rule: "Vide, ou ne contient aucun identifiant valide (ni ISBN, ni un URN standard). Si uniquement un UUID Calibre est présent -> Erreur."
|
# The rule: "Vide, ou ne contient aucun identifiant valide (ni ISBN, ni un URN standard). Si uniquement un UUID Calibre est présent -> Erreur."
|
||||||
@@ -212,16 +223,20 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
|
|||||||
# Look for <meta name="cover" content="item_id" />
|
# Look for <meta name="cover" content="item_id" />
|
||||||
# Then look for <item id="item_id" href="..." /> in <manifest>
|
# Then look for <item id="item_id" href="..." /> in <manifest>
|
||||||
cover_meta = None
|
cover_meta = None
|
||||||
if metadata:
|
if metadata is not None:
|
||||||
# <meta name="cover" content="...">
|
# <meta name="cover" content="...">
|
||||||
for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"): # opf:meta not usually in 2.0?
|
# Try with namespace first (correct for valid EPUB 2)
|
||||||
pass
|
for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"):
|
||||||
# The 'name' attribute is not in a namespace usually for OPF 2.0 meta elements?
|
|
||||||
# Actually in OPF 2.0: <meta name="cover" content="cover-image" />
|
|
||||||
for meta in metadata.findall("meta"): # Searching without namespace
|
|
||||||
if meta.get('name') == 'cover':
|
if meta.get('name') == 'cover':
|
||||||
cover_meta = meta.get('content')
|
cover_meta = meta.get('content')
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Fallback: search without namespace
|
||||||
|
if not cover_meta:
|
||||||
|
for meta in metadata.findall("meta"):
|
||||||
|
if meta.get('name') == 'cover':
|
||||||
|
cover_meta = meta.get('content')
|
||||||
|
break
|
||||||
# Also check namespaced meta if parsing strict OPF 3.0?
|
# Also check namespaced meta if parsing strict OPF 3.0?
|
||||||
# element is <meta property="cover-image"> in 3.0?
|
# element is <meta property="cover-image"> in 3.0?
|
||||||
# User asked for "cover" meta specifically or general cover check.
|
# User asked for "cover" meta specifically or general cover check.
|
||||||
|
|||||||
Reference in New Issue
Block a user