diff --git a/audit_epubs.py b/audit_epubs.py
index 952635b..0fee9ce 100644
--- a/audit_epubs.py
+++ b/audit_epubs.py
@@ -100,7 +100,8 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
result.add_error("Titre sale (Underscores)")
if title.lower().endswith('.epub'):
result.add_error("Titre sale (Extension .epub)")
- if title.isdigit():
+ if title.isdigit() and len(title) >= 5:
+ # Allow short numeric titles like "1984"
result.add_error("Titre sale (Numérique)")
# 2. Author
@@ -177,12 +178,22 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
continue # Don't count as valid ID for the "Only Calibre" rule check
elif scheme == 'isbn':
valid_id_found = True
+ elif scheme in ('asin', 'mobi-asin'):
+ # Amazon identifiers
+ valid_id_found = True
else:
# Heuristics
- if re.match(r'^\d{9}[\d|X]$', clean_id) or re.match(r'^\d{13}$', clean_id):
+ # Check for ISBN in various formats
+ if re.match(r'^\\d{9}[\\d|X]$', clean_id) or re.match(r'^\\d{13}$', clean_id):
valid_id_found = True
elif text.lower().startswith('urn:isbn'):
valid_id_found = True
+ elif text.lower().startswith('isbn:'):
+ # Format: isbn:9782732497150 (used by Calibre polish)
+ valid_id_found = True
+ elif text.lower().startswith('asin:') or text.lower().startswith('mobi-asin:'):
+ # Amazon identifiers
+ valid_id_found = True
elif text.lower().startswith('urn:uuid'):
# We treat generic UUIDs as valid unless we are strict about "No Calibre UUID ONLY".
# The rule: "Vide, ou ne contient aucun identifiant valide (ni ISBN, ni un URN standard). Si uniquement un UUID Calibre est présent -> Erreur."
@@ -212,16 +223,20 @@ def validate_epub(filepath: str, root_dir: str) -> VerificationResult:
# Look for
# Then look for in
cover_meta = None
- if metadata:
+ if metadata is not None:
#
- for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"): # opf:meta not usually in 2.0?
- pass
- # The 'name' attribute is not in a namespace usually for OPF 2.0 meta elements?
- # Actually in OPF 2.0:
- for meta in metadata.findall("meta"): # Searching without namespace
+ # Try with namespace first (correct for valid EPUB 2)
+ for meta in metadata.findall(f"{{http://www.idpf.org/2007/opf}}meta"):
if meta.get('name') == 'cover':
cover_meta = meta.get('content')
break
+
+ # Fallback: search without namespace
+ if not cover_meta:
+ for meta in metadata.findall("meta"):
+ if meta.get('name') == 'cover':
+ cover_meta = meta.get('content')
+ break
# Also check namespaced meta if parsing strict OPF 3.0?
# element is in 3.0?
# User asked for "cover" meta specifically or general cover check.