Skip bad PDF pages during text extraction

2026-05-22 22:51:21 +08:00 · 2026-05-22 22:51:21 +08:00 · 69d175c684
parent 5630118f7d
commit 69d175c684
1 changed files with 8 additions and 2 deletions
--- a/scripts/generate_zotero_ai_note.py
+++ b/scripts/generate_zotero_ai_note.py
@ -409,7 +409,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str:
        reader = PdfReader(str(path))
        chunks = []
        for page in reader.pages[:max_pages]:
-            chunks.append(page.extract_text() or "")
+            try:
                chunks.append(page.extract_text() or "")
            except Exception:
                continue
            if sum(len(chunk) for chunk in chunks) >= max_chars:
                break
        return "\n".join(chunks)[:max_chars]
@ -419,7 +422,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str:
        reader = PdfReader(str(path))
        chunks = []
        for page in reader.pages[:max_pages]:
-            chunks.append(page.extract_text() or "")
+            try:
                chunks.append(page.extract_text() or "")
            except Exception:
                continue
            if sum(len(chunk) for chunk in chunks) >= max_chars:
                break
        return "\n".join(chunks)[:max_chars]