From 69d175c684378c6122fd552095c2d1feb6dd897a Mon Sep 17 00:00:00 2001 From: qyh15 Date: Fri, 22 May 2026 22:51:21 +0800 Subject: [PATCH] Skip bad PDF pages during text extraction --- scripts/generate_zotero_ai_note.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/generate_zotero_ai_note.py b/scripts/generate_zotero_ai_note.py index c9a374c..7b58c41 100644 --- a/scripts/generate_zotero_ai_note.py +++ b/scripts/generate_zotero_ai_note.py @@ -409,7 +409,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str: reader = PdfReader(str(path)) chunks = [] for page in reader.pages[:max_pages]: - chunks.append(page.extract_text() or "") + try: + chunks.append(page.extract_text() or "") + except Exception: + continue if sum(len(chunk) for chunk in chunks) >= max_chars: break return "\n".join(chunks)[:max_chars] @@ -419,7 +422,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str: reader = PdfReader(str(path)) chunks = [] for page in reader.pages[:max_pages]: - chunks.append(page.extract_text() or "") + try: + chunks.append(page.extract_text() or "") + except Exception: + continue if sum(len(chunk) for chunk in chunks) >= max_chars: break return "\n".join(chunks)[:max_chars]