From 69d175c684378c6122fd552095c2d1feb6dd897a Mon Sep 17 00:00:00 2001
From: qyh15 <qyh15@local>
Date: Fri, 22 May 2026 22:51:21 +0800
Subject: [PATCH] Skip bad PDF pages during text extraction

---
 scripts/generate_zotero_ai_note.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/scripts/generate_zotero_ai_note.py b/scripts/generate_zotero_ai_note.py
index c9a374c..7b58c41 100644
--- a/scripts/generate_zotero_ai_note.py
+++ b/scripts/generate_zotero_ai_note.py
@@ -409,7 +409,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str:
         reader = PdfReader(str(path))
         chunks = []
         for page in reader.pages[:max_pages]:
-            chunks.append(page.extract_text() or "")
+            try:
+                chunks.append(page.extract_text() or "")
+            except Exception:
+                continue
             if sum(len(chunk) for chunk in chunks) >= max_chars:
                 break
         return "\n".join(chunks)[:max_chars]
@@ -419,7 +422,10 @@ def extract_pdf_text(path: Path, max_chars: int) -> str:
         reader = PdfReader(str(path))
         chunks = []
         for page in reader.pages[:max_pages]:
-            chunks.append(page.extract_text() or "")
+            try:
+                chunks.append(page.extract_text() or "")
+            except Exception:
+                continue
             if sum(len(chunk) for chunk in chunks) >= max_chars:
                 break
         return "\n".join(chunks)[:max_chars]