From d1e19e1bb8f83b1e84f20d9a247bd52716b8db48 Mon Sep 17 00:00:00 2001 From: qyh15 Date: Fri, 22 May 2026 16:16:55 +0800 Subject: [PATCH] Extract local Zotero PDFs for deep notes --- scripts/generate_zotero_ai_note.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/generate_zotero_ai_note.py b/scripts/generate_zotero_ai_note.py index 62eb183..3c36978 100644 --- a/scripts/generate_zotero_ai_note.py +++ b/scripts/generate_zotero_ai_note.py @@ -42,6 +42,9 @@ ZOTERO_WEB = "https://api.zotero.org" DEFAULT_VAULT = Path.cwd() SKILL_DIR = Path(__file__).resolve().parents[1] DEFAULT_PRIVATE_CONFIG = SKILL_DIR / "config" / "config.local.json" +WORKSPACE_PYDEPS = Path.home() / "Documents" / "Obsidian Vault" / ".codex_tmp" / "pydeps" +if WORKSPACE_PYDEPS.exists(): + sys.path.insert(0, str(WORKSPACE_PYDEPS)) def fail(message: str) -> None: @@ -324,9 +327,7 @@ def local_fulltext(parent_key: str, max_chars: int) -> str: if key: full = zotero_local_optional(f"/items/{urllib.parse.quote(key)}") path = ((full or {}).get("data") or {}).get("path") - if not path: - continue - extracted = extract_pdf_text(Path(path), max_chars=max_chars) + extracted = extract_pdf_text(resolve_attachment_path(child, path), max_chars=max_chars) if extracted: parts.append(extracted) break @@ -334,6 +335,21 @@ def local_fulltext(parent_key: str, max_chars: int) -> str: return text[:max_chars] +def resolve_attachment_path(child: dict[str, Any], path: str | None) -> Path: + if path: + candidate = Path(path) + if candidate.exists(): + return candidate + key = child.get("key") + if key: + storage_dir = Path.home() / "Zotero" / "storage" / str(key) + if storage_dir.exists(): + pdfs = list(storage_dir.glob("*.pdf")) + if pdfs: + return pdfs[0] + return Path(path or "") + + def template_paths(vault: Path) -> tuple[Path, Path, Path]: templates = vault / "00 Templater" if not templates.exists(): @@ -361,7 +377,7 @@ def template_paths(vault: Path) -> tuple[Path, Path, Path]: def extract_pdf_text(path: Path, max_chars: int) -> str: if not path.exists(): return "" - max_pages = 8 + max_pages = 80 if max_chars >= 50000 else 12 if importlib.util.find_spec("fitz"): import fitz # type: ignore