Extract local Zotero PDFs for deep notes

This commit is contained in:
qyh15 2026-05-22 16:16:55 +08:00
parent 4a080355ad
commit d1e19e1bb8
1 changed files with 20 additions and 4 deletions

View File

@ -42,6 +42,9 @@ ZOTERO_WEB = "https://api.zotero.org"
DEFAULT_VAULT = Path.cwd()
SKILL_DIR = Path(__file__).resolve().parents[1]
DEFAULT_PRIVATE_CONFIG = SKILL_DIR / "config" / "config.local.json"
WORKSPACE_PYDEPS = Path.home() / "Documents" / "Obsidian Vault" / ".codex_tmp" / "pydeps"
if WORKSPACE_PYDEPS.exists():
sys.path.insert(0, str(WORKSPACE_PYDEPS))
def fail(message: str) -> None:
@ -324,9 +327,7 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
if key:
full = zotero_local_optional(f"/items/{urllib.parse.quote(key)}")
path = ((full or {}).get("data") or {}).get("path")
if not path:
continue
extracted = extract_pdf_text(Path(path), max_chars=max_chars)
extracted = extract_pdf_text(resolve_attachment_path(child, path), max_chars=max_chars)
if extracted:
parts.append(extracted)
break
@ -334,6 +335,21 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
return text[:max_chars]
def resolve_attachment_path(child: dict[str, Any], path: str | None) -> Path:
if path:
candidate = Path(path)
if candidate.exists():
return candidate
key = child.get("key")
if key:
storage_dir = Path.home() / "Zotero" / "storage" / str(key)
if storage_dir.exists():
pdfs = list(storage_dir.glob("*.pdf"))
if pdfs:
return pdfs[0]
return Path(path or "")
def template_paths(vault: Path) -> tuple[Path, Path, Path]:
templates = vault / "00 Templater"
if not templates.exists():
@ -361,7 +377,7 @@ def template_paths(vault: Path) -> tuple[Path, Path, Path]:
def extract_pdf_text(path: Path, max_chars: int) -> str:
if not path.exists():
return ""
max_pages = 8
max_pages = 80 if max_chars >= 50000 else 12
if importlib.util.find_spec("fitz"):
import fitz # type: ignore