Extract local Zotero PDFs for deep notes
This commit is contained in:
parent
4a080355ad
commit
d1e19e1bb8
|
|
@ -42,6 +42,9 @@ ZOTERO_WEB = "https://api.zotero.org"
|
|||
DEFAULT_VAULT = Path.cwd()
|
||||
SKILL_DIR = Path(__file__).resolve().parents[1]
|
||||
DEFAULT_PRIVATE_CONFIG = SKILL_DIR / "config" / "config.local.json"
|
||||
WORKSPACE_PYDEPS = Path.home() / "Documents" / "Obsidian Vault" / ".codex_tmp" / "pydeps"
|
||||
if WORKSPACE_PYDEPS.exists():
|
||||
sys.path.insert(0, str(WORKSPACE_PYDEPS))
|
||||
|
||||
|
||||
def fail(message: str) -> None:
|
||||
|
|
@ -324,9 +327,7 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
|
|||
if key:
|
||||
full = zotero_local_optional(f"/items/{urllib.parse.quote(key)}")
|
||||
path = ((full or {}).get("data") or {}).get("path")
|
||||
if not path:
|
||||
continue
|
||||
extracted = extract_pdf_text(Path(path), max_chars=max_chars)
|
||||
extracted = extract_pdf_text(resolve_attachment_path(child, path), max_chars=max_chars)
|
||||
if extracted:
|
||||
parts.append(extracted)
|
||||
break
|
||||
|
|
@ -334,6 +335,21 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
|
|||
return text[:max_chars]
|
||||
|
||||
|
||||
def resolve_attachment_path(child: dict[str, Any], path: str | None) -> Path:
|
||||
if path:
|
||||
candidate = Path(path)
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
key = child.get("key")
|
||||
if key:
|
||||
storage_dir = Path.home() / "Zotero" / "storage" / str(key)
|
||||
if storage_dir.exists():
|
||||
pdfs = list(storage_dir.glob("*.pdf"))
|
||||
if pdfs:
|
||||
return pdfs[0]
|
||||
return Path(path or "")
|
||||
|
||||
|
||||
def template_paths(vault: Path) -> tuple[Path, Path, Path]:
|
||||
templates = vault / "00 Templater"
|
||||
if not templates.exists():
|
||||
|
|
@ -361,7 +377,7 @@ def template_paths(vault: Path) -> tuple[Path, Path, Path]:
|
|||
def extract_pdf_text(path: Path, max_chars: int) -> str:
|
||||
if not path.exists():
|
||||
return ""
|
||||
max_pages = 8
|
||||
max_pages = 80 if max_chars >= 50000 else 12
|
||||
if importlib.util.find_spec("fitz"):
|
||||
import fitz # type: ignore
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue