Extract local Zotero PDFs for deep notes
This commit is contained in:
parent
4a080355ad
commit
d1e19e1bb8
|
|
@ -42,6 +42,9 @@ ZOTERO_WEB = "https://api.zotero.org"
|
||||||
DEFAULT_VAULT = Path.cwd()
|
DEFAULT_VAULT = Path.cwd()
|
||||||
SKILL_DIR = Path(__file__).resolve().parents[1]
|
SKILL_DIR = Path(__file__).resolve().parents[1]
|
||||||
DEFAULT_PRIVATE_CONFIG = SKILL_DIR / "config" / "config.local.json"
|
DEFAULT_PRIVATE_CONFIG = SKILL_DIR / "config" / "config.local.json"
|
||||||
|
WORKSPACE_PYDEPS = Path.home() / "Documents" / "Obsidian Vault" / ".codex_tmp" / "pydeps"
|
||||||
|
if WORKSPACE_PYDEPS.exists():
|
||||||
|
sys.path.insert(0, str(WORKSPACE_PYDEPS))
|
||||||
|
|
||||||
|
|
||||||
def fail(message: str) -> None:
|
def fail(message: str) -> None:
|
||||||
|
|
@ -324,9 +327,7 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
|
||||||
if key:
|
if key:
|
||||||
full = zotero_local_optional(f"/items/{urllib.parse.quote(key)}")
|
full = zotero_local_optional(f"/items/{urllib.parse.quote(key)}")
|
||||||
path = ((full or {}).get("data") or {}).get("path")
|
path = ((full or {}).get("data") or {}).get("path")
|
||||||
if not path:
|
extracted = extract_pdf_text(resolve_attachment_path(child, path), max_chars=max_chars)
|
||||||
continue
|
|
||||||
extracted = extract_pdf_text(Path(path), max_chars=max_chars)
|
|
||||||
if extracted:
|
if extracted:
|
||||||
parts.append(extracted)
|
parts.append(extracted)
|
||||||
break
|
break
|
||||||
|
|
@ -334,6 +335,21 @@ def local_fulltext(parent_key: str, max_chars: int) -> str:
|
||||||
return text[:max_chars]
|
return text[:max_chars]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_attachment_path(child: dict[str, Any], path: str | None) -> Path:
|
||||||
|
if path:
|
||||||
|
candidate = Path(path)
|
||||||
|
if candidate.exists():
|
||||||
|
return candidate
|
||||||
|
key = child.get("key")
|
||||||
|
if key:
|
||||||
|
storage_dir = Path.home() / "Zotero" / "storage" / str(key)
|
||||||
|
if storage_dir.exists():
|
||||||
|
pdfs = list(storage_dir.glob("*.pdf"))
|
||||||
|
if pdfs:
|
||||||
|
return pdfs[0]
|
||||||
|
return Path(path or "")
|
||||||
|
|
||||||
|
|
||||||
def template_paths(vault: Path) -> tuple[Path, Path, Path]:
|
def template_paths(vault: Path) -> tuple[Path, Path, Path]:
|
||||||
templates = vault / "00 Templater"
|
templates = vault / "00 Templater"
|
||||||
if not templates.exists():
|
if not templates.exists():
|
||||||
|
|
@ -361,7 +377,7 @@ def template_paths(vault: Path) -> tuple[Path, Path, Path]:
|
||||||
def extract_pdf_text(path: Path, max_chars: int) -> str:
|
def extract_pdf_text(path: Path, max_chars: int) -> str:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return ""
|
return ""
|
||||||
max_pages = 8
|
max_pages = 80 if max_chars >= 50000 else 12
|
||||||
if importlib.util.find_spec("fitz"):
|
if importlib.util.find_spec("fitz"):
|
||||||
import fitz # type: ignore
|
import fitz # type: ignore
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue