QQsci-skill/scripts/material_literature_router.py

115 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""Create a manuscript keyword pack and DeepSeek screening prompt.
This helper is intentionally local-config only. It does not call DeepSeek or
Zotero by itself; it prepares a reproducible prompt that the skill can use with
the user's configured note-search route.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
MATERIAL_FAMILY_HINTS = [
"MOF",
"COF",
"MXene",
"perovskite",
"hydrogel",
"aerogel",
"cellulose",
"graphene",
"polymer",
"oxide",
"sulfide",
"carbide",
"nitride",
"composite",
]
DEVICE_HINTS = [
"TENG",
"triboelectric",
"piezoelectric",
"sensor",
"supercapacitor",
"battery",
"catalysis",
"photocatalysis",
"electrocatalysis",
"membrane",
"photodetector",
"flexible electronics",
]
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8", errors="replace")
def extract_terms(text: str) -> dict[str, list[str]]:
tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text)))
families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)]
devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)]
chemical_like = [
token
for token in tokens
if any(ch.isdigit() for ch in token) or "-" in token or token.isupper()
][:40]
return {
"exact_or_chemical_terms": chemical_like,
"material_family_hints": families,
"device_application_hints": devices,
}
def build_prompt(profile: dict[str, object]) -> str:
return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript.
Manuscript profile:
- Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'}
- Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'}
- Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'}
- Target journal or tier: {profile.get('target_journal') or 'xxx'}
- Extra author notes: {profile.get('extra_notes') or 'xxx'}
Return:
1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning.
2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern.
3. For each paper: title, year, journal, Zotero key if present, relation type, useful claim/evidence, and why it matters for the manuscript.
4. Reject title-only broad keyword matches.
"""
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path")
parser.add_argument("--target-journal", default="")
parser.add_argument("--extra-notes", default="")
parser.add_argument("--json-out", default="")
args = parser.parse_args()
draft = Path(args.draft)
text = read_text(draft)
profile: dict[str, object] = extract_terms(text)
profile["target_journal"] = args.target_journal
profile["extra_notes"] = args.extra_notes
profile["screening_prompt"] = build_prompt(profile)
data = json.dumps(profile, ensure_ascii=False, indent=2)
if args.json_out:
Path(args.json_out).write_text(data + "\n", encoding="utf-8")
else:
sys.stdout.reconfigure(encoding="utf-8")
print(data)
return 0
if __name__ == "__main__":
raise SystemExit(main())