115 lines
3.5 KiB
Python
115 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Create a manuscript keyword pack and DeepSeek screening prompt.
|
|
|
|
This helper is intentionally local-config only. It does not call DeepSeek or
|
|
Zotero by itself; it prepares a reproducible prompt that the skill can use with
|
|
the user's configured note-search route.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
MATERIAL_FAMILY_HINTS = [
|
|
"MOF",
|
|
"COF",
|
|
"MXene",
|
|
"perovskite",
|
|
"hydrogel",
|
|
"aerogel",
|
|
"cellulose",
|
|
"graphene",
|
|
"polymer",
|
|
"oxide",
|
|
"sulfide",
|
|
"carbide",
|
|
"nitride",
|
|
"composite",
|
|
]
|
|
|
|
DEVICE_HINTS = [
|
|
"TENG",
|
|
"triboelectric",
|
|
"piezoelectric",
|
|
"sensor",
|
|
"supercapacitor",
|
|
"battery",
|
|
"catalysis",
|
|
"photocatalysis",
|
|
"electrocatalysis",
|
|
"membrane",
|
|
"photodetector",
|
|
"flexible electronics",
|
|
]
|
|
|
|
|
|
def read_text(path: Path) -> str:
|
|
return path.read_text(encoding="utf-8", errors="replace")
|
|
|
|
|
|
def extract_terms(text: str) -> dict[str, list[str]]:
|
|
tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text)))
|
|
families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)]
|
|
devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)]
|
|
chemical_like = [
|
|
token
|
|
for token in tokens
|
|
if any(ch.isdigit() for ch in token) or "-" in token or token.isupper()
|
|
][:40]
|
|
return {
|
|
"exact_or_chemical_terms": chemical_like,
|
|
"material_family_hints": families,
|
|
"device_application_hints": devices,
|
|
}
|
|
|
|
|
|
def build_prompt(profile: dict[str, object]) -> str:
|
|
return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript.
|
|
|
|
Manuscript profile:
|
|
- Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'}
|
|
- Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'}
|
|
- Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'}
|
|
- Target journal or tier: {profile.get('target_journal') or 'xxx'}
|
|
- Extra author notes: {profile.get('extra_notes') or 'xxx'}
|
|
|
|
Return:
|
|
1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning.
|
|
2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern.
|
|
3. For each paper: title, year, journal, Zotero key if present, relation type, useful claim/evidence, and why it matters for the manuscript.
|
|
4. Reject title-only broad keyword matches.
|
|
"""
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path")
|
|
parser.add_argument("--target-journal", default="")
|
|
parser.add_argument("--extra-notes", default="")
|
|
parser.add_argument("--json-out", default="")
|
|
args = parser.parse_args()
|
|
|
|
draft = Path(args.draft)
|
|
text = read_text(draft)
|
|
profile: dict[str, object] = extract_terms(text)
|
|
profile["target_journal"] = args.target_journal
|
|
profile["extra_notes"] = args.extra_notes
|
|
profile["screening_prompt"] = build_prompt(profile)
|
|
|
|
data = json.dumps(profile, ensure_ascii=False, indent=2)
|
|
if args.json_out:
|
|
Path(args.json_out).write_text(data + "\n", encoding="utf-8")
|
|
else:
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
print(data)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|