#!/usr/bin/env python3 """Create a manuscript keyword pack and DeepSeek screening prompt. This helper is intentionally local-config only. It does not call DeepSeek or Zotero by itself; it prepares a reproducible prompt that the skill can use with the user's configured note-search route. """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path MATERIAL_FAMILY_HINTS = [ "MOF", "COF", "MXene", "perovskite", "hydrogel", "aerogel", "cellulose", "graphene", "polymer", "oxide", "sulfide", "carbide", "nitride", "composite", ] DEVICE_HINTS = [ "TENG", "triboelectric", "piezoelectric", "sensor", "supercapacitor", "battery", "catalysis", "photocatalysis", "electrocatalysis", "membrane", "photodetector", "flexible electronics", ] def read_text(path: Path) -> str: return path.read_text(encoding="utf-8", errors="replace") def extract_terms(text: str) -> dict[str, list[str]]: tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text))) families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)] devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)] chemical_like = [ token for token in tokens if any(ch.isdigit() for ch in token) or "-" in token or token.isupper() ][:40] return { "exact_or_chemical_terms": chemical_like, "material_family_hints": families, "device_application_hints": devices, } def build_prompt(profile: dict[str, object]) -> str: return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript. Manuscript profile: - Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'} - Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'} - Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'} - Target journal or tier: {profile.get('target_journal') or 'xxx'} - Extra author notes: {profile.get('extra_notes') or 'xxx'} Return: 1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning. 2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern. 3. For each paper: title, year, journal, Zotero key if present, relation type, useful claim/evidence, and why it matters for the manuscript. 4. Reject title-only broad keyword matches. """ def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path") parser.add_argument("--target-journal", default="") parser.add_argument("--extra-notes", default="") parser.add_argument("--json-out", default="") args = parser.parse_args() draft = Path(args.draft) text = read_text(draft) profile: dict[str, object] = extract_terms(text) profile["target_journal"] = args.target_journal profile["extra_notes"] = args.extra_notes profile["screening_prompt"] = build_prompt(profile) data = json.dumps(profile, ensure_ascii=False, indent=2) if args.json_out: Path(args.json_out).write_text(data + "\n", encoding="utf-8") else: sys.stdout.reconfigure(encoding="utf-8") print(data) return 0 if __name__ == "__main__": raise SystemExit(main())