#!/usr/bin/env python3 """Create a manuscript keyword pack and DeepSeek screening prompt. This helper is intentionally local-config only. It prepares a reproducible DeepSeek prompt for the required screening pass over QQnote/Zotero/Obsidian literature notes. It does not call DeepSeek or Zotero by itself. """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path MATERIAL_FAMILY_HINTS = [ "MOF", "COF", "MXene", "perovskite", "hydrogel", "aerogel", "cellulose", "graphene", "polymer", "oxide", "sulfide", "carbide", "nitride", "composite", ] DEVICE_HINTS = [ "TENG", "triboelectric", "piezoelectric", "sensor", "supercapacitor", "battery", "catalysis", "photocatalysis", "electrocatalysis", "membrane", "photodetector", "flexible electronics", ] def read_text(path: Path) -> str: return path.read_text(encoding="utf-8", errors="replace") def extract_terms(text: str) -> dict[str, list[str]]: tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text))) families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)] devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)] chemical_like = [ token for token in tokens if any(ch.isdigit() for ch in token) or "-" in token or token.isupper() ][:40] return { "exact_or_chemical_terms": chemical_like, "material_family_hints": families, "device_application_hints": devices, } def build_prompt(profile: dict[str, object]) -> str: return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript. Manuscript profile: - Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'} - Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'} - Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'} - Target journal or tier: {profile.get('target_journal') or 'xxx'} - Extra author notes: {profile.get('extra_notes') or 'xxx'} Return: 1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning. 2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern. 3. For each paper: title, year, journal, DOI, Zotero key if present, relation type, usable_for, useful claim/evidence, and why it matters for the manuscript. 4. Reject title-only broad keyword matches. 5. Return rejected near matches and missing/uncertain metadata separately. Output JSON keys: - keyword_pack - strongly_related_papers - weakly_related_papers - rejected_near_matches - missing_or_uncertain_metadata """ def build_brief_prompt(profile: dict[str, object]) -> str: return f"""After screening the literature notes, generate a QQwrite writing brief for this materials-science manuscript. Manuscript profile: - Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'} - Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'} - Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'} - Target journal or tier: {profile.get('target_journal') or 'xxx'} - Extra author notes: {profile.get('extra_notes') or 'xxx'} Use the screened strongly_related_papers and weakly_related_papers as input. Rules: 1. Do not invent experimental results, mechanisms, references, DOI values, or figure numbers. 2. Strong papers may define novelty, direct comparison, benchmark, and citation placement. 3. Weak papers may support field framing, mechanism language, or writing structure, but must not be treated as direct benchmark papers. 4. Every major claim must include author evidence, supporting DOI, or a clear status explaining that it still needs evidence. 5. Preserve DOI strings exactly as provided. 6. Return JSON only. Output JSON keys: - target_journal - article_type - recommended_word_template - material_system - application - central_claim - novelty_gap - design_principle - key_author_evidence - strong_related_papers - weak_related_papers - section_plan - claim_reference_map - must_not_overclaim - missing_inputs - qqwrite_instructions """ def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path") parser.add_argument("--target-journal", default="") parser.add_argument("--extra-notes", default="") parser.add_argument("--json-out", default="") args = parser.parse_args() draft = Path(args.draft) text = read_text(draft) profile: dict[str, object] = extract_terms(text) profile["target_journal"] = args.target_journal profile["extra_notes"] = args.extra_notes profile["screening_prompt"] = build_prompt(profile) profile["brief_prompt"] = build_brief_prompt(profile) data = json.dumps(profile, ensure_ascii=False, indent=2) if args.json_out: Path(args.json_out).write_text(data + "\n", encoding="utf-8") else: sys.stdout.reconfigure(encoding="utf-8") print(data) return 0 if __name__ == "__main__": raise SystemExit(main())