QQsci-skill/scripts/material_literature_router.py

#!/usr/bin/env python3
"""Create a manuscript keyword pack and DeepSeek screening prompt.

This helper is intentionally local-config only. It does not call DeepSeek or
Zotero by itself; it prepares a reproducible prompt that the skill can use with
the user's configured note-search route.
"""

from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path


MATERIAL_FAMILY_HINTS = [
    "MOF",
    "COF",
    "MXene",
    "perovskite",
    "hydrogel",
    "aerogel",
    "cellulose",
    "graphene",
    "polymer",
    "oxide",
    "sulfide",
    "carbide",
    "nitride",
    "composite",
]

DEVICE_HINTS = [
    "TENG",
    "triboelectric",
    "piezoelectric",
    "sensor",
    "supercapacitor",
    "battery",
    "catalysis",
    "photocatalysis",
    "electrocatalysis",
    "membrane",
    "photodetector",
    "flexible electronics",
]


def read_text(path: Path) -> str:
    return path.read_text(encoding="utf-8", errors="replace")


def extract_terms(text: str) -> dict[str, list[str]]:
    tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text)))
    families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)]
    devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)]
    chemical_like = [
        token
        for token in tokens
        if any(ch.isdigit() for ch in token) or "-" in token or token.isupper()
    ][:40]
    return {
        "exact_or_chemical_terms": chemical_like,
        "material_family_hints": families,
        "device_application_hints": devices,
    }


def build_prompt(profile: dict[str, object]) -> str:
    return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript.

Manuscript profile:
- Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'}
- Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'}
- Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'}
- Target journal or tier: {profile.get('target_journal') or 'xxx'}
- Extra author notes: {profile.get('extra_notes') or 'xxx'}

Return:
1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning.
2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern.
3. For each paper: title, year, journal, Zotero key if present, relation type, useful claim/evidence, and why it matters for the manuscript.
4. Reject title-only broad keyword matches.
"""


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path")
    parser.add_argument("--target-journal", default="")
    parser.add_argument("--extra-notes", default="")
    parser.add_argument("--json-out", default="")
    args = parser.parse_args()

    draft = Path(args.draft)
    text = read_text(draft)
    profile: dict[str, object] = extract_terms(text)
    profile["target_journal"] = args.target_journal
    profile["extra_notes"] = args.extra_notes
    profile["screening_prompt"] = build_prompt(profile)

    data = json.dumps(profile, ensure_ascii=False, indent=2)
    if args.json_out:
        Path(args.json_out).write_text(data + "\n", encoding="utf-8")
    else:
        sys.stdout.reconfigure(encoding="utf-8")
        print(data)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())