#!/usr/bin/env python3
"""Create a manuscript keyword pack and DeepSeek screening prompt.

This helper is intentionally local-config only. It prepares a reproducible
DeepSeek prompt for the required screening pass over QQnote/Zotero/Obsidian
literature notes. It does not call DeepSeek or Zotero by itself.
"""

from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path


MATERIAL_FAMILY_HINTS = [
    "MOF",
    "COF",
    "MXene",
    "perovskite",
    "hydrogel",
    "aerogel",
    "cellulose",
    "graphene",
    "polymer",
    "oxide",
    "sulfide",
    "carbide",
    "nitride",
    "composite",
]

DEVICE_HINTS = [
    "TENG",
    "triboelectric",
    "piezoelectric",
    "sensor",
    "supercapacitor",
    "battery",
    "catalysis",
    "photocatalysis",
    "electrocatalysis",
    "membrane",
    "photodetector",
    "flexible electronics",
]


def read_text(path: Path) -> str:
    return path.read_text(encoding="utf-8", errors="replace")


def extract_terms(text: str) -> dict[str, list[str]]:
    tokens = sorted(set(re.findall(r"[A-Za-z][A-Za-z0-9\-_/]{2,}", text)))
    families = [term for term in MATERIAL_FAMILY_HINTS if re.search(re.escape(term), text, re.I)]
    devices = [term for term in DEVICE_HINTS if re.search(re.escape(term), text, re.I)]
    chemical_like = [
        token
        for token in tokens
        if any(ch.isdigit() for ch in token) or "-" in token or token.isupper()
    ][:40]
    return {
        "exact_or_chemical_terms": chemical_like,
        "material_family_hints": families,
        "device_application_hints": devices,
    }


def build_prompt(profile: dict[str, object]) -> str:
    return f"""You are screening existing Zotero/Obsidian literature notes for a materials-science manuscript.

Manuscript profile:
- Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'}
- Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'}
- Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'}
- Target journal or tier: {profile.get('target_journal') or 'xxx'}
- Extra author notes: {profile.get('extra_notes') or 'xxx'}

Return:
1. Strongly related papers: same material, same material family plus same device, same mechanism, direct benchmark, or same target-journal positioning.
2. Weakly related papers: same direction, adjacent material family, same characterization logic, similar device architecture, or useful writing pattern.
3. For each paper: title, year, journal, DOI, Zotero key if present, relation type, usable_for, useful claim/evidence, and why it matters for the manuscript.
4. Reject title-only broad keyword matches.
5. Return rejected near matches and missing/uncertain metadata separately.

Output JSON keys:
- keyword_pack
- strongly_related_papers
- weakly_related_papers
- rejected_near_matches
- missing_or_uncertain_metadata
"""


def build_brief_prompt(profile: dict[str, object]) -> str:
    return f"""After screening the literature notes, generate a QQwrite writing brief for this materials-science manuscript.

Manuscript profile:
- Material/system terms: {', '.join(profile.get('exact_or_chemical_terms', [])) or 'xxx'}
- Material family: {', '.join(profile.get('material_family_hints', [])) or 'xxx'}
- Device/application: {', '.join(profile.get('device_application_hints', [])) or 'xxx'}
- Target journal or tier: {profile.get('target_journal') or 'xxx'}
- Extra author notes: {profile.get('extra_notes') or 'xxx'}

Use the screened strongly_related_papers and weakly_related_papers as input.

Rules:
1. Do not invent experimental results, mechanisms, references, DOI values, or figure numbers.
2. Strong papers may define novelty, direct comparison, benchmark, and citation placement.
3. Weak papers may support field framing, mechanism language, or writing structure, but must not be treated as direct benchmark papers.
4. Every major claim must include author evidence, supporting DOI, or a clear status explaining that it still needs evidence.
5. Preserve DOI strings exactly as provided.
6. Return JSON only.

Output JSON keys:
- target_journal
- article_type
- recommended_word_template
- material_system
- application
- central_claim
- novelty_gap
- design_principle
- key_author_evidence
- strong_related_papers
- weak_related_papers
- section_plan
- claim_reference_map
- must_not_overclaim
- missing_inputs
- qqwrite_instructions
"""


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--draft", required=True, help="UTF-8 text or markdown draft path")
    parser.add_argument("--target-journal", default="")
    parser.add_argument("--extra-notes", default="")
    parser.add_argument("--json-out", default="")
    args = parser.parse_args()

    draft = Path(args.draft)
    text = read_text(draft)
    profile: dict[str, object] = extract_terms(text)
    profile["target_journal"] = args.target_journal
    profile["extra_notes"] = args.extra_notes
    profile["screening_prompt"] = build_prompt(profile)
    profile["brief_prompt"] = build_brief_prompt(profile)

    data = json.dumps(profile, ensure_ascii=False, indent=2)
    if args.json_out:
        Path(args.json_out).write_text(data + "\n", encoding="utf-8")
    else:
        sys.stdout.reconfigure(encoding="utf-8")
        print(data)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())