Add deep mode for full literature notes

This commit is contained in:
qyh15 2026-05-22 15:59:23 +08:00
parent d3619931e2
commit 4a080355ad
3 changed files with 52 additions and 14 deletions

View File

@ -43,6 +43,10 @@ config/config.local.json
使用 MYwrite skill为这个 Zotero 条目生成一条 AI 阅读笔记SXAIQUJT。
```
```text
使用 MYwrite skill为这个 Zotero 条目生成满血版深度阅读笔记SXAIQUJT。不要跳过已有笔记生成一条新的更详细 Zotero 子笔记。
```
```text
使用 MYwrite skill给这些 Zotero 条目批量生成 AI 阅读笔记SXAIQUJT、X7GJZ627、ZCZXGRAM。
```
@ -65,6 +69,8 @@ config/config.local.json
更稳妥的方式是每次处理 20-30 篇文献,并要求 AI 跳过已经生成过笔记的条目。这样即使中途失败,也可以继续执行而不会重复写入。
如果需要和手动 AwesomeGPT 生成结果接近的详细笔记,请明确告诉 AI 使用“满血版”或“深度精读”模式。该模式会读取更多全文内容,并要求模型输出更完整的结构化笔记。
## 仓库内容
```text

View File

@ -48,6 +48,14 @@ Do not print the config contents. When checking setup, report only whether each
py "$env:USERPROFILE\.codex\skills\MYwrite\scripts\generate_zotero_ai_note.py" --vault "C:\Users\qyh15\Documents\Obsidian Vault" --item-key SXAIQUJT --skip-existing
```
### Deep full-detail item
Use this when the user asks for `满血版本`, `详细版`, `深度精读`, or says the batch note is not as detailed as manually generated AwesomeGPT notes. Do not use `--skip-existing` when the user wants to regenerate a fuller note; create a new Zotero child note unless the user explicitly asks to delete old notes.
```powershell
py "$env:USERPROFILE\.codex\skills\MYwrite\scripts\generate_zotero_ai_note.py" --vault "C:\Users\qyh15\Documents\Obsidian Vault" --item-key SXAIQUJT --mode deep --fulltext-chars 80000 --max-tokens 12000
```
### Multiple items
```powershell
@ -92,7 +100,7 @@ Default vault organization:
- For live Zotero writes, state that Zotero child notes will be created.
- Use `--dry-run` for first-time validation or template changes.
- Use `--fulltext-chars 4000` for cheap, lightweight notes; increase only when the user asks for deeper notes.
- Use `--fulltext-chars 4000` for cheap, lightweight notes; use `--mode deep --fulltext-chars 80000 --max-tokens 12000` when the user asks for a full-detail note.
- Do not add visible machine markers to note bodies. Use the Zotero item link for duplicate detection.
- Do not delete Zotero duplicate notes unless the user explicitly requests cleanup.
- For Obsidian cleanup, delete only files that are empty after removing frontmatter, separators, whitespace, and empty HTML placeholders.

View File

@ -406,7 +406,7 @@ def creators_text(creators: list[dict[str, Any]]) -> str:
return "; ".join(names)
def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path) -> str:
def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path, mode: str) -> str:
data = item.get("data") or {}
prompt_path, research_template_path, review_template_path = template_paths(vault)
prompt = prompt_path.read_text(encoding="utf-8")
@ -428,8 +428,7 @@ def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path)
"bibtex": bibtex,
"indexedFullTextExcerpt": fulltext,
}
return "\n\n".join(
[
instructions = [
prompt,
"请先判断文献类型:综述型文献或研究型文献。",
"若为研究型文献,请严格填充下面的研究型模板:",
@ -437,14 +436,29 @@ def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path)
"若为综述型文献,请严格填充下面的综述型模板:",
review_template,
"请将模板中的 ${topItem.getField('title')} 替换为真实题名,将 ${topItem.key} 替换为 Zotero key。",
]
if mode == "deep":
instructions.extend(
[
"这是满血精读模式。请优先追求完整、具体、可复用的文献阅读笔记,而不是短摘要。",
"每个模板栏目都要充分展开;如果原文提供了材料配比、制备参数、测试条件、器件结构、性能数据、机理解释或对照实验,必须具体写出。",
"核心数据必须尽量保留数值和单位;不要用'显著提高''性能优异'这类空泛表达替代具体结果。",
"机制部分要写清因果链条:材料/结构设计如何影响微观结构、界面、导电网络、离子/电子传输、力学响应或器件输出。",
"局限与启发部分要给出可用于后续课题设计的具体启发,而不是泛泛评价。",
"如果全文摘录中缺少某项信息,请明确写'原文摘录中未提供',不要编造。",
]
)
instructions.extend(
[
"只输出最终 Markdown 笔记,不要输出解释、判断过程或代码围栏。",
"文献材料如下:",
json.dumps(source, ensure_ascii=False, indent=2),
]
)
return "\n\n".join(instructions)
def call_llm(prompt: str) -> str:
def call_llm(prompt: str, max_tokens: int | None = None) -> str:
api_key = os.environ.get("AWESOMEGPT_API_KEY")
base_url = (os.environ.get("AWESOMEGPT_BASE_URL") or "").rstrip("/")
model = os.environ.get("AWESOMEGPT_MODEL")
@ -460,6 +474,8 @@ def call_llm(prompt: str) -> str:
],
"temperature": 0.3,
}
if max_tokens:
payload["max_tokens"] = max_tokens
response = http_json(
base_url + "/chat/completions",
method="POST",
@ -524,6 +540,8 @@ def main() -> None:
parser.add_argument("--all", action="store_true", help="Process all top-level Zotero items")
parser.add_argument("--limit", type=int, default=1, help="Maximum number of items to process")
parser.add_argument("--fulltext-chars", type=int, default=12000)
parser.add_argument("--mode", choices=["quick", "deep"], default="quick", help="quick for batch notes; deep for full-detail notes")
parser.add_argument("--max-tokens", type=int, default=0, help="Optional LLM output token limit; deep mode defaults to 12000")
parser.add_argument("--skip-existing", action="store_true", help="Skip items that already have a generated AI child note")
parser.add_argument("--dry-run", action="store_true", help="generate but do not write Zotero note")
parser.add_argument("--vault", default=str(DEFAULT_VAULT), help="Obsidian vault containing 00 Templater")
@ -557,9 +575,15 @@ def main() -> None:
continue
print(f"[{index}/{len(items)}] generating {key}: {title}", file=sys.stderr)
bibtex = export_bibtex(key)
fulltext = local_fulltext(key, args.fulltext_chars)
prompt = build_prompt(item, bibtex, fulltext, vault)
markdown = call_llm(prompt)
fulltext_chars = args.fulltext_chars
if args.mode == "deep" and fulltext_chars == 12000:
fulltext_chars = 80000
max_tokens = args.max_tokens
if args.mode == "deep" and not max_tokens:
max_tokens = 12000
fulltext = local_fulltext(key, fulltext_chars)
prompt = build_prompt(item, bibtex, fulltext, vault, args.mode)
markdown = call_llm(prompt, max_tokens or None)
result = create_child_note(user_id, key, markdown, args.dry_run)
results.append({"index": index, "itemKey": key, "title": title, "status": "ok", "result": result})
except SystemExit as exc: