From 5630118f7de5ffff05d4739b1921d2d83aa317d7 Mon Sep 17 00:00:00 2001 From: qyh15 Date: Fri, 22 May 2026 16:49:40 +0800 Subject: [PATCH] Clean invalid Unicode before LLM requests --- scripts/generate_zotero_ai_note.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/generate_zotero_ai_note.py b/scripts/generate_zotero_ai_note.py index d9db00a..c9a374c 100644 --- a/scripts/generate_zotero_ai_note.py +++ b/scripts/generate_zotero_ai_note.py @@ -52,6 +52,16 @@ def fail(message: str) -> None: raise SystemExit(1) +def clean_text(value: Any) -> Any: + if isinstance(value, str): + return value.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore") + if isinstance(value, list): + return [clean_text(item) for item in value] + if isinstance(value, dict): + return {clean_text(key): clean_text(item) for key, item in value.items()} + return value + + def load_dotenv(path: Path) -> None: if not path.exists(): return @@ -473,7 +483,7 @@ def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path, [ "只输出最终 Markdown 笔记,不要输出解释、判断过程或代码围栏。", "文献材料如下:", - json.dumps(source, ensure_ascii=False, indent=2), + json.dumps(clean_text(source), ensure_ascii=False, indent=2), ] ) return "\n\n".join(instructions)