Clean invalid Unicode before LLM requests
This commit is contained in:
parent
dd6e06665c
commit
5630118f7d
|
|
@ -52,6 +52,16 @@ def fail(message: str) -> None:
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(value: Any) -> Any:
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [clean_text(item) for item in value]
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {clean_text(key): clean_text(item) for key, item in value.items()}
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def load_dotenv(path: Path) -> None:
|
def load_dotenv(path: Path) -> None:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return
|
return
|
||||||
|
|
@ -473,7 +483,7 @@ def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path,
|
||||||
[
|
[
|
||||||
"只输出最终 Markdown 笔记,不要输出解释、判断过程或代码围栏。",
|
"只输出最终 Markdown 笔记,不要输出解释、判断过程或代码围栏。",
|
||||||
"文献材料如下:",
|
"文献材料如下:",
|
||||||
json.dumps(source, ensure_ascii=False, indent=2),
|
json.dumps(clean_text(source), ensure_ascii=False, indent=2),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
return "\n\n".join(instructions)
|
return "\n\n".join(instructions)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue