Clean invalid Unicode before LLM requests
This commit is contained in:
parent
dd6e06665c
commit
5630118f7d
|
|
@ -52,6 +52,16 @@ def fail(message: str) -> None:
|
|||
raise SystemExit(1)
|
||||
|
||||
|
||||
def clean_text(value: Any) -> Any:
|
||||
if isinstance(value, str):
|
||||
return value.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
|
||||
if isinstance(value, list):
|
||||
return [clean_text(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {clean_text(key): clean_text(item) for key, item in value.items()}
|
||||
return value
|
||||
|
||||
|
||||
def load_dotenv(path: Path) -> None:
|
||||
if not path.exists():
|
||||
return
|
||||
|
|
@ -473,7 +483,7 @@ def build_prompt(item: dict[str, Any], bibtex: str, fulltext: str, vault: Path,
|
|||
[
|
||||
"只输出最终 Markdown 笔记,不要输出解释、判断过程或代码围栏。",
|
||||
"文献材料如下:",
|
||||
json.dumps(source, ensure_ascii=False, indent=2),
|
||||
json.dumps(clean_text(source), ensure_ascii=False, indent=2),
|
||||
]
|
||||
)
|
||||
return "\n\n".join(instructions)
|
||||
|
|
|
|||
Loading…
Reference in New Issue