2023-05-31 22:03:15 +08:00
|
|
|
import json
|
|
|
|
|
2024-09-11 16:40:52 +08:00
|
|
|
from core.llm_generator.output_parser.errors import OutputParserError
|
2023-05-31 22:03:15 +08:00
|
|
|
|
|
|
|
|
|
|
|
def parse_json_markdown(json_string: str) -> dict:
|
2024-10-03 10:20:56 +08:00
|
|
|
# Get json from the backticks/braces
|
2023-05-31 22:03:15 +08:00
|
|
|
json_string = json_string.strip()
|
2024-10-03 10:20:56 +08:00
|
|
|
starts = ["```json", "```", "``", "`", "{"]
|
|
|
|
ends = ["```", "``", "`", "}"]
|
|
|
|
end_index = -1
|
|
|
|
for s in starts:
|
|
|
|
start_index = json_string.find(s)
|
|
|
|
if start_index != -1:
|
|
|
|
if json_string[start_index] != "{":
|
|
|
|
start_index += len(s)
|
|
|
|
break
|
|
|
|
if start_index != -1:
|
|
|
|
for e in ends:
|
|
|
|
end_index = json_string.rfind(e, start_index)
|
|
|
|
if end_index != -1:
|
|
|
|
if json_string[end_index] == "}":
|
|
|
|
end_index += 1
|
|
|
|
break
|
|
|
|
if start_index != -1 and end_index != -1 and start_index < end_index:
|
|
|
|
extracted_content = json_string[start_index:end_index].strip()
|
|
|
|
print("content:", extracted_content, start_index, end_index)
|
2023-05-31 22:03:15 +08:00
|
|
|
parsed = json.loads(extracted_content)
|
|
|
|
else:
|
|
|
|
raise Exception("Could not find JSON block in the output.")
|
|
|
|
|
|
|
|
return parsed
|
|
|
|
|
|
|
|
|
2024-02-09 15:21:33 +08:00
|
|
|
def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:
|
2023-05-31 22:03:15 +08:00
|
|
|
try:
|
|
|
|
json_obj = parse_json_markdown(text)
|
|
|
|
except json.JSONDecodeError as e:
|
2024-09-11 16:40:52 +08:00
|
|
|
raise OutputParserError(f"Got invalid JSON object. Error: {e}")
|
2023-05-31 22:03:15 +08:00
|
|
|
for key in expected_keys:
|
|
|
|
if key not in json_obj:
|
2024-09-11 16:40:52 +08:00
|
|
|
raise OutputParserError(
|
2024-09-13 14:24:49 +08:00
|
|
|
f"Got invalid return object. Expected key `{key}` to be present, but got {json_obj}"
|
2023-05-31 22:03:15 +08:00
|
|
|
)
|
|
|
|
return json_obj
|