fix: code block segmentation problem of markdown document (#6465)

This commit is contained in:
灰灰 2024-07-25 17:24:37 +08:00 committed by GitHub
parent 16b4f560cd
commit 5e4ac11df3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,8 +54,16 @@ class MarkdownExtractor(BaseExtractor):
current_header = None
current_text = ""
code_block_flag = False
for line in lines:
if line.startswith("```"):
code_block_flag = not code_block_flag
current_text += line + "\n"
continue
if code_block_flag:
current_text += line + "\n"
continue
header_match = re.match(r"^#+\s", line)
if header_match:
if current_header is not None: