Skip to content

Commit

Permalink
feat(document-extractor): Support xml file.
Browse files Browse the repository at this point in the history
  • Loading branch information
laipz8200 committed Sep 27, 2024
1 parent a09a6d0 commit 3b01c0e
Showing 1 changed file with 1 addition and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _run(self):

def _extract_text(*, file_content: bytes, mime_type: str) -> str:
"""Extract text from a file based on its MIME type."""
if mime_type.startswith("text/plain") or mime_type in {"text/html", "text/htm", "text/markdown"}:
if mime_type.startswith("text/plain") or mime_type in {"text/html", "text/htm", "text/markdown", "text/xml"}:
return _extract_text_from_plain_text(file_content)
elif mime_type == "application/pdf":
return _extract_text_from_pdf(file_content)
Expand Down

0 comments on commit 3b01c0e

Please sign in to comment.