From cf2a7dbd91197f55fe24afd4390ca70b947e3f64 Mon Sep 17 00:00:00 2001
From: -LAN- <laipz8200@outlook.com>
Date: Fri, 27 Sep 2024 13:57:02 +0800
Subject: [PATCH] 
 feat(api/core/workflow/nodes/document_extractor/document_extractor_node.py):
 Add csv support.

---
 .../document_extractor_node.py                | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/api/core/workflow/nodes/document_extractor/document_extractor_node.py b/api/core/workflow/nodes/document_extractor/document_extractor_node.py
index a9516f63525b4f..c18453103bdb28 100644
--- a/api/core/workflow/nodes/document_extractor/document_extractor_node.py
+++ b/api/core/workflow/nodes/document_extractor/document_extractor_node.py
@@ -1,3 +1,4 @@
+import csv
 import io
 from typing import cast
 
@@ -81,6 +82,8 @@ def _extract_text(*, file_content: bytes, mime_type: str) -> str:
         "application/msword",
     }:
         return _extract_text_from_doc(file_content)
+    elif mime_type == "text/csv":
+        return _extract_text_from_csv(file_content)
     else:
         raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
 
@@ -141,3 +144,23 @@ def _extract_text_from_file(file: File):
     file_content = _download_file_content(file)
     extracted_text = _extract_text(file_content=file_content, mime_type=file.mime_type)
     return extracted_text
+
+
+def _extract_text_from_csv(file_content: bytes) -> str:
+    try:
+        csv_file = io.StringIO(file_content.decode("utf-8"))
+        csv_reader = csv.reader(csv_file)
+        rows = list(csv_reader)
+
+        if not rows:
+            return ""
+
+        # Create markdown table
+        markdown_table = "| " + " | ".join(rows[0]) + " |\n"
+        markdown_table += "| " + " | ".join(["---"] * len(rows[0])) + " |\n"
+        for row in rows[1:]:
+            markdown_table += "| " + " | ".join(row) + " |\n"
+
+        return markdown_table.strip()
+    except Exception as e:
+        raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e