forked from open-webui/open-webui
feat: rag folder scan support
This commit is contained in:
parent
9f869f6573
commit
e07001e5f6
9 changed files with 350 additions and 12 deletions
|
@ -1,3 +1,4 @@
|
|||
from pathlib import Path
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
|
@ -38,3 +39,40 @@ def validate_email_format(email: str) -> bool:
|
|||
if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def sanitize_filename(file_name):
|
||||
# Convert to lowercase
|
||||
lower_case_file_name = file_name.lower()
|
||||
|
||||
# Remove special characters using regular expression
|
||||
sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)
|
||||
|
||||
# Replace spaces with dashes
|
||||
final_file_name = re.sub(r"\s+", "-", sanitized_file_name)
|
||||
|
||||
return final_file_name
|
||||
|
||||
|
||||
def extract_folders_after_data_docs(path):
|
||||
# Convert the path to a Path object if it's not already
|
||||
path = Path(path)
|
||||
|
||||
# Extract parts of the path
|
||||
parts = path.parts
|
||||
|
||||
# Find the index of '/data/docs' in the path
|
||||
try:
|
||||
index_data_docs = parts.index("data") + 1
|
||||
index_docs = parts.index("docs", index_data_docs) + 1
|
||||
except ValueError:
|
||||
return []
|
||||
|
||||
# Exclude the filename and accumulate folder names
|
||||
tags = []
|
||||
|
||||
folders = parts[index_docs:-1]
|
||||
for idx, part in enumerate(folders):
|
||||
tags.append("/".join(folders[: idx + 1]))
|
||||
|
||||
return tags
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue