Merge branch 'dev' into dockerfile-optimisation

This commit is contained in:
Jannik S 2024-04-08 09:15:00 +02:00 committed by GitHub
commit 3b3d0cce1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 1956 additions and 533 deletions

View file

@ -8,7 +8,7 @@ from fastapi import (
Form,
)
from fastapi.middleware.cors import CORSMiddleware
import os, shutil, logging
import os, shutil, logging, re
from pathlib import Path
from typing import List
@ -438,25 +438,11 @@ def store_doc(
log.info(f"file.content_type: {file.content_type}")
try:
is_valid_filename = True
unsanitized_filename = file.filename
if not unsanitized_filename.isascii():
is_valid_filename = False
filename = os.path.basename(unsanitized_filename)
unvalidated_file_path = f"{UPLOAD_DIR}/{unsanitized_filename}"
dereferenced_file_path = str(Path(unvalidated_file_path).resolve(strict=False))
if not dereferenced_file_path.startswith(UPLOAD_DIR):
is_valid_filename = False
file_path = f"{UPLOAD_DIR}/{filename}"
if is_valid_filename:
file_path = dereferenced_file_path
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(),
)
filename = file.filename
contents = file.file.read()
with open(file_path, "wb") as f:
f.write(contents)
@ -467,7 +453,7 @@ def store_doc(
collection_name = calculate_sha256(f)[:63]
f.close()
loader, known_type = get_loader(file.filename, file.content_type, file_path)
loader, known_type = get_loader(filename, file.content_type, file_path)
data = loader.load()
try: