From a640652a9bf9f0d10848a812bd2b3ada8562dcdc Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 23 Dec 2023 23:05:52 -0800 Subject: [PATCH] fix: large file upload issue --- backend/apps/web/routers/utils.py | 90 ++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 19 deletions(-) diff --git a/backend/apps/web/routers/utils.py b/backend/apps/web/routers/utils.py index 8c822f02..ee125975 100644 --- a/backend/apps/web/routers/utils.py +++ b/backend/apps/web/routers/utils.py @@ -21,19 +21,74 @@ class UploadBlobForm(BaseModel): filename: str +from urllib.parse import urlparse + + +def parse_huggingface_url(hf_url): + # Parse the URL + parsed_url = urlparse(hf_url) + + # Get the path and split it into components + path_components = parsed_url.path.split("/") + + # Extract the desired output + user_repo = "/".join(path_components[1:3]) + model_file = path_components[-1] + + return [user_repo, model_file] + + +def download_file_stream(url, file_path, chunk_size=1024 * 1024): + done = False + + if os.path.exists(file_path): + current_size = os.path.getsize(file_path) + else: + current_size = 0 + + headers = {"Range": f"bytes={current_size}-"} if current_size > 0 else {} + + with requests.get(url, headers=headers, stream=True) as response: + total_size = int(response.headers.get("content-length", 0)) + current_size + + with open(file_path, "ab") as file: + for data in response.iter_content(chunk_size=chunk_size): + current_size += len(data) + file.write(data) + + done = current_size == total_size + + progress = round((current_size / total_size) * 100, 2) + yield f'data: {{"progress": {progress}, "current": {current_size}, "total": {total_size}}}\n\n' + + +@router.get("/download") +async def download( + url: str = "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q2_K.gguf", +): + user_repo, model_file = parse_huggingface_url(url) + + os.makedirs("./uploads", exist_ok=True) + file_path = os.path.join("./uploads", f"{model_file}") + + return StreamingResponse( + download_file_stream(url, file_path), media_type="text/event-stream" + ) + + @router.post("/upload") async def upload(file: UploadFile = File(...)): os.makedirs("./uploads", exist_ok=True) file_path = os.path.join("./uploads", file.filename) - def file_write_stream(): + async def file_write_stream(): total = 0 total_size = file.size chunk_size = 1024 * 1024 done = False try: - with open(file_path, "wb") as f: + with open(file_path, "wb+") as f: while True: chunk = file.file.read(chunk_size) if not chunk: @@ -50,26 +105,23 @@ async def upload(file: UploadFile = File(...)): yield f"data: {json.dumps(res)}\n\n" if done: - with open(file_path, "rb") as f: - hashed = calculate_sha256(f) + f.seek(0) + hashed = calculate_sha256(f) + f.seek(0) - f.seek(0) - file_data = f.read() + url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}" + response = requests.post(url, data=f) - url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}" + if response.ok: + res = { + "done": done, + "blob": f"sha256:{hashed}", + } + os.remove(file_path) - response = requests.post(url, data=file_data) - - if response.ok: - res = { - "done": done, - "blob": f"sha256:{hashed}", - } - os.remove(file_path) - - yield f"data: {json.dumps(res)}\n\n" - else: - raise "Ollama: Could not create blob, Please try again." + yield f"data: {json.dumps(res)}\n\n" + else: + raise "Ollama: Could not create blob, Please try again." except Exception as e: res = {"error": str(e)}