forked from open-webui/open-webui
		
	refac: audio
This commit is contained in:
		
							parent
							
								
									2a10438b4d
								
							
						
					
					
						commit
						710850e442
					
				
					 7 changed files with 133 additions and 9 deletions
				
			
		|  | @ -10,9 +10,18 @@ from fastapi import ( | ||||||
|     File, |     File, | ||||||
|     Form, |     Form, | ||||||
| ) | ) | ||||||
|  | 
 | ||||||
|  | from fastapi.responses import StreamingResponse, JSONResponse, FileResponse | ||||||
|  | 
 | ||||||
| from fastapi.middleware.cors import CORSMiddleware | from fastapi.middleware.cors import CORSMiddleware | ||||||
| from faster_whisper import WhisperModel | from faster_whisper import WhisperModel | ||||||
| 
 | 
 | ||||||
|  | import requests | ||||||
|  | import hashlib | ||||||
|  | from pathlib import Path | ||||||
|  | import json | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| from constants import ERROR_MESSAGES | from constants import ERROR_MESSAGES | ||||||
| from utils.utils import ( | from utils.utils import ( | ||||||
|     decode_token, |     decode_token, | ||||||
|  | @ -30,6 +39,8 @@ from config import ( | ||||||
|     WHISPER_MODEL_DIR, |     WHISPER_MODEL_DIR, | ||||||
|     WHISPER_MODEL_AUTO_UPDATE, |     WHISPER_MODEL_AUTO_UPDATE, | ||||||
|     DEVICE_TYPE, |     DEVICE_TYPE, | ||||||
|  |     OPENAI_API_BASE_URL, | ||||||
|  |     OPENAI_API_KEY, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| log = logging.getLogger(__name__) | log = logging.getLogger(__name__) | ||||||
|  | @ -44,12 +55,78 @@ app.add_middleware( | ||||||
|     allow_headers=["*"], |     allow_headers=["*"], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL | ||||||
|  | app.state.OPENAI_API_KEY = OPENAI_API_KEY | ||||||
|  | 
 | ||||||
| # setting device type for whisper model | # setting device type for whisper model | ||||||
| whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" | whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" | ||||||
| log.info(f"whisper_device_type: {whisper_device_type}") | log.info(f"whisper_device_type: {whisper_device_type}") | ||||||
| 
 | 
 | ||||||
|  | SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/") | ||||||
|  | SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True) | ||||||
| 
 | 
 | ||||||
| @app.post("/transcribe") | 
 | ||||||
|  | @app.post("/speech") | ||||||
|  | async def speech(request: Request, user=Depends(get_verified_user)): | ||||||
|  |     idx = None | ||||||
|  |     try: | ||||||
|  |         body = await request.body() | ||||||
|  |         name = hashlib.sha256(body).hexdigest() | ||||||
|  | 
 | ||||||
|  |         file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") | ||||||
|  |         file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") | ||||||
|  | 
 | ||||||
|  |         # Check if the file already exists in the cache | ||||||
|  |         if file_path.is_file(): | ||||||
|  |             return FileResponse(file_path) | ||||||
|  | 
 | ||||||
|  |         headers = {} | ||||||
|  |         headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" | ||||||
|  |         headers["Content-Type"] = "application/json" | ||||||
|  | 
 | ||||||
|  |         r = None | ||||||
|  |         try: | ||||||
|  |             r = requests.post( | ||||||
|  |                 url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", | ||||||
|  |                 data=body, | ||||||
|  |                 headers=headers, | ||||||
|  |                 stream=True, | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             r.raise_for_status() | ||||||
|  | 
 | ||||||
|  |             # Save the streaming content to a file | ||||||
|  |             with open(file_path, "wb") as f: | ||||||
|  |                 for chunk in r.iter_content(chunk_size=8192): | ||||||
|  |                     f.write(chunk) | ||||||
|  | 
 | ||||||
|  |             with open(file_body_path, "w") as f: | ||||||
|  |                 json.dump(json.loads(body.decode("utf-8")), f) | ||||||
|  | 
 | ||||||
|  |             # Return the saved file | ||||||
|  |             return FileResponse(file_path) | ||||||
|  | 
 | ||||||
|  |         except Exception as e: | ||||||
|  |             log.exception(e) | ||||||
|  |             error_detail = "Open WebUI: Server Connection Error" | ||||||
|  |             if r is not None: | ||||||
|  |                 try: | ||||||
|  |                     res = r.json() | ||||||
|  |                     if "error" in res: | ||||||
|  |                         error_detail = f"External: {res['error']}" | ||||||
|  |                 except: | ||||||
|  |                     error_detail = f"External: {e}" | ||||||
|  | 
 | ||||||
|  |             raise HTTPException( | ||||||
|  |                 status_code=r.status_code if r else 500, detail=error_detail | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |     except ValueError: | ||||||
|  |         raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.post("/transcriptions") | ||||||
| def transcribe( | def transcribe( | ||||||
|     file: UploadFile = File(...), |     file: UploadFile = File(...), | ||||||
|     user=Depends(get_current_user), |     user=Depends(get_current_user), | ||||||
|  |  | ||||||
|  | @ -35,6 +35,8 @@ from config import ( | ||||||
|     ENABLE_IMAGE_GENERATION, |     ENABLE_IMAGE_GENERATION, | ||||||
|     AUTOMATIC1111_BASE_URL, |     AUTOMATIC1111_BASE_URL, | ||||||
|     COMFYUI_BASE_URL, |     COMFYUI_BASE_URL, | ||||||
|  |     OPENAI_API_BASE_URL, | ||||||
|  |     OPENAI_API_KEY, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -56,7 +58,9 @@ app.add_middleware( | ||||||
| app.state.ENGINE = "" | app.state.ENGINE = "" | ||||||
| app.state.ENABLED = ENABLE_IMAGE_GENERATION | app.state.ENABLED = ENABLE_IMAGE_GENERATION | ||||||
| 
 | 
 | ||||||
| app.state.OPENAI_API_KEY = "" | app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL | ||||||
|  | app.state.OPENAI_API_KEY = OPENAI_API_KEY | ||||||
|  | 
 | ||||||
| app.state.MODEL = "" | app.state.MODEL = "" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -360,7 +364,7 @@ def generate_image( | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             r = requests.post( |             r = requests.post( | ||||||
|                 url=f"https://api.openai.com/v1/images/generations", |                 url=f"{app.state.OPENAI_API_BASE_URL}/images/generations", | ||||||
|                 json=data, |                 json=data, | ||||||
|                 headers=headers, |                 headers=headers, | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|  | @ -70,6 +70,8 @@ from config import ( | ||||||
|     RAG_EMBEDDING_ENGINE, |     RAG_EMBEDDING_ENGINE, | ||||||
|     RAG_EMBEDDING_MODEL, |     RAG_EMBEDDING_MODEL, | ||||||
|     RAG_EMBEDDING_MODEL_AUTO_UPDATE, |     RAG_EMBEDDING_MODEL_AUTO_UPDATE, | ||||||
|  |     RAG_OPENAI_API_BASE_URL, | ||||||
|  |     RAG_OPENAI_API_KEY, | ||||||
|     DEVICE_TYPE, |     DEVICE_TYPE, | ||||||
|     CHROMA_CLIENT, |     CHROMA_CLIENT, | ||||||
|     CHUNK_SIZE, |     CHUNK_SIZE, | ||||||
|  | @ -94,8 +96,8 @@ app.state.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE | ||||||
| app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL | app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL | ||||||
| app.state.RAG_TEMPLATE = RAG_TEMPLATE | app.state.RAG_TEMPLATE = RAG_TEMPLATE | ||||||
| 
 | 
 | ||||||
| app.state.RAG_OPENAI_API_BASE_URL = "https://api.openai.com" | app.state.RAG_OPENAI_API_BASE_URL = RAG_OPENAI_API_BASE_URL | ||||||
| app.state.RAG_OPENAI_API_KEY = "" | app.state.RAG_OPENAI_API_KEY = RAG_OPENAI_API_KEY | ||||||
| 
 | 
 | ||||||
| app.state.PDF_EXTRACT_IMAGES = False | app.state.PDF_EXTRACT_IMAGES = False | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -324,11 +324,11 @@ def get_embedding_model_path( | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def generate_openai_embeddings( | def generate_openai_embeddings( | ||||||
|     model: str, text: str, key: str, url: str = "https://api.openai.com" |     model: str, text: str, key: str, url: str = "https://api.openai.com/v1" | ||||||
| ): | ): | ||||||
|     try: |     try: | ||||||
|         r = requests.post( |         r = requests.post( | ||||||
|             f"{url}/v1/embeddings", |             f"{url}/embeddings", | ||||||
|             headers={ |             headers={ | ||||||
|                 "Content-Type": "application/json", |                 "Content-Type": "application/json", | ||||||
|                 "Authorization": f"Bearer {key}", |                 "Authorization": f"Bearer {key}", | ||||||
|  |  | ||||||
|  | @ -321,6 +321,13 @@ OPENAI_API_BASE_URLS = [ | ||||||
|     for url in OPENAI_API_BASE_URLS.split(";") |     for url in OPENAI_API_BASE_URLS.split(";") | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
|  | OPENAI_API_KEY = "" | ||||||
|  | OPENAI_API_KEY = OPENAI_API_KEYS[ | ||||||
|  |     OPENAI_API_BASE_URLS.index("https://api.openai.com/v1") | ||||||
|  | ] | ||||||
|  | OPENAI_API_BASE_URL = "https://api.openai.com/v1" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| #################################### | #################################### | ||||||
| # WEBUI | # WEBUI | ||||||
| #################################### | #################################### | ||||||
|  | @ -447,6 +454,9 @@ And answer according to the language of the user's question. | ||||||
| Given the context information, answer the query. | Given the context information, answer the query. | ||||||
| Query: [query]""" | Query: [query]""" | ||||||
| 
 | 
 | ||||||
|  | RAG_OPENAI_API_BASE_URL = os.getenv("RAG_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL) | ||||||
|  | RAG_OPENAI_API_KEY = os.getenv("RAG_OPENAI_API_KEY", OPENAI_API_KEY) | ||||||
|  | 
 | ||||||
| #################################### | #################################### | ||||||
| # Transcribe | # Transcribe | ||||||
| #################################### | #################################### | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ export const transcribeAudio = async (token: string, file: File) => { | ||||||
| 	data.append('file', file); | 	data.append('file', file); | ||||||
| 
 | 
 | ||||||
| 	let error = null; | 	let error = null; | ||||||
| 	const res = await fetch(`${AUDIO_API_BASE_URL}/transcribe`, { | 	const res = await fetch(`${AUDIO_API_BASE_URL}/transcriptions`, { | ||||||
| 		method: 'POST', | 		method: 'POST', | ||||||
| 		headers: { | 		headers: { | ||||||
| 			Accept: 'application/json', | 			Accept: 'application/json', | ||||||
|  | @ -29,3 +29,34 @@ export const transcribeAudio = async (token: string, file: File) => { | ||||||
| 
 | 
 | ||||||
| 	return res; | 	return res; | ||||||
| }; | }; | ||||||
|  | 
 | ||||||
|  | export const synthesizeOpenAISpeech = async ( | ||||||
|  | 	token: string = '', | ||||||
|  | 	speaker: string = 'alloy', | ||||||
|  | 	text: string = '' | ||||||
|  | ) => { | ||||||
|  | 	let error = null; | ||||||
|  | 
 | ||||||
|  | 	const res = await fetch(`${AUDIO_API_BASE_URL}/speech`, { | ||||||
|  | 		method: 'POST', | ||||||
|  | 		headers: { | ||||||
|  | 			Authorization: `Bearer ${token}`, | ||||||
|  | 			'Content-Type': 'application/json' | ||||||
|  | 		}, | ||||||
|  | 		body: JSON.stringify({ | ||||||
|  | 			model: 'tts-1', | ||||||
|  | 			input: text, | ||||||
|  | 			voice: speaker | ||||||
|  | 		}) | ||||||
|  | 	}).catch((err) => { | ||||||
|  | 		console.log(err); | ||||||
|  | 		error = err; | ||||||
|  | 		return null; | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	if (error) { | ||||||
|  | 		throw error; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return res; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | @ -15,7 +15,7 @@ | ||||||
| 	const dispatch = createEventDispatcher(); | 	const dispatch = createEventDispatcher(); | ||||||
| 
 | 
 | ||||||
| 	import { config, settings } from '$lib/stores'; | 	import { config, settings } from '$lib/stores'; | ||||||
| 	import { synthesizeOpenAISpeech } from '$lib/apis/openai'; | 	import { synthesizeOpenAISpeech } from '$lib/apis/audio'; | ||||||
| 	import { imageGenerations } from '$lib/apis/images'; | 	import { imageGenerations } from '$lib/apis/images'; | ||||||
| 	import { | 	import { | ||||||
| 		approximateToHumanReadable, | 		approximateToHumanReadable, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy J. Baek
						Timothy J. Baek