forked from open-webui/open-webui
		
	feat: external openai tts support
This commit is contained in:
		
							parent
							
								
									713934edb6
								
							
						
					
					
						commit
						cbd18ec63c
					
				
					 5 changed files with 187 additions and 74 deletions
				
			
		|  | @ -101,61 +101,57 @@ async def update_openai_config( | ||||||
| 
 | 
 | ||||||
| @app.post("/speech") | @app.post("/speech") | ||||||
| async def speech(request: Request, user=Depends(get_verified_user)): | async def speech(request: Request, user=Depends(get_verified_user)): | ||||||
|     idx = None |     body = await request.body() | ||||||
|  |     name = hashlib.sha256(body).hexdigest() | ||||||
|  | 
 | ||||||
|  |     file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") | ||||||
|  |     file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") | ||||||
|  | 
 | ||||||
|  |     # Check if the file already exists in the cache | ||||||
|  |     if file_path.is_file(): | ||||||
|  |         return FileResponse(file_path) | ||||||
|  | 
 | ||||||
|  |     headers = {} | ||||||
|  |     headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" | ||||||
|  |     headers["Content-Type"] = "application/json" | ||||||
|  | 
 | ||||||
|  |     r = None | ||||||
|     try: |     try: | ||||||
|         body = await request.body() |         r = requests.post( | ||||||
|         name = hashlib.sha256(body).hexdigest() |             url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", | ||||||
|  |             data=body, | ||||||
|  |             headers=headers, | ||||||
|  |             stream=True, | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|         file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") |         r.raise_for_status() | ||||||
|         file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") |  | ||||||
| 
 | 
 | ||||||
|         # Check if the file already exists in the cache |         # Save the streaming content to a file | ||||||
|         if file_path.is_file(): |         with open(file_path, "wb") as f: | ||||||
|             return FileResponse(file_path) |             for chunk in r.iter_content(chunk_size=8192): | ||||||
|  |                 f.write(chunk) | ||||||
| 
 | 
 | ||||||
|         headers = {} |         with open(file_body_path, "w") as f: | ||||||
|         headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" |             json.dump(json.loads(body.decode("utf-8")), f) | ||||||
|         headers["Content-Type"] = "application/json" |  | ||||||
| 
 | 
 | ||||||
|         r = None |         # Return the saved file | ||||||
|         try: |         return FileResponse(file_path) | ||||||
|             r = requests.post( |  | ||||||
|                 url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", |  | ||||||
|                 data=body, |  | ||||||
|                 headers=headers, |  | ||||||
|                 stream=True, |  | ||||||
|             ) |  | ||||||
| 
 | 
 | ||||||
|             r.raise_for_status() |     except Exception as e: | ||||||
|  |         log.exception(e) | ||||||
|  |         error_detail = "Open WebUI: Server Connection Error" | ||||||
|  |         if r is not None: | ||||||
|  |             try: | ||||||
|  |                 res = r.json() | ||||||
|  |                 if "error" in res: | ||||||
|  |                     error_detail = f"External: {res['error']['message']}" | ||||||
|  |             except: | ||||||
|  |                 error_detail = f"External: {e}" | ||||||
| 
 | 
 | ||||||
|             # Save the streaming content to a file |         raise HTTPException( | ||||||
|             with open(file_path, "wb") as f: |             status_code=r.status_code if r != None else 500, | ||||||
|                 for chunk in r.iter_content(chunk_size=8192): |             detail=error_detail, | ||||||
|                     f.write(chunk) |         ) | ||||||
| 
 |  | ||||||
|             with open(file_body_path, "w") as f: |  | ||||||
|                 json.dump(json.loads(body.decode("utf-8")), f) |  | ||||||
| 
 |  | ||||||
|             # Return the saved file |  | ||||||
|             return FileResponse(file_path) |  | ||||||
| 
 |  | ||||||
|         except Exception as e: |  | ||||||
|             log.exception(e) |  | ||||||
|             error_detail = "Open WebUI: Server Connection Error" |  | ||||||
|             if r is not None: |  | ||||||
|                 try: |  | ||||||
|                     res = r.json() |  | ||||||
|                     if "error" in res: |  | ||||||
|                         error_detail = f"External: {res['error']}" |  | ||||||
|                 except: |  | ||||||
|                     error_detail = f"External: {e}" |  | ||||||
| 
 |  | ||||||
|             raise HTTPException( |  | ||||||
|                 status_code=r.status_code if r else 500, detail=error_detail |  | ||||||
|             ) |  | ||||||
| 
 |  | ||||||
|     except ValueError: |  | ||||||
|         raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.post("/transcriptions") | @app.post("/transcriptions") | ||||||
|  |  | ||||||
|  | @ -1,5 +1,67 @@ | ||||||
| import { AUDIO_API_BASE_URL } from '$lib/constants'; | import { AUDIO_API_BASE_URL } from '$lib/constants'; | ||||||
| 
 | 
 | ||||||
|  | export const getAudioConfig = async (token: string) => { | ||||||
|  | 	let error = null; | ||||||
|  | 
 | ||||||
|  | 	const res = await fetch(`${AUDIO_API_BASE_URL}/config`, { | ||||||
|  | 		method: 'GET', | ||||||
|  | 		headers: { | ||||||
|  | 			'Content-Type': 'application/json', | ||||||
|  | 			Authorization: `Bearer ${token}` | ||||||
|  | 		} | ||||||
|  | 	}) | ||||||
|  | 		.then(async (res) => { | ||||||
|  | 			if (!res.ok) throw await res.json(); | ||||||
|  | 			return res.json(); | ||||||
|  | 		}) | ||||||
|  | 		.catch((err) => { | ||||||
|  | 			console.log(err); | ||||||
|  | 			error = err.detail; | ||||||
|  | 			return null; | ||||||
|  | 		}); | ||||||
|  | 
 | ||||||
|  | 	if (error) { | ||||||
|  | 		throw error; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return res; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | type OpenAIConfigForm = { | ||||||
|  | 	url: string; | ||||||
|  | 	key: string; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => { | ||||||
|  | 	let error = null; | ||||||
|  | 
 | ||||||
|  | 	const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, { | ||||||
|  | 		method: 'POST', | ||||||
|  | 		headers: { | ||||||
|  | 			'Content-Type': 'application/json', | ||||||
|  | 			Authorization: `Bearer ${token}` | ||||||
|  | 		}, | ||||||
|  | 		body: JSON.stringify({ | ||||||
|  | 			...payload | ||||||
|  | 		}) | ||||||
|  | 	}) | ||||||
|  | 		.then(async (res) => { | ||||||
|  | 			if (!res.ok) throw await res.json(); | ||||||
|  | 			return res.json(); | ||||||
|  | 		}) | ||||||
|  | 		.catch((err) => { | ||||||
|  | 			console.log(err); | ||||||
|  | 			error = err.detail; | ||||||
|  | 			return null; | ||||||
|  | 		}); | ||||||
|  | 
 | ||||||
|  | 	if (error) { | ||||||
|  | 		throw error; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return res; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| export const transcribeAudio = async (token: string, file: File) => { | export const transcribeAudio = async (token: string, file: File) => { | ||||||
| 	const data = new FormData(); | 	const data = new FormData(); | ||||||
| 	data.append('file', file); | 	data.append('file', file); | ||||||
|  | @ -48,11 +110,17 @@ export const synthesizeOpenAISpeech = async ( | ||||||
| 			input: text, | 			input: text, | ||||||
| 			voice: speaker | 			voice: speaker | ||||||
| 		}) | 		}) | ||||||
| 	}).catch((err) => { | 	}) | ||||||
| 		console.log(err); | 		.then(async (res) => { | ||||||
| 		error = err; | 			if (!res.ok) throw await res.json(); | ||||||
| 		return null; | 			return res; | ||||||
| 	}); | 		}) | ||||||
|  | 		.catch((err) => { | ||||||
|  | 			error = err.detail; | ||||||
|  | 			console.log(err); | ||||||
|  | 
 | ||||||
|  | 			return null; | ||||||
|  | 		}); | ||||||
| 
 | 
 | ||||||
| 	if (error) { | 	if (error) { | ||||||
| 		throw error; | 		throw error; | ||||||
|  |  | ||||||
|  | @ -176,10 +176,12 @@ | ||||||
| 
 | 
 | ||||||
| 	const toggleSpeakMessage = async () => { | 	const toggleSpeakMessage = async () => { | ||||||
| 		if (speaking) { | 		if (speaking) { | ||||||
| 			speechSynthesis.cancel(); | 			try { | ||||||
|  | 				speechSynthesis.cancel(); | ||||||
| 
 | 
 | ||||||
| 			sentencesAudio[speakingIdx].pause(); | 				sentencesAudio[speakingIdx].pause(); | ||||||
| 			sentencesAudio[speakingIdx].currentTime = 0; | 				sentencesAudio[speakingIdx].currentTime = 0; | ||||||
|  | 			} catch {} | ||||||
| 
 | 
 | ||||||
| 			speaking = null; | 			speaking = null; | ||||||
| 			speakingIdx = null; | 			speakingIdx = null; | ||||||
|  | @ -221,6 +223,10 @@ | ||||||
| 						sentence | 						sentence | ||||||
| 					).catch((error) => { | 					).catch((error) => { | ||||||
| 						toast.error(error); | 						toast.error(error); | ||||||
|  | 
 | ||||||
|  | 						speaking = null; | ||||||
|  | 						loadingSpeech = false; | ||||||
|  | 
 | ||||||
| 						return null; | 						return null; | ||||||
| 					}); | 					}); | ||||||
| 
 | 
 | ||||||
|  | @ -230,7 +236,6 @@ | ||||||
| 						const audio = new Audio(blobUrl); | 						const audio = new Audio(blobUrl); | ||||||
| 						sentencesAudio[idx] = audio; | 						sentencesAudio[idx] = audio; | ||||||
| 						loadingSpeech = false; | 						loadingSpeech = false; | ||||||
| 
 |  | ||||||
| 						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); | 						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); | ||||||
| 					} | 					} | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| <script lang="ts"> | <script lang="ts"> | ||||||
|  | 	import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio'; | ||||||
| 	import { createEventDispatcher, onMount, getContext } from 'svelte'; | 	import { createEventDispatcher, onMount, getContext } from 'svelte'; | ||||||
| 	import { toast } from 'svelte-sonner'; | 	import { toast } from 'svelte-sonner'; | ||||||
| 	const dispatch = createEventDispatcher(); | 	const dispatch = createEventDispatcher(); | ||||||
|  | @ -9,6 +10,9 @@ | ||||||
| 
 | 
 | ||||||
| 	// Audio | 	// Audio | ||||||
| 
 | 
 | ||||||
|  | 	let OpenAIUrl = ''; | ||||||
|  | 	let OpenAIKey = ''; | ||||||
|  | 
 | ||||||
| 	let STTEngines = ['', 'openai']; | 	let STTEngines = ['', 'openai']; | ||||||
| 	let STTEngine = ''; | 	let STTEngine = ''; | ||||||
| 
 | 
 | ||||||
|  | @ -69,6 +73,18 @@ | ||||||
| 		saveSettings({ speechAutoSend: speechAutoSend }); | 		saveSettings({ speechAutoSend: speechAutoSend }); | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
|  | 	const updateConfigHandler = async () => { | ||||||
|  | 		const res = await updateAudioConfig(localStorage.token, { | ||||||
|  | 			url: OpenAIUrl, | ||||||
|  | 			key: OpenAIKey | ||||||
|  | 		}); | ||||||
|  | 
 | ||||||
|  | 		if (res) { | ||||||
|  | 			OpenAIUrl = res.OPENAI_API_BASE_URL; | ||||||
|  | 			OpenAIKey = res.OPENAI_API_KEY; | ||||||
|  | 		} | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
| 	onMount(async () => { | 	onMount(async () => { | ||||||
| 		let settings = JSON.parse(localStorage.getItem('settings') ?? '{}'); | 		let settings = JSON.parse(localStorage.getItem('settings') ?? '{}'); | ||||||
| 
 | 
 | ||||||
|  | @ -85,12 +101,20 @@ | ||||||
| 		} else { | 		} else { | ||||||
| 			getWebAPIVoices(); | 			getWebAPIVoices(); | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		const res = await getAudioConfig(localStorage.token); | ||||||
|  | 
 | ||||||
|  | 		if (res) { | ||||||
|  | 			OpenAIUrl = res.OPENAI_API_BASE_URL; | ||||||
|  | 			OpenAIKey = res.OPENAI_API_KEY; | ||||||
|  | 		} | ||||||
| 	}); | 	}); | ||||||
| </script> | </script> | ||||||
| 
 | 
 | ||||||
| <form | <form | ||||||
| 	class="flex flex-col h-full justify-between space-y-3 text-sm" | 	class="flex flex-col h-full justify-between space-y-3 text-sm" | ||||||
| 	on:submit|preventDefault={() => { | 	on:submit|preventDefault={async () => { | ||||||
|  | 		await updateConfigHandler(); | ||||||
| 		saveSettings({ | 		saveSettings({ | ||||||
| 			audio: { | 			audio: { | ||||||
| 				STTEngine: STTEngine !== '' ? STTEngine : undefined, | 				STTEngine: STTEngine !== '' ? STTEngine : undefined, | ||||||
|  | @ -101,7 +125,7 @@ | ||||||
| 		dispatch('save'); | 		dispatch('save'); | ||||||
| 	}} | 	}} | ||||||
| > | > | ||||||
| 	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80"> | 	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-[22rem]"> | ||||||
| 		<div> | 		<div> | ||||||
| 			<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div> | 			<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div> | ||||||
| 
 | 
 | ||||||
|  | @ -196,6 +220,24 @@ | ||||||
| 				</div> | 				</div> | ||||||
| 			</div> | 			</div> | ||||||
| 
 | 
 | ||||||
|  | 			{#if TTSEngine === 'openai'} | ||||||
|  | 				<div class="mt-1 flex gap-2 mb-1"> | ||||||
|  | 					<input | ||||||
|  | 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
|  | 						placeholder={$i18n.t('API Base URL')} | ||||||
|  | 						bind:value={OpenAIUrl} | ||||||
|  | 						required | ||||||
|  | 					/> | ||||||
|  | 
 | ||||||
|  | 					<input | ||||||
|  | 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
|  | 						placeholder={$i18n.t('API Key')} | ||||||
|  | 						bind:value={OpenAIKey} | ||||||
|  | 						required | ||||||
|  | 					/> | ||||||
|  | 				</div> | ||||||
|  | 			{/if} | ||||||
|  | 
 | ||||||
| 			<div class=" py-0.5 flex w-full justify-between"> | 			<div class=" py-0.5 flex w-full justify-between"> | ||||||
| 				<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div> | 				<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div> | ||||||
| 
 | 
 | ||||||
|  | @ -241,16 +283,18 @@ | ||||||
| 				<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> | 				<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> | ||||||
| 				<div class="flex w-full"> | 				<div class="flex w-full"> | ||||||
| 					<div class="flex-1"> | 					<div class="flex-1"> | ||||||
| 						<select | 						<input | ||||||
| 							class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none" | 							list="voice-list" | ||||||
|  | 							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
| 							bind:value={speaker} | 							bind:value={speaker} | ||||||
| 							placeholder="Select a voice" | 							placeholder="Select a voice" | ||||||
| 						> | 						/> | ||||||
|  | 
 | ||||||
|  | 						<datalist id="voice-list"> | ||||||
| 							{#each voices as voice} | 							{#each voices as voice} | ||||||
| 								<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option | 								<option value={voice.name} /> | ||||||
| 								> |  | ||||||
| 							{/each} | 							{/each} | ||||||
| 						</select> | 						</datalist> | ||||||
| 					</div> | 					</div> | ||||||
| 				</div> | 				</div> | ||||||
| 			</div> | 			</div> | ||||||
|  |  | ||||||
|  | @ -29,8 +29,8 @@ | ||||||
| 	let embeddingEngine = ''; | 	let embeddingEngine = ''; | ||||||
| 	let embeddingModel = ''; | 	let embeddingModel = ''; | ||||||
| 
 | 
 | ||||||
| 	let openAIKey = ''; | 	let OpenAIKey = ''; | ||||||
| 	let openAIUrl = ''; | 	let OpenAIUrl = ''; | ||||||
| 
 | 
 | ||||||
| 	let chunkSize = 0; | 	let chunkSize = 0; | ||||||
| 	let chunkOverlap = 0; | 	let chunkOverlap = 0; | ||||||
|  | @ -79,7 +79,7 @@ | ||||||
| 			return; | 			return; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if ((embeddingEngine === 'openai' && openAIKey === '') || openAIUrl === '') { | 		if ((embeddingEngine === 'openai' && OpenAIKey === '') || OpenAIUrl === '') { | ||||||
| 			toast.error($i18n.t('OpenAI URL/Key required.')); | 			toast.error($i18n.t('OpenAI URL/Key required.')); | ||||||
| 			return; | 			return; | ||||||
| 		} | 		} | ||||||
|  | @ -93,8 +93,8 @@ | ||||||
| 			...(embeddingEngine === 'openai' | 			...(embeddingEngine === 'openai' | ||||||
| 				? { | 				? { | ||||||
| 						openai_config: { | 						openai_config: { | ||||||
| 							key: openAIKey, | 							key: OpenAIKey, | ||||||
| 							url: openAIUrl | 							url: OpenAIUrl | ||||||
| 						} | 						} | ||||||
| 				  } | 				  } | ||||||
| 				: {}) | 				: {}) | ||||||
|  | @ -133,8 +133,8 @@ | ||||||
| 			embeddingEngine = embeddingConfig.embedding_engine; | 			embeddingEngine = embeddingConfig.embedding_engine; | ||||||
| 			embeddingModel = embeddingConfig.embedding_model; | 			embeddingModel = embeddingConfig.embedding_model; | ||||||
| 
 | 
 | ||||||
| 			openAIKey = embeddingConfig.openai_config.key; | 			OpenAIKey = embeddingConfig.openai_config.key; | ||||||
| 			openAIUrl = embeddingConfig.openai_config.url; | 			OpenAIUrl = embeddingConfig.openai_config.url; | ||||||
| 		} | 		} | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
|  | @ -192,14 +192,14 @@ | ||||||
| 					<input | 					<input | ||||||
| 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
| 						placeholder={$i18n.t('API Base URL')} | 						placeholder={$i18n.t('API Base URL')} | ||||||
| 						bind:value={openAIUrl} | 						bind:value={OpenAIUrl} | ||||||
| 						required | 						required | ||||||
| 					/> | 					/> | ||||||
| 
 | 
 | ||||||
| 					<input | 					<input | ||||||
| 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
| 						placeholder={$i18n.t('API Key')} | 						placeholder={$i18n.t('API Key')} | ||||||
| 						bind:value={openAIKey} | 						bind:value={OpenAIKey} | ||||||
| 						required | 						required | ||||||
| 					/> | 					/> | ||||||
| 				</div> | 				</div> | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue