feat:add tts-streaming config and future (#5492)
parent
b29a36f461
commit
6ef401a9f0
@ -0,0 +1,4 @@
|
||||
TTS_AUTO_PLAY_TIMEOUT = 5
|
||||
|
||||
# sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file)
|
||||
TTS_AUTO_PLAY_YIELD_CPU_TIME = 0.02
|
||||
@ -0,0 +1,135 @@
|
||||
import base64
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import queue
|
||||
import re
|
||||
import threading
|
||||
|
||||
from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueTextChunkEvent
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
|
||||
|
||||
class AudioTrunk:
|
||||
def __init__(self, status: str, audio):
|
||||
self.audio = audio
|
||||
self.status = status
|
||||
|
||||
|
||||
def _invoiceTTS(text_content: str, model_instance, tenant_id: str, voice: str):
|
||||
if not text_content or text_content.isspace():
|
||||
return
|
||||
return model_instance.invoke_tts(
|
||||
content_text=text_content.strip(),
|
||||
user="responding_tts",
|
||||
tenant_id=tenant_id,
|
||||
voice=voice
|
||||
)
|
||||
|
||||
|
||||
def _process_future(future_queue, audio_queue):
|
||||
while True:
|
||||
try:
|
||||
future = future_queue.get()
|
||||
if future is None:
|
||||
break
|
||||
for audio in future.result():
|
||||
audio_base64 = base64.b64encode(bytes(audio))
|
||||
audio_queue.put(AudioTrunk("responding", audio=audio_base64))
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(e)
|
||||
break
|
||||
audio_queue.put(AudioTrunk("finish", b''))
|
||||
|
||||
|
||||
class AppGeneratorTTSPublisher:
|
||||
|
||||
def __init__(self, tenant_id: str, voice: str):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.tenant_id = tenant_id
|
||||
self.msg_text = ''
|
||||
self._audio_queue = queue.Queue()
|
||||
self._msg_queue = queue.Queue()
|
||||
self.match = re.compile(r'[。.!?]')
|
||||
self.model_manager = ModelManager()
|
||||
self.model_instance = self.model_manager.get_default_model_instance(
|
||||
tenant_id=self.tenant_id,
|
||||
model_type=ModelType.TTS
|
||||
)
|
||||
self.voices = self.model_instance.get_tts_voices()
|
||||
values = [voice.get('value') for voice in self.voices]
|
||||
self.voice = voice
|
||||
if not voice or voice not in values:
|
||||
self.voice = self.voices[0].get('value')
|
||||
self.MAX_SENTENCE = 2
|
||||
self._last_audio_event = None
|
||||
self._runtime_thread = threading.Thread(target=self._runtime).start()
|
||||
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
|
||||
|
||||
def publish(self, message):
|
||||
try:
|
||||
self._msg_queue.put(message)
|
||||
except Exception as e:
|
||||
self.logger.warning(e)
|
||||
|
||||
def _runtime(self):
|
||||
future_queue = queue.Queue()
|
||||
threading.Thread(target=_process_future, args=(future_queue, self._audio_queue)).start()
|
||||
while True:
|
||||
try:
|
||||
message = self._msg_queue.get()
|
||||
if message is None:
|
||||
if self.msg_text and len(self.msg_text.strip()) > 0:
|
||||
futures_result = self.executor.submit(_invoiceTTS, self.msg_text,
|
||||
self.model_instance, self.tenant_id, self.voice)
|
||||
future_queue.put(futures_result)
|
||||
break
|
||||
elif isinstance(message.event, QueueAgentMessageEvent | QueueLLMChunkEvent):
|
||||
self.msg_text += message.event.chunk.delta.message.content
|
||||
elif isinstance(message.event, QueueTextChunkEvent):
|
||||
self.msg_text += message.event.text
|
||||
self.last_message = message
|
||||
sentence_arr, text_tmp = self._extract_sentence(self.msg_text)
|
||||
if len(sentence_arr) >= min(self.MAX_SENTENCE, 7):
|
||||
self.MAX_SENTENCE += 1
|
||||
text_content = ''.join(sentence_arr)
|
||||
futures_result = self.executor.submit(_invoiceTTS, text_content,
|
||||
self.model_instance,
|
||||
self.tenant_id,
|
||||
self.voice)
|
||||
future_queue.put(futures_result)
|
||||
if text_tmp:
|
||||
self.msg_text = text_tmp
|
||||
else:
|
||||
self.msg_text = ''
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(e)
|
||||
break
|
||||
future_queue.put(None)
|
||||
|
||||
def checkAndGetAudio(self) -> AudioTrunk | None:
|
||||
try:
|
||||
if self._last_audio_event and self._last_audio_event.status == "finish":
|
||||
if self.executor:
|
||||
self.executor.shutdown(wait=False)
|
||||
return self.last_message
|
||||
audio = self._audio_queue.get_nowait()
|
||||
if audio and audio.status == "finish":
|
||||
self.executor.shutdown(wait=False)
|
||||
self._runtime_thread = None
|
||||
if audio:
|
||||
self._last_audio_event = audio
|
||||
return audio
|
||||
except queue.Empty:
|
||||
return None
|
||||
|
||||
def _extract_sentence(self, org_text):
|
||||
tx = self.match.finditer(org_text)
|
||||
start = 0
|
||||
result = []
|
||||
for i in tx:
|
||||
end = i.regs[0][1]
|
||||
result.append(org_text[start:end])
|
||||
start = end
|
||||
return result, org_text[start:]
|
||||
@ -0,0 +1,53 @@
|
||||
import AudioPlayer from '@/app/components/base/audio-btn/audio'
|
||||
declare global {
|
||||
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||
interface AudioPlayerManager {
|
||||
instance: AudioPlayerManager
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export class AudioPlayerManager {
|
||||
private static instance: AudioPlayerManager
|
||||
private audioPlayers: AudioPlayer | null = null
|
||||
private msgId: string | undefined
|
||||
|
||||
private constructor() {
|
||||
}
|
||||
|
||||
public static getInstance(): AudioPlayerManager {
|
||||
if (!AudioPlayerManager.instance) {
|
||||
AudioPlayerManager.instance = new AudioPlayerManager()
|
||||
this.instance = AudioPlayerManager.instance
|
||||
}
|
||||
|
||||
return AudioPlayerManager.instance
|
||||
}
|
||||
|
||||
public getAudioPlayer(url: string, isPublic: boolean, id: string | undefined, msgContent: string | null | undefined, voice: string | undefined, callback: ((event: string) => {}) | null): AudioPlayer {
|
||||
if (this.msgId && this.msgId === id && this.audioPlayers) {
|
||||
this.audioPlayers.setCallback(callback)
|
||||
return this.audioPlayers
|
||||
}
|
||||
else {
|
||||
if (this.audioPlayers) {
|
||||
try {
|
||||
this.audioPlayers.pauseAudio()
|
||||
this.audioPlayers.cacheBuffers = []
|
||||
this.audioPlayers.sourceBuffer?.abort()
|
||||
}
|
||||
catch (e) {
|
||||
}
|
||||
}
|
||||
|
||||
this.msgId = id
|
||||
this.audioPlayers = new AudioPlayer(url, isPublic, id, msgContent, callback)
|
||||
return this.audioPlayers
|
||||
}
|
||||
}
|
||||
|
||||
public resetMsgId(msgId: string) {
|
||||
this.msgId = msgId
|
||||
this.audioPlayers?.resetMsgId(msgId)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,263 @@
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import { textToAudioStream } from '@/service/share'
|
||||
|
||||
declare global {
|
||||
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||
interface Window {
|
||||
ManagedMediaSource: any
|
||||
}
|
||||
}
|
||||
|
||||
export default class AudioPlayer {
|
||||
mediaSource: MediaSource | null
|
||||
audio: HTMLAudioElement
|
||||
audioContext: AudioContext
|
||||
sourceBuffer?: SourceBuffer
|
||||
cacheBuffers: ArrayBuffer[] = []
|
||||
pauseTimer: number | null = null
|
||||
msgId: string | undefined
|
||||
msgContent: string | null | undefined = null
|
||||
voice: string | undefined = undefined
|
||||
isLoadData = false
|
||||
url: string
|
||||
isPublic: boolean
|
||||
callback: ((event: string) => {}) | null
|
||||
|
||||
constructor(streamUrl: string, isPublic: boolean, msgId: string | undefined, msgContent: string | null | undefined, callback: ((event: string) => {}) | null) {
|
||||
this.audioContext = new AudioContext()
|
||||
this.msgId = msgId
|
||||
this.msgContent = msgContent
|
||||
this.url = streamUrl
|
||||
this.isPublic = isPublic
|
||||
this.callback = callback
|
||||
|
||||
// Compatible with iphone ios17 ManagedMediaSource
|
||||
const MediaSource = window.MediaSource || window.ManagedMediaSource
|
||||
if (!MediaSource) {
|
||||
Toast.notify({
|
||||
message: 'Your browser does not support audio streaming, if you are using an iPhone, please update to iOS 17.1 or later.',
|
||||
type: 'error',
|
||||
})
|
||||
}
|
||||
this.mediaSource = MediaSource ? new MediaSource() : null
|
||||
this.audio = new Audio()
|
||||
this.setCallback(callback)
|
||||
this.audio.src = this.mediaSource ? URL.createObjectURL(this.mediaSource) : ''
|
||||
this.audio.autoplay = true
|
||||
|
||||
const source = this.audioContext.createMediaElementSource(this.audio)
|
||||
source.connect(this.audioContext.destination)
|
||||
this.listenMediaSource('audio/mpeg')
|
||||
}
|
||||
|
||||
public resetMsgId(msgId: string) {
|
||||
this.msgId = msgId
|
||||
}
|
||||
|
||||
private listenMediaSource(contentType: string) {
|
||||
this.mediaSource?.addEventListener('sourceopen', () => {
|
||||
if (this.sourceBuffer)
|
||||
return
|
||||
|
||||
this.sourceBuffer = this.mediaSource?.addSourceBuffer(contentType)
|
||||
// this.sourceBuffer?.addEventListener('update', () => {
|
||||
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||
// const cacheBuffer = this.cacheBuffers.shift()!
|
||||
// this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||
// }
|
||||
// // this.pauseAudio()
|
||||
// })
|
||||
//
|
||||
// this.sourceBuffer?.addEventListener('updateend', () => {
|
||||
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||
// const cacheBuffer = this.cacheBuffers.shift()!
|
||||
// this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||
// }
|
||||
// // this.pauseAudio()
|
||||
// })
|
||||
})
|
||||
}
|
||||
|
||||
public setCallback(callback: ((event: string) => {}) | null) {
|
||||
this.callback = callback
|
||||
if (callback) {
|
||||
this.audio.addEventListener('ended', () => {
|
||||
callback('ended')
|
||||
}, false)
|
||||
this.audio.addEventListener('paused', () => {
|
||||
callback('paused')
|
||||
}, true)
|
||||
this.audio.addEventListener('loaded', () => {
|
||||
callback('loaded')
|
||||
}, true)
|
||||
this.audio.addEventListener('play', () => {
|
||||
callback('play')
|
||||
}, true)
|
||||
this.audio.addEventListener('timeupdate', () => {
|
||||
callback('timeupdate')
|
||||
}, true)
|
||||
this.audio.addEventListener('loadeddate', () => {
|
||||
callback('loadeddate')
|
||||
}, true)
|
||||
this.audio.addEventListener('canplay', () => {
|
||||
callback('canplay')
|
||||
}, true)
|
||||
this.audio.addEventListener('error', () => {
|
||||
callback('error')
|
||||
}, true)
|
||||
}
|
||||
}
|
||||
|
||||
private async loadAudio() {
|
||||
try {
|
||||
const audioResponse: any = await textToAudioStream(this.url, this.isPublic, { content_type: 'audio/mpeg' }, {
|
||||
message_id: this.msgId,
|
||||
streaming: true,
|
||||
voice: this.voice,
|
||||
text: this.msgContent,
|
||||
})
|
||||
|
||||
if (audioResponse.status !== 200) {
|
||||
this.isLoadData = false
|
||||
if (this.callback)
|
||||
this.callback('error')
|
||||
}
|
||||
|
||||
const reader = audioResponse.body.getReader()
|
||||
while (true) {
|
||||
const { value, done } = await reader.read()
|
||||
|
||||
if (done) {
|
||||
this.receiveAudioData(value)
|
||||
break
|
||||
}
|
||||
|
||||
this.receiveAudioData(value)
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
this.isLoadData = false
|
||||
this.callback && this.callback('error')
|
||||
}
|
||||
}
|
||||
|
||||
// play audio
|
||||
public playAudio() {
|
||||
if (this.isLoadData) {
|
||||
if (this.audioContext.state === 'suspended') {
|
||||
this.audioContext.resume().then((_) => {
|
||||
this.audio.play()
|
||||
this.callback && this.callback('play')
|
||||
})
|
||||
}
|
||||
else if (this.audio.ended) {
|
||||
this.audio.play()
|
||||
this.callback && this.callback('play')
|
||||
}
|
||||
if (this.callback)
|
||||
this.callback('play')
|
||||
}
|
||||
else {
|
||||
this.isLoadData = true
|
||||
this.loadAudio()
|
||||
}
|
||||
}
|
||||
|
||||
private theEndOfStream() {
|
||||
const endTimer = setInterval(() => {
|
||||
if (!this.sourceBuffer?.updating) {
|
||||
this.mediaSource?.endOfStream()
|
||||
clearInterval(endTimer)
|
||||
}
|
||||
console.log('finishStream endOfStream endTimer')
|
||||
}, 10)
|
||||
}
|
||||
|
||||
private finishStream() {
|
||||
const timer = setInterval(() => {
|
||||
if (!this.cacheBuffers.length) {
|
||||
this.theEndOfStream()
|
||||
clearInterval(timer)
|
||||
}
|
||||
|
||||
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||
const arrayBuffer = this.cacheBuffers.shift()!
|
||||
this.sourceBuffer?.appendBuffer(arrayBuffer)
|
||||
}
|
||||
console.log('finishStream timer')
|
||||
}, 10)
|
||||
}
|
||||
|
||||
public async playAudioWithAudio(audio: string, play = true) {
|
||||
if (!audio || !audio.length) {
|
||||
this.finishStream()
|
||||
return
|
||||
}
|
||||
|
||||
const audioContent = Buffer.from(audio, 'base64')
|
||||
this.receiveAudioData(new Uint8Array(audioContent))
|
||||
if (play) {
|
||||
this.isLoadData = true
|
||||
if (this.audio.paused) {
|
||||
this.audioContext.resume().then((_) => {
|
||||
this.audio.play()
|
||||
this.callback && this.callback('play')
|
||||
})
|
||||
}
|
||||
else if (this.audio.ended) {
|
||||
this.audio.play()
|
||||
this.callback && this.callback('play')
|
||||
}
|
||||
else if (this.audio.played) { /* empty */ }
|
||||
|
||||
else {
|
||||
this.audio.play()
|
||||
this.callback && this.callback('play')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public pauseAudio() {
|
||||
this.callback && this.callback('paused')
|
||||
this.audio.pause()
|
||||
this.audioContext.suspend()
|
||||
}
|
||||
|
||||
private cancer() {
|
||||
|
||||
}
|
||||
|
||||
private receiveAudioData(unit8Array: Uint8Array) {
|
||||
if (!unit8Array) {
|
||||
this.finishStream()
|
||||
return
|
||||
}
|
||||
const audioData = this.byteArrayToArrayBuffer(unit8Array)
|
||||
if (!audioData.byteLength) {
|
||||
if (this.mediaSource?.readyState === 'open')
|
||||
this.finishStream()
|
||||
return
|
||||
}
|
||||
|
||||
if (this.sourceBuffer?.updating) {
|
||||
this.cacheBuffers.push(audioData)
|
||||
}
|
||||
else {
|
||||
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||
this.cacheBuffers.push(audioData)
|
||||
const cacheBuffer = this.cacheBuffers.shift()!
|
||||
this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||
}
|
||||
else {
|
||||
this.sourceBuffer?.appendBuffer(audioData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byteArrayToArrayBuffer(byteArray: Uint8Array): ArrayBuffer {
|
||||
const arrayBuffer = new ArrayBuffer(byteArray.length)
|
||||
const uint8Array = new Uint8Array(arrayBuffer)
|
||||
uint8Array.set(byteArray)
|
||||
return arrayBuffer
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue