package com.digitalperson import android.Manifest import android.content.pm.PackageManager import android.os.Bundle import android.util.Log import android.widget.Toast import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat import com.digitalperson.cloud.CloudApiManager import com.digitalperson.audio.AudioProcessor import com.digitalperson.vad.VadManager import com.digitalperson.asr.AsrManager import com.digitalperson.tts.TtsManager import com.digitalperson.ui.Live2DUiManager import com.digitalperson.config.AppConfig import com.digitalperson.metrics.TraceManager import com.digitalperson.metrics.TraceSession import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job import kotlinx.coroutines.SupervisorJob import kotlinx.coroutines.cancel import kotlinx.coroutines.isActive import kotlinx.coroutines.launch import kotlinx.coroutines.withContext class Live2DChatActivity : AppCompatActivity() { private lateinit var uiManager: Live2DUiManager private lateinit var vadManager: VadManager private lateinit var asrManager: AsrManager private lateinit var ttsManager: TtsManager private lateinit var audioProcessor: AudioProcessor private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) @Volatile private var isRecording: Boolean = false private val holdToSpeakAudioBuffer = mutableListOf() private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据 private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO) private var recordingJob: Job? = null private val nativeLock = Any() private lateinit var cloudApiManager: CloudApiManager private val segmenter = StreamingTextSegmenter( maxLen = AppConfig.Tts.MAX_LEN, maxWaitMs = AppConfig.Tts.MAX_WAIT_MS ) private var currentTrace: TraceSession? = null @Volatile private var llmInFlight: Boolean = false private var enableStreaming = false override fun onRequestPermissionsResult( requestCode: Int, permissions: Array, grantResults: IntArray ) { super.onRequestPermissionsResult(requestCode, permissions, grantResults) val ok = requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION && grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED if (!ok) { Log.e(AppConfig.TAG, "Audio record is disallowed") finish() } } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) setContentView(R.layout.activity_live2d_chat) uiManager = Live2DUiManager(this) uiManager.initViews( textViewId = R.id.my_text, scrollViewId = R.id.scroll_view, startButtonId = R.id.start_button, stopButtonId = R.id.stop_button, recordButtonId = R.id.record_button, traditionalButtonsId = R.id.traditional_buttons, silentPlayerViewId = 0, speakingPlayerViewId = 0, live2dViewId = R.id.live2d_view ) // 根据配置选择交互方式 uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setRecordButtonTouchListener { isDown -> if (isDown) { // 按住按钮,开始录音 onRecordButtonDown() } else { // 松开按钮,停止录音 onRecordButtonUp() } } } else { uiManager.setStartButtonListener { onStartClicked() } uiManager.setStopButtonListener { onStopClicked(userInitiated = true) } } ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) try { val streamingSwitch = findViewById(R.id.streaming_switch) streamingSwitch.isChecked = enableStreaming streamingSwitch.setOnCheckedChangeListener { _, isChecked -> enableStreaming = isChecked cloudApiManager.setEnableStreaming(isChecked) uiManager.showToast("流式输出已${if (isChecked) "启用" else "禁用"}") } } catch (e: Exception) { Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}") } if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = false) } else { uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false) } uiManager.setText("初始化中…") audioProcessor = AudioProcessor(this) ttsManager = TtsManager(this) ttsManager.setCallback(createTtsCallback()) asrManager = AsrManager(this) asrManager.setAudioProcessor(audioProcessor) asrManager.setCallback(createAsrCallback()) vadManager = VadManager(this) vadManager.setCallback(createVadCallback()) ioScope.launch { try { Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)") synchronized(nativeLock) { vadManager.initVadModel() asrManager.initSenseVoiceModel() } val ttsOk = ttsManager.initTtsAndAudioTrack() withContext(Dispatchers.Main) { if (!ttsOk) { uiManager.showToast( "TTS 初始化失败:请确认 assets/${AppConfig.Tts.MODEL_DIR}/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst", Toast.LENGTH_LONG ) } uiManager.setText(getString(R.string.hint)) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = true) } else { uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false) } } } catch (t: Throwable) { Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t) withContext(Dispatchers.Main) { uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}") uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = false) } else { uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false) } } } } cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext) cloudApiManager.setEnableStreaming(enableStreaming) Log.d(AppConfig.TAG, "Pre-starting ASR worker") ioScope.launch { asrManager.runAsrWorker() } } private fun createAsrCallback() = object : AsrManager.AsrCallback { override fun onAsrStarted() { currentTrace?.markASRStart() runOnUiThread { uiManager.appendToUi("\n[ASR] 开始识别...\n") } } override fun onAsrResult(text: String) { currentTrace?.markASREnd() runOnUiThread { uiManager.appendToUi("\n\n[ASR] ${text}\n") } currentTrace?.markRecordingDone() currentTrace?.markLlmResponseReceived() } override fun onAsrSkipped(reason: String) { Log.d(AppConfig.TAG, "ASR segment skipped: $reason") } override fun shouldSkipAsr(): Boolean = ttsManager.isPlaying() override fun isLlmInFlight(): Boolean = llmInFlight override fun onLlmCalled(text: String) { llmInFlight = true Log.d(AppConfig.TAG, "Calling LLM with text: $text") cloudApiManager.callLLM(text) } } private fun createVadCallback() = object : VadManager.VadCallback { override fun onSpeechSegmentReady(originalAudio: FloatArray, processedAudio: FloatArray) { Log.d(AppConfig.TAG, "Sending audio segment to ASR queue, size: ${processedAudio.size}") asrManager.enqueueAudioSegment(originalAudio, processedAudio) } override fun shouldSkipProcessing(): Boolean = ttsManager.isPlaying() || llmInFlight } private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener { private var llmFirstChunkMarked = false override fun onLLMResponseReceived(response: String) { currentTrace?.markLlmDone() llmInFlight = false if (enableStreaming) { for (seg in segmenter.flush()) { ttsManager.enqueueSegment(seg) } ttsManager.enqueueEnd() } else { val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood() val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response) android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText") if (mood != previousMood) { uiManager.setMood(mood) } runOnUiThread { uiManager.appendToUi("${filteredText}\n") } ttsManager.enqueueSegment(filteredText) ttsManager.enqueueEnd() } } override fun onLLMStreamingChunkReceived(chunk: String) { if (enableStreaming) { if (!llmFirstChunkMarked) { llmFirstChunkMarked = true currentTrace?.markLlmFirstChunk() } val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood() val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk) if (mood != previousMood) { android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood") // 设置Live2D人物的心情 uiManager.setMood(mood) } uiManager.appendToUi(filteredText) val segments = segmenter.processChunk(filteredText) for (seg in segments) { ttsManager.enqueueSegment(seg) } } } override fun onTTSAudioReceived(audioFilePath: String) {} override fun onError(errorMessage: String) { llmInFlight = false uiManager.showToast(errorMessage, Toast.LENGTH_LONG) onStopClicked(userInitiated = false) } } private fun createTtsCallback() = object : TtsManager.TtsCallback { override fun onTtsStarted(text: String) { runOnUiThread { uiManager.appendToUi("\n[TTS] 开始合成...\n") } } override fun onTtsCompleted() { runOnUiThread { uiManager.appendToUi("\n[LOG] TTS completed at: ${System.currentTimeMillis()}\n") } } override fun onTtsSegmentCompleted(durationMs: Long) {} override fun isTtsStopped(): Boolean = !isRecording override fun onClearAsrQueue() { asrManager.clearQueue() } override fun onSetSpeaking(speaking: Boolean) { uiManager.setSpeaking(speaking) } override fun getCurrentTrace(): TraceSession? = currentTrace override fun onTraceMarkTtsRequestEnqueued() { currentTrace?.markTtsRequestEnqueued() } override fun onTraceMarkTtsSynthesisStart() { currentTrace?.markTtsSynthesisStart() } override fun onTraceMarkTtsFirstPcmReady() { currentTrace?.markTtsFirstPcmReady() } override fun onTraceMarkTtsFirstAudioPlay() { currentTrace?.markTtsFirstAudioPlay() } override fun onTraceMarkTtsDone() { currentTrace?.markTtsDone() } override fun onTraceAddDuration(name: String, value: Long) { currentTrace?.addDuration(name, value) } override fun onEndTurn() { TraceManager.getInstance().endTurn() currentTrace = null } } override fun onDestroy() { super.onDestroy() onStopClicked(userInitiated = false) ioScope.cancel() synchronized(nativeLock) { try { vadManager.release() } catch (_: Throwable) {} try { asrManager.release() } catch (_: Throwable) {} } try { ttsManager.release() } catch (_: Throwable) {} try { uiManager.release() } catch (_: Throwable) {} try { audioProcessor.release() } catch (_: Throwable) {} } override fun onResume() { super.onResume() uiManager.onResume() } override fun onPause() { uiManager.onPause() super.onPause() } private fun onStartClicked() { Log.d(AppConfig.TAG, "onStartClicked called") if (isRecording) { Log.d(AppConfig.TAG, "Already recording, returning") return } if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) { uiManager.showToast("麦克风初始化失败/无权限") return } currentTrace = TraceManager.getInstance().startNewTurn() currentTrace?.mark("turn_start") llmInFlight = false uiManager.clearText() ttsManager.reset() ttsManager.setCurrentTrace(currentTrace) segmenter.reset() vadManager.reset() audioProcessor.startRecording() isRecording = true uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true) Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine") recordingJob?.cancel() recordingJob = ioScope.launch { processSamplesLoop() } Log.d(AppConfig.TAG, "onStartClicked completed") } private fun onRecordButtonDown() { Log.d(AppConfig.TAG, "onRecordButtonDown called") if (isRecording) { Log.d(AppConfig.TAG, "Already recording, returning") return } // 如果TTS正在播放,打断它 val interrupted = ttsManager.interruptForNewTurn() if (interrupted) { uiManager.appendToUi("\n[LOG] 已打断TTS播放\n") } if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) { uiManager.showToast("麦克风初始化失败/无权限") return } currentTrace = TraceManager.getInstance().startNewTurn() currentTrace?.mark("turn_start") llmInFlight = false uiManager.clearText() // interruptForNewTurn() already prepared TTS state for next turn. // Keep reset() only for non-interrupt entry points. ttsManager.setCurrentTrace(currentTrace) segmenter.reset() // 启动按住说话的动作 uiManager.startSpecificMotion("hold_to_speak") holdToSpeakAudioBuffer.clear() audioProcessor.startRecording() isRecording = true Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine") recordingJob?.cancel() recordingJob = ioScope.launch { processSamplesLoop() } Log.d(AppConfig.TAG, "onRecordButtonDown completed") } private fun onRecordButtonUp() { Log.d(AppConfig.TAG, "onRecordButtonUp called") if (!isRecording) { Log.d(AppConfig.TAG, "Not recording, returning") return } isRecording = false audioProcessor.stopRecording() recordingJob?.cancel() recordingJob = ioScope.launch { // 处理最后的音频数据 val audioData = audioProcessor.getRecordedData() holdToSpeakAudioBuffer.addAll(audioData.toList()) if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) { val finalAudio = holdToSpeakAudioBuffer.toFloatArray() asrManager.enqueueAudioSegment(finalAudio, finalAudio) } else { uiManager.showToast("录音时间太短,请长按至少1秒") } holdToSpeakAudioBuffer.clear() } Log.d(AppConfig.TAG, "onRecordButtonUp completed") } private fun onStopClicked(userInitiated: Boolean) { isRecording = false audioProcessor.stopRecording() recordingJob?.cancel() recordingJob = null ttsManager.stop() if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = true) } else { uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false) } if (userInitiated) { TraceManager.getInstance().endTurn() currentTrace = null } } private suspend fun processSamplesLoop() { Log.d(AppConfig.TAG, "processSamplesLoop started") if (AppConfig.USE_HOLD_TO_SPEAK) { // 按住说话模式:累积音频数据到一定长度后再发送给ASR while (isRecording && ioScope.coroutineContext.isActive) { val audioData = audioProcessor.getAudioData() if (audioData.isNotEmpty()) { holdToSpeakAudioBuffer.addAll(audioData.toList()) } // 避免CPU占用过高 kotlinx.coroutines.delay(10) } } else { // 传统模式:使用VAD val windowSize = AppConfig.WINDOW_SIZE val buffer = ShortArray(windowSize) var loopCount = 0 while (isRecording && ioScope.coroutineContext.isActive) { loopCount++ if (loopCount % 100 == 0) { Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}") } if (ttsManager.isPlaying()) { if (vadManager.isInSpeech()) { Log.d(AppConfig.TAG, "TTS playing, resetting VAD state") vadManager.clearState() } val ret = audioProcessor.readAudio(buffer) if (ret <= 0) continue continue } val ret = audioProcessor.readAudio(buffer) if (ret <= 0) continue if (ret != windowSize) continue val chunk = audioProcessor.convertShortToFloat(buffer) val processedChunk = audioProcessor.applyGain(chunk) val result = vadManager.processAudioChunk(chunk, processedChunk) if (vadManager.vadComputeCount % 100 == 0) { Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}") } if (loopCount % 1000 == 0) { Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}") } val forced = segmenter.maybeForceByTime() for (seg in forced) ttsManager.enqueueSegment(seg) } vadManager.forceFinalize() } Log.d(AppConfig.TAG, "processSamplesLoop stopped") } }