package com.digitalperson import android.Manifest import android.content.pm.PackageManager import android.graphics.Bitmap import android.os.Bundle import android.util.Log import android.widget.Toast import androidx.appcompat.app.AlertDialog import androidx.camera.core.CameraSelector import com.digitalperson.engine.RetinaFaceEngineRKNN import com.digitalperson.face.FaceBox import androidx.camera.core.ImageAnalysis import androidx.camera.core.ImageProxy import androidx.camera.core.Preview import androidx.camera.lifecycle.ProcessCameraProvider import androidx.camera.view.PreviewView import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat import androidx.core.content.ContextCompat import com.digitalperson.cloud.CloudApiManager import com.digitalperson.audio.AudioProcessor import com.digitalperson.vad.VadManager import com.digitalperson.asr.AsrManager import com.digitalperson.ui.Live2DUiManager import com.digitalperson.config.AppConfig import com.digitalperson.face.FaceDetectionPipeline import com.digitalperson.face.FaceOverlayView import com.digitalperson.face.ImageProxyBitmapConverter import com.digitalperson.metrics.TraceManager import com.digitalperson.metrics.TraceSession import com.digitalperson.tts.TtsController import com.digitalperson.interaction.DigitalHumanInteractionController import com.digitalperson.data.DatabaseInitializer import com.digitalperson.interaction.InteractionActionHandler import com.digitalperson.interaction.InteractionState import com.digitalperson.interaction.UserMemoryStore import com.digitalperson.llm.LLMManager import com.digitalperson.llm.LLMManagerCallback import com.digitalperson.util.FileHelper import com.digitalperson.data.AppDatabase import com.digitalperson.data.entity.ChatMessage import com.digitalperson.interaction.ConversationBufferMemory import com.digitalperson.interaction.ConversationSummaryMemory import java.io.File import android.graphics.BitmapFactory import org.json.JSONObject import java.util.concurrent.ExecutorService import java.util.concurrent.Executors import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job import kotlinx.coroutines.SupervisorJob import kotlinx.coroutines.cancel import kotlinx.coroutines.isActive import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import com.digitalperson.onboard_testing.FaceRecognitionTest import com.digitalperson.onboard_testing.LLMSummaryTest class Live2DChatActivity : AppCompatActivity() { companion object { private const val TAG_ACTIVITY = "Live2DChatActivity" private const val TAG_LLM = "LLM_ROUTE" } private lateinit var uiManager: Live2DUiManager private lateinit var vadManager: VadManager private lateinit var asrManager: AsrManager private lateinit var ttsController: TtsController private lateinit var audioProcessor: AudioProcessor private var llmManager: LLMManager? = null private var useLocalLLM = false // 默认使用云端 LLM private val appPermissions: Array = arrayOf( Manifest.permission.RECORD_AUDIO, Manifest.permission.CAMERA ) private val micPermissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) @Volatile private var isRecording: Boolean = false private val holdToSpeakAudioBuffer = mutableListOf() private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据 private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO) private var recordingJob: Job? = null private val nativeLock = Any() private lateinit var cloudApiManager: CloudApiManager private val segmenter = StreamingTextSegmenter( maxLen = AppConfig.Tts.MAX_LEN, maxWaitMs = AppConfig.Tts.MAX_WAIT_MS ) private var currentTrace: TraceSession? = null @Volatile private var llmInFlight: Boolean = false private var enableStreaming = false private lateinit var cameraPreviewView: PreviewView private lateinit var faceOverlayView: FaceOverlayView private lateinit var faceDetectionPipeline: FaceDetectionPipeline private lateinit var interactionController: DigitalHumanInteractionController private lateinit var userMemoryStore: UserMemoryStore private lateinit var conversationBufferMemory: ConversationBufferMemory private lateinit var conversationSummaryMemory: ConversationSummaryMemory private var facePipelineReady: Boolean = false private var cameraProvider: ProcessCameraProvider? = null private lateinit var cameraAnalyzerExecutor: ExecutorService private var activeUserId: String = "guest" private var pendingLocalThoughtCallback: ((String) -> Unit)? = null private var pendingLocalProfileCallback: ((String) -> Unit)? = null private var localThoughtSilentMode: Boolean = false private val recentConversationLines = ArrayList() private var recentConversationDirty: Boolean = false private var lastFacePresent: Boolean = false private var lastFaceIdentityId: String? = null private var lastFaceRecognizedName: String? = null private lateinit var faceRecognitionTest: FaceRecognitionTest private lateinit var llmSummaryTest: LLMSummaryTest override fun onRequestPermissionsResult( requestCode: Int, permissions: Array, grantResults: IntArray ) { super.onRequestPermissionsResult(requestCode, permissions, grantResults) if (requestCode != AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) return if (grantResults.isEmpty()) { finish() return } val granted = permissions.zip(grantResults.toTypedArray()).associate { it.first to it.second } val micGranted = granted[Manifest.permission.RECORD_AUDIO] == PackageManager.PERMISSION_GRANTED val cameraGranted = granted[Manifest.permission.CAMERA] == PackageManager.PERMISSION_GRANTED if (!micGranted) { Log.e(AppConfig.TAG, "Audio record is disallowed") finish() return } if (!cameraGranted) { uiManager.showToast("未授予相机权限,暂不启用人脸检测") Log.w(AppConfig.TAG, "Camera permission denied") return } if (facePipelineReady) { startCameraPreviewAndDetection() } } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) Log.i(TAG_ACTIVITY, "onCreate") setContentView(R.layout.activity_live2d_chat) uiManager = Live2DUiManager(this) uiManager.initViews( textViewId = R.id.my_text, scrollViewId = R.id.scroll_view, startButtonId = R.id.start_button, stopButtonId = R.id.stop_button, recordButtonId = R.id.record_button, traditionalButtonsId = R.id.traditional_buttons, llmModeSwitchId = R.id.llm_mode_switch, llmModeSwitchRowId = R.id.llm_mode_switch_row, silentPlayerViewId = 0, speakingPlayerViewId = 0, live2dViewId = R.id.live2d_view ) cameraPreviewView = findViewById(R.id.camera_preview) cameraPreviewView.implementationMode = PreviewView.ImplementationMode.COMPATIBLE faceOverlayView = findViewById(R.id.face_overlay) cameraAnalyzerExecutor = Executors.newSingleThreadExecutor() // 初始化数据库 val databaseInitializer = DatabaseInitializer(applicationContext) databaseInitializer.initialize() userMemoryStore = UserMemoryStore(applicationContext) val database = AppDatabase.getInstance(applicationContext) conversationBufferMemory = ConversationBufferMemory(database) conversationSummaryMemory = ConversationSummaryMemory(database, llmManager) interactionController = DigitalHumanInteractionController( scope = ioScope, handler = object : InteractionActionHandler { override fun onStateChanged(state: InteractionState) { runOnUiThread { uiManager.appendToUi("\n[State] $state\n") } Log.i(TAG_ACTIVITY, "\n[State] $state\n") if (state == InteractionState.IDLE) { analyzeUserProfileInIdleIfNeeded() Log.i(TAG_ACTIVITY, "[analyze] done") } } override fun playMotion(motionName: String) { playInteractionMotion(motionName) } override fun appendText(text: String) { uiManager.appendToUi(text) } override fun speak(text: String) { ttsController.enqueueSegment(text) ttsController.enqueueEnd() } override fun requestCloudReply(userText: String) { llmInFlight = true Log.i(TAG_LLM, "Routing dialogue to CLOUD") cloudApiManager.callLLM(buildCloudPromptWithUserProfile(userText)) } override fun requestLocalThought(prompt: String, onResult: (String) -> Unit) { this@Live2DChatActivity.requestLocalThought(prompt, onResult) } override fun onRememberUser(faceIdentityId: String, name: String?) { activeUserId = faceIdentityId userMemoryStore.upsertUserSeen(activeUserId, name) } override fun saveThought(thought: String) { userMemoryStore.upsertUserSeen(activeUserId, null) userMemoryStore.updateThought(activeUserId, thought) } override fun loadLatestThought(): String? = userMemoryStore.getLatestThought() override fun loadRecentThoughts(timeRangeMs: Long): List = userMemoryStore.getRecentThoughts(timeRangeMs) override fun addToChatHistory(role: String, content: String) { appendConversationLine(role, content) } override fun addAssistantMessageToCloudHistory(content: String) { cloudApiManager.addAssistantMessage(content) } override fun getRandomQuestion(faceId: String): String { // 从数据库获取该faceId未被问过的问题 val question = userMemoryStore.getRandomUnansweredQuestion(faceId) return question?.content ?: "你喜欢什么颜色呀?" } }, context = applicationContext ) faceDetectionPipeline = FaceDetectionPipeline( context = applicationContext, onResult = { result -> faceOverlayView.updateResult(result) }, onPresenceChanged = { present, faceIdentityId, recognizedName -> if (present != lastFacePresent) { lastFacePresent = present Log.d(TAG_ACTIVITY, "presence changed: present=$present") interactionController.onFacePresenceChanged(present) if (!present) { lastFaceIdentityId = null lastFaceRecognizedName = null } } if (present && (faceIdentityId != lastFaceIdentityId || recognizedName != lastFaceRecognizedName)) { lastFaceIdentityId = faceIdentityId lastFaceRecognizedName = recognizedName Log.d(TAG_ACTIVITY, "identity update: faceIdentityId=$faceIdentityId, recognized=$recognizedName") interactionController.onFaceIdentityUpdated(faceIdentityId, recognizedName) } } ) // 根据配置选择交互方式 uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setRecordButtonTouchListener { isDown -> if (isDown) { // 按住按钮,开始录音 onRecordButtonDown() } else { // 松开按钮,停止录音 onRecordButtonUp() } } } else { uiManager.setStartButtonListener { onStartClicked() } uiManager.setStopButtonListener { onStopClicked(userInitiated = true) } } ActivityCompat.requestPermissions(this, appPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) try { val streamingSwitch = findViewById(R.id.streaming_switch) streamingSwitch.isChecked = enableStreaming streamingSwitch.setOnCheckedChangeListener { _, isChecked -> enableStreaming = isChecked cloudApiManager.setEnableStreaming(isChecked) uiManager.showToast("流式输出已${if (isChecked) "启用" else "禁用"}") } } catch (e: Exception) { Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}") } try { val ttsModeSwitch = findViewById(R.id.tts_mode_switch) ttsModeSwitch.isChecked = true // 默认使用本地TTS ttsModeSwitch.setOnCheckedChangeListener { _, isChecked -> ttsController.setUseQCloudTts(isChecked) uiManager.showToast("TTS模式已切换到${if (isChecked) "腾讯云" else "本地"}") } } catch (e: Exception) { Log.w(AppConfig.TAG, "TTS mode switch not found in layout: ${e.message}") } // 设置 LLM 模式开关 uiManager.setLLMSwitchListener { isChecked -> // 交互状态机固定路由:用户对话走云端,回忆走本地。此开关仅作为本地LLM可用性提示。 useLocalLLM = isChecked uiManager.showToast("状态机路由已固定:对话云端,回忆本地") } // 默认不显示 LLM 开关,等模型下载完成后再显示 uiManager.showLLMSwitch(false) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = false) } else { uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false) } uiManager.setText("初始化中…") audioProcessor = AudioProcessor(this) ttsController = TtsController(this) ttsController.setCallback(createTtsCallback()) asrManager = AsrManager(this) asrManager.setAudioProcessor(audioProcessor) asrManager.setCallback(createAsrCallback()) vadManager = VadManager(this) vadManager.setCallback(createVadCallback()) // 检查是否需要下载模型 if (!FileHelper.isLocalLLMAvailable(this)) { // 显示下载进度对话框 uiManager.showDownloadProgressDialog() // 异步下载模型文件 FileHelper.downloadModelFilesWithProgress( this, onProgress = { fileName, downloaded, total, progress -> runOnUiThread { val downloadedMB = downloaded / (1024 * 1024) val totalMB = total / (1024 * 1024) uiManager.updateDownloadProgress( fileName, downloadedMB, totalMB, progress ) } }, onComplete = { success, message -> runOnUiThread { uiManager.dismissDownloadProgressDialog() if (success) { Log.i(AppConfig.TAG, "Model files downloaded successfully") uiManager.showToast("模型下载完成", Toast.LENGTH_SHORT) // 检查本地 LLM 是否可用 if (FileHelper.isLocalLLMAvailable(this)) { Log.i(AppConfig.TAG, "Local LLM is available, enabling local LLM switch") // 显示本地 LLM 开关,并同步状态 uiManager.showLLMSwitch(false) // 初始化本地 LLM initLLM() // 重新初始化 ConversationSummaryMemory conversationSummaryMemory = ConversationSummaryMemory(database, llmManager) // 启动交互控制器 interactionController.start() // 下载完成后初始化其他组件 initializeOtherComponents() } } else { Log.e(AppConfig.TAG, "Failed to download model files: $message") // 显示错误弹窗,阻止应用继续运行 showModelDownloadErrorDialog(message) } } } ) } else { // 模型已存在,初始化本地 LLM initLLM() // 重新初始化 ConversationSummaryMemory conversationSummaryMemory = ConversationSummaryMemory(database, llmManager) // 启动交互控制器 interactionController.start() // 直接初始化其他组件 initializeOtherComponents() // 显示本地 LLM 开关,并同步状态 uiManager.showLLMSwitch(false) } } /** * 初始化其他组件(VAD、ASR、TTS、人脸检测等) */ private fun initializeOtherComponents() { ioScope.launch { try { Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)") synchronized(nativeLock) { vadManager.initVadModel() asrManager.initSenseVoiceModel() } val ttsOk = ttsController.init() facePipelineReady = faceDetectionPipeline.initialize() withContext(Dispatchers.Main) { if (!ttsOk) { uiManager.showToast( "TTS 初始化失败:请确认 assets/${AppConfig.Tts.MODEL_DIR}/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst", Toast.LENGTH_LONG ) } if (!facePipelineReady) { uiManager.showToast("RetinaFace 初始化失败,请检查模型和 rknn 运行库", Toast.LENGTH_LONG) } else if (allPermissionsGranted()) { startCameraPreviewAndDetection() } uiManager.setText(getString(R.string.hint)) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = true) } else { uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false) } } } catch (t: Throwable) { Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t) withContext(Dispatchers.Main) { uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}") uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG) if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = false) } else { uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false) } } } } cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext) cloudApiManager.setEnableStreaming(enableStreaming) Log.d(AppConfig.TAG, "Pre-starting ASR worker") ioScope.launch { asrManager.runAsrWorker() } // 测试人脸识别(延迟执行,确保所有组件初始化完成) if (AppConfig.OnboardTesting.FACE_REGONITION) { faceRecognitionTest = FaceRecognitionTest(this) faceRecognitionTest.setFaceDetectionPipeline(faceDetectionPipeline) CoroutineScope(Dispatchers.IO).launch { kotlinx.coroutines.delay(10000) runOnUiThread { faceRecognitionTest.runTest("http://192.168.1.19:5000/api/face_test_images") { message -> Log.i(AppConfig.TAG, message) uiManager.appendToUi("\n$message\n") } } } } } /** * 显示模型下载错误弹窗,阻止应用继续运行 */ private fun showModelDownloadErrorDialog(errorMessage: String) { AlertDialog.Builder(this) .setTitle("模型下载失败") .setMessage("本地 LLM 模型下载失败,应用无法正常运行。\n\n错误信息:$errorMessage\n\n请检查网络连接后重启应用。") .setCancelable(false) .setPositiveButton("退出应用") { _, _ -> finish() } .show() } /** * 检查 URL 是否存在 */ private fun checkUrlExists(url: String): Boolean { return try { val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection connection.requestMethod = "HEAD" connection.connectTimeout = 3000 connection.readTimeout = 3000 val responseCode = connection.responseCode connection.disconnect() responseCode == 200 } catch (e: Exception) { false } } private fun createAsrCallback() = object : AsrManager.AsrCallback { override fun onAsrStarted() { currentTrace?.markASRStart() runOnUiThread { uiManager.appendToUi("\n[ASR] 开始识别...\n") } } override fun onAsrResult(text: String) { currentTrace?.markASREnd() runOnUiThread { uiManager.appendToUi("\n\n[ASR] ${text}\n") } appendConversationLine("用户", text) currentTrace?.markRecordingDone() currentTrace?.markLlmResponseReceived() } override fun onAsrSkipped(reason: String) { Log.d(AppConfig.TAG, "ASR segment skipped: $reason") } override fun shouldSkipAsr(): Boolean = ttsController.isPlaying() override fun isLlmInFlight(): Boolean = llmInFlight override fun onLlmCalled(text: String) { Log.d(AppConfig.TAG, "Forward ASR text to interaction controller: $text") interactionController.onUserAsrText(text) } } private fun createVadCallback() = object : VadManager.VadCallback { override fun onSpeechSegmentReady(originalAudio: FloatArray, processedAudio: FloatArray) { Log.d(AppConfig.TAG, "Sending audio segment to ASR queue, size: ${processedAudio.size}") asrManager.enqueueAudioSegment(originalAudio, processedAudio) } override fun shouldSkipProcessing(): Boolean = ttsController.isPlaying() || llmInFlight } private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener { private var llmFirstChunkMarked = false override fun onLLMResponseReceived(response: String) { currentTrace?.markLlmDone() llmInFlight = false appendConversationLine("助手", response) if (enableStreaming) { for (seg in segmenter.flush()) { ttsController.enqueueSegment(seg) } ttsController.enqueueEnd() } else { val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood() val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response) android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText") if (mood != previousMood) { uiManager.setMood(mood) } runOnUiThread { uiManager.appendToUi("${filteredText}\n") } ttsController.enqueueSegment(filteredText) ttsController.enqueueEnd() } interactionController.onDialogueResponseFinished() } override fun onLLMStreamingChunkReceived(chunk: String) { if (enableStreaming) { if (!llmFirstChunkMarked) { llmFirstChunkMarked = true currentTrace?.markLlmFirstChunk() } val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood() val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk) if (mood != previousMood) { android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood") // 设置Live2D人物的心情 uiManager.setMood(mood) } uiManager.appendToUi(filteredText) val segments = segmenter.processChunk(filteredText) for (seg in segments) { ttsController.enqueueSegment(seg) } } } override fun onTTSAudioReceived(audioFilePath: String) {} override fun onError(errorMessage: String) { llmInFlight = false uiManager.showToast(errorMessage, Toast.LENGTH_LONG) interactionController.onDialogueResponseFinished() onStopClicked(userInitiated = false) } } private fun createTtsCallback() = object : TtsController.TtsCallback { override fun onTtsStarted(text: String) { runOnUiThread { uiManager.appendToUi("\n[TTS] 开始合成...\n") } } override fun onTtsCompleted() { runOnUiThread { uiManager.appendToUi("\n[LOG] TTS completed at: ${System.currentTimeMillis()}\n") } } override fun onTtsSegmentCompleted(durationMs: Long) {} override fun isTtsStopped(): Boolean = !isRecording override fun onClearAsrQueue() { asrManager.clearQueue() } override fun onSetSpeaking(speaking: Boolean) { uiManager.setSpeaking(speaking) } override fun onEndTurn() { TraceManager.getInstance().endTurn() currentTrace = null } } override fun onDestroy() { super.onDestroy() try { interactionController.stop() } catch (_: Throwable) {} stopCameraPreviewAndDetection() onStopClicked(userInitiated = false) ioScope.cancel() synchronized(nativeLock) { try { vadManager.release() } catch (_: Throwable) {} try { asrManager.release() } catch (_: Throwable) {} } try { faceDetectionPipeline.release() } catch (_: Throwable) {} try { cameraAnalyzerExecutor.shutdown() } catch (_: Throwable) {} try { ttsController.release() } catch (_: Throwable) {} try { llmManager?.destroy() } catch (_: Throwable) {} try { uiManager.release() } catch (_: Throwable) {} try { audioProcessor.release() } catch (_: Throwable) {} } override fun onResume() { super.onResume() Log.i(TAG_ACTIVITY, "onResume") uiManager.onResume() if (facePipelineReady && allPermissionsGranted()) { startCameraPreviewAndDetection() } } override fun onPause() { Log.i(TAG_ACTIVITY, "onPause") stopCameraPreviewAndDetection() uiManager.onPause() super.onPause() } private fun allPermissionsGranted(): Boolean { return appPermissions.all { ContextCompat.checkSelfPermission(this, it) == PackageManager.PERMISSION_GRANTED } } private fun startCameraPreviewAndDetection() { val cameraProviderFuture = ProcessCameraProvider.getInstance(this) cameraProviderFuture.addListener({ try { val provider = cameraProviderFuture.get() cameraProvider = provider provider.unbindAll() val preview = Preview.Builder().build().apply { setSurfaceProvider(cameraPreviewView.surfaceProvider) } cameraPreviewView.scaleType = PreviewView.ScaleType.FIT_CENTER val analyzer = ImageAnalysis.Builder() .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST) .build() analyzer.setAnalyzer(cameraAnalyzerExecutor) { imageProxy -> analyzeCameraFrame(imageProxy) } val selector = CameraSelector.Builder() .requireLensFacing(CameraSelector.LENS_FACING_FRONT) .build() provider.bindToLifecycle(this, selector, preview, analyzer) } catch (t: Throwable) { Log.e(AppConfig.TAG, "startCameraPreviewAndDetection failed: ${t.message}", t) } }, ContextCompat.getMainExecutor(this)) } private fun stopCameraPreviewAndDetection() { try { cameraProvider?.unbindAll() } catch (_: Throwable) { } finally { cameraProvider = null } } private fun analyzeCameraFrame(imageProxy: ImageProxy) { try { val bitmap: Bitmap? = ImageProxyBitmapConverter.toBitmap(imageProxy) if (bitmap != null) { faceDetectionPipeline.submitFrame(bitmap) } } catch (t: Throwable) { Log.w(AppConfig.TAG, "analyzeCameraFrame error: ${t.message}") } finally { imageProxy.close() } } private fun onStartClicked() { Log.d(AppConfig.TAG, "onStartClicked called") if (isRecording) { Log.d(AppConfig.TAG, "Already recording, returning") return } if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) { uiManager.showToast("麦克风初始化失败/无权限") return } currentTrace = TraceManager.getInstance().startNewTurn() currentTrace?.mark("turn_start") llmInFlight = false uiManager.clearText() ttsController.reset() segmenter.reset() vadManager.reset() audioProcessor.startRecording() isRecording = true uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true) Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine") recordingJob?.cancel() recordingJob = ioScope.launch { processSamplesLoop() } Log.d(AppConfig.TAG, "onStartClicked completed") } private fun onRecordButtonDown() { Log.d(AppConfig.TAG, "onRecordButtonDown called") if (isRecording) { Log.d(AppConfig.TAG, "Already recording, returning") return } // 如果TTS正在播放,打断它 val interrupted = ttsController.interruptForNewTurn() if (interrupted) { uiManager.appendToUi("\n[LOG] 已打断TTS播放\n") } // 通知状态机用户开始说话,立即进入对话状态 interactionController.onUserStartSpeaking() if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) { uiManager.showToast("麦克风初始化失败/无权限") return } currentTrace = TraceManager.getInstance().startNewTurn() currentTrace?.mark("turn_start") llmInFlight = false uiManager.clearText() // interruptForNewTurn() already prepared TTS state for next turn. // Keep reset() only for non-interrupt entry points. segmenter.reset() // 启动按住说话的动作 uiManager.startSpecificMotion("hold_to_speak") holdToSpeakAudioBuffer.clear() audioProcessor.startRecording() isRecording = true Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine") recordingJob?.cancel() recordingJob = ioScope.launch { processSamplesLoop() } Log.d(AppConfig.TAG, "onRecordButtonDown completed") } private fun onRecordButtonUp() { Log.d(AppConfig.TAG, "onRecordButtonUp called") if (!isRecording) { Log.d(AppConfig.TAG, "Not recording, returning") return } isRecording = false audioProcessor.stopRecording() recordingJob?.cancel() recordingJob = ioScope.launch { // 处理最后的音频数据 val audioData = audioProcessor.getRecordedData() holdToSpeakAudioBuffer.addAll(audioData.toList()) if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) { val finalAudio = holdToSpeakAudioBuffer.toFloatArray() asrManager.enqueueAudioSegment(finalAudio, finalAudio) } else { uiManager.showToast("录音时间太短,请长按至少1秒") } holdToSpeakAudioBuffer.clear() } Log.d(AppConfig.TAG, "onRecordButtonUp completed") } private fun onStopClicked(userInitiated: Boolean) { isRecording = false audioProcessor.stopRecording() recordingJob?.cancel() recordingJob = null ttsController.stop() if (AppConfig.USE_HOLD_TO_SPEAK) { uiManager.setButtonsEnabled(recordEnabled = true) } else { uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false) } if (userInitiated) { TraceManager.getInstance().endTurn() currentTrace = null } } private suspend fun processSamplesLoop() { Log.d(AppConfig.TAG, "processSamplesLoop started") if (AppConfig.USE_HOLD_TO_SPEAK) { // 按住说话模式:累积音频数据到一定长度后再发送给ASR while (isRecording && ioScope.coroutineContext.isActive) { val audioData = audioProcessor.getAudioData() if (audioData.isNotEmpty()) { holdToSpeakAudioBuffer.addAll(audioData.toList()) } // 避免CPU占用过高 kotlinx.coroutines.delay(10) } } else { // 传统模式:使用VAD val windowSize = AppConfig.WINDOW_SIZE val buffer = ShortArray(windowSize) var loopCount = 0 while (isRecording && ioScope.coroutineContext.isActive) { loopCount++ if (loopCount % 100 == 0) { Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsController.isPlaying()}") } if (ttsController.isPlaying()) { if (vadManager.isInSpeech()) { Log.d(AppConfig.TAG, "TTS playing, resetting VAD state") vadManager.clearState() } val ret = audioProcessor.readAudio(buffer) if (ret <= 0) continue continue } val ret = audioProcessor.readAudio(buffer) if (ret <= 0) continue if (ret != windowSize) continue val chunk = audioProcessor.convertShortToFloat(buffer) val processedChunk = audioProcessor.applyGain(chunk) val result = vadManager.processAudioChunk(chunk, processedChunk) if (vadManager.vadComputeCount % 100 == 0) { Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}") } if (loopCount % 1000 == 0) { Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}") } val forced = segmenter.maybeForceByTime() for (seg in forced) ttsController.enqueueSegment(seg) } vadManager.forceFinalize() } Log.d(AppConfig.TAG, "processSamplesLoop stopped") } private fun playInteractionMotion(motionName: String) { when (motionName) { "haru_g_m22.motion3.json" -> uiManager.setMood("高兴") "haru_g_m01.motion3.json", "haru_g_m17.motion3.json" -> uiManager.setMood("中性") "haru_g_m15.motion3.json" -> uiManager.setMood("关心") "haru_g_idle.motion3.json" -> uiManager.setMood("平和") else -> uiManager.setMood("中性") } } private fun appendConversationLine(role: String, text: String) { val line = "$role: ${text.trim()}" if (line.length <= 4) return recentConversationLines.add(line) if (recentConversationLines.size > 12) { recentConversationLines.removeAt(0) } recentConversationDirty = true // 同时添加到对话记忆中 val memoryRole = if (role == "用户") "user" else "assistant" conversationBufferMemory.addMessage(activeUserId, memoryRole, text.trim()) // 定期保存到数据库 if (recentConversationLines.size % 5 == 0) { conversationBufferMemory.saveToDatabase(activeUserId) } } private fun buildCloudPromptWithUserProfile(userText: String): String { val profile = userMemoryStore.getMemory(activeUserId) ?: return userText val profileParts = ArrayList() profile.displayName?.takeIf { it.isNotBlank() }?.let { profileParts.add("姓名:$it") } profile.age?.takeIf { it.isNotBlank() }?.let { profileParts.add("年龄:$it") } profile.gender?.takeIf { it.isNotBlank() }?.let { profileParts.add("性别:$it") } profile.hobbies?.takeIf { it.isNotBlank() }?.let { profileParts.add("爱好:$it") } profile.profileSummary?.takeIf { it.isNotBlank() }?.let { profileParts.add("画像:$it") } // 添加对话摘要 val conversationSummary = conversationSummaryMemory.getSummary(activeUserId) if (conversationSummary.isNotBlank()) { profileParts.add("对话摘要:$conversationSummary") } if (profileParts.isEmpty()) return userText return buildString { append("[用户画像]\n") append(profileParts.joinToString(";")) append("\n[/用户画像]\n") append(userText) } } private fun analyzeUserProfileInIdleIfNeeded() { if (!activeUserId.startsWith("face_")) { Log.d(AppConfig.TAG, "faceID is not face_") return } // 使用 conversationBufferMemory 获取对话消息 val messages = conversationBufferMemory.getMessages(activeUserId) Log.d(AppConfig.TAG, "msg is empty? ${messages.isEmpty()}") val hasUserMessages = messages.any { it.role == "user" } Log.d(AppConfig.TAG, "msg has user messages? $hasUserMessages") if (messages.isEmpty() || !hasUserMessages) return // 生成对话摘要 conversationSummaryMemory.generateSummary(activeUserId, messages) { summary -> Log.d(AppConfig.TAG, "Generated conversation summary for $activeUserId: $summary") } // 使用多角度提问方式提取用户信息 val dialogue = messages.joinToString("\n") { "${it.role}: ${it.content}" } requestMultiAngleProfileExtraction(dialogue) { profileData -> try { val nameToUpdate = profileData["name"]?.trim()?.ifBlank { null } val ageToUpdate = profileData["age"]?.trim()?.ifBlank { null } val genderToUpdate = profileData["gender"]?.trim()?.ifBlank { null } val hobbiesToUpdate = profileData["hobbies"]?.trim()?.ifBlank { null } val summaryToUpdate = profileData["summary"]?.trim()?.ifBlank { null } Log.d(TAG_LLM, "profileData: $profileData") if (nameToUpdate != null || ageToUpdate != null || genderToUpdate != null || hobbiesToUpdate != null || summaryToUpdate != null) { if (nameToUpdate != null) { userMemoryStore.updateDisplayName(activeUserId, nameToUpdate) Log.i(TAG_LLM, "Updated display name to $nameToUpdate") } userMemoryStore.updateProfile(activeUserId, ageToUpdate, genderToUpdate, hobbiesToUpdate, summaryToUpdate) // 清空已处理的对话记录 conversationBufferMemory.clear(activeUserId) runOnUiThread { uiManager.appendToUi("\n[Memory] 已更新用户画像: $activeUserId\n") } } } catch (e: Exception) { Log.w(TAG_LLM, "Profile parse failed: ${e.message}") } } } private fun requestMultiAngleProfileExtraction(dialogue: String, onResult: (Map) -> Unit) { try { val local = llmManager if (local == null) { onResult(emptyMap()) return } val questions = listOf( "请从对话中提取用户的姓名,只返回姓名,如果没有提到姓名,请返回未知", "请从对话中提取用户的年龄,只返回年龄,如果没有提到年龄,请返回未知", "请从对话中提取用户的性别,只返回性别,如果没有提到性别,请返回未知", "请从对话中提取用户的爱好,只返回爱好,如果没有提到爱好,请返回未知", "请总结对话,只返回总结的内容" ) var completed = 0 val results = mutableMapOf() questions.forEach { question -> val prompt = buildMultiAnglePrompt(dialogue, question) local.generate(prompt) { answer -> val processedAnswer = processProfileAnswer(answer) when { question.contains("姓名") -> results["name"] = processedAnswer question.contains("年龄") -> results["age"] = processedAnswer question.contains("性别") -> results["gender"] = processedAnswer question.contains("爱好") -> results["hobbies"] = processedAnswer question.contains("总结") -> results["summary"] = processedAnswer } completed++ if (completed == questions.size) { onResult(results) } } } } catch (e: Exception) { Log.e(TAG_LLM, "requestMultiAngleProfileExtraction failed: ${e.message}", e) onResult(emptyMap()) } } private fun buildMultiAnglePrompt(dialogue: String, question: String): String { return """ 请根据以下对话回答问题: 对话内容: $dialogue 问题:$question 回答: """.trimIndent() } private fun processProfileAnswer(answer: String): String { var processed = answer.replace("<", "").replace(">", "") if (processed.contains("unknown", ignoreCase = true) || processed.contains("null", ignoreCase = true) || processed.contains("未知")) { return "" } if (processed.contains(":")) { processed = processed.substringAfter(":").trim() } processed = processed.replace(".", "").trim() return processed } private fun parseFirstJsonObject(text: String): JSONObject { val raw = text.trim() val start = raw.indexOf('{') val end = raw.lastIndexOf('}') if (start >= 0 && end > start) { return JSONObject(raw.substring(start, end + 1)) } return JSONObject(raw) } /** * 初始化本地 LLM(仅用于回忆状态) */ private fun initLLM() { try { Log.i(TAG_LLM, "initLLM called for memory-local model") llmManager?.destroy() llmManager = null val modelPath = FileHelper.getLLMModelPath(applicationContext) if (!File(modelPath).exists()) { throw IllegalStateException("RKLLM model file missing: $modelPath") } Log.i(AppConfig.TAG, "Initializing local memory LLM with model path: $modelPath") val localLlmResponseBuffer = StringBuilder() llmManager = LLMManager(modelPath, object : LLMManagerCallback { override fun onThinking(msg: String, finished: Boolean) { Log.d(TAG_LLM, "LOCAL onThinking finished=$finished msg=${msg.take(60)}") } override fun onResult(msg: String, finished: Boolean) { Log.d(TAG_LLM, "LOCAL onResult finished=$finished len=${msg.length}") runOnUiThread { if (!finished) { localLlmResponseBuffer.append(msg) if (enableStreaming && !localThoughtSilentMode) { uiManager.appendToUi(msg) } return@runOnUiThread } val finalText = localLlmResponseBuffer.toString().trim() localLlmResponseBuffer.setLength(0) val profileCallback = pendingLocalProfileCallback pendingLocalProfileCallback = null if (profileCallback != null) { profileCallback(finalText) localThoughtSilentMode = false return@runOnUiThread } val callback = pendingLocalThoughtCallback pendingLocalThoughtCallback = null if (callback != null) { callback(finalText) localThoughtSilentMode = false return@runOnUiThread } if (!localThoughtSilentMode && finalText.isNotEmpty()) { uiManager.appendToUi("$finalText\n") ttsController.enqueueSegment(finalText) ttsController.enqueueEnd() } localThoughtSilentMode = false } } }) Log.i(TAG_LLM, "LOCAL memory LLM initialized") useLocalLLM = true if (AppConfig.OnboardTesting.LOCAL_LLM_SUMMARY) { llmSummaryTest = LLMSummaryTest(this) ioScope.launch { kotlinx.coroutines.delay(5000) // 等待5秒,确保LLMManager初始化完成 runOnUiThread { if (llmManager != null) { llmSummaryTest.setLLMManager(llmManager!!) llmSummaryTest.runTest { message -> Log.i(AppConfig.TAG, message) uiManager.appendToUi("\n$message\n") } } } } } } catch (e: Exception) { Log.e(AppConfig.TAG, "Failed to initialize LLM: ${e.message}", e) Log.e(TAG_LLM, "LOCAL init failed: ${e.message}", e) useLocalLLM = false runOnUiThread { uiManager.setLLMSwitchChecked(false) uiManager.showToast("LLM 初始化失败: ${e.message}", Toast.LENGTH_LONG) uiManager.appendToUi("\n[错误] LLM 初始化失败: ${e.message}\n") } } } /** * 回忆状态调用本地 LLM,仅用于 memory/what-are-you-thinking */ private fun requestLocalThought(prompt: String, onResult: (String) -> Unit) { try { val local = llmManager if (local == null) { onResult("我在想,下次见面可以聊聊今天的新鲜事。") return } localThoughtSilentMode = true pendingLocalThoughtCallback = onResult Log.i(TAG_LLM, "Routing memory thought to LOCAL") local.generateResponseWithSystem( "你是数字人内心独白模块,输出一句简短温和的想法。", prompt ) } catch (e: Exception) { Log.e(TAG_LLM, "requestLocalThought failed: ${e.message}", e) pendingLocalThoughtCallback = null localThoughtSilentMode = false onResult("我在想,下次见面可以聊聊今天的新鲜事。") } } }