Files
digital_person/app/src/main/java/com/digitalperson/Live2DChatActivity.kt
gcw_4spBpAfv 1cae048a7f llm testing
2026-03-10 19:00:32 +08:00

1233 lines
50 KiB
Kotlin
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package com.digitalperson
import android.Manifest
import android.content.pm.PackageManager
import android.graphics.Bitmap
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.appcompat.app.AlertDialog
import androidx.camera.core.CameraSelector
import com.digitalperson.engine.RetinaFaceEngineRKNN
import com.digitalperson.face.FaceBox
import androidx.camera.core.ImageAnalysis
import androidx.camera.core.ImageProxy
import androidx.camera.core.Preview
import androidx.camera.lifecycle.ProcessCameraProvider
import androidx.camera.view.PreviewView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.audio.AudioProcessor
import com.digitalperson.vad.VadManager
import com.digitalperson.asr.AsrManager
import com.digitalperson.ui.Live2DUiManager
import com.digitalperson.config.AppConfig
import com.digitalperson.face.FaceDetectionPipeline
import com.digitalperson.face.FaceOverlayView
import com.digitalperson.face.ImageProxyBitmapConverter
import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
import com.digitalperson.tts.TtsController
import com.digitalperson.interaction.DigitalHumanInteractionController
import com.digitalperson.data.DatabaseInitializer
import com.digitalperson.interaction.InteractionActionHandler
import com.digitalperson.interaction.InteractionState
import com.digitalperson.interaction.UserMemoryStore
import com.digitalperson.llm.LLMManager
import com.digitalperson.llm.LLMManagerCallback
import com.digitalperson.util.FileHelper
import com.digitalperson.data.AppDatabase
import com.digitalperson.data.entity.ChatMessage
import com.digitalperson.interaction.ConversationBufferMemory
import com.digitalperson.interaction.ConversationSummaryMemory
import java.io.File
import android.graphics.BitmapFactory
import org.json.JSONObject
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import com.digitalperson.onboard_testing.FaceRecognitionTest
import com.digitalperson.onboard_testing.LLMSummaryTest
class Live2DChatActivity : AppCompatActivity() {
companion object {
private const val TAG_ACTIVITY = "Live2DChatActivity"
private const val TAG_LLM = "LLM_ROUTE"
}
private lateinit var uiManager: Live2DUiManager
private lateinit var vadManager: VadManager
private lateinit var asrManager: AsrManager
private lateinit var ttsController: TtsController
private lateinit var audioProcessor: AudioProcessor
private var llmManager: LLMManager? = null
private var useLocalLLM = false // 默认使用云端 LLM
private val appPermissions: Array<String> = arrayOf(
Manifest.permission.RECORD_AUDIO,
Manifest.permission.CAMERA
)
private val micPermissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
private val holdToSpeakAudioBuffer = mutableListOf<Float>()
private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var recordingJob: Job? = null
private val nativeLock = Any()
private lateinit var cloudApiManager: CloudApiManager
private val segmenter = StreamingTextSegmenter(
maxLen = AppConfig.Tts.MAX_LEN,
maxWaitMs = AppConfig.Tts.MAX_WAIT_MS
)
private var currentTrace: TraceSession? = null
@Volatile private var llmInFlight: Boolean = false
private var enableStreaming = false
private lateinit var cameraPreviewView: PreviewView
private lateinit var faceOverlayView: FaceOverlayView
private lateinit var faceDetectionPipeline: FaceDetectionPipeline
private lateinit var interactionController: DigitalHumanInteractionController
private lateinit var userMemoryStore: UserMemoryStore
private lateinit var conversationBufferMemory: ConversationBufferMemory
private lateinit var conversationSummaryMemory: ConversationSummaryMemory
private var facePipelineReady: Boolean = false
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraAnalyzerExecutor: ExecutorService
private var activeUserId: String = "guest"
private var pendingLocalThoughtCallback: ((String) -> Unit)? = null
private var pendingLocalProfileCallback: ((String) -> Unit)? = null
private var localThoughtSilentMode: Boolean = false
private val recentConversationLines = ArrayList<String>()
private var recentConversationDirty: Boolean = false
private var lastFacePresent: Boolean = false
private var lastFaceIdentityId: String? = null
private var lastFaceRecognizedName: String? = null
private lateinit var faceRecognitionTest: FaceRecognitionTest
private lateinit var llmSummaryTest: LLMSummaryTest
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode != AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) return
if (grantResults.isEmpty()) {
finish()
return
}
val granted = permissions.zip(grantResults.toTypedArray()).associate { it.first to it.second }
val micGranted = granted[Manifest.permission.RECORD_AUDIO] == PackageManager.PERMISSION_GRANTED
val cameraGranted = granted[Manifest.permission.CAMERA] == PackageManager.PERMISSION_GRANTED
if (!micGranted) {
Log.e(AppConfig.TAG, "Audio record is disallowed")
finish()
return
}
if (!cameraGranted) {
uiManager.showToast("未授予相机权限,暂不启用人脸检测")
Log.w(AppConfig.TAG, "Camera permission denied")
return
}
if (facePipelineReady) {
startCameraPreviewAndDetection()
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
Log.i(TAG_ACTIVITY, "onCreate")
setContentView(R.layout.activity_live2d_chat)
uiManager = Live2DUiManager(this)
uiManager.initViews(
textViewId = R.id.my_text,
scrollViewId = R.id.scroll_view,
startButtonId = R.id.start_button,
stopButtonId = R.id.stop_button,
recordButtonId = R.id.record_button,
traditionalButtonsId = R.id.traditional_buttons,
llmModeSwitchId = R.id.llm_mode_switch,
llmModeSwitchRowId = R.id.llm_mode_switch_row,
silentPlayerViewId = 0,
speakingPlayerViewId = 0,
live2dViewId = R.id.live2d_view
)
cameraPreviewView = findViewById(R.id.camera_preview)
cameraPreviewView.implementationMode = PreviewView.ImplementationMode.COMPATIBLE
faceOverlayView = findViewById(R.id.face_overlay)
cameraAnalyzerExecutor = Executors.newSingleThreadExecutor()
// 初始化数据库
val databaseInitializer = DatabaseInitializer(applicationContext)
databaseInitializer.initialize()
userMemoryStore = UserMemoryStore(applicationContext)
val database = AppDatabase.getInstance(applicationContext)
conversationBufferMemory = ConversationBufferMemory(database)
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
interactionController = DigitalHumanInteractionController(
scope = ioScope,
handler = object : InteractionActionHandler {
override fun onStateChanged(state: InteractionState) {
runOnUiThread {
uiManager.appendToUi("\n[State] $state\n")
}
Log.i(TAG_ACTIVITY, "\n[State] $state\n")
if (state == InteractionState.IDLE) {
analyzeUserProfileInIdleIfNeeded()
Log.i(TAG_ACTIVITY, "[analyze] done")
}
}
override fun playMotion(motionName: String) {
playInteractionMotion(motionName)
}
override fun appendText(text: String) {
uiManager.appendToUi(text)
}
override fun speak(text: String) {
ttsController.enqueueSegment(text)
ttsController.enqueueEnd()
}
override fun requestCloudReply(userText: String) {
llmInFlight = true
Log.i(TAG_LLM, "Routing dialogue to CLOUD")
cloudApiManager.callLLM(buildCloudPromptWithUserProfile(userText))
}
override fun requestLocalThought(prompt: String, onResult: (String) -> Unit) {
this@Live2DChatActivity.requestLocalThought(prompt, onResult)
}
override fun onRememberUser(faceIdentityId: String, name: String?) {
activeUserId = faceIdentityId
userMemoryStore.upsertUserSeen(activeUserId, name)
}
override fun saveThought(thought: String) {
userMemoryStore.upsertUserSeen(activeUserId, null)
userMemoryStore.updateThought(activeUserId, thought)
}
override fun loadLatestThought(): String? = userMemoryStore.getLatestThought()
override fun loadRecentThoughts(timeRangeMs: Long): List<String> = userMemoryStore.getRecentThoughts(timeRangeMs)
override fun addToChatHistory(role: String, content: String) {
appendConversationLine(role, content)
}
override fun addAssistantMessageToCloudHistory(content: String) {
cloudApiManager.addAssistantMessage(content)
}
override fun getRandomQuestion(faceId: String): String {
// 从数据库获取该faceId未被问过的问题
val question = userMemoryStore.getRandomUnansweredQuestion(faceId)
return question?.content ?: "你喜欢什么颜色呀?"
}
},
context = applicationContext
)
faceDetectionPipeline = FaceDetectionPipeline(
context = applicationContext,
onResult = { result ->
faceOverlayView.updateResult(result)
},
onPresenceChanged = { present, faceIdentityId, recognizedName ->
if (present != lastFacePresent) {
lastFacePresent = present
Log.d(TAG_ACTIVITY, "presence changed: present=$present")
interactionController.onFacePresenceChanged(present)
if (!present) {
lastFaceIdentityId = null
lastFaceRecognizedName = null
}
}
if (present && (faceIdentityId != lastFaceIdentityId || recognizedName != lastFaceRecognizedName)) {
lastFaceIdentityId = faceIdentityId
lastFaceRecognizedName = recognizedName
Log.d(TAG_ACTIVITY, "identity update: faceIdentityId=$faceIdentityId, recognized=$recognizedName")
interactionController.onFaceIdentityUpdated(faceIdentityId, recognizedName)
}
}
)
// 根据配置选择交互方式
uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setRecordButtonTouchListener { isDown ->
if (isDown) {
// 按住按钮,开始录音
onRecordButtonDown()
} else {
// 松开按钮,停止录音
onRecordButtonUp()
}
}
} else {
uiManager.setStartButtonListener { onStartClicked() }
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
}
ActivityCompat.requestPermissions(this, appPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
try {
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
streamingSwitch.isChecked = enableStreaming
streamingSwitch.setOnCheckedChangeListener { _, isChecked ->
enableStreaming = isChecked
cloudApiManager.setEnableStreaming(isChecked)
uiManager.showToast("流式输出已${if (isChecked) "启用" else "禁用"}")
}
} catch (e: Exception) {
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
}
try {
val ttsModeSwitch = findViewById<android.widget.Switch>(R.id.tts_mode_switch)
ttsModeSwitch.isChecked = true // 默认使用本地TTS
ttsModeSwitch.setOnCheckedChangeListener { _, isChecked ->
ttsController.setUseQCloudTts(isChecked)
uiManager.showToast("TTS模式已切换到${if (isChecked) "腾讯云" else "本地"}")
}
} catch (e: Exception) {
Log.w(AppConfig.TAG, "TTS mode switch not found in layout: ${e.message}")
}
// 设置 LLM 模式开关
uiManager.setLLMSwitchListener { isChecked ->
// 交互状态机固定路由用户对话走云端回忆走本地。此开关仅作为本地LLM可用性提示。
useLocalLLM = isChecked
uiManager.showToast("状态机路由已固定:对话云端,回忆本地")
}
// 默认不显示 LLM 开关,等模型下载完成后再显示
uiManager.showLLMSwitch(false)
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = false)
} else {
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
}
uiManager.setText("初始化中…")
audioProcessor = AudioProcessor(this)
ttsController = TtsController(this)
ttsController.setCallback(createTtsCallback())
asrManager = AsrManager(this)
asrManager.setAudioProcessor(audioProcessor)
asrManager.setCallback(createAsrCallback())
vadManager = VadManager(this)
vadManager.setCallback(createVadCallback())
// 检查是否需要下载模型
if (!FileHelper.isLocalLLMAvailable(this)) {
// 显示下载进度对话框
uiManager.showDownloadProgressDialog()
// 异步下载模型文件
FileHelper.downloadModelFilesWithProgress(
this,
onProgress = { fileName, downloaded, total, progress ->
runOnUiThread {
val downloadedMB = downloaded / (1024 * 1024)
val totalMB = total / (1024 * 1024)
uiManager.updateDownloadProgress(
fileName,
downloadedMB,
totalMB,
progress
)
}
},
onComplete = { success, message ->
runOnUiThread {
uiManager.dismissDownloadProgressDialog()
if (success) {
Log.i(AppConfig.TAG, "Model files downloaded successfully")
uiManager.showToast("模型下载完成", Toast.LENGTH_SHORT)
// 检查本地 LLM 是否可用
if (FileHelper.isLocalLLMAvailable(this)) {
Log.i(AppConfig.TAG, "Local LLM is available, enabling local LLM switch")
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(false)
// 初始化本地 LLM
initLLM()
// 重新初始化 ConversationSummaryMemory
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
// 启动交互控制器
interactionController.start()
// 下载完成后初始化其他组件
initializeOtherComponents()
}
} else {
Log.e(AppConfig.TAG, "Failed to download model files: $message")
// 显示错误弹窗,阻止应用继续运行
showModelDownloadErrorDialog(message)
}
}
}
)
} else {
// 模型已存在,初始化本地 LLM
initLLM()
// 重新初始化 ConversationSummaryMemory
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
// 启动交互控制器
interactionController.start()
// 直接初始化其他组件
initializeOtherComponents()
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(false)
}
}
/**
* 初始化其他组件VAD、ASR、TTS、人脸检测等
*/
private fun initializeOtherComponents() {
ioScope.launch {
try {
Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)")
synchronized(nativeLock) {
vadManager.initVadModel()
asrManager.initSenseVoiceModel()
}
val ttsOk = ttsController.init()
facePipelineReady = faceDetectionPipeline.initialize()
withContext(Dispatchers.Main) {
if (!ttsOk) {
uiManager.showToast(
"TTS 初始化失败:请确认 assets/${AppConfig.Tts.MODEL_DIR}/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst",
Toast.LENGTH_LONG
)
}
if (!facePipelineReady) {
uiManager.showToast("RetinaFace 初始化失败,请检查模型和 rknn 运行库", Toast.LENGTH_LONG)
} else if (allPermissionsGranted()) {
startCameraPreviewAndDetection()
}
uiManager.setText(getString(R.string.hint))
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
} else {
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
}
}
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
withContext(Dispatchers.Main) {
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
uiManager.showToast("初始化失败(请看 Logcat: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = false)
} else {
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
}
}
}
}
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
cloudApiManager.setEnableStreaming(enableStreaming)
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
ioScope.launch {
asrManager.runAsrWorker()
}
// 测试人脸识别(延迟执行,确保所有组件初始化完成)
if (AppConfig.OnboardTesting.FACE_REGONITION) {
faceRecognitionTest = FaceRecognitionTest(this)
faceRecognitionTest.setFaceDetectionPipeline(faceDetectionPipeline)
CoroutineScope(Dispatchers.IO).launch {
kotlinx.coroutines.delay(10000)
runOnUiThread {
faceRecognitionTest.runTest("http://192.168.1.19:5000/api/face_test_images") { message ->
Log.i(AppConfig.TAG, message)
uiManager.appendToUi("\n$message\n")
}
}
}
}
}
/**
* 显示模型下载错误弹窗,阻止应用继续运行
*/
private fun showModelDownloadErrorDialog(errorMessage: String) {
AlertDialog.Builder(this)
.setTitle("模型下载失败")
.setMessage("本地 LLM 模型下载失败,应用无法正常运行。\n\n错误信息:$errorMessage\n\n请检查网络连接后重启应用。")
.setCancelable(false)
.setPositiveButton("退出应用") { _, _ ->
finish()
}
.show()
}
/**
* 检查 URL 是否存在
*/
private fun checkUrlExists(url: String): Boolean {
return try {
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
connection.requestMethod = "HEAD"
connection.connectTimeout = 3000
connection.readTimeout = 3000
val responseCode = connection.responseCode
connection.disconnect()
responseCode == 200
} catch (e: Exception) {
false
}
}
private fun createAsrCallback() = object : AsrManager.AsrCallback {
override fun onAsrStarted() {
currentTrace?.markASRStart()
runOnUiThread {
uiManager.appendToUi("\n[ASR] 开始识别...\n")
}
}
override fun onAsrResult(text: String) {
currentTrace?.markASREnd()
runOnUiThread {
uiManager.appendToUi("\n\n[ASR] ${text}\n")
}
appendConversationLine("用户", text)
currentTrace?.markRecordingDone()
currentTrace?.markLlmResponseReceived()
}
override fun onAsrSkipped(reason: String) {
Log.d(AppConfig.TAG, "ASR segment skipped: $reason")
}
override fun shouldSkipAsr(): Boolean = ttsController.isPlaying()
override fun isLlmInFlight(): Boolean = llmInFlight
override fun onLlmCalled(text: String) {
Log.d(AppConfig.TAG, "Forward ASR text to interaction controller: $text")
interactionController.onUserAsrText(text)
}
}
private fun createVadCallback() = object : VadManager.VadCallback {
override fun onSpeechSegmentReady(originalAudio: FloatArray, processedAudio: FloatArray) {
Log.d(AppConfig.TAG, "Sending audio segment to ASR queue, size: ${processedAudio.size}")
asrManager.enqueueAudioSegment(originalAudio, processedAudio)
}
override fun shouldSkipProcessing(): Boolean = ttsController.isPlaying() || llmInFlight
}
private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener {
private var llmFirstChunkMarked = false
override fun onLLMResponseReceived(response: String) {
currentTrace?.markLlmDone()
llmInFlight = false
appendConversationLine("助手", response)
if (enableStreaming) {
for (seg in segmenter.flush()) {
ttsController.enqueueSegment(seg)
}
ttsController.enqueueEnd()
} else {
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
if (mood != previousMood) {
uiManager.setMood(mood)
}
runOnUiThread {
uiManager.appendToUi("${filteredText}\n")
}
ttsController.enqueueSegment(filteredText)
ttsController.enqueueEnd()
}
interactionController.onDialogueResponseFinished()
}
override fun onLLMStreamingChunkReceived(chunk: String) {
if (enableStreaming) {
if (!llmFirstChunkMarked) {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
if (mood != previousMood) {
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
// 设置Live2D人物的心情
uiManager.setMood(mood)
}
uiManager.appendToUi(filteredText)
val segments = segmenter.processChunk(filteredText)
for (seg in segments) {
ttsController.enqueueSegment(seg)
}
}
}
override fun onTTSAudioReceived(audioFilePath: String) {}
override fun onError(errorMessage: String) {
llmInFlight = false
uiManager.showToast(errorMessage, Toast.LENGTH_LONG)
interactionController.onDialogueResponseFinished()
onStopClicked(userInitiated = false)
}
}
private fun createTtsCallback() = object : TtsController.TtsCallback {
override fun onTtsStarted(text: String) {
runOnUiThread {
uiManager.appendToUi("\n[TTS] 开始合成...\n")
}
}
override fun onTtsCompleted() {
runOnUiThread {
uiManager.appendToUi("\n[LOG] TTS completed at: ${System.currentTimeMillis()}\n")
}
}
override fun onTtsSegmentCompleted(durationMs: Long) {}
override fun isTtsStopped(): Boolean = !isRecording
override fun onClearAsrQueue() {
asrManager.clearQueue()
}
override fun onSetSpeaking(speaking: Boolean) {
uiManager.setSpeaking(speaking)
}
override fun onEndTurn() {
TraceManager.getInstance().endTurn()
currentTrace = null
}
}
override fun onDestroy() {
super.onDestroy()
try { interactionController.stop() } catch (_: Throwable) {}
stopCameraPreviewAndDetection()
onStopClicked(userInitiated = false)
ioScope.cancel()
synchronized(nativeLock) {
try { vadManager.release() } catch (_: Throwable) {}
try { asrManager.release() } catch (_: Throwable) {}
}
try { faceDetectionPipeline.release() } catch (_: Throwable) {}
try { cameraAnalyzerExecutor.shutdown() } catch (_: Throwable) {}
try { ttsController.release() } catch (_: Throwable) {}
try { llmManager?.destroy() } catch (_: Throwable) {}
try { uiManager.release() } catch (_: Throwable) {}
try { audioProcessor.release() } catch (_: Throwable) {}
}
override fun onResume() {
super.onResume()
Log.i(TAG_ACTIVITY, "onResume")
uiManager.onResume()
if (facePipelineReady && allPermissionsGranted()) {
startCameraPreviewAndDetection()
}
}
override fun onPause() {
Log.i(TAG_ACTIVITY, "onPause")
stopCameraPreviewAndDetection()
uiManager.onPause()
super.onPause()
}
private fun allPermissionsGranted(): Boolean {
return appPermissions.all {
ContextCompat.checkSelfPermission(this, it) == PackageManager.PERMISSION_GRANTED
}
}
private fun startCameraPreviewAndDetection() {
val cameraProviderFuture = ProcessCameraProvider.getInstance(this)
cameraProviderFuture.addListener({
try {
val provider = cameraProviderFuture.get()
cameraProvider = provider
provider.unbindAll()
val preview = Preview.Builder().build().apply {
setSurfaceProvider(cameraPreviewView.surfaceProvider)
}
cameraPreviewView.scaleType = PreviewView.ScaleType.FIT_CENTER
val analyzer = ImageAnalysis.Builder()
.setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
.build()
analyzer.setAnalyzer(cameraAnalyzerExecutor) { imageProxy ->
analyzeCameraFrame(imageProxy)
}
val selector = CameraSelector.Builder()
.requireLensFacing(CameraSelector.LENS_FACING_FRONT)
.build()
provider.bindToLifecycle(this, selector, preview, analyzer)
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "startCameraPreviewAndDetection failed: ${t.message}", t)
}
}, ContextCompat.getMainExecutor(this))
}
private fun stopCameraPreviewAndDetection() {
try {
cameraProvider?.unbindAll()
} catch (_: Throwable) {
} finally {
cameraProvider = null
}
}
private fun analyzeCameraFrame(imageProxy: ImageProxy) {
try {
val bitmap: Bitmap? = ImageProxyBitmapConverter.toBitmap(imageProxy)
if (bitmap != null) {
faceDetectionPipeline.submitFrame(bitmap)
}
} catch (t: Throwable) {
Log.w(AppConfig.TAG, "analyzeCameraFrame error: ${t.message}")
} finally {
imageProxy.close()
}
}
private fun onStartClicked() {
Log.d(AppConfig.TAG, "onStartClicked called")
if (isRecording) {
Log.d(AppConfig.TAG, "Already recording, returning")
return
}
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
currentTrace = TraceManager.getInstance().startNewTurn()
currentTrace?.mark("turn_start")
llmInFlight = false
uiManager.clearText()
ttsController.reset()
segmenter.reset()
vadManager.reset()
audioProcessor.startRecording()
isRecording = true
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(AppConfig.TAG, "onStartClicked completed")
}
private fun onRecordButtonDown() {
Log.d(AppConfig.TAG, "onRecordButtonDown called")
if (isRecording) {
Log.d(AppConfig.TAG, "Already recording, returning")
return
}
// 如果TTS正在播放打断它
val interrupted = ttsController.interruptForNewTurn()
if (interrupted) {
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
}
// 通知状态机用户开始说话,立即进入对话状态
interactionController.onUserStartSpeaking()
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
currentTrace = TraceManager.getInstance().startNewTurn()
currentTrace?.mark("turn_start")
llmInFlight = false
uiManager.clearText()
// interruptForNewTurn() already prepared TTS state for next turn.
// Keep reset() only for non-interrupt entry points.
segmenter.reset()
// 启动按住说话的动作
uiManager.startSpecificMotion("hold_to_speak")
holdToSpeakAudioBuffer.clear()
audioProcessor.startRecording()
isRecording = true
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
}
private fun onRecordButtonUp() {
Log.d(AppConfig.TAG, "onRecordButtonUp called")
if (!isRecording) {
Log.d(AppConfig.TAG, "Not recording, returning")
return
}
isRecording = false
audioProcessor.stopRecording()
recordingJob?.cancel()
recordingJob = ioScope.launch {
// 处理最后的音频数据
val audioData = audioProcessor.getRecordedData()
holdToSpeakAudioBuffer.addAll(audioData.toList())
if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
asrManager.enqueueAudioSegment(finalAudio, finalAudio)
} else {
uiManager.showToast("录音时间太短请长按至少1秒")
}
holdToSpeakAudioBuffer.clear()
}
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
}
private fun onStopClicked(userInitiated: Boolean) {
isRecording = false
audioProcessor.stopRecording()
recordingJob?.cancel()
recordingJob = null
ttsController.stop()
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
} else {
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
}
if (userInitiated) {
TraceManager.getInstance().endTurn()
currentTrace = null
}
}
private suspend fun processSamplesLoop() {
Log.d(AppConfig.TAG, "processSamplesLoop started")
if (AppConfig.USE_HOLD_TO_SPEAK) {
// 按住说话模式累积音频数据到一定长度后再发送给ASR
while (isRecording && ioScope.coroutineContext.isActive) {
val audioData = audioProcessor.getAudioData()
if (audioData.isNotEmpty()) {
holdToSpeakAudioBuffer.addAll(audioData.toList())
}
// 避免CPU占用过高
kotlinx.coroutines.delay(10)
}
} else {
// 传统模式使用VAD
val windowSize = AppConfig.WINDOW_SIZE
val buffer = ShortArray(windowSize)
var loopCount = 0
while (isRecording && ioScope.coroutineContext.isActive) {
loopCount++
if (loopCount % 100 == 0) {
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsController.isPlaying()}")
}
if (ttsController.isPlaying()) {
if (vadManager.isInSpeech()) {
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
vadManager.clearState()
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
continue
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
if (ret != windowSize) continue
val chunk = audioProcessor.convertShortToFloat(buffer)
val processedChunk = audioProcessor.applyGain(chunk)
val result = vadManager.processAudioChunk(chunk, processedChunk)
if (vadManager.vadComputeCount % 100 == 0) {
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
}
if (loopCount % 1000 == 0) {
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
}
val forced = segmenter.maybeForceByTime()
for (seg in forced) ttsController.enqueueSegment(seg)
}
vadManager.forceFinalize()
}
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
}
private fun playInteractionMotion(motionName: String) {
when (motionName) {
"haru_g_m22.motion3.json" -> uiManager.setMood("高兴")
"haru_g_m01.motion3.json", "haru_g_m17.motion3.json" -> uiManager.setMood("中性")
"haru_g_m15.motion3.json" -> uiManager.setMood("关心")
"haru_g_idle.motion3.json" -> uiManager.setMood("平和")
else -> uiManager.setMood("中性")
}
}
private fun appendConversationLine(role: String, text: String) {
val line = "$role: ${text.trim()}"
if (line.length <= 4) return
recentConversationLines.add(line)
if (recentConversationLines.size > 12) {
recentConversationLines.removeAt(0)
}
recentConversationDirty = true
// 同时添加到对话记忆中
val memoryRole = if (role == "用户") "user" else "assistant"
conversationBufferMemory.addMessage(activeUserId, memoryRole, text.trim())
// 定期保存到数据库
if (recentConversationLines.size % 5 == 0) {
conversationBufferMemory.saveToDatabase(activeUserId)
}
}
private fun buildCloudPromptWithUserProfile(userText: String): String {
val profile = userMemoryStore.getMemory(activeUserId) ?: return userText
val profileParts = ArrayList<String>()
profile.displayName?.takeIf { it.isNotBlank() }?.let { profileParts.add("姓名:$it") }
profile.age?.takeIf { it.isNotBlank() }?.let { profileParts.add("年龄:$it") }
profile.gender?.takeIf { it.isNotBlank() }?.let { profileParts.add("性别:$it") }
profile.hobbies?.takeIf { it.isNotBlank() }?.let { profileParts.add("爱好:$it") }
profile.profileSummary?.takeIf { it.isNotBlank() }?.let { profileParts.add("画像:$it") }
// 添加对话摘要
val conversationSummary = conversationSummaryMemory.getSummary(activeUserId)
if (conversationSummary.isNotBlank()) {
profileParts.add("对话摘要:$conversationSummary")
}
if (profileParts.isEmpty()) return userText
return buildString {
append("[用户画像]\n")
append(profileParts.joinToString(""))
append("\n[/用户画像]\n")
append(userText)
}
}
private fun analyzeUserProfileInIdleIfNeeded() {
if (!activeUserId.startsWith("face_")) {
Log.d(AppConfig.TAG, "faceID is not face_")
return
}
// 使用 conversationBufferMemory 获取对话消息
val messages = conversationBufferMemory.getMessages(activeUserId)
Log.d(AppConfig.TAG, "msg is empty? ${messages.isEmpty()}")
val hasUserMessages = messages.any { it.role == "user" }
Log.d(AppConfig.TAG, "msg has user messages? $hasUserMessages")
if (messages.isEmpty() || !hasUserMessages) return
// 生成对话摘要
conversationSummaryMemory.generateSummary(activeUserId, messages) { summary ->
Log.d(AppConfig.TAG, "Generated conversation summary for $activeUserId: $summary")
}
// 使用多角度提问方式提取用户信息
val dialogue = messages.joinToString("\n") { "${it.role}: ${it.content}" }
requestMultiAngleProfileExtraction(dialogue) { profileData ->
try {
val nameToUpdate = profileData["name"]?.trim()?.ifBlank { null }
val ageToUpdate = profileData["age"]?.trim()?.ifBlank { null }
val genderToUpdate = profileData["gender"]?.trim()?.ifBlank { null }
val hobbiesToUpdate = profileData["hobbies"]?.trim()?.ifBlank { null }
val summaryToUpdate = profileData["summary"]?.trim()?.ifBlank { null }
Log.d(TAG_LLM, "profileData: $profileData")
if (nameToUpdate != null || ageToUpdate != null || genderToUpdate != null || hobbiesToUpdate != null || summaryToUpdate != null) {
if (nameToUpdate != null) {
userMemoryStore.updateDisplayName(activeUserId, nameToUpdate)
Log.i(TAG_LLM, "Updated display name to $nameToUpdate")
}
userMemoryStore.updateProfile(activeUserId, ageToUpdate, genderToUpdate, hobbiesToUpdate, summaryToUpdate)
// 清空已处理的对话记录
conversationBufferMemory.clear(activeUserId)
runOnUiThread {
uiManager.appendToUi("\n[Memory] 已更新用户画像: $activeUserId\n")
}
}
} catch (e: Exception) {
Log.w(TAG_LLM, "Profile parse failed: ${e.message}")
}
}
}
private fun requestMultiAngleProfileExtraction(dialogue: String, onResult: (Map<String, String>) -> Unit) {
try {
val local = llmManager
if (local == null) {
onResult(emptyMap())
return
}
val questions = listOf(
"请从对话中提取用户的姓名,只返回姓名,如果没有提到姓名,请返回未知",
"请从对话中提取用户的年龄,只返回年龄,如果没有提到年龄,请返回未知",
"请从对话中提取用户的性别,只返回性别,如果没有提到性别,请返回未知",
"请从对话中提取用户的爱好,只返回爱好,如果没有提到爱好,请返回未知",
"请总结对话,只返回总结的内容"
)
var completed = 0
val results = mutableMapOf<String, String>()
questions.forEach { question ->
val prompt = buildMultiAnglePrompt(dialogue, question)
local.generate(prompt) { answer ->
val processedAnswer = processProfileAnswer(answer)
when {
question.contains("姓名") -> results["name"] = processedAnswer
question.contains("年龄") -> results["age"] = processedAnswer
question.contains("性别") -> results["gender"] = processedAnswer
question.contains("爱好") -> results["hobbies"] = processedAnswer
question.contains("总结") -> results["summary"] = processedAnswer
}
completed++
if (completed == questions.size) {
onResult(results)
}
}
}
} catch (e: Exception) {
Log.e(TAG_LLM, "requestMultiAngleProfileExtraction failed: ${e.message}", e)
onResult(emptyMap())
}
}
private fun buildMultiAnglePrompt(dialogue: String, question: String): String {
return """
请根据以下对话回答问题:
对话内容:
$dialogue
问题:$question
回答:
""".trimIndent()
}
private fun processProfileAnswer(answer: String): String {
var processed = answer.replace("<", "").replace(">", "")
if (processed.contains("unknown", ignoreCase = true) ||
processed.contains("null", ignoreCase = true) ||
processed.contains("未知")) {
return ""
}
if (processed.contains(":")) {
processed = processed.substringAfter(":").trim()
}
processed = processed.replace(".", "").trim()
return processed
}
private fun parseFirstJsonObject(text: String): JSONObject {
val raw = text.trim()
val start = raw.indexOf('{')
val end = raw.lastIndexOf('}')
if (start >= 0 && end > start) {
return JSONObject(raw.substring(start, end + 1))
}
return JSONObject(raw)
}
/**
* 初始化本地 LLM仅用于回忆状态
*/
private fun initLLM() {
try {
Log.i(TAG_LLM, "initLLM called for memory-local model")
llmManager?.destroy()
llmManager = null
val modelPath = FileHelper.getLLMModelPath(applicationContext)
if (!File(modelPath).exists()) {
throw IllegalStateException("RKLLM model file missing: $modelPath")
}
Log.i(AppConfig.TAG, "Initializing local memory LLM with model path: $modelPath")
val localLlmResponseBuffer = StringBuilder()
llmManager = LLMManager(modelPath, object : LLMManagerCallback {
override fun onThinking(msg: String, finished: Boolean) {
Log.d(TAG_LLM, "LOCAL onThinking finished=$finished msg=${msg.take(60)}")
}
override fun onResult(msg: String, finished: Boolean) {
Log.d(TAG_LLM, "LOCAL onResult finished=$finished len=${msg.length}")
runOnUiThread {
if (!finished) {
localLlmResponseBuffer.append(msg)
if (enableStreaming && !localThoughtSilentMode) {
uiManager.appendToUi(msg)
}
return@runOnUiThread
}
val finalText = localLlmResponseBuffer.toString().trim()
localLlmResponseBuffer.setLength(0)
val profileCallback = pendingLocalProfileCallback
pendingLocalProfileCallback = null
if (profileCallback != null) {
profileCallback(finalText)
localThoughtSilentMode = false
return@runOnUiThread
}
val callback = pendingLocalThoughtCallback
pendingLocalThoughtCallback = null
if (callback != null) {
callback(finalText)
localThoughtSilentMode = false
return@runOnUiThread
}
if (!localThoughtSilentMode && finalText.isNotEmpty()) {
uiManager.appendToUi("$finalText\n")
ttsController.enqueueSegment(finalText)
ttsController.enqueueEnd()
}
localThoughtSilentMode = false
}
}
})
Log.i(TAG_LLM, "LOCAL memory LLM initialized")
useLocalLLM = true
if (AppConfig.OnboardTesting.LOCAL_LLM_SUMMARY) {
llmSummaryTest = LLMSummaryTest(this)
ioScope.launch {
kotlinx.coroutines.delay(5000) // 等待5秒确保LLMManager初始化完成
runOnUiThread {
if (llmManager != null) {
llmSummaryTest.setLLMManager(llmManager!!)
llmSummaryTest.runTest { message ->
Log.i(AppConfig.TAG, message)
uiManager.appendToUi("\n$message\n")
}
}
}
}
}
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Failed to initialize LLM: ${e.message}", e)
Log.e(TAG_LLM, "LOCAL init failed: ${e.message}", e)
useLocalLLM = false
runOnUiThread {
uiManager.setLLMSwitchChecked(false)
uiManager.showToast("LLM 初始化失败: ${e.message}", Toast.LENGTH_LONG)
uiManager.appendToUi("\n[错误] LLM 初始化失败: ${e.message}\n")
}
}
}
/**
* 回忆状态调用本地 LLM仅用于 memory/what-are-you-thinking
*/
private fun requestLocalThought(prompt: String, onResult: (String) -> Unit) {
try {
val local = llmManager
if (local == null) {
onResult("我在想,下次见面可以聊聊今天的新鲜事。")
return
}
localThoughtSilentMode = true
pendingLocalThoughtCallback = onResult
Log.i(TAG_LLM, "Routing memory thought to LOCAL")
local.generateResponseWithSystem(
"你是数字人内心独白模块,输出一句简短温和的想法。",
prompt
)
} catch (e: Exception) {
Log.e(TAG_LLM, "requestLocalThought failed: ${e.message}", e)
pendingLocalThoughtCallback = null
localThoughtSilentMode = false
onResult("我在想,下次见面可以聊聊今天的新鲜事。")
}
}
}