diff --git a/app/build.gradle b/app/build.gradle index 31c87bf..2035fb5 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -32,7 +32,7 @@ android { defaultConfig { applicationId "com.digitalperson" - minSdk 21 + minSdk 22 targetSdk 33 versionCode 1 versionName "1.0" @@ -97,4 +97,7 @@ dependencies { implementation 'androidx.room:room-runtime:2.5.2' kapt 'androidx.room:room-compiler:2.5.2' implementation 'androidx.room:room-ktx:2.5.2' + + implementation project(':tuanjieLibrary') + implementation files('../tuanjieLibrary/libs/unity-classes.jar') } diff --git a/app/note/design_doc b/app/note/design_doc index 92fe7c2..1fb51e6 100644 --- a/app/note/design_doc +++ b/app/note/design_doc @@ -253,4 +253,23 @@ https://tianchi.aliyun.com/dataset/93864 SELECT * FROM table_name; # 查询数据 .headers on # 显示列名 .mode column # 列模式显示 - .quit # 退出 \ No newline at end of file + .quit # 退出 + +13. Unity 集成时遇到的问题: + 1. 问题描述:NDK的版本不对,导致编译错误 + 解决方法: + - 在 build.gradle 中指定 NDK 版本 + ndkVersion "23.1.7779620" + 2. 问题描述:Unity 编译时提示 NDK 路径错误 + 解决方法: + - 在 build.gradle 中指定 NDK 路径 + ndkPath "D:/software/2022.3.62t5/Editor/Data/PlaybackEngines/AndroidPlayer/NDK" + + 3. 问题描述:Build file 'D:\code\digital_person\tuanjieLibrary\build.gradle' + Could not get unknown property 'tuanjieStreamingAssets' for object of type com.android.build.gradle.internal.dsl.LibraryAndroidResourcesImpl$AgpDecorated. + 解决方法: + - 在项目的顶层的 gradle.properties 中添加 tuanjieStreamingAssets 配置 + tuanjieStreamingAssets=.unity3d, google-services-desktop.json, google-services.json, GoogleService-Info.plist + + + diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 1b87526..cdd3b40 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -15,15 +15,17 @@ android:usesCleartextTraffic="true"> + + diff --git a/app/src/main/java/com/digitalperson/DigitalPersonLauncherActivity.kt b/app/src/main/java/com/digitalperson/DigitalPersonLauncherActivity.kt new file mode 100644 index 0000000..e4110f7 --- /dev/null +++ b/app/src/main/java/com/digitalperson/DigitalPersonLauncherActivity.kt @@ -0,0 +1,22 @@ +package com.digitalperson + +import android.content.Intent +import android.os.Bundle +import androidx.appcompat.app.AppCompatActivity +import com.digitalperson.config.AppConfig + +class DigitalPersonLauncherActivity : AppCompatActivity() { + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + + // 根据配置启动相应的Activity + val intent = if (AppConfig.Avatar.isUnity()) { + Intent(this, UnityDigitalPersonActivity::class.java) + } else { + Intent(this, Live2DChatActivity::class.java) + } + + startActivity(intent) + finish() + } +} \ No newline at end of file diff --git a/app/src/main/java/com/digitalperson/EntryActivity.kt b/app/src/main/java/com/digitalperson/EntryActivity.kt index 656790a..45a6843 100644 --- a/app/src/main/java/com/digitalperson/EntryActivity.kt +++ b/app/src/main/java/com/digitalperson/EntryActivity.kt @@ -14,12 +14,12 @@ class EntryActivity : AppCompatActivity() { override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) - val target = if (AppConfig.Avatar.USE_LIVE2D) { + val target = if (AppConfig.Avatar.isLive2D()) { Live2DChatActivity::class.java } else { MainActivity::class.java } - Log.i(TAG, "USE_LIVE2D=${AppConfig.Avatar.USE_LIVE2D}, target=${target.simpleName}") + Log.i(TAG, "DIGITAL_PERSON_TYPE=${AppConfig.Avatar.DIGITAL_PERSON_TYPE}, target=${target.simpleName}") startActivity(Intent(this, target)) finish() } diff --git a/app/src/main/java/com/digitalperson/Live2DChatActivity.kt b/app/src/main/java/com/digitalperson/Live2DChatActivity.kt index c5fcb42..3d5ed45 100644 --- a/app/src/main/java/com/digitalperson/Live2DChatActivity.kt +++ b/app/src/main/java/com/digitalperson/Live2DChatActivity.kt @@ -637,6 +637,8 @@ class Live2DChatActivity : AppCompatActivity() { } override fun onTtsSegmentCompleted(durationMs: Long) {} + + override fun onTtsAudioData(data: ByteArray) {} override fun isTtsStopped(): Boolean = !isRecording diff --git a/app/src/main/java/com/digitalperson/MainActivity.kt b/app/src/main/java/com/digitalperson/MainActivity.kt index 095dcbd..7bbe5c0 100644 --- a/app/src/main/java/com/digitalperson/MainActivity.kt +++ b/app/src/main/java/com/digitalperson/MainActivity.kt @@ -265,6 +265,8 @@ class MainActivity : AppCompatActivity() { } override fun onTtsSegmentCompleted(durationMs: Long) {} + + override fun onTtsAudioData(data: ByteArray) {} override fun isTtsStopped(): Boolean = !isRecording diff --git a/app/src/main/java/com/digitalperson/UnityDigitalPersonActivity.kt b/app/src/main/java/com/digitalperson/UnityDigitalPersonActivity.kt new file mode 100644 index 0000000..26f1e4b --- /dev/null +++ b/app/src/main/java/com/digitalperson/UnityDigitalPersonActivity.kt @@ -0,0 +1,558 @@ +package com.digitalperson + +import android.Manifest +import android.content.pm.PackageManager +import android.os.Bundle +import android.os.Handler +import android.os.Looper +import android.util.Log +import android.view.MotionEvent +import android.view.ViewGroup +import android.widget.Button +import android.widget.EditText +import android.widget.TextView +import androidx.core.app.ActivityCompat +import androidx.core.content.ContextCompat +import android.util.Base64 +import android.view.View +import com.unity3d.player.UnityPlayer +import com.unity3d.player.UnityPlayerActivity +import com.digitalperson.audio.AudioProcessor +import com.digitalperson.asr.AsrManager +import com.digitalperson.config.AppConfig +import com.digitalperson.data.AppDatabase +import com.digitalperson.face.FaceDetectionPipeline +import com.digitalperson.interaction.ConversationBufferMemory +import com.digitalperson.interaction.ConversationSummaryMemory +import com.digitalperson.interaction.UserMemoryStore +import com.digitalperson.llm.LLMManager +import com.digitalperson.llm.LLMManagerCallback +import com.digitalperson.tts.TtsController +import com.digitalperson.util.FileHelper +import com.digitalperson.vad.VadManager +import kotlinx.coroutines.* + +class UnityDigitalPersonActivity : UnityPlayerActivity() { + + // ==================== 伴生对象(静态成员)==================== + companion object { + private var instance: UnityDigitalPersonActivity? = null + + + + + } + + // ==================== 核心模块 ==================== + private lateinit var conversationBufferMemory: ConversationBufferMemory + private lateinit var conversationSummaryMemory: ConversationSummaryMemory + private var llmManager: LLMManager? = null + private lateinit var faceDetectionPipeline: FaceDetectionPipeline + private lateinit var userMemoryStore: UserMemoryStore + + private lateinit var chatHistoryText: TextView + private lateinit var holdToSpeakButton: Button + private lateinit var messageInput: EditText + private lateinit var sendButton: Button + + + + // 音频和AI模块 + private lateinit var asrManager: AsrManager + private lateinit var ttsController: TtsController + private lateinit var audioProcessor: AudioProcessor + private lateinit var vadManager: VadManager + + // ==================== 状态标志 ==================== + @Volatile + private var isRecording: Boolean = false + + @Volatile + private var llmInFlight: Boolean = false + + private var useLocalLLM = false // 默认使用云端 LLM + + // ==================== TTS回调相关 ==================== + private var isTTSPlaying = false + private val ttsHandler = Handler(Looper.getMainLooper()) + private var ttsStopRunnable: Runnable? = null + private var ttsStartRunnable: Runnable? = null + private var ttsCallback: Runnable? = null + private var ttsStopCallback: Runnable? = null + private var unityAudioTargetObject: String = "DigitalPerson" + + // 非静态方法,供Unity调用 + fun setUnityAudioTarget(gameObjectName: String) { + unityAudioTargetObject = gameObjectName + Log.d("UnityDigitalPerson", "Unity audio target set: $gameObjectName") + } + + fun setTTSCallback(callback: Runnable) { + instance?.ttsCallback = callback + Log.d("UnityDigitalPerson", "TTS callback registered") + } + + + fun setTTSStopCallback(callback: Runnable) { + instance?.ttsStopCallback = callback + Log.d("UnityDigitalPerson", "TTS stop callback registered") + } + + // ==================== 音频处理 ==================== + private val holdToSpeakAudioBuffer = mutableListOf() + private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据 + + // ==================== 协程 ==================== + private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + private var recordingJob: Job? = null + private var asrWorkerJob: Job? = null + + // ==================== 权限 ==================== + private val micPermissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) + private val cameraPermissions: Array = arrayOf(Manifest.permission.CAMERA) + + // ==================== 生命周期 ==================== + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + + // 设置单例实例 + instance = this + + Log.d("UnityDigitalPerson", "Initializing with config: ${AppConfig.Avatar.UNITY_MODEL_PATH}") + + // 添加对话界面 + addChatUI() + + // 初始化所有组件 + initComponents() + } + + override fun onDestroy() { + super.onDestroy() + // 清理资源 + stopRecording() + recordingJob?.cancel() + asrWorkerJob?.cancel() + ioScope.cancel() + ttsController.stop() + asrManager.release() + llmManager?.destroy() + instance = null + } + + override fun onRequestPermissionsResult( + requestCode: Int, + permissions: Array, + grantResults: IntArray + ) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + if (requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) { + if (grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + Log.d("UnityDigitalPerson", "麦克风权限已授予") + } else { + Log.e("UnityDigitalPerson", "麦克风权限被拒绝") + } + } + } + + // ==================== UI初始化 ==================== + private fun addChatUI() { + try { + // 创建一个包含聊天UI的布局 + val chatLayout = layoutInflater.inflate(R.layout.activity_unity_digital_person, null) + + // 获取UI组件 + chatHistoryText = chatLayout.findViewById(R.id.my_text) + holdToSpeakButton = chatLayout.findViewById(R.id.record_button) + + // 根据配置设置按钮可见性 + if (AppConfig.USE_HOLD_TO_SPEAK) { + holdToSpeakButton.visibility = View.VISIBLE + } else { + holdToSpeakButton.visibility = View.GONE + } + + // 设置按钮监听器 + holdToSpeakButton.setOnTouchListener { _, event -> + when (event.action) { + MotionEvent.ACTION_DOWN -> onRecordButtonDown() + MotionEvent.ACTION_UP, MotionEvent.ACTION_CANCEL -> onRecordButtonUp() + } + true + } + + // 将聊天UI添加到Unity视图上方 + addContentView(chatLayout, ViewGroup.LayoutParams( + ViewGroup.LayoutParams.MATCH_PARENT, + ViewGroup.LayoutParams.MATCH_PARENT + )) + + Log.d("UnityDigitalPerson", "Chat UI added successfully") + } catch (e: Exception) { + Log.e("UnityDigitalPerson", "Failed to add chat UI: ${e.message}", e) + } + } + + // ==================== 组件初始化 ==================== + private fun initComponents() { + val database = AppDatabase.getInstance(this) + + // 内存模块 + conversationBufferMemory = ConversationBufferMemory(database) + userMemoryStore = UserMemoryStore(this) + + // 人脸检测 + faceDetectionPipeline = FaceDetectionPipeline( + context = this, + onResult = { result -> + Log.d("UnityDigitalPerson", "Face detection result: ${result.faces.size} faces") + }, + onPresenceChanged = { present, faceIdentityId, recognizedName -> + Log.d("UnityDigitalPerson", "Presence changed: present=$present, faceId=$faceIdentityId, name=$recognizedName") + } + ) + + // 音频处理器 + audioProcessor = AudioProcessor(this) + + // VAD管理器 + vadManager = VadManager(this) + + // ASR管理器 + asrManager = AsrManager(this).apply { + setCallback(object : AsrManager.AsrCallback { + override fun onAsrStarted() { + Log.d("UnityDigitalPerson", "ASR started") + } + + override fun onAsrResult(text: String) { + Log.d("UnityDigitalPerson", "ASR result: $text") + if (text.isNotEmpty()) { + appendChat("用户: $text") + processUserMessage(text) + } + } + + override fun onAsrSkipped(reason: String) { + Log.d("UnityDigitalPerson", "ASR skipped: $reason") + } + + override fun shouldSkipAsr(): Boolean = false + + override fun isLlmInFlight(): Boolean = llmInFlight + + override fun onLlmCalled(text: String) { + Log.d("UnityDigitalPerson", "LLM called with: $text") + } + }) + setAudioProcessor(audioProcessor) + initSenseVoiceModel() + } + asrWorkerJob?.cancel() + asrWorkerJob = ioScope.launch { + asrManager.runAsrWorker() + } + + // TTS控制器 + ttsController = TtsController(this).apply { + setCallback(object : TtsController.TtsCallback { + override fun onTtsStarted(text: String) { + Log.d("UnityDigitalPerson", "TTS started: $text") + startTTSPlayback() + } + + override fun onTtsCompleted() { + Log.d("UnityDigitalPerson", "TTS completed") + stopTTSPlayback() + } + + override fun onTtsSegmentCompleted(durationMs: Long) { + Log.d("UnityDigitalPerson", "TTS segment completed: $durationMs ms") + } + + override fun onTtsAudioData(data: ByteArray) { + sendTTSAudioToUnity(data) + } + + override fun isTtsStopped(): Boolean = false + + override fun onClearAsrQueue() { + Log.d("UnityDigitalPerson", "Clear ASR queue") + } + + override fun onSetSpeaking(speaking: Boolean) { + Log.d("UnityDigitalPerson", "Set speaking: $speaking") + } + + override fun onEndTurn() { + Log.d("UnityDigitalPerson", "End turn") + } + }) + init() + } + + // 初始化LLM + initLLM() + + // 初始化人脸检测 + faceDetectionPipeline.initialize() + + // 检查权限并开始录音 + checkPermissions() + } + + // ==================== LLM初始化 ==================== + private fun initLLM() { + try { + Log.i("UnityDigitalPerson", "initLLM called for memory-local model") + llmManager?.destroy() + llmManager = null + + val modelPath = FileHelper.getLLMModelPath(this) + if (!java.io.File(modelPath).exists()) { + throw IllegalStateException("RKLLM model file missing: $modelPath") + } + + Log.i("UnityDigitalPerson", "Initializing local memory LLM with model path: $modelPath") + val localLlmResponseBuffer = StringBuilder() + + llmManager = LLMManager(modelPath, object : LLMManagerCallback { + override fun onThinking(msg: String, finished: Boolean) { + Log.d("UnityDigitalPerson", "LOCAL onThinking finished=$finished msg=${msg.take(60)}") + } + + override fun onResult(msg: String, finished: Boolean) { + Log.d("UnityDigitalPerson", "LOCAL onResult finished=$finished len=${msg.length}") + runOnUiThread { + if (!finished) { + localLlmResponseBuffer.append(msg) + return@runOnUiThread + } + val finalText = localLlmResponseBuffer.toString().trim() + localLlmResponseBuffer.setLength(0) + if (finalText.isNotEmpty()) { + appendChat("助手: $finalText") + // 使用TTS播放回复 + ttsController.enqueueSegment(finalText) + ttsController.enqueueEnd() + } + llmInFlight = false + } + } + }) + + // 初始化ConversationSummaryMemory + conversationSummaryMemory = ConversationSummaryMemory( + AppDatabase.getInstance(this), + llmManager + ) + + } catch (e: Exception) { + Log.e("UnityDigitalPerson", "Failed to initialize LLM: ${e.message}", e) + } + } + + // ==================== 权限检查 ==================== + private fun checkPermissions() { + if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) + != PackageManager.PERMISSION_GRANTED) { + ActivityCompat.requestPermissions( + this, + micPermissions, + AppConfig.REQUEST_RECORD_AUDIO_PERMISSION + ) + } + + // 可选:检查摄像头权限 +// if (ContextCompat.checkSelfPermission(this, Manifest.permission.CAMERA) +// != PackageManager.PERMISSION_GRANTED) { +// ActivityCompat.requestPermissions( +// this, +// cameraPermissions, +// AppConfig.REQUEST_CAMERA_PERMISSION +// ) +// } + } + + // ==================== 录音控制 ==================== + private fun startRecording() { + if (isRecording) return + + if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) { + Log.e("UnityDigitalPerson", "麦克风初始化失败/无权限") + return + } + + llmInFlight = false + ttsController.reset() + vadManager.reset() + audioProcessor.startRecording() + isRecording = true + + Log.d("UnityDigitalPerson", "Starting processSamplesLoop coroutine") + recordingJob?.cancel() + recordingJob = ioScope.launch { + processSamplesLoop() + } + Log.d("UnityDigitalPerson", "startRecording completed") + } + + private fun onRecordButtonDown() { + if (isRecording) return + ttsController.interruptForNewTurn() + holdToSpeakAudioBuffer.clear() + startRecording() + } + + private fun onRecordButtonUp() { + if (!isRecording) return + isRecording = false + audioProcessor.stopRecording() + recordingJob?.cancel() + recordingJob = ioScope.launch { + val audioData = audioProcessor.getRecordedData() + holdToSpeakAudioBuffer.addAll(audioData.toList()) + if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) { + val finalAudio = holdToSpeakAudioBuffer.toFloatArray() + asrManager.enqueueAudioSegment(finalAudio, finalAudio) + } else { + runOnUiThread { appendChat("[系统] 录音时间太短,请长按至少1秒") } + } + holdToSpeakAudioBuffer.clear() + } + } + + private fun stopRecording() { + if (!isRecording) return + + isRecording = false + audioProcessor.stopRecording() + + recordingJob?.cancel() + recordingJob = null + + ttsController.stop() + Log.d("UnityDigitalPerson", "stopRecording completed") + } + + // ==================== 音频处理循环 ==================== + private suspend fun processSamplesLoop() { + Log.d("UnityDigitalPerson", "processSamplesLoop started") + + if (AppConfig.USE_HOLD_TO_SPEAK) { + // 按住说话模式:累积音频数据到一定长度后再发送给ASR + while (isRecording && ioScope.coroutineContext.isActive) { + val audioData = audioProcessor.getAudioData() + if (audioData.isNotEmpty()) { + holdToSpeakAudioBuffer.addAll(audioData.toList()) + } + // 避免CPU占用过高 + delay(10) + } + } else { + // 传统模式:使用VAD + val windowSize = AppConfig.WINDOW_SIZE + val buffer = ShortArray(windowSize) + var loopCount = 0 + + while (isRecording && ioScope.coroutineContext.isActive) { + loopCount++ + if (loopCount % 100 == 0) { + Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsController.isPlaying()}") + } + + if (ttsController.isPlaying()) { + if (vadManager.isInSpeech()) { + Log.d(AppConfig.TAG, "TTS playing, resetting VAD state") + vadManager.clearState() + } + val ret = audioProcessor.readAudio(buffer) + if (ret <= 0) continue + continue + } + + val ret = audioProcessor.readAudio(buffer) + if (ret <= 0) continue + if (ret != windowSize) continue + + val chunk = audioProcessor.convertShortToFloat(buffer) + val processedChunk = audioProcessor.applyGain(chunk) + + val result = vadManager.processAudioChunk(chunk, processedChunk) + + if (vadManager.vadComputeCount % 100 == 0) { + Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}") + } + + if (loopCount % 1000 == 0) { + Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}") + } + } + + vadManager.forceFinalize() + + } + } + + // ==================== 消息处理 ==================== + private fun processUserMessage(message: String) { + conversationBufferMemory.addMessage(activeUserId, "user", message) + + llmInFlight = true + llmManager?.generateResponseWithSystem( + getSystemPrompt(), + message + ) + } + + private fun getSystemPrompt(): String { + return "你是一个友好的数字人助手。" + } + + private fun appendChat(text: String) { + runOnUiThread { + chatHistoryText.append(text + "\n") + } + } + + private val activeUserId: String + get() = "face_1" + + // ==================== TTS控制 ==================== + private fun startTTSPlayback() { + if (isTTSPlaying) return + + isTTSPlaying = true + + ttsStartRunnable?.let { ttsHandler.removeCallbacks(it) } + ttsStartRunnable = Runnable { + if (ttsCallback != null) { + runOnUiThread(ttsCallback!!) + } + } + ttsHandler.postDelayed(ttsStartRunnable!!, 100) // 100ms延迟,等待音频开始 + } + + private fun sendTTSAudioToUnity(data: ByteArray) { + if (data.isEmpty()) return + try { + val base64 = Base64.encodeToString(data, Base64.NO_WRAP) + UnityPlayer.UnitySendMessage(unityAudioTargetObject, "OnTTSAudioDataBase64", base64) + } catch (e: Exception) { + Log.w("UnityDigitalPerson", "sendTTSAudioToUnity failed: ${e.message}") + } + } + + private fun stopTTSPlayback() { + if (!isTTSPlaying) return + + ttsStopRunnable?.let { ttsHandler.removeCallbacks(it) } + ttsStopRunnable = Runnable { + isTTSPlaying = false + if (ttsStopCallback != null) { + runOnUiThread(ttsStopCallback!!) + } + } + ttsHandler.postDelayed(ttsStopRunnable!!, 500) // 500ms延迟,避免短暂中断 + } +} \ No newline at end of file diff --git a/app/src/main/java/com/digitalperson/config/AppConfig.kt b/app/src/main/java/com/digitalperson/config/AppConfig.kt index 9dcef1e..7d80d62 100644 --- a/app/src/main/java/com/digitalperson/config/AppConfig.kt +++ b/app/src/main/java/com/digitalperson/config/AppConfig.kt @@ -61,11 +61,30 @@ object AppConfig { object Avatar { // Compile-time switch in gradle.properties/local.properties: USE_LIVE2D=true|false - const val USE_LIVE2D = BuildConfig.USE_LIVE2D + // const val USE_LIVE2D = BuildConfig.USE_LIVE2D // const val MODEL_DIR = "live2d_model/mao_pro_zh" // const val MODEL_JSON = "mao_pro.model3.json" - const val MODEL_DIR = "live2d_model/Haru_pro_jp" - const val MODEL_JSON = "haru_greeter_t05.model3.json" + // const val MODEL_DIR = "live2d_model/Haru_pro_jp" + // const val MODEL_JSON = "haru_greeter_t05.model3.json" + // 数字人类型: "live2d" 或 "unity" + const val DIGITAL_PERSON_TYPE = "unity" + // Live2D 配置 + const val LIVE2D_MODEL_DIR = "live2d_model/Haru_pro_jp" + const val LIVE2D_MODEL_JSON = "haru_greeter_t05.model3.json" + const val LIVE2D_SCALE = 1.0f + // Unity 配置 + const val UNITY_MODEL_PATH = "asobi_chan_b" + const val UNITY_SCALE = 1.0f + + // 检查是否使用Unity + fun isUnity(): Boolean { + return DIGITAL_PERSON_TYPE == "unity" + } + + // 检查是否使用Live2D + fun isLive2D(): Boolean { + return DIGITAL_PERSON_TYPE == "live2d" + } } object QCloud { diff --git a/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt b/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt index 2a24e45..5140199 100644 --- a/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt +++ b/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt @@ -34,10 +34,10 @@ class Live2DRenderer( val model = Live2DCharacter() model.loadFromAssets( assets = context.assets, - modelDir = AppConfig.Avatar.MODEL_DIR, - modelJsonName = AppConfig.Avatar.MODEL_JSON + modelDir = AppConfig.Avatar.LIVE2D_MODEL_DIR, + modelJsonName = AppConfig.Avatar.LIVE2D_MODEL_JSON ) - model.bindTextures(context.assets, AppConfig.Avatar.MODEL_DIR) + model.bindTextures(context.assets, AppConfig.Avatar.LIVE2D_MODEL_DIR) character = model Log.i(TAG, "Live2D model loaded and textures bound") }.onFailure { diff --git a/app/src/main/java/com/digitalperson/tts/QCloudTtsManager.kt b/app/src/main/java/com/digitalperson/tts/QCloudTtsManager.kt index 689e1a0..03a904e 100644 --- a/app/src/main/java/com/digitalperson/tts/QCloudTtsManager.kt +++ b/app/src/main/java/com/digitalperson/tts/QCloudTtsManager.kt @@ -57,6 +57,7 @@ class QCloudTtsManager(private val context: Context) { fun onTtsStarted(text: String) fun onTtsCompleted() fun onTtsSegmentCompleted(durationMs: Long) + fun onTtsAudioData(data: ByteArray) fun isTtsStopped(): Boolean fun onClearAsrQueue() fun onSetSpeaking(speaking: Boolean) @@ -314,6 +315,7 @@ class QCloudTtsManager(private val context: Context) { } val data = ByteArray(buffer.remaining()) buffer.get(data) + callback?.onTtsAudioData(data) writeAudioTrack(audioTrack, data) } diff --git a/app/src/main/java/com/digitalperson/tts/TtsController.kt b/app/src/main/java/com/digitalperson/tts/TtsController.kt index ff5cb1c..7a6d5e7 100644 --- a/app/src/main/java/com/digitalperson/tts/TtsController.kt +++ b/app/src/main/java/com/digitalperson/tts/TtsController.kt @@ -17,6 +17,7 @@ class TtsController(private val context: Context) { fun onTtsStarted(text: String) fun onTtsCompleted() fun onTtsSegmentCompleted(durationMs: Long) + fun onTtsAudioData(data: ByteArray) fun isTtsStopped(): Boolean fun onClearAsrQueue() fun onSetSpeaking(speaking: Boolean) @@ -45,6 +46,10 @@ class TtsController(private val context: Context) { cb.onTtsSegmentCompleted(durationMs) } + override fun onTtsAudioData(data: ByteArray) { + cb.onTtsAudioData(data) + } + override fun isTtsStopped(): Boolean { return cb.isTtsStopped() } @@ -94,6 +99,10 @@ class TtsController(private val context: Context) { cb.onTtsSegmentCompleted(durationMs) } + override fun onTtsAudioData(data: ByteArray) { + cb.onTtsAudioData(data) + } + override fun isTtsStopped(): Boolean { return cb.isTtsStopped() } diff --git a/app/src/main/java/com/digitalperson/tts/TtsManager.kt b/app/src/main/java/com/digitalperson/tts/TtsManager.kt index b5c725a..4366087 100644 --- a/app/src/main/java/com/digitalperson/tts/TtsManager.kt +++ b/app/src/main/java/com/digitalperson/tts/TtsManager.kt @@ -47,6 +47,7 @@ class TtsManager(private val context: Context) { fun onTtsStarted(text: String) fun onTtsCompleted() fun onTtsSegmentCompleted(durationMs: Long) + fun onTtsAudioData(data: ByteArray) fun isTtsStopped(): Boolean fun onClearAsrQueue() fun onSetSpeaking(speaking: Boolean) @@ -305,6 +306,7 @@ class TtsManager(private val context: Context) { trace?.markTtsFirstAudioPlay() callback?.onTraceMarkTtsFirstAudioPlay() } + callback?.onTtsAudioData(floatSamplesToPcm16(samples)) audioTrack.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING) ttsTotalSamplesWritten += samples.size 1 @@ -360,4 +362,15 @@ class TtsManager(private val context: Context) { } Thread.sleep(1000) } + + private fun floatSamplesToPcm16(samples: FloatArray): ByteArray { + val out = ByteArray(samples.size * 2) + var j = 0 + for (s in samples) { + val v = (s.coerceIn(-1f, 1f) * 32767f).toInt().toShort() + out[j++] = (v.toInt() and 0xFF).toByte() + out[j++] = ((v.toInt() shr 8) and 0xFF).toByte() + } + return out + } } diff --git a/app/src/main/res/layout/activity_unity_digital_person.xml b/app/src/main/res/layout/activity_unity_digital_person.xml new file mode 100644 index 0000000..a7261f7 --- /dev/null +++ b/app/src/main/res/layout/activity_unity_digital_person.xml @@ -0,0 +1,86 @@ + + + + + + + + + + + + +