tts_asr_with_video

This commit is contained in:
gcw_4spBpAfv
2026-02-28 10:14:03 +08:00
parent 6aa84d6b77
commit d63d4b03cf
13 changed files with 1823 additions and 128 deletions

View File

@@ -0,0 +1,957 @@
package com.digitalperson
import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioRecord
import android.media.AudioTrack
import android.media.MediaRecorder
import android.media.audiofx.AcousticEchoCanceler
import android.media.audiofx.NoiseSuppressor
import android.os.Bundle
import android.os.SystemClock
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.player.VideoPlayerManager
import com.google.android.exoplayer2.ui.PlayerView
import com.digitalperson.engine.SenseVoiceEngineRKNN
import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
import com.k2fsa.sherpa.onnx.OfflineTts
import com.k2fsa.sherpa.onnx.SileroVadModelConfig
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.VadModelConfig
import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import java.io.File
import java.io.FileOutputStream
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.math.max
private const val TAG = "DigitalPerson"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private lateinit var startButton: Button
private lateinit var stopButton: Button
private lateinit var textView: TextView
private lateinit var vad: Vad
private var senseVoice: SenseVoiceEngineRKNN? = null
private var tts: OfflineTts? = null
private var track: AudioTrack? = null
private var aec: AcousticEchoCanceler? = null
private var ns: NoiseSuppressor? = null
private var audioRecord: AudioRecord? = null
private val audioSource = MediaRecorder.AudioSource.MIC
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var recordingJob: Job? = null
private val nativeLock = Any()
private lateinit var cloudApiManager: CloudApiManager
private var videoPlayerManager: VideoPlayerManager? = null
private val segmenter = StreamingTextSegmenter(
maxLen = 30,
maxWaitMs = 600
)
private sealed class TtsQueueItem {
data class Segment(val text: String) : TtsQueueItem()
data object End : TtsQueueItem()
}
private val ttsQueue = LinkedBlockingQueue<TtsQueueItem>()
private val ttsStopped = AtomicBoolean(false)
private val ttsWorkerRunning = AtomicBoolean(false)
private val ttsPlaying = AtomicBoolean(false)
@Volatile private var ttsTotalSamplesWritten: Long = 0
private var currentTrace: TraceSession? = null
private var lastUiText: String = ""
@Volatile private var llmInFlight: Boolean = false
private var enableStreaming = true // 默认启用流式输出
// ASR 队列和工作器
private val asrQueue = Channel<Pair<FloatArray, TraceSession?>>()
private val asrWorkerRunning = AtomicBoolean(false)
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val ok = requestCode == REQUEST_RECORD_AUDIO_PERMISSION &&
grantResults.isNotEmpty() &&
grantResults[0] == PackageManager.PERMISSION_GRANTED
if (!ok) {
Log.e(TAG, "Audio record is disallowed")
finish()
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// 初始化双播放器管理器silent 与 speaking 两个叠加的 PlayerView
try {
val silentPv = findViewById<PlayerView>(R.id.player_view_silent)
val speakingPv = findViewById<PlayerView>(R.id.player_view_speaking)
videoPlayerManager = VideoPlayerManager(this, silentPv, speakingPv)
// 默认 AI 未说话
videoPlayerManager?.setSpeaking(false)
} catch (e: Exception) {
Log.w(TAG, "PlayerViews not found or init failed: ${e.message}")
}
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
startButton = findViewById(R.id.start_button)
stopButton = findViewById(R.id.stop_button)
textView = findViewById(R.id.my_text)
textView.movementMethod = ScrollingMovementMethod()
startButton.setOnClickListener { onStartClicked() }
stopButton.setOnClickListener { onStopClicked(userInitiated = true) }
// 初始化流式输出开关
try {
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
streamingSwitch.isChecked = enableStreaming
streamingSwitch.setOnCheckedChangeListener { _, isChecked ->
enableStreaming = isChecked
cloudApiManager.setEnableStreaming(isChecked)
Toast.makeText(this, "流式输出已${if (isChecked) "启用" else "禁用"}", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) {
Log.w(TAG, "Streaming switch not found in layout: ${e.message}")
}
// 避免 UI 线程重初始化导致 ANR在后台初始化模型与 AudioTrack
startButton.isEnabled = false
stopButton.isEnabled = false
textView.text = "初始化中…"
ioScope.launch {
try {
Log.i(TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)")
synchronized(nativeLock) {
initVadModel()
initSenseVoiceModel()
}
withContext(Dispatchers.Main) {
initTtsAndAudioTrack()
textView.text = getString(R.string.hint)
startButton.isEnabled = true
stopButton.isEnabled = false
}
} catch (t: Throwable) {
Log.e(TAG, "Initialization failed: ${t.message}", t)
withContext(Dispatchers.Main) {
textView.text = "初始化失败:${t.javaClass.simpleName}: ${t.message}"
Toast.makeText(
this@MainActivity,
"初始化失败(请看 Logcat: ${t.javaClass.simpleName}",
Toast.LENGTH_LONG
).show()
startButton.isEnabled = false
stopButton.isEnabled = false
}
}
}
cloudApiManager = CloudApiManager(object : CloudApiManager.CloudApiListener {
private var llmFirstChunkMarked = false
override fun onLLMResponseReceived(response: String) {
currentTrace?.markLlmDone()
llmInFlight = false
// 根据流式输出模式处理响应
if (enableStreaming) {
// 启用流式输出时,刷新剩余缓冲区
for (seg in segmenter.flush()) {
enqueueTtsSegment(seg)
}
// 发送队列结束信号
ttsQueue.offer(TtsQueueItem.End)
} else {
runOnUiThread {
appendToUi("${response}\n")
}
// 禁用流式输出时直接使用整段文本进行TTS
enqueueTtsSegment(response)
// 发送队列结束信号
ttsQueue.offer(TtsQueueItem.End)
}
}
override fun onLLMStreamingChunkReceived(chunk: String) {
// 启用流式输出时处理流式chunk
if (enableStreaming) {
if (!llmFirstChunkMarked) {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
appendToUi(chunk)
val segments = segmenter.processChunk(chunk)
for (seg in segments) {
enqueueTtsSegment(seg)
}
}
}
override fun onTTSAudioReceived(audioFilePath: String) {
// unused
}
override fun onError(errorMessage: String) {
llmInFlight = false
Toast.makeText(this@MainActivity, errorMessage, Toast.LENGTH_LONG).show()
onStopClicked(userInitiated = false)
}
}, applicationContext)
// 设置流式输出模式
cloudApiManager.setEnableStreaming(enableStreaming)
}
override fun onDestroy() {
super.onDestroy()
onStopClicked(userInitiated = false)
ioScope.cancel()
synchronized(nativeLock) {
try {
vad.release()
} catch (_: Throwable) {
}
try {
senseVoice?.deinitialize()
} catch (_: Throwable) {
}
}
try {
tts?.release()
} catch (_: Throwable) {
}
try {
videoPlayerManager?.release()
} catch (_: Throwable) {
}
}
private fun onStartClicked() {
if (isRecording) return
if (!initMicrophone()) {
Toast.makeText(this, "麦克风初始化失败/无权限", Toast.LENGTH_SHORT).show()
return
}
// Start a new trace turn
currentTrace = TraceManager.getInstance().startNewTurn()
currentTrace?.mark("turn_start")
llmInFlight = false
lastUiText = ""
textView.text = ""
ttsStopped.set(false)
ttsPlaying.set(false)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
segmenter.reset()
vad.reset()
audioRecord!!.startRecording()
isRecording = true
startButton.isEnabled = false
stopButton.isEnabled = true
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
}
private fun onStopClicked(userInitiated: Boolean) {
isRecording = false
try {
audioRecord?.stop()
} catch (_: Throwable) {
}
try {
audioRecord?.release()
} catch (_: Throwable) {
}
audioRecord = null
recordingJob?.cancel()
recordingJob = null
ttsStopped.set(true)
ttsPlaying.set(false)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
// wake worker if waiting
ttsQueue.offer(TtsQueueItem.End)
try {
track?.pause()
track?.flush()
} catch (_: Throwable) {
}
try { aec?.release() } catch (_: Throwable) {}
try { ns?.release() } catch (_: Throwable) {}
aec = null
ns = null
startButton.isEnabled = true
stopButton.isEnabled = false
if (userInitiated) {
TraceManager.getInstance().endTurn()
currentTrace = null
}
}
private fun initVadModel() {
// 你的 VAD 模型在 assets/vad_model/ 下
val config = VadModelConfig(
sileroVadModelConfig = SileroVadModelConfig(
model = "vad_model/silero_vad.onnx",
threshold = 0.5F,
minSilenceDuration = 0.25F,
minSpeechDuration = 0.25F,
windowSize = 512,
),
sampleRate = sampleRateInHz,
numThreads = 1,
provider = "cpu",
)
vad = Vad(assetManager = application.assets, config = config)
}
private fun initSenseVoiceModel() {
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
// Copy assets/sensevoice_models/* -> filesDir/sensevoice_models/*
val modelDir = copySenseVoiceAssetsToInternal()
val modelPath = File(modelDir, "sense-voice-encoder.rknn").absolutePath
val embeddingPath = File(modelDir, "embedding.npy").absolutePath
val bpePath = File(modelDir, "chn_jpn_yue_eng_ko_spectok.bpe.model").absolutePath
// Print quick diagnostics for native libs + model files
try {
val libDir = applicationInfo.nativeLibraryDir
Log.i(TAG, "nativeLibraryDir=$libDir")
try {
val names = File(libDir).list()?.joinToString(", ") ?: "(empty)"
Log.i(TAG, "nativeLibraryDir files: $names")
} catch (t: Throwable) {
Log.w(TAG, "Failed to list nativeLibraryDir: ${t.message}")
}
} catch (_: Throwable) {
}
Log.i(TAG, "SenseVoice model paths:")
Log.i(TAG, " model=$modelPath exists=${File(modelPath).exists()} size=${File(modelPath).length()}")
Log.i(TAG, " embedding=$embeddingPath exists=${File(embeddingPath).exists()} size=${File(embeddingPath).length()}")
Log.i(TAG, " bpe=$bpePath exists=${File(bpePath).exists()} size=${File(bpePath).length()}")
val t0 = SystemClock.elapsedRealtime()
val engine = try {
SenseVoiceEngineRKNN(this)
} catch (e: UnsatisfiedLinkError) {
// Most common: libsensevoiceEngine.so not packaged/built, or dependent libs missing
throw IllegalStateException("Load native libraries failed: ${e.message}", e)
}
val ok = try {
engine.loadModelDirectly(modelPath, embeddingPath, bpePath)
} catch (t: Throwable) {
throw IllegalStateException("SenseVoice loadModelDirectly crashed: ${t.message}", t)
}
val dt = SystemClock.elapsedRealtime() - t0
Log.i(TAG, "SenseVoice loadModelDirectly ok=$ok costMs=$dt")
if (!ok) throw IllegalStateException("SenseVoiceEngineRKNN loadModelDirectly returned false")
senseVoice = engine
}
private fun initTtsAndAudioTrack() {
try {
// 你放入的 sherpa-onnx VITS 中文模型目录:
// assets/tts_model/sherpa-onnx-vits-zh-ll/{model.onnx,tokens.txt,lexicon.txt,...}
val modelDir = "tts_model/sherpa-onnx-vits-zh-ll"
val modelName = "model.onnx"
val lexicon = "lexicon.txt"
val dataDir = ""
val ttsConfig = getOfflineTtsConfig(
modelDir = modelDir,
modelName = modelName,
acousticModelName = "",
vocoder = "",
voices = "",
lexicon = lexicon,
dataDir = dataDir,
dictDir = "",
// 中文规范化规则(目录里已有这些 fst
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst,$modelDir/new_heteronym.fst",
ruleFars = "",
numThreads = null,
isKitten = false
)
tts = OfflineTts(assetManager = application.assets, config = ttsConfig)
} catch (t: Throwable) {
Log.e(TAG, "Init TTS failed: ${t.message}", t)
tts = null
runOnUiThread {
Toast.makeText(
this,
"TTS 初始化失败:请确认 assets/tts_model/sherpa-onnx-vits-zh-ll/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst",
Toast.LENGTH_LONG
).show()
}
}
val t = tts ?: return
val sr = t.sampleRate()
val bufLength = AudioTrack.getMinBufferSize(
sr,
AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_FLOAT
)
val attr = AudioAttributes.Builder()
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.setUsage(AudioAttributes.USAGE_MEDIA)
.build()
val format = AudioFormat.Builder()
.setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
.setSampleRate(sr)
.build()
track = AudioTrack(
attr,
format,
bufLength,
AudioTrack.MODE_STREAM,
AudioManager.AUDIO_SESSION_ID_GENERATE
)
track?.play()
}
private fun assetExists(path: String): Boolean {
return try {
application.assets.open(path).close()
true
} catch (_: Throwable) {
false
}
}
private fun copySenseVoiceAssetsToInternal(): File {
val outDir = File(filesDir, "sensevoice_models")
if (!outDir.exists()) outDir.mkdirs()
val files = arrayOf(
"am.mvn",
"chn_jpn_yue_eng_ko_spectok.bpe.model",
"embedding.npy",
"sense-voice-encoder.rknn"
)
for (name in files) {
val assetPath = "sensevoice_models/$name"
val outFile = File(outDir, name)
if (outFile.exists() && outFile.length() > 0) continue
application.assets.open(assetPath).use { input ->
FileOutputStream(outFile).use { output ->
input.copyTo(output)
}
}
}
return outDir
}
private fun initMicrophone(): Boolean {
if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
!= PackageManager.PERMISSION_GRANTED
) {
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
return false
}
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
channelConfig,
audioFormat,
numBytes * 2
)
val sessionId = audioRecord?.audioSessionId ?: 0
if (sessionId != 0) {
if (android.media.audiofx.AcousticEchoCanceler.isAvailable()) {
aec = android.media.audiofx.AcousticEchoCanceler.create(sessionId)?.apply {
enabled = true
}
Log.i(TAG, "AEC enabled=${aec?.enabled}")
} else {
Log.w(TAG, "AEC not available on this device")
}
if (android.media.audiofx.NoiseSuppressor.isAvailable()) {
ns = android.media.audiofx.NoiseSuppressor.create(sessionId)?.apply {
enabled = true
}
Log.i(TAG, "NS enabled=${ns?.enabled}")
} else {
Log.w(TAG, "NS not available on this device")
}
}
return true
}
private suspend fun processSamplesLoop() {
// Avoid calling vad.front()/vad.pop() (native queue APIs) since it crashes on some builds.
// Use vad.compute() and implement a simple VAD segmenter in Kotlin instead.
val windowSize = 512
val buffer = ShortArray(windowSize)
// 双阈值设置
val startThreshold = 0.2f // 进入语音的阈值
val endThreshold = 0.15f // 退出语音的阈值
val minSilenceSamples = (0.5f * sampleRateInHz).toInt()
val minSpeechSamples = (0.1f * sampleRateInHz).toInt()
val maxSpeechSamples = (5.0f * sampleRateInHz).toInt()
// VAD 概率数据记录
val vadProbabilities = mutableListOf<Float>()
val vadTimestamps = mutableListOf<Long>()
val vadRMSValues = mutableListOf<Float>()
val vadSmoothedRMSValues = mutableListOf<Float>()
// 指数平滑相关变量
var smoothedRms = 0f
val alpha = 0.8f // 平滑系数
var inSpeech = false
var silenceSamples = 0
var speechBuf = FloatArray(0)
var speechLen = 0
var processedSpeechBuf = FloatArray(0)
var processedSpeechLen = 0
fun appendSpeech(chunk: FloatArray, processedChunk: FloatArray) {
// 保存原始音频
val needed = speechLen + chunk.size
if (speechBuf.size < needed) {
var newCap = maxOf(needed, maxOf(1024, speechBuf.size * 2))
if (newCap > maxSpeechSamples) newCap = maxSpeechSamples
val n = FloatArray(newCap)
if (speechLen > 0) System.arraycopy(speechBuf, 0, n, 0, speechLen)
speechBuf = n
}
val copyN = minOf(chunk.size, max(0, maxSpeechSamples - speechLen))
if (copyN > 0) {
System.arraycopy(chunk, 0, speechBuf, speechLen, copyN)
speechLen += copyN
}
// 保存增益后的音频
val processedNeeded = processedSpeechLen + processedChunk.size
if (processedSpeechBuf.size < processedNeeded) {
var newCap = maxOf(processedNeeded, maxOf(1024, processedSpeechBuf.size * 2))
if (newCap > maxSpeechSamples) newCap = maxSpeechSamples
val n = FloatArray(newCap)
if (processedSpeechLen > 0) System.arraycopy(processedSpeechBuf, 0, n, 0, processedSpeechLen)
processedSpeechBuf = n
}
val processedCopyN = minOf(processedChunk.size, max(0, maxSpeechSamples - processedSpeechLen))
if (processedCopyN > 0) {
System.arraycopy(processedChunk, 0, processedSpeechBuf, processedSpeechLen, processedCopyN)
processedSpeechLen += processedCopyN
}
}
suspend fun finalizeSegmentIfAny() {
if (speechLen < minSpeechSamples) {
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
return
}
// ✅ 新增:如果 TTS 正在播放或 LLM 请求中,丢弃此段(避免回声)
if (ttsPlaying.get() || llmInFlight) {
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
return
}
val originalSeg = speechBuf.copyOf(speechLen)
val processedSeg = processedSpeechBuf.copyOf(processedSpeechLen)
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
// 将语音段加入 ASR 处理队列,异步处理
asrQueue.send(Pair(originalSeg, processedSeg))
}
while (isRecording && ioScope.coroutineContext.isActive) {
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: break
if (ret <= 0) continue
if (ret != windowSize) continue
// 在 processSamplesLoop 方法中
val chunk = FloatArray(ret) { buffer[it] / 32768.0f }
// 计算当前音频的RMS值均方根
val rms = calculateRMS(chunk)
// 应用指数平滑
smoothedRms = if (smoothedRms == 0f) rms else alpha * rms + (1 - alpha) * smoothedRms
// 动态调整增益因子目标RMS设为0.1(约-20dB
val targetRMS = 0.1f
var gainFactor = if (smoothedRms > 0) targetRMS / smoothedRms else 3.0f
// 设置增益的上下限,避免过度增益导致削波
gainFactor = gainFactor.coerceIn(0.1f, 10.0f)
// 应用增益因子
val processedChunk = FloatArray(chunk.size) {
val value = chunk[it] * gainFactor
// 限制音量范围,避免削波
if (value > 1.0f) 1.0f else if (value < -1.0f) -1.0f else value
}
// 使用处理后的音频数据
val prob = synchronized(nativeLock) { vad.compute(processedChunk) }
// 记录VAD概率、时间戳、原始RMS值和平滑后的RMS值
vadProbabilities.add(prob)
vadTimestamps.add(System.currentTimeMillis())
vadRMSValues.add(rms)
vadSmoothedRMSValues.add(smoothedRms)
// 双阈值状态机逻辑
if (!inSpeech && prob >= startThreshold) {
// 进入语音状态
inSpeech = true
silenceSamples = 0
appendSpeech(chunk, processedChunk)
} else if (inSpeech && prob <= endThreshold) {
// 开始计数静音样本
silenceSamples += ret
if (silenceSamples >= minSilenceSamples) {
// 退出语音状态
finalizeSegmentIfAny()
} else {
// 保留尾音
appendSpeech(chunk, processedChunk)
}
} else if (inSpeech) {
// 语音过程中,持续添加音频
appendSpeech(chunk, processedChunk)
silenceSamples = 0 // 重置静音计数
if (speechLen >= maxSpeechSamples) {
finalizeSegmentIfAny()
}
}
// 非语音状态且概率低于开始阈值,不做处理
// 时间兜底切段(避免长时间无标点导致首包太慢)
val forced = segmenter.maybeForceByTime()
for (seg in forced) enqueueTtsSegment(seg)
}
// flush last partial segment
finalizeSegmentIfAny()
// 保存VAD数据到文件
saveVadData(vadTimestamps, vadProbabilities, vadRMSValues, vadSmoothedRMSValues)
}
/**
* 保存VAD数据到文件方便后续分析和绘图
*/
private fun saveVadData(timestamps: List<Long>, probabilities: List<Float>, rmsValues: List<Float>, smoothedRmsValues: List<Float>) {
try {
// 创建保存目录
val vadDataDir = File(filesDir, "vad_data")
if (!vadDataDir.exists()) {
vadDataDir.mkdirs()
}
// 生成唯一的文件名
val timestamp = System.currentTimeMillis()
val fileName = "vad_data_${timestamp}.csv"
val outputFile = File(vadDataDir, fileName)
// 写入数据
FileOutputStream(outputFile).use { fos ->
// 写入表头
fos.write("timestamp,probability,rms,smoothed_rms\n".toByteArray())
// 写入数据行
for (i in timestamps.indices) {
val line = "${timestamps[i]},${probabilities[i]},${rmsValues[i]},${smoothedRmsValues[i]}\n"
fos.write(line.toByteArray())
}
}
Log.d(TAG, "Saved VAD data to: ${outputFile.absolutePath}")
} catch (e: Exception) {
Log.e(TAG, "Error saving VAD data: ${e.message}")
}
}
private fun removeTokens(text: String): String {
// Remove tokens like <|zh|>, <|NEUTRAL|>, <|Speech|>, <|woitn|> and stray '>' chars
var cleaned = text.replace(Regex("<\\|[^>]+\\|>"), "")
cleaned = cleaned.replace(Regex("[>>≥≫]"), "")
cleaned = cleaned.trim().replace(Regex("\\s+"), " ")
return cleaned
}
private fun enqueueTtsSegment(seg: String) {
// 移除句末的标点符号
val cleanedSeg = seg.trimEnd('.', '。', '!', '', '?', '', ',', '', ';', '', ':', '')
currentTrace?.markTtsRequestEnqueued()
ttsQueue.offer(TtsQueueItem.Segment(cleanedSeg))
ensureTtsWorker()
}
private fun ensureTtsWorker() {
if (!ttsWorkerRunning.compareAndSet(false, true)) return
ioScope.launch {
try {
runTtsWorker()
} finally {
ttsWorkerRunning.set(false)
}
}
}
private fun ensureAsrWorker() {
if (!asrWorkerRunning.compareAndSet(false, true)) return
ioScope.launch {
try {
runAsrWorker()
} finally {
asrWorkerRunning.set(false)
}
}
}
private fun runTtsWorker() {
val t = tts ?: return
val audioTrack = track ?: return
var firstAudioMarked = false
var isFirstSegment = true
while (true) {
val item = ttsQueue.take()
if (ttsStopped.get()) break
when (item) {
is TtsQueueItem.Segment -> {
ttsPlaying.set(true)
runOnUiThread { videoPlayerManager?.setSpeaking(true) }
val trace = currentTrace
trace?.markTtsSynthesisStart()
Log.d(TAG, "TTS started: processing segment '${item.text}'")
runOnUiThread {
appendToUi("\n[TTS] 开始合成...\n")
}
val startMs = System.currentTimeMillis()
var firstPcmMarked = false
if (isFirstSegment) {
try {
audioTrack.pause()
audioTrack.flush()
audioTrack.play()
} catch (_: Throwable) {
}
isFirstSegment = false
}
t.generateWithCallback(
text = item.text,
sid = 2, // 这里可以修改说话人
speed = 1.0f
) { samples ->
if (ttsStopped.get()) return@generateWithCallback 0
if (!firstPcmMarked && samples.isNotEmpty()) {
firstPcmMarked = true
trace?.markTtsFirstPcmReady()
}
if (!firstAudioMarked && samples.isNotEmpty()) {
firstAudioMarked = true
trace?.markTtsFirstAudioPlay()
}
audioTrack.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
ttsTotalSamplesWritten += samples.size
1
}
val ttsMs = System.currentTimeMillis() - startMs
trace?.addDuration("tts_segment_ms_total", ttsMs)
}
TtsQueueItem.End -> {
// 清空 ASR 队列,丢弃所有未处理的段(这些可能是 TTS 播放期间的回声)
while (asrQueue.tryReceive().isSuccess) { }
waitForPlaybackComplete(audioTrack)
val ttsCompleteTime = System.currentTimeMillis()
// 在主线程更新UI
runOnUiThread {
appendToUi("\n[LOG] TTS completed at: ${ttsCompleteTime}\n")
}
ttsPlaying.set(false)
runOnUiThread { videoPlayerManager?.setSpeaking(false) }
ttsTotalSamplesWritten = 0
isFirstSegment = true
currentTrace?.markTtsDone()
TraceManager.getInstance().endTurn()
currentTrace = null
break
}
}
}
}
private fun waitForPlaybackComplete(audioTrack: AudioTrack) {
val totalSamples = ttsTotalSamplesWritten
if (totalSamples <= 0) return
val sampleRate = audioTrack.sampleRate
val timeoutMs = (totalSamples * 1000 / sampleRate) + 2000
val startTime = System.currentTimeMillis()
while (true) {
if (ttsStopped.get()) break
val playbackPos = audioTrack.playbackHeadPosition.toLong()
if (playbackPos >= totalSamples) {
break
}
if (System.currentTimeMillis() - startTime > timeoutMs) {
Log.w(TAG, "waitForPlaybackComplete timeout, pos=$playbackPos, total=$totalSamples")
break
}
Thread.sleep(20)
}
// 直接等待 1000ms确保所有缓冲区清空
Thread.sleep(1000)
}
private suspend fun runAsrWorker() {
while (ioScope.coroutineContext.isActive) {
val (seg, trace) = try {
asrQueue.receive()
} catch (_: Throwable) {
break
}
// 每次只允许一个 LLM 请求在飞,避免堆积导致卡死/竞态
// TTS 播放期间不做 ASR避免识别到 TTS 播放的声音
if (llmInFlight || ttsPlaying.get()) continue
trace?.markASRStart()
Log.d(TAG, "ASR started: processing audio segment")
withContext(Dispatchers.Main) {
appendToUi("\n[ASR] 开始识别...\n")
}
val raw = synchronized(nativeLock) {
val e = senseVoice
if (e == null || !e.isInitialized) "" else e.transcribeBuffer(seg)
}
val text = removeTokens(raw)
// 添加过滤逻辑
if (text.isBlank()) continue
// 过滤英文单字符"i"
if (text.length == 1 && text[0].equals('i', ignoreCase = true)) {
Log.d(TAG, "ASR segment skipped: single 'i'")
continue
}
// 过滤超过50个字符的长文本
if (text.length > 50) {
Log.d(TAG, "ASR segment skipped: too long (${text.length} chars)")
continue
}
trace?.markASREnd()
withContext(Dispatchers.Main) {
appendToUi("\n\n[ASR] ${text}\n")
}
trace?.markRecordingDone()
trace?.markLlmResponseReceived()
if (BuildConfig.LLM_API_KEY.isBlank()) {
withContext(Dispatchers.Main) {
Toast.makeText(
this@MainActivity,
"未配置 LLM_API_KEY在 local.properties 或 gradle.properties 里设置)",
Toast.LENGTH_LONG
).show()
}
continue
}
llmInFlight = true
cloudApiManager.callLLM(text)
}
}
private fun appendToUi(s: String) {
lastUiText += s
textView.text = lastUiText
}
}

View File

@@ -20,6 +20,8 @@ import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.player.VideoPlayerManager
import com.google.android.exoplayer2.ui.PlayerView
import com.digitalperson.engine.SenseVoiceEngineRKNN
import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
@@ -33,6 +35,7 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
@@ -66,6 +69,8 @@ class MainActivity : AppCompatActivity() {
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
@@ -74,7 +79,11 @@ class MainActivity : AppCompatActivity() {
private val nativeLock = Any()
private lateinit var cloudApiManager: CloudApiManager
private val segmenter = StreamingTextSegmenter()
private var videoPlayerManager: VideoPlayerManager? = null
private val segmenter = StreamingTextSegmenter(
maxLen = 30,
maxWaitMs = 600
)
private sealed class TtsQueueItem {
data class Segment(val text: String) : TtsQueueItem()
@@ -84,11 +93,18 @@ class MainActivity : AppCompatActivity() {
private val ttsQueue = LinkedBlockingQueue<TtsQueueItem>()
private val ttsStopped = AtomicBoolean(false)
private val ttsWorkerRunning = AtomicBoolean(false)
private val ttsPlaying = AtomicBoolean(false)
@Volatile private var ttsTotalSamplesWritten: Long = 0
private var currentTrace: TraceSession? = null
private var lastUiText: String = ""
@Volatile private var llmInFlight: Boolean = false
private var enableStreaming = false // 默认禁用流式输出
// ASR 队列和工作器
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
private val asrWorkerRunning = AtomicBoolean(false)
override fun onRequestPermissionsResult(
requestCode: Int,
@@ -97,8 +113,8 @@ class MainActivity : AppCompatActivity() {
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val ok = requestCode == REQUEST_RECORD_AUDIO_PERMISSION &&
grantResults.isNotEmpty() &&
grantResults[0] == PackageManager.PERMISSION_GRANTED
grantResults.isNotEmpty() &&
grantResults[0] == PackageManager.PERMISSION_GRANTED
if (!ok) {
Log.e(TAG, "Audio record is disallowed")
finish()
@@ -109,6 +125,17 @@ class MainActivity : AppCompatActivity() {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// 初始化双播放器管理器silent 与 speaking 两个叠加的 PlayerView
try {
val silentPv = findViewById<PlayerView>(R.id.player_view_silent)
val speakingPv = findViewById<PlayerView>(R.id.player_view_speaking)
videoPlayerManager = VideoPlayerManager(this, silentPv, speakingPv)
// 默认 AI 未说话
videoPlayerManager?.setSpeaking(false)
} catch (e: Exception) {
Log.w(TAG, "PlayerViews not found or init failed: ${e.message}")
}
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
startButton = findViewById(R.id.start_button)
@@ -119,6 +146,19 @@ class MainActivity : AppCompatActivity() {
startButton.setOnClickListener { onStartClicked() }
stopButton.setOnClickListener { onStopClicked(userInitiated = true) }
// 初始化流式输出开关
try {
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
streamingSwitch.isChecked = enableStreaming
streamingSwitch.setOnCheckedChangeListener { _, isChecked ->
enableStreaming = isChecked
cloudApiManager.setEnableStreaming(isChecked)
Toast.makeText(this, "流式输出已${if (isChecked) "启用" else "禁用"}", Toast.LENGTH_SHORT).show()
}
} catch (e: Exception) {
Log.w(TAG, "Streaming switch not found in layout: ${e.message}")
}
// 避免 UI 线程重初始化导致 ANR在后台初始化模型与 AudioTrack
startButton.isEnabled = false
stopButton.isEnabled = false
@@ -151,30 +191,45 @@ class MainActivity : AppCompatActivity() {
}
}
cloudApiManager = CloudApiManager(object : CloudApiManager.CloudApiListener {
cloudApiManager = CloudApiManager(object : CloudApiManager.CloudApiListener {
private var llmFirstChunkMarked = false
override fun onLLMResponseReceived(response: String) {
currentTrace?.markLlmDone()
llmInFlight = false
// flush remaining buffer into TTS
for (seg in segmenter.flush()) {
enqueueTtsSegment(seg)
// 根据流式输出模式处理响应
if (enableStreaming) {
// 启用流式输出时,刷新剩余缓冲区
for (seg in segmenter.flush()) {
enqueueTtsSegment(seg)
}
// 发送队列结束信号
ttsQueue.offer(TtsQueueItem.End)
} else {
runOnUiThread {
appendToUi("${response}\n")
}
// 禁用流式输出时直接使用整段文本进行TTS
enqueueTtsSegment(response)
// 发送队列结束信号
ttsQueue.offer(TtsQueueItem.End)
}
// signal queue end (no more segments after this)
ttsQueue.offer(TtsQueueItem.End)
}
override fun onLLMStreamingChunkReceived(chunk: String) {
if (!llmFirstChunkMarked) {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
appendToUi(chunk)
// 启用流式输出时处理流式chunk
if (enableStreaming) {
if (!llmFirstChunkMarked) {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
appendToUi(chunk)
val segments = segmenter.processChunk(chunk)
for (seg in segments) {
enqueueTtsSegment(seg)
val segments = segmenter.processChunk(chunk)
for (seg in segments) {
enqueueTtsSegment(seg)
}
}
}
@@ -187,7 +242,14 @@ class MainActivity : AppCompatActivity() {
Toast.makeText(this@MainActivity, errorMessage, Toast.LENGTH_LONG).show()
onStopClicked(userInitiated = false)
}
})
}, applicationContext)
// 设置流式输出模式
cloudApiManager.setEnableStreaming(enableStreaming)
// 预先启动ASR worker
Log.d(TAG, "Pre-starting ASR worker")
ensureAsrWorker()
}
override fun onDestroy() {
@@ -208,10 +270,18 @@ class MainActivity : AppCompatActivity() {
tts?.release()
} catch (_: Throwable) {
}
try {
videoPlayerManager?.release()
} catch (_: Throwable) {
}
}
private fun onStartClicked() {
if (isRecording) return
Log.d(TAG, "onStartClicked called")
if (isRecording) {
Log.d(TAG, "Already recording, returning")
return
}
if (!initMicrophone()) {
Toast.makeText(this, "麦克风初始化失败/无权限", Toast.LENGTH_SHORT).show()
@@ -227,6 +297,8 @@ class MainActivity : AppCompatActivity() {
textView.text = ""
ttsStopped.set(false)
ttsPlaying.set(false)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
segmenter.reset()
@@ -237,10 +309,12 @@ class MainActivity : AppCompatActivity() {
startButton.isEnabled = false
stopButton.isEnabled = true
Log.d(TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(TAG, "onStartClicked completed")
}
private fun onStopClicked(userInitiated: Boolean) {
@@ -259,6 +333,8 @@ class MainActivity : AppCompatActivity() {
recordingJob = null
ttsStopped.set(true)
ttsPlaying.set(false)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
// wake worker if waiting
ttsQueue.offer(TtsQueueItem.End)
@@ -480,22 +556,43 @@ class MainActivity : AppCompatActivity() {
}
private suspend fun processSamplesLoop() {
Log.d(TAG, "processSamplesLoop started")
// Avoid calling vad.front()/vad.pop() (native queue APIs) since it crashes on some builds.
// Use vad.compute() and implement a simple VAD segmenter in Kotlin instead.
val windowSize = 512
val buffer = ShortArray(windowSize)
val threshold = 0.5f
val minSilenceSamples = (0.25f * sampleRateInHz).toInt()
val minSpeechSamples = (0.25f * sampleRateInHz).toInt()
// 双阈值设置
val startThreshold = 0.2f // 进入语音的阈值
val endThreshold = 0.15f // 退出语音的阈值
val minSilenceSamples = (0.5f * sampleRateInHz).toInt()
val minSpeechSamples = (0.1f * sampleRateInHz).toInt()
val maxSpeechSamples = (5.0f * sampleRateInHz).toInt()
Log.d(TAG, "VAD thresholds: start=$startThreshold, end=$endThreshold, minSilenceSamples=$minSilenceSamples, minSpeechSamples=$minSpeechSamples")
// VAD 概率数据记录
val vadProbabilities = mutableListOf<Float>()
val vadTimestamps = mutableListOf<Long>()
val vadRMSValues = mutableListOf<Float>()
val vadSmoothedRMSValues = mutableListOf<Float>()
// 指数平滑相关变量
var smoothedRms = 0f
val alpha = 0.8f // 平滑系数
var inSpeech = false
var silenceSamples = 0
var speechBuf = FloatArray(0)
var speechLen = 0
var processedSpeechBuf = FloatArray(0)
var processedSpeechLen = 0
fun appendSpeech(chunk: FloatArray) {
var loopCount = 0
var vadComputeCount = 0
fun appendSpeech(chunk: FloatArray, processedChunk: FloatArray) {
// 保存原始音频
val needed = speechLen + chunk.size
if (speechBuf.size < needed) {
var newCap = maxOf(needed, maxOf(1024, speechBuf.size * 2))
@@ -509,85 +606,152 @@ class MainActivity : AppCompatActivity() {
System.arraycopy(chunk, 0, speechBuf, speechLen, copyN)
speechLen += copyN
}
// 保存增益后的音频
val processedNeeded = processedSpeechLen + processedChunk.size
if (processedSpeechBuf.size < processedNeeded) {
var newCap = maxOf(processedNeeded, maxOf(1024, processedSpeechBuf.size * 2))
if (newCap > maxSpeechSamples) newCap = maxSpeechSamples
val n = FloatArray(newCap)
if (processedSpeechLen > 0) System.arraycopy(processedSpeechBuf, 0, n, 0, processedSpeechLen)
processedSpeechBuf = n
}
val processedCopyN = minOf(processedChunk.size, max(0, maxSpeechSamples - processedSpeechLen))
if (processedCopyN > 0) {
System.arraycopy(processedChunk, 0, processedSpeechBuf, processedSpeechLen, processedCopyN)
processedSpeechLen += processedCopyN
}
}
suspend fun finalizeSegmentIfAny() {
Log.d(TAG, "finalizeSegmentIfAny called: speechLen=$speechLen, minSpeechSamples=$minSpeechSamples, ttsPlaying=${ttsPlaying.get()}, llmInFlight=$llmInFlight")
if (speechLen < minSpeechSamples) {
Log.d(TAG, "finalizeSegmentIfAny: speech too short, discarding")
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
return
}
val seg = speechBuf.copyOf(speechLen)
// ✅ 新增:如果 TTS 正在播放或 LLM 请求中,丢弃此段(避免回声)
if (ttsPlaying.get() || llmInFlight) {
Log.d(TAG, "finalizeSegmentIfAny: TTS playing or LLM in flight, discarding")
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
return
}
val originalSeg = speechBuf.copyOf(speechLen)
val processedSeg = processedSpeechBuf.copyOf(processedSpeechLen)
speechLen = 0
processedSpeechLen = 0
inSpeech = false
silenceSamples = 0
// 每次只允许一个 LLM 请求在飞,避免堆积导致卡死/竞态
if (llmInFlight) return
val trace = currentTrace
trace?.markASRStart()
val raw = synchronized(nativeLock) {
val e = senseVoice
if (e == null || !e.isInitialized) "" else e.transcribeBuffer(seg)
}
val text = removeTokens(raw)
if (text.isBlank()) return
trace?.markASREnd()
if (text.isBlank()) return
withContext(Dispatchers.Main) {
appendToUi("\n\n[ASR] ${text}\n")
}
trace?.markRecordingDone()
trace?.markLlmResponseReceived()
if (BuildConfig.LLM_API_KEY.isBlank()) {
withContext(Dispatchers.Main) {
Toast.makeText(
this@MainActivity,
"未配置 LLM_API_KEY在 local.properties 或 gradle.properties 里设置)",
Toast.LENGTH_LONG
).show()
}
return
}
llmInFlight = true
cloudApiManager.callLLM(text)
// 将语音段加入 ASR 处理队列,异步处理
Log.d(TAG, "Sending audio segment to ASR queue, size: ${processedSeg.size}")
asrQueue.send(Pair(originalSeg, processedSeg))
Log.d(TAG, "Calling ensureAsrWorker")
ensureAsrWorker()
}
while (isRecording && ioScope.coroutineContext.isActive) {
loopCount++
if (loopCount % 100 == 0) {
Log.d(TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsPlaying.get()}")
}
// 如果TTS正在播放跳过VAD处理避免检测到回声
if (ttsPlaying.get()) {
// 如果正在语音状态,立即结束它
if (inSpeech) {
Log.d(TAG, "TTS playing, resetting VAD state")
inSpeech = false
silenceSamples = 0
speechLen = 0
processedSpeechLen = 0
}
// 读取并丢弃音频数据,保持录音状态
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: break
if (ret <= 0) continue
continue
}
val ret = audioRecord?.read(buffer, 0, buffer.size) ?: break
if (ret <= 0) continue
if (ret != windowSize) continue
// 在 processSamplesLoop 方法中
val chunk = FloatArray(ret) { buffer[it] / 32768.0f }
val prob = synchronized(nativeLock) { vad.compute(chunk) }
if (prob >= threshold) {
if (!inSpeech) {
inSpeech = true
silenceSamples = 0
// 计算当前音频的RMS值均方根
val rms = calculateRMS(chunk)
// 应用指数平滑
smoothedRms = if (smoothedRms == 0f) rms else alpha * rms + (1 - alpha) * smoothedRms
// 动态调整增益因子目标RMS设为0.1(约-20dB
val targetRMS = 0.1f
var gainFactor = if (smoothedRms > 0) targetRMS / smoothedRms else 3.0f
// 设置增益的上下限,避免过度增益导致削波
gainFactor = gainFactor.coerceIn(0.1f, 10.0f)
// 应用增益因子
val processedChunk = FloatArray(chunk.size) {
val value = chunk[it] * gainFactor
// 限制音量范围,避免削波
if (value > 1.0f) 1.0f else if (value < -1.0f) -1.0f else value
}
// 使用处理后的音频数据
val prob = synchronized(nativeLock) { vad.compute(processedChunk) }
vadComputeCount++
// 记录VAD概率、时间戳、原始RMS值和平滑后的RMS值
vadProbabilities.add(prob)
vadTimestamps.add(System.currentTimeMillis())
vadRMSValues.add(rms)
vadSmoothedRMSValues.add(smoothedRms)
// 每100次循环输出一次VAD概率
if (vadComputeCount % 100 == 0) {
Log.d(TAG, "VAD prob=$prob, inSpeech=$inSpeech, rms=$rms, smoothedRms=$smoothedRms")
}
// 双阈值状态机逻辑
if (!inSpeech && prob >= startThreshold) {
// 进入语音状态
inSpeech = true
silenceSamples = 0
appendSpeech(chunk, processedChunk)
Log.d(TAG, "VAD: Entered speech state, prob=$prob, speechLen=$speechLen")
} else if (inSpeech && prob <= endThreshold) {
// 开始计数静音样本
silenceSamples += ret
if (silenceSamples >= minSilenceSamples) {
// 退出语音状态
Log.d(TAG, "VAD: Exiting speech state, prob=$prob, silenceSamples=$silenceSamples, speechLen=$speechLen")
finalizeSegmentIfAny()
} else {
// 保留尾音
appendSpeech(chunk, processedChunk)
}
appendSpeech(chunk)
} else if (inSpeech) {
// 语音过程中,持续添加音频
appendSpeech(chunk, processedChunk)
silenceSamples = 0 // 重置静音计数
if (speechLen >= maxSpeechSamples) {
Log.d(TAG, "VAD: Max speech length reached, finalizing segment")
finalizeSegmentIfAny()
}
} else {
if (inSpeech) {
silenceSamples += ret
if (silenceSamples >= minSilenceSamples) {
finalizeSegmentIfAny()
} else {
// keep a bit of trailing silence to avoid chopping
appendSpeech(chunk)
}
}
}
// 非语音状态且概率低于开始阈值,不做处理
// 每1000次循环输出一次VAD状态
if (loopCount % 1000 == 0) {
Log.d(TAG, "VAD status: inSpeech=$inSpeech, prob=$prob, speechLen=$speechLen")
}
// 时间兜底切段(避免长时间无标点导致首包太慢)
@@ -597,6 +761,58 @@ class MainActivity : AppCompatActivity() {
// flush last partial segment
finalizeSegmentIfAny()
// 保存VAD数据到文件
saveVadData(vadTimestamps, vadProbabilities, vadRMSValues, vadSmoothedRMSValues)
}
/**
* 计算音频数据的均方根RMS用于动态调整增益
*/
private fun calculateRMS(samples: FloatArray): Float {
if (samples.isEmpty()) return 0.0f
var sumSquared = 0.0f
for (sample in samples) {
sumSquared += sample * sample
}
val meanSquared = sumSquared / samples.size
return kotlin.math.sqrt(meanSquared)
}
/**
* 保存VAD数据到文件方便后续分析和绘图
*/
private fun saveVadData(timestamps: List<Long>, probabilities: List<Float>, rmsValues: List<Float>, smoothedRmsValues: List<Float>) {
try {
// 创建保存目录
val vadDataDir = File(filesDir, "vad_data")
if (!vadDataDir.exists()) {
vadDataDir.mkdirs()
}
// 生成唯一的文件名
val timestamp = System.currentTimeMillis()
val fileName = "vad_data_${timestamp}.csv"
val outputFile = File(vadDataDir, fileName)
// 写入数据
FileOutputStream(outputFile).use { fos ->
// 写入表头
fos.write("timestamp,probability,rms,smoothed_rms\n".toByteArray())
// 写入数据行
for (i in timestamps.indices) {
val line = "${timestamps[i]},${probabilities[i]},${rmsValues[i]},${smoothedRmsValues[i]}\n"
fos.write(line.toByteArray())
}
}
Log.d(TAG, "Saved VAD data to: ${outputFile.absolutePath}")
} catch (e: Exception) {
Log.e(TAG, "Error saving VAD data: ${e.message}")
}
}
private fun removeTokens(text: String): String {
@@ -608,8 +824,11 @@ class MainActivity : AppCompatActivity() {
}
private fun enqueueTtsSegment(seg: String) {
// 移除句末的标点符号
val cleanedSeg = seg.trimEnd('.', '。', '!', '', '?', '', ',', '', ';', '', ':', '')
currentTrace?.markTtsRequestEnqueued()
ttsQueue.offer(TtsQueueItem.Segment(seg))
ttsQueue.offer(TtsQueueItem.Segment(cleanedSeg))
ensureTtsWorker()
}
@@ -624,34 +843,60 @@ class MainActivity : AppCompatActivity() {
}
}
private fun ensureAsrWorker() {
Log.d(TAG, "ensureAsrWorker called, asrWorkerRunning=${asrWorkerRunning.get()}")
if (!asrWorkerRunning.compareAndSet(false, true)) {
Log.d(TAG, "ASR worker already running, returning")
return
}
Log.d(TAG, "Starting ASR worker coroutine")
ioScope.launch {
try {
runAsrWorker()
} finally {
Log.d(TAG, "ASR worker coroutine finished")
asrWorkerRunning.set(false)
}
}
}
private fun runTtsWorker() {
val t = tts ?: return
val audioTrack = track ?: return
var firstAudioMarked = false
var isFirstSegment = true
while (true) {
val item = ttsQueue.take()
if (ttsStopped.get()) break
when (item) {
is TtsQueueItem.Segment -> {
ttsPlaying.set(true)
runOnUiThread { videoPlayerManager?.setSpeaking(true) }
val trace = currentTrace
trace?.markTtsSynthesisStart()
Log.d(TAG, "TTS started: processing segment '${item.text}'")
runOnUiThread {
appendToUi("\n[TTS] 开始合成...\n")
}
val startMs = System.currentTimeMillis()
var firstPcmMarked = false
// flush to reduce latency between segments
try {
audioTrack.pause()
audioTrack.flush()
audioTrack.play()
} catch (_: Throwable) {
if (isFirstSegment) {
try {
audioTrack.pause()
audioTrack.flush()
audioTrack.play()
} catch (_: Throwable) {
}
isFirstSegment = false
}
t.generateWithCallback(
text = item.text,
sid = 0,
sid = 2, // 这里可以修改说话人
speed = 1.0f
) { samples ->
if (ttsStopped.get()) return@generateWithCallback 0
@@ -664,6 +909,7 @@ class MainActivity : AppCompatActivity() {
trace?.markTtsFirstAudioPlay()
}
audioTrack.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
ttsTotalSamplesWritten += samples.size
1
}
@@ -672,6 +918,21 @@ class MainActivity : AppCompatActivity() {
}
TtsQueueItem.End -> {
// 清空 ASR 队列,丢弃所有未处理的段(这些可能是 TTS 播放期间的回声)
while (asrQueue.tryReceive().isSuccess) { }
waitForPlaybackComplete(audioTrack)
val ttsCompleteTime = System.currentTimeMillis()
// 在主线程更新UI
runOnUiThread {
appendToUi("\n[LOG] TTS completed at: ${ttsCompleteTime}\n")
}
ttsPlaying.set(false)
runOnUiThread { videoPlayerManager?.setSpeaking(false) }
ttsTotalSamplesWritten = 0
isFirstSegment = true
currentTrace?.markTtsDone()
TraceManager.getInstance().endTurn()
currentTrace = null
@@ -681,9 +942,257 @@ class MainActivity : AppCompatActivity() {
}
}
private fun waitForPlaybackComplete(audioTrack: AudioTrack) {
val totalSamples = ttsTotalSamplesWritten
if (totalSamples <= 0) return
val sampleRate = audioTrack.sampleRate
val timeoutMs = (totalSamples * 1000 / sampleRate) + 2000
val startTime = System.currentTimeMillis()
while (true) {
if (ttsStopped.get()) break
val playbackPos = audioTrack.playbackHeadPosition.toLong()
if (playbackPos >= totalSamples) {
break
}
if (System.currentTimeMillis() - startTime > timeoutMs) {
Log.w(TAG, "waitForPlaybackComplete timeout, pos=$playbackPos, total=$totalSamples")
break
}
Thread.sleep(20)
}
// 直接等待 1000ms确保所有缓冲区清空
Thread.sleep(1000)
}
private suspend fun runAsrWorker() {
Log.d(TAG, "ASR worker started")
try {
while (ioScope.coroutineContext.isActive) {
val (originalSeg, processedSeg) = try {
Log.d(TAG, "ASR worker waiting for audio segment")
asrQueue.receive()
} catch (e: Throwable) {
Log.e(TAG, "ASR worker receive failed: ${e.message}")
break
}
Log.d(TAG, "ASR worker received audio segment, size=${processedSeg.size}")
// 每次只允许一个 LLM 请求在飞,避免堆积导致卡死/竞态
// TTS 播放期间不做 ASR避免识别到 TTS 播放的声音
if (llmInFlight || ttsPlaying.get()) {
Log.d(TAG, "ASR worker skipping segment: llmInFlight=$llmInFlight, ttsPlaying=${ttsPlaying.get()}")
continue
}
val trace = currentTrace
trace?.markASRStart()
Log.d(TAG, "ASR started: processing audio segment")
withContext(Dispatchers.Main) {
appendToUi("\n[ASR] 开始识别...\n")
}
// 保存ASR音频用于调试
saveAsrAudio(originalSeg, processedSeg)
val raw = synchronized(nativeLock) {
val e = senseVoice
if (e == null || !e.isInitialized) {
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
""
} else {
try {
e.transcribeBuffer(processedSeg)
} catch (e: Throwable) {
Log.e(TAG, "ASR transcribe failed: ${e.message}")
""
}
}
}
Log.d(TAG, "ASR raw result: $raw")
val text = removeTokens(raw)
// 添加过滤逻辑
if (text.isBlank()) {
Log.d(TAG, "ASR segment skipped: blank text")
continue
}
// 过滤英文单字符"i"
if (text.length == 1 && text[0].equals('i', ignoreCase = true)) {
Log.d(TAG, "ASR segment skipped: single 'i'")
continue
}
// 过滤超过50个字符的长文本
if (text.length > 50) {
Log.d(TAG, "ASR segment skipped: too long (${text.length} chars)")
continue
}
trace?.markASREnd()
withContext(Dispatchers.Main) {
appendToUi("\n\n[ASR] ${text}\n")
}
trace?.markRecordingDone()
trace?.markLlmResponseReceived()
if (BuildConfig.LLM_API_KEY.isBlank()) {
withContext(Dispatchers.Main) {
Toast.makeText(
this@MainActivity,
"未配置 LLM_API_KEY在 local.properties 或 gradle.properties 里设置)",
Toast.LENGTH_LONG
).show()
}
continue
}
llmInFlight = true
Log.d(TAG, "Calling LLM with text: $text")
cloudApiManager.callLLM(text)
}
} catch (e: Throwable) {
Log.e(TAG, "ASR worker error: ${e.message}", e)
} finally {
Log.d(TAG, "ASR worker exiting")
}
}
private fun appendToUi(s: String) {
lastUiText += s
textView.text = lastUiText
}
}
/**
* 保存ASR音频用于调试
*/
private fun saveAsrAudio(originalAudio: FloatArray, processedAudio: FloatArray) {
try {
// 创建保存目录
val asrAudioDir = File(filesDir, "asr_audio")
if (!asrAudioDir.exists()) {
asrAudioDir.mkdirs()
}
// 生成唯一的文件名
val timestamp = System.currentTimeMillis()
// 保存原始音频
val originalFile = File(asrAudioDir, "asr_${timestamp}_original.wav")
saveFloatArrayAsWav(originalFile, originalAudio, sampleRateInHz)
Log.d(TAG, "Saved original ASR audio to: ${originalFile.absolutePath}")
// 保存处理后的音频(增益后)
val processedFile = File(asrAudioDir, "asr_${timestamp}_processed.wav")
saveFloatArrayAsWav(processedFile, processedAudio, sampleRateInHz)
Log.d(TAG, "Saved processed ASR audio to: ${processedFile.absolutePath}")
} catch (e: Exception) {
Log.e(TAG, "Error saving ASR audio: ${e.message}")
}
}
/**
* 将FloatArray保存为WAV文件
*/
private fun saveFloatArrayAsWav(file: File, samples: FloatArray, sampleRate: Int) {
FileOutputStream(file).use { fos ->
// WAV文件头
val header = ByteArray(44)
// RIFF标识
header[0] = 'R'.code.toByte()
header[1] = 'I'.code.toByte()
header[2] = 'F'.code.toByte()
header[3] = 'F'.code.toByte()
// 文件大小不包括RIFF标识和文件大小字段本身
val fileSize = 36 + samples.size * 2
intToByteArray(fileSize, header, 4)
// WAVE标识
header[8] = 'W'.code.toByte()
header[9] = 'A'.code.toByte()
header[10] = 'V'.code.toByte()
header[11] = 'E'.code.toByte()
// fmt标识
header[12] = 'f'.code.toByte()
header[13] = 'm'.code.toByte()
header[14] = 't'.code.toByte()
header[15] = ' '.code.toByte()
// 子块大小
intToByteArray(16, header, 16)
// 音频格式1 = PCM
shortToByteArray(1, header, 20)
// 声道数1 = 单声道)
shortToByteArray(1, header, 22)
// 采样率
intToByteArray(sampleRate, header, 24)
// 字节率 = 采样率 * 声道数 * 位深度 / 8
val byteRate = sampleRate * 1 * 16 / 8
intToByteArray(byteRate, header, 28)
// 块对齐 = 声道数 * 位深度 / 8
val blockAlign = 1 * 16 / 8
shortToByteArray(blockAlign.toShort(), header, 32)
// 位深度16位
shortToByteArray(16, header, 34)
// data标识
header[36] = 'd'.code.toByte()
header[37] = 'a'.code.toByte()
header[38] = 't'.code.toByte()
header[39] = 'a'.code.toByte()
// 数据大小
val dataSize = samples.size * 2
intToByteArray(dataSize, header, 40)
// 写入文件头
fos.write(header)
// 写入音频数据转换为16位PCM
for (sample in samples) {
// 确保样本在[-1, 1]范围内
val clampedSample = sample.coerceIn(-1.0f, 1.0f)
// 转换为16位整数
val shortSample = (clampedSample * 32767.0f).toInt().toShort()
// 写入小端序
val bytes = ByteArray(2)
bytes[0] = (shortSample.toInt() and 0xFF).toByte()
bytes[1] = (shortSample.toInt() shr 8 and 0xFF).toByte()
fos.write(bytes)
}
}
}
/**
* 将int转换为小端序字节数组
*/
private fun intToByteArray(value: Int, dest: ByteArray, offset: Int) {
dest[offset] = (value and 0xFF).toByte()
dest[offset + 1] = (value shr 8 and 0xFF).toByte()
dest[offset + 2] = (value shr 16 and 0xFF).toByte()
dest[offset + 3] = (value shr 24 and 0xFF).toByte()
}
/**
* 将short转换为小端序字节数组
*/
private fun shortToByteArray(value: Short, dest: ByteArray, offset: Int) {
dest[offset] = (value.toInt() and 0xFF).toByte()
dest[offset + 1] = (value.toInt() shr 8 and 0xFF).toByte()
}
}

View File

@@ -1,10 +1,12 @@
package com.digitalperson.cloud;
import android.content.Context;
import android.os.Handler;
import android.os.Looper;
import android.util.Log;
import com.digitalperson.BuildConfig;
import com.digitalperson.R;
import org.json.JSONArray;
import org.json.JSONException;
@@ -30,6 +32,7 @@ public class CloudApiManager {
private CloudApiListener mListener;
private Handler mMainHandler; // 用于在主线程执行UI更新
private JSONArray mConversationHistory; // 存储对话历史
private boolean mEnableStreaming = true; // 默认启用流式输出
public interface CloudApiListener {
void onLLMResponseReceived(String response);
@@ -38,10 +41,37 @@ public class CloudApiManager {
void onError(String errorMessage);
}
public CloudApiManager(CloudApiListener listener) {
public CloudApiManager(CloudApiListener listener, Context context) {
this.mListener = listener;
this.mMainHandler = new Handler(Looper.getMainLooper()); // 初始化主线程Handler
this.mConversationHistory = new JSONArray(); // 初始化对话历史
// 添加 system message要求回答简洁
try {
JSONObject systemMessage = new JSONObject();
systemMessage.put("role", "system");
String systemPrompt = context.getString(R.string.system_prompt);
systemMessage.put("content", systemPrompt);
mConversationHistory.put(systemMessage);
} catch (JSONException e) {
Log.e(TAG, "Failed to add system message: " + e.getMessage());
}
}
/**
* 设置是否启用流式输出
* @param enableStreaming true: 启用流式输出false: 禁用流式输出(整段输出)
*/
public void setEnableStreaming(boolean enableStreaming) {
this.mEnableStreaming = enableStreaming;
}
/**
* 获取当前是否启用流式输出
* @return true: 启用流式输出false: 禁用流式输出(整段输出)
*/
public boolean isEnableStreaming() {
return mEnableStreaming;
}
public void callLLM(String userInput) {
@@ -64,7 +94,7 @@ public class CloudApiManager {
JSONObject requestBody = new JSONObject();
requestBody.put("model", LLM_MODEL);
requestBody.put("messages", mConversationHistory);
requestBody.put("stream", true); // 启用流式响应
requestBody.put("stream", mEnableStreaming); // 根据配置决定是否启用流式响应
String jsonBody = requestBody.toString();
@@ -84,47 +114,74 @@ public class CloudApiManager {
Log.d(TAG, "LLM Response Code: " + responseCode);
if (responseCode == 200) {
// 逐行读取流式响应
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
Log.d(TAG, "LLM Streaming Line: " + line);
// 处理SSE格式的响应
if (line.startsWith("data: ")) {
String dataPart = line.substring(6);
if (dataPart.equals("[DONE]")) {
// 流式响应结束
break;
}
if (mEnableStreaming) {
// 逐行读取流式响应
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
Log.d(TAG, "LLM Streaming Line: " + line);
try {
// 解析JSON
JSONObject chunkObj = new JSONObject(dataPart);
JSONArray choices = chunkObj.getJSONArray("choices");
if (choices.length() > 0) {
JSONObject choice = choices.getJSONObject(0);
JSONObject delta = choice.getJSONObject("delta");
if (delta.has("content")) {
String chunkContent = delta.getString("content");
accumulatedContent.append(chunkContent);
// 处理SSE格式的响应
if (line.startsWith("data: ")) {
String dataPart = line.substring(6);
if (dataPart.equals("[DONE]")) {
// 流式响应结束
break;
}
try {
// 解析JSON
JSONObject chunkObj = new JSONObject(dataPart);
JSONArray choices = chunkObj.getJSONArray("choices");
if (choices.length() > 0) {
JSONObject choice = choices.getJSONObject(0);
JSONObject delta = choice.getJSONObject("delta");
// 发送流式chunk到监听器
if (mListener != null) {
mMainHandler.post(() -> {
mListener.onLLMStreamingChunkReceived(chunkContent);
});
if (delta.has("content")) {
String chunkContent = delta.getString("content");
accumulatedContent.append(chunkContent);
// 发送流式chunk到监听器
if (mListener != null) {
mMainHandler.post(() -> {
mListener.onLLMStreamingChunkReceived(chunkContent);
});
}
}
}
} catch (JSONException e) {
Log.e(TAG, "Failed to parse streaming chunk: " + e.getMessage());
}
} catch (JSONException e) {
Log.e(TAG, "Failed to parse streaming chunk: " + e.getMessage());
}
fullResponse.append(line).append("\n");
}
}
} else {
// 读取完整响应
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
fullResponse.append(line);
}
}
// 解析完整JSON响应
try {
JSONObject responseObj = new JSONObject(fullResponse.toString());
JSONArray choices = responseObj.getJSONArray("choices");
if (choices.length() > 0) {
JSONObject choice = choices.getJSONObject(0);
JSONObject message = choice.getJSONObject("message");
if (message.has("content")) {
String content = message.getString("content");
accumulatedContent.append(content);
}
}
fullResponse.append(line).append("\n");
} catch (JSONException e) {
Log.e(TAG, "Failed to parse full response: " + e.getMessage());
}
}

View File

@@ -38,13 +38,19 @@ public class TraceSession {
long newValue = (currentValue != null) ? currentValue + deltaMs : deltaMs;
if (currentValue == null) {
// 如果键不存在,尝试添加
if (durations.putIfAbsent(name, newValue) == null) {
break;
synchronized (durations) {
if (!durations.containsKey(name)) {
durations.put(name, newValue);
break;
}
}
} else {
// 如果键存在,尝试更新
if (durations.replace(name, currentValue, newValue)) {
break;
synchronized (durations) {
if (durations.containsKey(name) && durations.get(name).equals(currentValue)) {
durations.put(name, newValue);
break;
}
}
}
}

View File

@@ -0,0 +1,99 @@
package com.digitalperson.player
import android.content.Context
import android.net.Uri
import android.view.View
import com.digitalperson.R
import com.google.android.exoplayer2.ExoPlayer
import com.google.android.exoplayer2.MediaItem
import com.google.android.exoplayer2.Player
import com.google.android.exoplayer2.ui.PlayerView
class VideoPlayerManager(
private val context: Context,
private val silentView: PlayerView,
private val speakingView: PlayerView
) {
private var playerSilent: ExoPlayer? = null
private var playerSpeaking: ExoPlayer? = null
private var currentState: Boolean = false
private var transitionDuration = 300L // 淡入淡出时长
init {
// 确保初始 alpha
silentView.alpha = 1f
speakingView.alpha = 0f
initPlayers()
}
private fun uriForRaw(resId: Int): Uri = Uri.parse("android.resource://${context.packageName}/$resId")
private fun initPlayers() {
playerSilent = ExoPlayer.Builder(context).build().apply {
repeatMode = Player.REPEAT_MODE_ONE
playWhenReady = true
setMediaItem(MediaItem.fromUri(uriForRaw(R.raw.silent)))
prepare()
}
playerSpeaking = ExoPlayer.Builder(context).build().apply {
repeatMode = Player.REPEAT_MODE_ONE
playWhenReady = true
setMediaItem(MediaItem.fromUri(uriForRaw(R.raw.speak_no_voice)))
prepare()
}
// 绑定到各自的 PlayerView
silentView.player = playerSilent
speakingView.player = playerSpeaking
// 静音视频音频输出(通常不需要声音)
playerSilent?.volume = 0f
playerSpeaking?.volume = 0f
// 启动播放prepare 后自动播放)
playerSilent?.play()
playerSpeaking?.play()
// 确保初始 alpha 状态(防止 Surface/Texture 的 race
silentView.alpha = 1f
speakingView.alpha = 0f
currentState = false
}
/**
* 切换到说话状态speaking=true 播放 speakingViewalpha 1silentView 渐隐
*/
fun setSpeaking(speaking: Boolean) {
if (speaking == currentState) return
currentState = speaking
// 同步位置:以 silent 为主(也可以反向)
syncPositions()
val fadeInView = if (speaking) speakingView else silentView
val fadeOutView = if (speaking) silentView else speakingView
// 执行淡入淡出
fadeOutView.animate().alpha(0f).setDuration(transitionDuration).start()
fadeInView.visibility = View.VISIBLE
fadeInView.animate().alpha(1f).setDuration(transitionDuration).start()
}
private fun syncPositions() {
// 以 silent 为主:将 speaking 同步到 silent 的位置
try {
val pos = playerSilent?.currentPosition ?: 0L
playerSpeaking?.seekTo(pos)
} catch (_: Throwable) {}
}
fun release() {
try { silentView.player = null } catch (_: Throwable) {}
try { speakingView.player = null } catch (_: Throwable) {}
try { playerSilent?.release() } catch (_: Throwable) {}
try { playerSpeaking?.release() } catch (_: Throwable) {}
playerSilent = null
playerSpeaking = null
}
}

View File

@@ -4,8 +4,39 @@
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:background="#606060"
tools:context="com.digitalperson.MainActivity">
<!-- 双播放器容器:两个重叠的 PlayerViewsilent 在下面speaking 在上面,初始 alpha=0 -->
<FrameLayout
android:id="@+id/video_container"
android:layout_width="0dp"
android:layout_height="0dp"
app:layout_constraintTop_toTopOf="parent"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintEnd_toEndOf="parent">
<com.google.android.exoplayer2.ui.PlayerView
android:id="@+id/player_view_silent"
android:layout_width="match_parent"
android:layout_height="match_parent"
app:use_controller="false"
app:resize_mode="fill"
app:surface_type="texture_view"
android:alpha="1" />
<com.google.android.exoplayer2.ui.PlayerView
android:id="@+id/player_view_speaking"
android:layout_width="match_parent"
android:layout_height="match_parent"
app:use_controller="false"
app:resize_mode="fill"
app:surface_type="texture_view"
android:alpha="0" />
</FrameLayout>
<TextView
android:id="@+id/my_text"
android:layout_width="0dp"
@@ -14,10 +45,37 @@
android:scrollbars="vertical"
android:text="@string/hint"
android:textIsSelectable="true"
app:layout_constraintBottom_toTopOf="@+id/button_row"
app:layout_constraintBottom_toTopOf="@+id/streaming_switch_row"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
app:layout_constraintTop_toTopOf="parent"
android:background="@android:color/transparent"
/>
<LinearLayout
android:id="@+id/streaming_switch_row"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:gravity="center_vertical"
android:orientation="horizontal"
android:padding="16dp"
app:layout_constraintBottom_toTopOf="@+id/button_row"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent">
<TextView
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="流式输出"
android:textSize="16sp"
android:layout_marginEnd="16dp"/>
<Switch
android:id="@+id/streaming_switch"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:checked="false"/>
</LinearLayout>
<LinearLayout
android:id="@+id/button_row"

Binary file not shown.

Binary file not shown.

View File

@@ -3,4 +3,5 @@
<string name="start">开始</string>
<string name="stop">结束</string>
<string name="hint">点击“开始”说话;识别后会请求大模型并用 TTS 播放回复。</string>
<string name="system_prompt">你是一名小学女老师喜欢回答学生的各种问题请简洁但温柔地回答每个回答不超过30字。</string>
</resources>