mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-06-17 04:42:05 +00:00
feat: 优化视频生成进度展示与UI细节
- 为视频生成任务的每个处理步骤添加详细的中文状态提示 - 重构WebUI的视频生成弹窗,使用Streamlit原生状态组件优化进度展示 - 清理多语言翻译文本中的冗余表情符号,统一UI文本风格 - 调整TTS设置面板的折叠面板默认展开状态为关闭,并移除标题中的表情前缀
This commit is contained in:
parent
34d5532119
commit
4ab29fd776
@ -434,12 +434,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
global merged_audio_path, merged_subtitle_path
|
||||
|
||||
logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=0,
|
||||
message="正在初始化视频生成任务",
|
||||
)
|
||||
|
||||
"""
|
||||
1. 加载剪辑脚本
|
||||
"""
|
||||
logger.info("\n\n## 1. 加载视频脚本")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=5,
|
||||
message="正在加载剪辑脚本",
|
||||
)
|
||||
video_script_path = path.join(params.video_clip_json_path)
|
||||
|
||||
if path.exists(video_script_path):
|
||||
@ -465,6 +476,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
2. 使用 TTS 生成音频素材
|
||||
"""
|
||||
logger.info("\n\n## 2. 根据OST设置生成音频列表")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=10,
|
||||
message="正在生成 TTS 配音",
|
||||
)
|
||||
# 只为OST=0 or 2的判断生成音频, OST=0 仅保留解说 OST=2 保留解说和原声
|
||||
tts_segments = [
|
||||
segment for segment in list_script
|
||||
@ -481,12 +498,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
voice_pitch=params.voice_pitch,
|
||||
)
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=20,
|
||||
message="TTS 配音生成完成",
|
||||
)
|
||||
|
||||
"""
|
||||
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
|
||||
"""
|
||||
logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=30,
|
||||
message="正在按脚本裁剪视频片段",
|
||||
)
|
||||
|
||||
# 使用新的统一裁剪策略
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
@ -505,12 +533,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
|
||||
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=60,
|
||||
message="视频片段裁剪完成",
|
||||
)
|
||||
|
||||
"""
|
||||
4. 合并音频和字幕
|
||||
"""
|
||||
logger.info("\n\n## 4. 合并音频和字幕")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=65,
|
||||
message="正在合并配音和字幕",
|
||||
)
|
||||
total_duration = sum([script["duration"] for script in new_script_list])
|
||||
if tts_segments:
|
||||
try:
|
||||
@ -540,6 +579,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
logger.warning("没有需要合并的音频/字幕")
|
||||
merged_audio_path = ""
|
||||
merged_subtitle_path = ""
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=70,
|
||||
message="配音和字幕合并完成",
|
||||
)
|
||||
|
||||
"""
|
||||
5. 合并视频
|
||||
@ -549,6 +594,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
|
||||
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=75,
|
||||
message="正在合并视频片段",
|
||||
)
|
||||
|
||||
# 使用统一裁剪后的视频片段
|
||||
video_clips = []
|
||||
@ -568,7 +619,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
video_aspect=params.video_aspect,
|
||||
threads=params.n_threads
|
||||
)
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=80,
|
||||
message="视频片段合并完成",
|
||||
)
|
||||
|
||||
"""
|
||||
6. 合并字幕/BGM/配音/视频
|
||||
@ -581,6 +637,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
else output_video_path
|
||||
)
|
||||
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=85,
|
||||
message="正在合成最终视频",
|
||||
)
|
||||
|
||||
bgm_path = utils.get_bgm_file(
|
||||
bgm_type=getattr(params, "bgm_type", "random"),
|
||||
@ -634,10 +696,20 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
|
||||
auto_subtitle_path = ""
|
||||
if auto_transcription_enabled:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=90,
|
||||
message="正在自动转录最终视频",
|
||||
)
|
||||
logger.info("\n\n## 7. 自动转录最终视频字幕")
|
||||
auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_PROCESSING,
|
||||
progress=95,
|
||||
message="正在压入自动转录字幕",
|
||||
)
|
||||
logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
|
||||
_merge_auto_transcribed_subtitles(
|
||||
source_video_path=merge_output_video_path,
|
||||
@ -657,7 +729,13 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
}
|
||||
if auto_subtitle_path:
|
||||
kwargs["subtitles"] = [auto_subtitle_path]
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_COMPLETE,
|
||||
progress=100,
|
||||
message="视频生成完成",
|
||||
**kwargs
|
||||
)
|
||||
return kwargs
|
||||
|
||||
|
||||
|
||||
137
webui.py
137
webui.py
@ -170,57 +170,102 @@ def render_generate_button():
|
||||
# 生成一个新的task_id用于本次处理
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# 创建进度条
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
@st.dialog(tr("Generating Video"), width="large")
|
||||
def generate_video_dialog():
|
||||
st.markdown(
|
||||
"""
|
||||
<style>
|
||||
div[data-testid="stDialog"] div[data-testid="stStatusWidget"] {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
div[data-testid="stDialog"] div[data-testid="stProgress"] {
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
</style>
|
||||
""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
def run_task():
|
||||
try:
|
||||
tm.start_subclip_unified(
|
||||
task_id=task_id,
|
||||
params=params
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"任务执行失败: {e}")
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, message=str(e))
|
||||
progress_bar = st.progress(0)
|
||||
status_panel = st.status(tr("Generating Video"), expanded=True)
|
||||
status_panel.write(tr("Generating Video"))
|
||||
|
||||
# 在新线程中启动任务
|
||||
thread = threading.Thread(target=run_task)
|
||||
thread.start()
|
||||
def run_task():
|
||||
try:
|
||||
tm.start_subclip_unified(
|
||||
task_id=task_id,
|
||||
params=params
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"任务执行失败: {e}")
|
||||
current_task = sm.state.get_task(task_id) or {}
|
||||
sm.state.update_task(
|
||||
task_id,
|
||||
state=const.TASK_STATE_FAILED,
|
||||
progress=current_task.get("progress", 0),
|
||||
message=str(e),
|
||||
)
|
||||
|
||||
# 轮询任务状态
|
||||
while True:
|
||||
task = sm.state.get_task(task_id)
|
||||
if task:
|
||||
progress = task.get("progress", 0)
|
||||
state = task.get("state")
|
||||
|
||||
# 更新进度条
|
||||
progress_bar.progress(progress / 100)
|
||||
status_text.text(f"Processing... {progress}%")
|
||||
# 在新线程中启动任务
|
||||
thread = threading.Thread(target=run_task)
|
||||
thread.start()
|
||||
|
||||
last_status_key = None
|
||||
|
||||
# 轮询任务状态
|
||||
while True:
|
||||
task = sm.state.get_task(task_id)
|
||||
if task:
|
||||
progress = task.get("progress", 0)
|
||||
state = task.get("state")
|
||||
|
||||
if state == const.TASK_STATE_COMPLETE:
|
||||
status_text.text(tr("Video Generation Completed"))
|
||||
progress_bar.progress(1.0)
|
||||
|
||||
# 显示结果
|
||||
video_files = task.get("videos", [])
|
||||
try:
|
||||
if video_files:
|
||||
player_cols = st.columns(len(video_files) * 2 + 1)
|
||||
for i, url in enumerate(video_files):
|
||||
player_cols[i * 2 + 1].video(url)
|
||||
except Exception as e:
|
||||
logger.error(f"播放视频失败: {e}")
|
||||
|
||||
st.success(tr("Video Generation Completed"))
|
||||
break
|
||||
|
||||
elif state == const.TASK_STATE_FAILED:
|
||||
st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
|
||||
break
|
||||
|
||||
time.sleep(0.5)
|
||||
progress = int(progress)
|
||||
except (TypeError, ValueError):
|
||||
progress = 0
|
||||
progress = max(0, min(progress, 100))
|
||||
|
||||
# 更新进度条和阶段状态
|
||||
progress_bar.progress(progress / 100)
|
||||
current_message = task.get("message") or f"Processing... {progress}%"
|
||||
status_label = f"{current_message} ({progress}%)"
|
||||
status_key = (state, progress, current_message)
|
||||
if status_key != last_status_key:
|
||||
status_panel.write(status_label)
|
||||
last_status_key = status_key
|
||||
|
||||
if state == const.TASK_STATE_COMPLETE:
|
||||
status_panel.update(
|
||||
label=tr("Video Generation Completed"),
|
||||
state="complete",
|
||||
expanded=False,
|
||||
)
|
||||
progress_bar.progress(1.0)
|
||||
|
||||
# 显示结果
|
||||
video_files = task.get("videos", [])
|
||||
try:
|
||||
if video_files:
|
||||
for url in video_files:
|
||||
st.video(url)
|
||||
except Exception as e:
|
||||
logger.error(f"播放视频失败: {e}")
|
||||
|
||||
st.success(tr("Video Generation Completed"))
|
||||
break
|
||||
|
||||
if state == const.TASK_STATE_FAILED:
|
||||
status_panel.update(
|
||||
label=f"{tr('Task failed')}: {task.get('message', 'Unknown error')}",
|
||||
state="error",
|
||||
expanded=True,
|
||||
)
|
||||
st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
|
||||
break
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
generate_video_dialog()
|
||||
|
||||
|
||||
def get_voice_name_for_tts_engine(tts_engine: str) -> str:
|
||||
|
||||
@ -522,7 +522,7 @@ def render_tts_settings(tr):
|
||||
"""渲染TTS(文本转语音)设置"""
|
||||
|
||||
# 1. TTS引擎选择器
|
||||
# st.subheader("🎤 TTS引擎选择")
|
||||
# st.subheader("TTS引擎选择")
|
||||
|
||||
engine_options = get_tts_engine_options(tr)
|
||||
engine_descriptions = get_tts_engine_descriptions(tr)
|
||||
@ -553,7 +553,7 @@ def render_tts_settings(tr):
|
||||
if selected_engine in engine_descriptions:
|
||||
desc = engine_descriptions[selected_engine]
|
||||
|
||||
with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=True):
|
||||
with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=False):
|
||||
st.markdown(f"**{tr('Features')}:** {desc['features']}")
|
||||
st.markdown(f"**{tr('Use Case')}:** {desc['use_case']}")
|
||||
|
||||
@ -561,7 +561,7 @@ def render_tts_settings(tr):
|
||||
st.markdown(f"**{tr('Registration URL')}:** [{desc['registration']}]({desc['registration']})")
|
||||
|
||||
# 3. 根据选择的引擎渲染对应的配置界面
|
||||
# st.subheader("⚙️ 引擎配置")
|
||||
# st.subheader("引擎配置")
|
||||
|
||||
if selected_engine == "edge_tts":
|
||||
render_edge_tts_settings(tr)
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
"Preview Background Music Help": "Play the selected background music.",
|
||||
"Upload Background Music File": "Upload Background Music File",
|
||||
"Upload Background Music Help": "Upload an audio file to use as background music.",
|
||||
"Background Music uploaded": "✅ Background music uploaded: {path}",
|
||||
"Background Music uploaded": "Background music uploaded: {path}",
|
||||
"Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)",
|
||||
"Subtitle Settings": "**Subtitle Settings**",
|
||||
"Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)",
|
||||
@ -326,13 +326,13 @@
|
||||
"Cloud Service": "Cloud Service",
|
||||
"Select TTS Engine": "Select TTS Engine",
|
||||
"Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
|
||||
"TTS Engine Details": "📋 {engine} Details",
|
||||
"TTS Engine Details": "{engine} Details",
|
||||
"Features": "Features",
|
||||
"Use Case": "Use Case",
|
||||
"Registration URL": "Registration URL",
|
||||
"Voice Selection": "Voice Selection",
|
||||
"Select Edge TTS Voice": "Select an Edge TTS voice",
|
||||
"Edge TTS Voice Description": "💡 Edge TTS Voice Notes",
|
||||
"Edge TTS Voice Description": "Edge TTS Voice Notes",
|
||||
"Loaded voice count": "Loaded {count} voices",
|
||||
"Female Voice": "Female voice",
|
||||
"Male Voice": "Male voice",
|
||||
@ -348,21 +348,21 @@
|
||||
"Azure Speech Key Help": "Azure Speech Services API key",
|
||||
"Voice Name": "Voice Name",
|
||||
"Azure Voice Name Help": "Enter an Azure Speech Services voice name. You can use the official voice name directly, such as zh-CN-YunzeNeural.",
|
||||
"Common Voice Reference": "💡 Common Voice Reference",
|
||||
"Common Voice Reference": "Common Voice Reference",
|
||||
"Chinese Voices": "Chinese Voices",
|
||||
"English Voices": "English Voices",
|
||||
"Multilingual": "multilingual",
|
||||
"Azure Voices Docs Notice": "💡 For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
|
||||
"Azure Voices Docs Notice": "For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
|
||||
"Quick Select": "Quick Select",
|
||||
"Chinese Female Voice": "Chinese Female Voice",
|
||||
"Chinese Male Voice": "Chinese Male Voice",
|
||||
"English Female Voice": "English Female Voice",
|
||||
"Voice name valid": "✅ Voice name is valid: {voice}",
|
||||
"Voice name format may be invalid": "⚠️ Voice name format may be incorrect: {voice}",
|
||||
"Azure voice name format notice": "💡 Azure voice names usually follow this format: [language]-[region]-[name]Neural",
|
||||
"Azure Speech Services configured": "✅ Azure Speech Services is configured",
|
||||
"Please configure service region": "⚠️ Please configure the service region",
|
||||
"Please configure API Key": "⚠️ Please configure the API Key",
|
||||
"Voice name valid": "Voice name is valid: {voice}",
|
||||
"Voice name format may be invalid": "Voice name format may be incorrect: {voice}",
|
||||
"Azure voice name format notice": "Azure voice names usually follow this format: [language]-[region]-[name]Neural",
|
||||
"Azure Speech Services configured": "Azure Speech Services is configured",
|
||||
"Please configure service region": "Please configure the service region",
|
||||
"Please configure API Key": "Please configure the API Key",
|
||||
"Task failed": "Task failed",
|
||||
"Script file cannot be empty": "Script file cannot be empty",
|
||||
"Video file cannot be empty": "Video file cannot be empty",
|
||||
@ -486,10 +486,10 @@
|
||||
"Tencent Service Region Help": "Select the Tencent Cloud TTS service region",
|
||||
"Custom Voice": "Custom Voice",
|
||||
"Select Tencent TTS Voice": "Select a Tencent Cloud TTS voice",
|
||||
"Tencent Cloud TTS Voice Description": "💡 Tencent Cloud TTS Voice Notes",
|
||||
"Tencent Cloud TTS Voice Description": "Tencent Cloud TTS Voice Notes",
|
||||
"Female Voices": "Female Voices",
|
||||
"Male Voices": "Male Voices",
|
||||
"Tencent More Voices Notice": "💡 See the official Tencent Cloud documentation for more voices.",
|
||||
"Tencent More Voices Notice": "See the official Tencent Cloud documentation for more voices.",
|
||||
"Qwen DashScope API Key Help": "Tongyi Qwen DashScope API Key",
|
||||
"TTS Model Name": "TTS Model Name",
|
||||
"Qwen TTS Model Help": "Qwen TTS model name, for example qwen3-tts-flash",
|
||||
@ -532,12 +532,12 @@
|
||||
"Preview Reference Audio Help": "Play the selected reference audio.",
|
||||
"Upload Reference Audio File": "Upload Reference Audio File",
|
||||
"Upload Reference Audio Help": "Upload a clear audio clip for voice cloning",
|
||||
"Audio uploaded": "✅ Audio uploaded: {path}",
|
||||
"Audio uploaded": "Audio uploaded: {path}",
|
||||
"Inference Mode": "Inference Mode",
|
||||
"Standard Inference": "Standard Inference",
|
||||
"Fast Inference": "Fast Inference",
|
||||
"Inference Mode Help": "Standard inference has higher quality but is slower. Fast inference is faster with slightly lower quality.",
|
||||
"Advanced Parameters": "🔧 Advanced Parameters",
|
||||
"Advanced Parameters": "Advanced Parameters",
|
||||
"Sampling Temperature": "Sampling Temperature",
|
||||
"Sampling Temperature Help": "Controls randomness. Higher values are more random; lower values are more deterministic.",
|
||||
"Top P Help": "Probability threshold for nucleus sampling. Smaller values make results more deterministic.",
|
||||
@ -548,9 +548,9 @@
|
||||
"Repetition Penalty Help": "Higher values reduce repetition, but overly high values may sound unnatural.",
|
||||
"Enable Sampling": "Enable Sampling",
|
||||
"Enable Sampling Help": "Enable sampling for more natural speech.",
|
||||
"IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 Usage Instructions",
|
||||
"IndexTTS Usage Instructions Title": "IndexTTS-1.5 Usage Instructions",
|
||||
"IndexTTS Usage Instructions": "**Zero-shot voice cloning**\n\n1. **Prepare reference audio**: upload or specify a clear audio file (3-10 seconds recommended)\n2. **Set API URL**: make sure the IndexTTS-1.5 service is running\n3. **Start synthesis**: the system will use the reference voice to synthesize new speech\n\n**Notes**:\n- Reference audio quality directly affects synthesis quality\n- Use clean audio without background noise when possible\n- Keep text length within a reasonable range\n- The first synthesis may take longer",
|
||||
"IndexTTS2 Emotion Parameters": "🎭 Emotion Parameters",
|
||||
"IndexTTS2 Emotion Parameters": "Emotion Parameters",
|
||||
"Emotion Mode": "Emotion Mode",
|
||||
"Emotion Mode Help": "Choose the emotion control source for IndexTTS-2.",
|
||||
"Emotion Mode Speaker": "Same as speaker reference",
|
||||
@ -578,7 +578,7 @@
|
||||
"Max Text Tokens Per Segment Help": "Maximum text tokens per segment for IndexTTS-2 inference.",
|
||||
"Max Mel Tokens": "Max Mel Tokens",
|
||||
"Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
|
||||
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
|
||||
"IndexTTS2 Usage Instructions Title": "IndexTTS-2 Usage Instructions",
|
||||
"IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
|
||||
"OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
|
||||
@ -594,7 +594,7 @@
|
||||
"Voice Pitch Help 0.5-1.5": "Adjust voice pitch (0.5-1.5)",
|
||||
"Sentence Silence Duration": "Sentence-end Silence Duration (seconds)",
|
||||
"Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
|
||||
"Doubao TTS API Key Application Process": "💡 Doubao TTS API Key Application Process",
|
||||
"Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
|
||||
"Application Steps": "Application Steps",
|
||||
"Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
|
||||
"Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
|
||||
@ -602,15 +602,15 @@
|
||||
"Doubao TTS Step 4": "4. Click Start Now",
|
||||
"Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
|
||||
"Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
|
||||
"Doubao TTS Fill Credentials Notice": "💡 Fill the Access Key, Secret Key, AppID, and Token above.",
|
||||
"Doubao TTS configured": "✅ Doubao TTS is configured",
|
||||
"Please configure missing fields": "⚠️ Please configure: {fields}",
|
||||
"Preview Voice Synthesis": "🎵 Preview Voice Synthesis",
|
||||
"Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
|
||||
"Doubao TTS configured": "Doubao TTS is configured",
|
||||
"Please configure missing fields": "Please configure: {fields}",
|
||||
"Preview Voice Synthesis": "Preview Voice Synthesis",
|
||||
"Voice Preview Sample": "Thanks for using NarratoAI. If you have any questions or suggestions, please join the community for help and discussion.",
|
||||
"Please configure voice settings first": "Please configure voice settings first",
|
||||
"Voice synthesis successful": "✅ Voice synthesis successful!",
|
||||
"Voice synthesis failed": "❌ Voice synthesis failed. Please check your configuration.",
|
||||
"SoulVoice pitch not supported": "ℹ️ SoulVoice does not support pitch adjustment",
|
||||
"Voice synthesis successful": "Voice synthesis successful!",
|
||||
"Voice synthesis failed": "Voice synthesis failed. Please check your configuration.",
|
||||
"SoulVoice pitch not supported": "SoulVoice does not support pitch adjustment",
|
||||
"Progress": "Progress",
|
||||
"Generating script...": "Generating script...",
|
||||
"Please select video file first": "Please select a video file first",
|
||||
|
||||
@ -49,7 +49,7 @@
|
||||
"Preview Background Music Help": "播放当前背景音乐",
|
||||
"Upload Background Music File": "上传背景音乐文件",
|
||||
"Upload Background Music Help": "上传一个音频文件作为背景音乐",
|
||||
"Background Music uploaded": "✅ 背景音乐已上传: {path}",
|
||||
"Background Music uploaded": "背景音乐已上传: {path}",
|
||||
"Background Music Volume": "背景音乐音量(0.2表示20%,背景声音不宜过高)",
|
||||
"Subtitle Settings": "**字幕设置**",
|
||||
"Enable Subtitles": "启用字幕(若取消勾选,下面的设置都将不生效)",
|
||||
@ -276,13 +276,13 @@
|
||||
"Cloud Service": "云端服务",
|
||||
"Select TTS Engine": "选择 TTS 引擎",
|
||||
"Select TTS Engine Help": "选择您要使用的文本转语音引擎",
|
||||
"TTS Engine Details": "📋 {engine} 详细说明",
|
||||
"TTS Engine Details": "{engine} 详细说明",
|
||||
"Features": "特点",
|
||||
"Use Case": "适用场景",
|
||||
"Registration URL": "注册地址",
|
||||
"Voice Selection": "音色选择",
|
||||
"Select Edge TTS Voice": "选择 Edge TTS 音色",
|
||||
"Edge TTS Voice Description": "💡 Edge TTS 音色说明",
|
||||
"Edge TTS Voice Description": "Edge TTS 音色说明",
|
||||
"Loaded voice count": "已加载 {count} 个音色",
|
||||
"Female Voice": "女声",
|
||||
"Male Voice": "男声",
|
||||
@ -298,21 +298,21 @@
|
||||
"Azure Speech Key Help": "Azure Speech Services API 密钥",
|
||||
"Voice Name": "音色名称",
|
||||
"Azure Voice Name Help": "输入 Azure Speech Services 音色名称,直接使用官方音色名称即可。例如:zh-CN-YunzeNeural",
|
||||
"Common Voice Reference": "💡 常用音色参考",
|
||||
"Common Voice Reference": "常用音色参考",
|
||||
"Chinese Voices": "中文音色",
|
||||
"English Voices": "英文音色",
|
||||
"Multilingual": "多语言",
|
||||
"Azure Voices Docs Notice": "💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
|
||||
"Azure Voices Docs Notice": "更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
|
||||
"Quick Select": "快速选择",
|
||||
"Chinese Female Voice": "中文女声",
|
||||
"Chinese Male Voice": "中文男声",
|
||||
"English Female Voice": "英文女声",
|
||||
"Voice name valid": "✅ 音色名称有效: {voice}",
|
||||
"Voice name format may be invalid": "⚠️ 音色名称格式可能不正确: {voice}",
|
||||
"Azure voice name format notice": "💡 Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
|
||||
"Azure Speech Services configured": "✅ Azure Speech Services 配置已设置",
|
||||
"Please configure service region": "⚠️ 请配置服务区域",
|
||||
"Please configure API Key": "⚠️ 请配置 API Key",
|
||||
"Voice name valid": "音色名称有效: {voice}",
|
||||
"Voice name format may be invalid": "音色名称格式可能不正确: {voice}",
|
||||
"Azure voice name format notice": "Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
|
||||
"Azure Speech Services configured": "Azure Speech Services 配置已设置",
|
||||
"Please configure service region": "请配置服务区域",
|
||||
"Please configure API Key": "请配置 API Key",
|
||||
"Language": "界面语言",
|
||||
"Task failed": "任务失败",
|
||||
"Script file cannot be empty": "脚本文件不能为空",
|
||||
@ -437,10 +437,10 @@
|
||||
"Tencent Service Region Help": "选择腾讯云 TTS 服务地域",
|
||||
"Custom Voice": "自定义音色",
|
||||
"Select Tencent TTS Voice": "选择腾讯云 TTS 音色",
|
||||
"Tencent Cloud TTS Voice Description": "💡 腾讯云 TTS 音色说明",
|
||||
"Tencent Cloud TTS Voice Description": "腾讯云 TTS 音色说明",
|
||||
"Female Voices": "女声音色",
|
||||
"Male Voices": "男声音色",
|
||||
"Tencent More Voices Notice": "💡 更多音色请参考腾讯云官方文档",
|
||||
"Tencent More Voices Notice": "更多音色请参考腾讯云官方文档",
|
||||
"Qwen DashScope API Key Help": "通义千问 DashScope API Key",
|
||||
"TTS Model Name": "模型名称",
|
||||
"Qwen TTS Model Help": "Qwen TTS 模型名,例如 qwen3-tts-flash",
|
||||
@ -483,12 +483,12 @@
|
||||
"Preview Reference Audio Help": "播放当前参考音频",
|
||||
"Upload Reference Audio File": "上传参考音频文件",
|
||||
"Upload Reference Audio Help": "上传一段清晰的音频用于语音克隆",
|
||||
"Audio uploaded": "✅ 音频已上传: {path}",
|
||||
"Audio uploaded": "音频已上传: {path}",
|
||||
"Inference Mode": "推理模式",
|
||||
"Standard Inference": "普通推理",
|
||||
"Fast Inference": "快速推理",
|
||||
"Inference Mode Help": "普通推理质量更高但速度较慢,快速推理速度更快但质量略低",
|
||||
"Advanced Parameters": "🔧 高级参数",
|
||||
"Advanced Parameters": "高级参数",
|
||||
"Sampling Temperature": "采样温度 (Temperature)",
|
||||
"Sampling Temperature Help": "控制随机性,值越高输出越随机,值越低越确定",
|
||||
"Top P Help": "nucleus 采样的概率阈值,值越小结果越确定",
|
||||
@ -499,9 +499,9 @@
|
||||
"Repetition Penalty Help": "值越大越能避免重复,但过大可能导致不自然",
|
||||
"Enable Sampling": "启用采样",
|
||||
"Enable Sampling Help": "启用采样可以获得更自然的语音",
|
||||
"IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 使用说明",
|
||||
"IndexTTS Usage Instructions Title": "IndexTTS-1.5 使用说明",
|
||||
"IndexTTS Usage Instructions": "**零样本语音克隆**\n\n1. **准备参考音频**:上传或指定一段清晰的音频文件(建议 3-10 秒)\n2. **设置 API 地址**:确保 IndexTTS-1.5 服务正常运行\n3. **开始合成**:系统会自动使用参考音频的音色合成新语音\n\n**注意事项**:\n- 参考音频质量直接影响合成效果\n- 建议使用无背景噪音的清晰音频\n- 文本长度建议控制在合理范围内\n- 首次合成可能需要较长时间",
|
||||
"IndexTTS2 Emotion Parameters": "🎭 情感参数",
|
||||
"IndexTTS2 Emotion Parameters": "情感参数",
|
||||
"Emotion Mode": "情感控制方式",
|
||||
"Emotion Mode Help": "选择 IndexTTS-2 的情感控制来源",
|
||||
"Emotion Mode Speaker": "与音色参考相同",
|
||||
@ -529,7 +529,7 @@
|
||||
"Max Text Tokens Per Segment Help": "IndexTTS-2 分段推理的最大文本 token 数",
|
||||
"Max Mel Tokens": "最大 Mel Tokens",
|
||||
"Max Mel Tokens Help": "控制单次生成的最大 mel token 数,值越大可生成更长音频",
|
||||
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
|
||||
"IndexTTS2 Usage Instructions Title": "IndexTTS-2 使用说明",
|
||||
"IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**:复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**:例如 http://192.168.3.6:7863/tts,也可以填写服务根地址\n3. **调整情感参数**:默认使用 speaker,可按需切换到 audio、vector 或 text\n4. **调整生成参数**:temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**:\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型,耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
|
||||
"OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**:\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落",
|
||||
@ -545,7 +545,7 @@
|
||||
"Voice Pitch Help 0.5-1.5": "调节语音音高 (0.5-1.5)",
|
||||
"Sentence Silence Duration": "句尾静音时长 (秒)",
|
||||
"Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
|
||||
"Doubao TTS API Key Application Process": "💡 豆包语音 TTS API Key申请流程",
|
||||
"Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
|
||||
"Application Steps": "申请步骤",
|
||||
"Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
|
||||
"Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
|
||||
@ -553,15 +553,15 @@
|
||||
"Doubao TTS Step 4": "4. 点击立即使用",
|
||||
"Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)",
|
||||
"Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
|
||||
"Doubao TTS Fill Credentials Notice": "💡 请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
|
||||
"Doubao TTS configured": "✅ 豆包语音 TTS 配置已设置",
|
||||
"Please configure missing fields": "⚠️ 请配置: {fields}",
|
||||
"Preview Voice Synthesis": "🎵 试听语音合成",
|
||||
"Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
|
||||
"Doubao TTS configured": "豆包语音 TTS 配置已设置",
|
||||
"Please configure missing fields": "请配置: {fields}",
|
||||
"Preview Voice Synthesis": "试听语音合成",
|
||||
"Voice Preview Sample": "感谢关注 NarratoAI,有任何问题或建议,可以加入社区频道求助或讨论",
|
||||
"Please configure voice settings first": "请先配置语音设置",
|
||||
"Voice synthesis successful": "✅ 语音合成成功!",
|
||||
"Voice synthesis failed": "❌ 语音合成失败,请检查配置",
|
||||
"SoulVoice pitch not supported": "ℹ️ SoulVoice 引擎不支持音调调节",
|
||||
"Voice synthesis successful": "语音合成成功!",
|
||||
"Voice synthesis failed": "语音合成失败,请检查配置",
|
||||
"SoulVoice pitch not supported": "SoulVoice 引擎不支持音调调节",
|
||||
"上传字幕文件": "上传字幕",
|
||||
"清除已上传字幕": "清除已上传字幕",
|
||||
"无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312)": "无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312)",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user