mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-14 13:02:50 +00:00
优化 TTS 新增 proxy 配置,新增语调配置
This commit is contained in:
parent
1d9b27bf46
commit
e926e8676a
@ -347,6 +347,7 @@ class VideoClipParams(BaseModel):
|
|||||||
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
||||||
voice_volume: Optional[float] = Field(default=1.0, description="语音音量")
|
voice_volume: Optional[float] = Field(default=1.0, description="语音音量")
|
||||||
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
||||||
|
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
|
||||||
|
|
||||||
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
|
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
|
||||||
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
|
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
|
||||||
|
|||||||
@ -1032,11 +1032,11 @@ def is_azure_v2_voice(voice_name: str):
|
|||||||
|
|
||||||
|
|
||||||
def tts(
|
def tts(
|
||||||
text: str, voice_name: str, voice_rate: float, voice_file: str
|
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||||
) -> [SubMaker, None]:
|
) -> [SubMaker, None]:
|
||||||
# if is_azure_v2_voice(voice_name):
|
# if is_azure_v2_voice(voice_name):
|
||||||
# return azure_tts_v2(text, voice_name, voice_file)
|
# return azure_tts_v2(text, voice_name, voice_file)
|
||||||
return azure_tts_v1(text, voice_name, voice_rate, voice_file)
|
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
|
||||||
|
|
||||||
|
|
||||||
def convert_rate_to_percent(rate: float) -> str:
|
def convert_rate_to_percent(rate: float) -> str:
|
||||||
@ -1049,18 +1049,29 @@ def convert_rate_to_percent(rate: float) -> str:
|
|||||||
return f"{percent}%"
|
return f"{percent}%"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_pitch_to_percent(rate: float) -> str:
|
||||||
|
if rate == 1.0:
|
||||||
|
return "+0Hz"
|
||||||
|
percent = round((rate - 1.0) * 100)
|
||||||
|
if percent > 0:
|
||||||
|
return f"+{percent}Hz"
|
||||||
|
else:
|
||||||
|
return f"{percent}Hz"
|
||||||
|
|
||||||
|
|
||||||
def azure_tts_v1(
|
def azure_tts_v1(
|
||||||
text: str, voice_name: str, voice_rate: float, voice_file: str
|
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||||
) -> [SubMaker, None]:
|
) -> [SubMaker, None]:
|
||||||
voice_name = parse_voice_name(voice_name)
|
voice_name = parse_voice_name(voice_name)
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
rate_str = convert_rate_to_percent(voice_rate)
|
rate_str = convert_rate_to_percent(voice_rate)
|
||||||
|
pitch_str = convert_pitch_to_percent(voice_pitch)
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
try:
|
try:
|
||||||
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||||
|
|
||||||
async def _do() -> SubMaker:
|
async def _do() -> SubMaker:
|
||||||
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, proxy="http://127.0.0.1:7890")
|
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str, pitch=pitch_str, proxy=config.proxy.get("http"))
|
||||||
sub_maker = edge_tts.SubMaker()
|
sub_maker = edge_tts.SubMaker()
|
||||||
with open(voice_file, "wb") as file:
|
with open(voice_file, "wb") as file:
|
||||||
async for chunk in communicate.stream():
|
async for chunk in communicate.stream():
|
||||||
|
|||||||
@ -1,11 +1,12 @@
|
|||||||
[app]
|
[app]
|
||||||
project_version="0.2.0"
|
project_version="0.2.2"
|
||||||
# 如果你没有 OPENAI API Key,可以使用 g4f 代替,或者使用国内的 Moonshot API
|
# 支持视频理解的大模型提供商
|
||||||
# If you don't have an OPENAI API Key, you can use g4f instead
|
# gemini
|
||||||
|
# qwen2-vl (待增加)
|
||||||
video_llm_provider="gemini"
|
video_llm_provider="gemini"
|
||||||
|
|
||||||
# 支持的提供商 (Supported providers):
|
# 用于生成文案的大模型支持的提供商 (Supported providers):
|
||||||
# openai
|
# openai (默认)
|
||||||
# moonshot (月之暗面)
|
# moonshot (月之暗面)
|
||||||
# oneapi
|
# oneapi
|
||||||
# g4f
|
# g4f
|
||||||
@ -13,8 +14,6 @@
|
|||||||
# qwen (通义千问)
|
# qwen (通义千问)
|
||||||
# gemini
|
# gemini
|
||||||
llm_provider="openai"
|
llm_provider="openai"
|
||||||
# 支持多模态视频理解能力的大模型
|
|
||||||
|
|
||||||
########## Ollama Settings
|
########## Ollama Settings
|
||||||
# No need to set it unless you want to use your own proxy
|
# No need to set it unless you want to use your own proxy
|
||||||
ollama_base_url = ""
|
ollama_base_url = ""
|
||||||
@ -27,7 +26,7 @@
|
|||||||
# No need to set it unless you want to use your own proxy
|
# No need to set it unless you want to use your own proxy
|
||||||
openai_base_url = ""
|
openai_base_url = ""
|
||||||
# Check your available models at https://platform.openai.com/account/limits
|
# Check your available models at https://platform.openai.com/account/limits
|
||||||
openai_model_name = "gpt-4-turbo"
|
openai_model_name = "gpt-4o"
|
||||||
|
|
||||||
########## Moonshot API Key
|
########## Moonshot API Key
|
||||||
# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
|
# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
|
||||||
@ -56,7 +55,7 @@
|
|||||||
|
|
||||||
########## Gemini API Key
|
########## Gemini API Key
|
||||||
gemini_api_key=""
|
gemini_api_key=""
|
||||||
gemini_model_name = "gemini-1.5-flash"
|
gemini_model_name = "gemini-1.5-pro"
|
||||||
|
|
||||||
########## Qwen API Key
|
########## Qwen API Key
|
||||||
# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
|
# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
|
||||||
@ -66,29 +65,23 @@
|
|||||||
qwen_api_key = ""
|
qwen_api_key = ""
|
||||||
qwen_model_name = "qwen-max"
|
qwen_model_name = "qwen-max"
|
||||||
|
|
||||||
|
|
||||||
########## DeepSeek API Key
|
########## DeepSeek API Key
|
||||||
# Visit https://platform.deepseek.com/api_keys to get your API key
|
# Visit https://platform.deepseek.com/api_keys to get your API key
|
||||||
deepseek_api_key = ""
|
deepseek_api_key = ""
|
||||||
deepseek_base_url = "https://api.deepseek.com"
|
deepseek_base_url = "https://api.deepseek.com"
|
||||||
deepseek_model_name = "deepseek-chat"
|
deepseek_model_name = "deepseek-chat"
|
||||||
|
|
||||||
# Subtitle Provider, "whisper"
|
# 字幕提供商、可选,支持 whisper 和 faster-whisper-large-v2"whisper"
|
||||||
# If empty, the subtitle will not be generated
|
# 默认为 faster-whisper-large-v2 模型地址:https://huggingface.co/guillaumekln/faster-whisper-large-v2
|
||||||
subtitle_provider = "faster-whisper-large-v2"
|
subtitle_provider = "faster-whisper-large-v2"
|
||||||
subtitle_enabled = true
|
subtitle_enabled = true
|
||||||
|
|
||||||
#
|
|
||||||
# ImageMagick
|
# ImageMagick
|
||||||
#
|
# 安装后,将自动检测到 ImageMagick,Windows 除外!
|
||||||
# Once you have installed it, ImageMagick will be automatically detected, except on Windows!
|
# 例如,在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
|
||||||
# On Windows, for example "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
|
# 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
|
||||||
# Download from https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
|
|
||||||
|
|
||||||
# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
|
# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# FFMPEG
|
# FFMPEG
|
||||||
#
|
#
|
||||||
# 通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
|
# 通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
|
||||||
@ -97,12 +90,6 @@
|
|||||||
# Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
|
# Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
|
||||||
# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path,下载地址:https://www.gyan.dev/ffmpeg/builds/
|
# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path,下载地址:https://www.gyan.dev/ffmpeg/builds/
|
||||||
|
|
||||||
# Under normal circumstances, ffmpeg is downloaded automatically and detected automatically.
|
|
||||||
# However, if there is an issue with your environment that prevents automatic downloading, you might encounter the following error:
|
|
||||||
# RuntimeError: No ffmpeg exe could be found.
|
|
||||||
# Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
|
|
||||||
# In such cases, you can manually download ffmpeg and set the ffmpeg_path, download link: https://www.gyan.dev/ffmpeg/builds/
|
|
||||||
|
|
||||||
# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
|
# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
|
|
||||||
@ -132,7 +119,7 @@
|
|||||||
|
|
||||||
material_directory = ""
|
material_directory = ""
|
||||||
|
|
||||||
# Used for state management of the task
|
# 用于任务的状态管理
|
||||||
enable_redis = false
|
enable_redis = false
|
||||||
redis_host = "localhost"
|
redis_host = "localhost"
|
||||||
redis_port = 6379
|
redis_port = 6379
|
||||||
@ -143,7 +130,6 @@
|
|||||||
max_concurrent_tasks = 5
|
max_concurrent_tasks = 5
|
||||||
|
|
||||||
# webui界面是否显示配置项
|
# webui界面是否显示配置项
|
||||||
# webui hide baisc config panel
|
|
||||||
hide_config = false
|
hide_config = false
|
||||||
|
|
||||||
|
|
||||||
@ -161,7 +147,7 @@
|
|||||||
|
|
||||||
# recommended model_size: "large-v3"
|
# recommended model_size: "large-v3"
|
||||||
model_size="faster-whisper-large-v2"
|
model_size="faster-whisper-large-v2"
|
||||||
# if you want to use GPU, set device="cuda"
|
# 如果要使用 GPU,请设置 device=“cuda”
|
||||||
device="CPU"
|
device="CPU"
|
||||||
compute_type="int8"
|
compute_type="int8"
|
||||||
|
|
||||||
|
|||||||
70
webui.py
70
webui.py
@ -549,37 +549,6 @@ with middle_panel:
|
|||||||
params.voice_name = voice_name
|
params.voice_name = voice_name
|
||||||
config.ui["voice_name"] = voice_name
|
config.ui["voice_name"] = voice_name
|
||||||
|
|
||||||
# 试听语言合成
|
|
||||||
if st.button(tr("Play Voice")):
|
|
||||||
play_content = "感谢关注 NarratoAI,有任何问题或建议,可以关注微信公众号,求助或讨论"
|
|
||||||
if not play_content:
|
|
||||||
play_content = params.video_script
|
|
||||||
if not play_content:
|
|
||||||
play_content = tr("Voice Example")
|
|
||||||
with st.spinner(tr("Synthesizing Voice")):
|
|
||||||
temp_dir = utils.storage_dir("temp", create=True)
|
|
||||||
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
|
|
||||||
sub_maker = voice.tts(
|
|
||||||
text=play_content,
|
|
||||||
voice_name=voice_name,
|
|
||||||
voice_rate=params.voice_rate,
|
|
||||||
voice_file=audio_file,
|
|
||||||
)
|
|
||||||
# 如果语音文件生成失败,请使用默认内容重试。
|
|
||||||
if not sub_maker:
|
|
||||||
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
|
|
||||||
sub_maker = voice.tts(
|
|
||||||
text=play_content,
|
|
||||||
voice_name=voice_name,
|
|
||||||
voice_rate=params.voice_rate,
|
|
||||||
voice_file=audio_file,
|
|
||||||
)
|
|
||||||
|
|
||||||
if sub_maker and os.path.exists(audio_file):
|
|
||||||
st.audio(audio_file, format="audio/mp3")
|
|
||||||
if os.path.exists(audio_file):
|
|
||||||
os.remove(audio_file)
|
|
||||||
|
|
||||||
if voice.is_azure_v2_voice(voice_name):
|
if voice.is_azure_v2_voice(voice_name):
|
||||||
saved_azure_speech_region = config.azure.get("speech_region", "")
|
saved_azure_speech_region = config.azure.get("speech_region", "")
|
||||||
saved_azure_speech_key = config.azure.get("speech_key", "")
|
saved_azure_speech_key = config.azure.get("speech_key", "")
|
||||||
@ -604,6 +573,45 @@ with middle_panel:
|
|||||||
index=2,
|
index=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
params.voice_pitch = st.selectbox(
|
||||||
|
tr("Speech Pitch"),
|
||||||
|
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||||
|
index=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 试听语言合成
|
||||||
|
if st.button(tr("Play Voice")):
|
||||||
|
play_content = "感谢关注 NarratoAI,有任何问题或建议,可以关注微信公众号,求助或讨论"
|
||||||
|
if not play_content:
|
||||||
|
play_content = params.video_script
|
||||||
|
if not play_content:
|
||||||
|
play_content = tr("Voice Example")
|
||||||
|
with st.spinner(tr("Synthesizing Voice")):
|
||||||
|
temp_dir = utils.storage_dir("temp", create=True)
|
||||||
|
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
|
||||||
|
sub_maker = voice.tts(
|
||||||
|
text=play_content,
|
||||||
|
voice_name=voice_name,
|
||||||
|
voice_rate=params.voice_rate,
|
||||||
|
voice_pitch=params.voice_pitch,
|
||||||
|
voice_file=audio_file,
|
||||||
|
)
|
||||||
|
# 如果语音文件生成失败,请使用默认内容重试。
|
||||||
|
if not sub_maker:
|
||||||
|
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
|
||||||
|
sub_maker = voice.tts(
|
||||||
|
text=play_content,
|
||||||
|
voice_name=voice_name,
|
||||||
|
voice_rate=params.voice_rate,
|
||||||
|
voice_pitch=params.voice_pitch,
|
||||||
|
voice_file=audio_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
if sub_maker and os.path.exists(audio_file):
|
||||||
|
st.audio(audio_file, format="audio/mp3")
|
||||||
|
if os.path.exists(audio_file):
|
||||||
|
os.remove(audio_file)
|
||||||
|
|
||||||
bgm_options = [
|
bgm_options = [
|
||||||
(tr("No Background Music"), ""),
|
(tr("No Background Music"), ""),
|
||||||
(tr("Random Background Music"), "random"),
|
(tr("Random Background Music"), "random"),
|
||||||
|
|||||||
@ -91,6 +91,7 @@
|
|||||||
"Picture description": "图片描述",
|
"Picture description": "图片描述",
|
||||||
"Narration": "视频文案",
|
"Narration": "视频文案",
|
||||||
"Rebuild": "重新生成",
|
"Rebuild": "重新生成",
|
||||||
"Video Script Load": "加载视频脚本"
|
"Video Script Load": "加载视频脚本",
|
||||||
|
"Speech Pitch": "语调"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user