diff --git a/.gitignore b/.gitignore index c51c4e8..f10a692 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ resource/scripts/* resource/videos/* resource/songs/* resource/fonts/* +app/models/faster-whisper-large-v2/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 8cf53f0..4beabe4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -# Use an official Python runtime as a parent image FROM python:3.10-slim-bullseye # Set the working directory in the container @@ -12,6 +11,7 @@ ENV PYTHONPATH="/NarratoAI" # Install system dependencies RUN apt-get update && apt-get install -y \ git \ + git-lfs \ imagemagick \ ffmpeg \ wget \ @@ -29,17 +29,11 @@ RUN pip install --no-cache-dir -r requirements.txt # Now copy the rest of the codebase into the image COPY . . +# 安装 git lfs 并下载模型到指定目录 +RUN git lfs install + # Expose the port the app runs on EXPOSE 8501 # Command to run the application -CMD ["streamlit", "run", "./webui/Main.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False"] - -# 1. Build the Docker image using the following command -# docker build -t moneyprinterturbo . - -# 2. Run the Docker container using the following command -## For Linux or MacOS: -# docker run -v $(pwd)/config.toml:/NarratoAI/config.toml -v $(pwd)/storage:/NarratoAI/storage -p 8501:8501 moneyprinterturbo -## For Windows: -# docker run -v %cd%/config.toml:/NarratoAI/config.toml -v %cd%/storage:/NarratoAI/storage -p 8501:8501 moneyprinterturbo \ No newline at end of file +CMD ["streamlit", "run", "webui.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False"] diff --git a/app/services/llm.py b/app/services/llm.py index adb3f6d..01bef0a 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -109,26 +109,25 @@ Method = """ def handle_exception(err): if isinstance(err, PermissionDenied): - logger.error("403 用户没有权限访问该资源") + raise Exception("403 用户没有权限访问该资源") elif isinstance(err, ResourceExhausted): - logger.error("429 您的配额已用尽。请稍后重试。请考虑设置自动重试来处理这些错误") + raise Exception("429 您的配额已用尽。请稍后重试。请考虑设置自动重试来处理这些错误") elif isinstance(err, InvalidArgument): - logger.error("400 参数无效。例如,文件过大,超出了载荷大小限制。另一个事件提供了无效的 API 密钥。") + raise Exception("400 参数无效。例如,文件过大,超出了载荷大小限制。另一个事件提供了无效的 API 密钥。") elif isinstance(err, AlreadyExists): - logger.error("409 已存在具有相同 ID 的已调参模型。对新模型进行调参时,请指定唯一的模型 ID。") + raise Exception("409 已存在具有相同 ID 的已调参模型。对新模型进行调参时,请指定唯一的模型 ID。") elif isinstance(err, RetryError): - logger.error("使用不支持 gRPC 的代理时可能会引起此错误。请尝试将 REST 传输与 genai.configure(..., transport=rest) 搭配使用。") + raise Exception("使用不支持 gRPC 的代理时可能会引起此错误。请尝试将 REST 传输与 genai.configure(..., transport=rest) 搭配使用。") elif isinstance(err, BlockedPromptException): - logger.error("400 出于安全原因,该提示已被屏蔽。") + raise Exception("400 出于安全原因,该提示已被屏蔽。") elif isinstance(err, BrokenResponseError): - logger.error("500 流式传输响应已损坏。在访问需要完整响应的内容(例如聊天记录)时引发。查看堆栈轨迹中提供的错误详情。") + raise Exception("500 流式传输响应已损坏。在访问需要完整响应的内容(例如聊天记录)时引发。查看堆栈轨迹中提供的错误详情。") elif isinstance(err, IncompleteIterationError): - logger.error("500 访问需要完整 API 响应但流式响应尚未完全迭代的内容时引发。对响应对象调用 resolve() 以使用迭代器。") + raise Exception("500 访问需要完整 API 响应但流式响应尚未完全迭代的内容时引发。对响应对象调用 resolve() 以使用迭代器。") elif isinstance(err, ConnectionError): - logger.error("网络连接错误,请检查您的网络连接。") + raise Exception("网络连接错误, 请检查您的网络连接(建议使用 NarratoAI 官方提供的 url)") else: - logger.error(f"大模型请求失败, 下面是具体报错信息: \n{traceback.format_exc()}") - return "" + raise Exception(f"大模型请求失败, 下面是具体报错信息: \n\n{traceback.format_exc()}") def _generate_response(prompt: str, llm_provider: str = None) -> str: @@ -398,9 +397,6 @@ def compress_video(input_path: str, output_path: str): input_path: 输入视频文件路径 output_path: 输出压缩后的视频文件路径 """ - # 指定 ffmpeg 的完整路径 - ffmpeg_path = os.getenv("FFMPEG_PATH") or config.app.get("ffmpeg_path") or "ffmpeg" - # 如果压缩后的视频文件已经存在,则直接使用 if os.path.exists(output_path): logger.info(f"压缩视频文件已存在: {output_path}") @@ -409,17 +405,6 @@ def compress_video(input_path: str, output_path: str): try: clip = VideoFileClip(input_path) clip.write_videofile(output_path, codec='libx264', audio_codec='aac', bitrate="500k", audio_bitrate="128k") - # command = [ - # ffmpeg_path, - # "-i", input_path, - # "-c:v", "h264", - # "-b:v", "500k", - # "-c:a", "aac", - # "-b:a", "128k", - # output_path - # ] - # logger.info(f"执行命令: {' '.join(command)}") - # subprocess.run(command, check=True) except subprocess.CalledProcessError as e: logger.error(f"视频压缩失败: {e}") raise @@ -440,41 +425,45 @@ def generate_script( Returns: str: 生成的脚本 """ - # 1. 压缩视频 - compressed_video_path = f"{os.path.splitext(video_path)[0]}_compressed.mp4" - compress_video(video_path, compressed_video_path) + try: + # 1. 压缩视频 + compressed_video_path = f"{os.path.splitext(video_path)[0]}_compressed.mp4" + compress_video(video_path, compressed_video_path) - # 在关键步骤更新进度 - if progress_callback: - progress_callback(15, "压缩完成") # 例如,在压缩视频后 - - # 2. 转录视频 - transcription = gemini_video_transcription( - video_name=video_name, - video_path=compressed_video_path, - language=language, - llm_provider_video="gemini", - progress_callback=progress_callback - ) - if progress_callback: - progress_callback(60, "生成解说文案...") # 例如,在转录视频后 - - # 3. 编写解说文案 - script = writing_short_play(video_plot, video_name, "openai", count=300) - - # 在关键步骤更新进度 - if progress_callback: - progress_callback(70, "匹配画面...") # 例如,在生成脚本后 - - # 4. 文案匹配画面 - if transcription != "": - matched_script = screen_matching(huamian=transcription, wenan=script, llm_provider="openai") # 在关键步骤更新进度 if progress_callback: - progress_callback(80, "匹配成功") - return matched_script - else: - return "" + progress_callback(15, "压缩完成") # 例如,在压缩视频后 + + # 2. 转录视频 + transcription = gemini_video_transcription( + video_name=video_name, + video_path=compressed_video_path, + language=language, + llm_provider_video="gemini", + progress_callback=progress_callback + ) + if progress_callback: + progress_callback(60, "生成解说文案...") # 例如,在转录视频后 + + # 3. 编写解说文案 + script = writing_short_play(video_plot, video_name, "openai", count=300) + + # 在关键步骤更新进度 + if progress_callback: + progress_callback(70, "匹配画面...") # 例如,在生成脚本后 + + # 4. 文案匹配画面 + if transcription != "": + matched_script = screen_matching(huamian=transcription, wenan=script, llm_provider="openai") + # 在关键步骤更新进度 + if progress_callback: + progress_callback(80, "匹配成功") + return matched_script + else: + return "" + except Exception as e: + handle_exception(e) + raise def gemini_video_transcription(video_name: str, video_path: str, language: str, llm_provider_video: str, progress_callback=None): diff --git a/docker-compose.yml b/docker-compose.yml index cc94678..399c588 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,7 @@ services: build: context: . dockerfile: Dockerfile + image: linyq1/narratoai:latest container_name: "webui" ports: - "8501:8501" @@ -18,10 +19,11 @@ services: build: context: . dockerfile: Dockerfile + image: linyq1/narratoai:latest container_name: "api" ports: - - "8502:22" - command: [ "sleep", "48h" ] + - "8502:8080" + command: [ "python3", "main.py" ] volumes: *common-volumes environment: - "VPN_PROXY_URL=http://host.docker.internal:7890" diff --git a/requirements.txt b/requirements.txt index a562dcb..af5d8b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ pillow~=10.3.0 pydantic~=2.6.3 g4f~=0.3.0.4 dashscope~=1.15.0 -google.generativeai>=0.7.2 +google.generativeai>=0.8.2 python-multipart~=0.0.9 redis==5.0.3 # if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video @@ -25,3 +25,4 @@ opencv-python~=4.9.0.80 azure-cognitiveservices-speech~=1.37.0 git-changelog~=2.5.2 watchdog==5.0.2 +pydub==0.25.1 diff --git a/webui.py b/webui.py index 4410c2d..aa272b6 100644 --- a/webui.py +++ b/webui.py @@ -66,6 +66,8 @@ if 'video_plot' not in st.session_state: st.session_state['video_plot'] = '' if 'ui_language' not in st.session_state: st.session_state['ui_language'] = config.ui.get("language", system_locale) +if 'subclip_videos' not in st.session_state: + st.session_state['subclip_videos'] = {} def get_all_fonts(): @@ -404,8 +406,8 @@ with left_panel: progress_bar.progress(100) status_text.text("脚本生成完成!") st.success("视频脚本生成成功!") - except Exception as e: - st.error(f"生成过程中发生错误: {traceback.format_exc()}") + except Exception as err: + st.error(f"生成过程中发生错误: {str(err)}") finally: time.sleep(2) # 给用户一些时间查看最终状态 progress_bar.empty() @@ -445,7 +447,7 @@ with left_panel: st.session_state['video_clip_json'] = data st.session_state['video_clip_json_path'] = save_path # 刷新页面 - # st.rerun() + st.rerun() # 裁剪视频 with button_columns[1]: @@ -677,7 +679,7 @@ with right_panel: # 视频编辑面板 with st.expander(tr("Video Check"), expanded=False): try: - video_list = st.session_state['video_script_list'] + video_list = st.session_state.video_clip_json except KeyError as e: video_list = [] diff --git a/webui.sh b/webui.sh index c188c2b..dcdea0a 100644 --- a/webui.sh +++ b/webui.sh @@ -1,7 +1,7 @@ #!/bin/bash # 从环境变量中加载VPN代理的配置URL -vpn_proxy_url="http://127.0.0.1:7890" +vpn_proxy_url="$VPN_PROXY_URL" # 检查是否成功加载 if [ -z "$vpn_proxy_url" ]; then echo "VPN代理配置URL未设置,请检查环境变量VPN_PROXY_URL" @@ -44,7 +44,24 @@ for url in "${!urls_paths[@]}"; do echo "下载失败: $url" >&2 } done + +# 安装 git lfs 并下载模型到指定目录 +git lfs install +mkdir -p /NarratoAI/app/models +cd /NarratoAI/app/models +if [ ! -d "faster-whisper-large-v2" ] || [ -z "$(ls -A faster-whisper-large-v2)" ]; then + if git clone https://huggingface.co/guillaumekln/faster-whisper-large-v2; then + echo "下载faster-whisper-large-v2成功" + else + echo "下载faster-whisper-large-v2失败" >&2 + exit 1 + fi +else + echo "faster-whisper-large-v2 已存在,跳过下载" +fi + # 等待所有后台任务完成 wait echo "所有文件已成功下载到指定目录" +cd /NarratoAI/ streamlit run webui.py --browser.serverAddress="0.0.0.0" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False