升级几个包；python升级到py11，准备对比优化视频转录；看中英文提示词效果对比；

2026-03-13 07:16:01 +00:00 · 2024-10-28 18:32:17 +08:00 · 2024-10-28 18:32:17 +08:00 · 2ff72e8b06
commit 2ff72e8b06
parent 2896966359
3 changed files with 33 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -130,7 +130,7 @@ docker-compose up
 ## 开发 💻
 1. 安装依赖
 ```shell
-conda create -n narratoai python=3.10
+conda create -n narratoai python=3.11
 conda activate narratoai
 cd narratoai
 pip install -r requirements.txt
--- a/app/services/llm.py
+++ b/app/services/llm.py
@ -14,6 +14,7 @@ from googleapiclient.errors import ResumableUploadError
 from google.api_core.exceptions import *
 from google.generativeai.types import *
 import subprocess
+from typing import Union, TextIO

 from app.config import config
 from app.utils.utils import clean_model_output
@ -353,7 +354,7 @@ def _generate_response(prompt: str, llm_provider: str = None) -> str:
    return content.replace("\n", "")


-def _generate_response_video(prompt: str, llm_provider_video: str, video_file: str | File) -> str:
+def _generate_response_video(prompt: str, llm_provider_video: str, video_file: Union[str, TextIO]) -> str:
    """
    多模态能力大模型
    """
@ -780,22 +781,28 @@ def screen_matching(huamian: str, wenan: str, llm_provider: str):

 if __name__ == "__main__":
    # 1. 视频转录
-    # video_subject = "第二十条之无罪释放"
-    # video_path = "../../resource/videos/test01.mp4"
-    # language = "zh-CN"
-    # gemini_video_transcription(video_subject, video_path, language)
+    video_subject = "第二十条之无罪释放"
+    video_path = "/Users/apple/Desktop/home/pipedream_project/downloads/jianzao.mp4"
+    language = "zh-CN"
+    gemini_video_transcription(
+        video_name=video_subject,
+        video_path=video_path,
+        language=language,
+        progress_callback=print,
+        llm_provider_video="gemini"
+    )

-    # 2. 解说文案
-    video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
-    # video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
-    video_plot = """
-        李自忠拿着儿子李牧名下的存折，去银行取钱给儿子救命，却被要求证明"你儿子是你儿子"。
-    走投无路时碰到银行被抢劫，劫匪给了他两沓钱救命，李自忠却因此被银行以抢劫罪起诉，并顶格判处20年有期徒刑。
-    苏醒后的李牧坚决为父亲做无罪辩护，面对银行的顶级律师团队，他一个法学院大一学生，能否力挽狂澜，创作奇迹？挥法律之利剑 ，持正义之天平！
-    """
-    res = generate_script(video_path, video_plot, video_name="第二十条之无罪释放")
-    # res = generate_script(video_path, video_plot, video_name="海岸")
-    print("脚本生成成功:\n", res)
-    res = clean_model_output(res)
-    aaa = json.loads(res)
-    print(json.dumps(aaa, indent=2, ensure_ascii=False))
+    # # 2. 解说文案
+    # video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
+    # # video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
+    # video_plot = """
+    #     李自忠拿着儿子李牧名下的存折，去银行取钱给儿子救命，却被要求证明"你儿子是你儿子"。
+    # 走投无路时碰到银行被抢劫，劫匪给了他两沓钱救命，李自忠却因此被银行以抢劫罪起诉，并顶格判处20年有期徒刑。
+    # 苏醒后的李牧坚决为父亲做无罪辩护，面对银行的顶级律师团队，他一个法学院大一学生，能否力挽狂澜，创作奇迹？挥法律之利剑 ，持正义之天平！
+    # """
+    # res = generate_script(video_path, video_plot, video_name="第二十条之无罪释放")
+    # # res = generate_script(video_path, video_plot, video_name="海岸")
+    # print("脚本生成成功:\n", res)
+    # res = clean_model_output(res)
+    # aaa = json.loads(res)
+    # print(json.dumps(aaa, indent=2, ensure_ascii=False))
--- a/requirements.txt
+++ b/requirements.txt
@ -4,22 +4,20 @@ openai~=1.13.3
 faster-whisper~=1.0.1
 edge_tts~=6.1.15
 uvicorn~=0.27.1
-fastapi~=0.110.0
+fastapi~=0.115.4
 tomli~=2.0.1
-streamlit~=1.33.0
+streamlit~=1.39.0
 loguru~=0.7.2
-aiohttp~=3.9.3
+aiohttp~=3.10.10
 urllib3~=2.2.1
-pillow~=10.3.0
+pillow~=10.4.0
 pydantic~=2.6.3
 g4f~=0.3.0.4
 dashscope~=1.15.0
-google.generativeai>=0.8.2
+google.generativeai>=0.8.3
 python-multipart~=0.0.9
 redis==5.0.3
-# if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video
-# please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error
-opencv-python~=4.9.0.80
+opencv-python~=4.10.0.84
 # for azure speech
 # https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
 azure-cognitiveservices-speech~=1.37.0