升级几个包;python升级到py11,准备对比优化视频转录;看中英文提示词效果对比;

This commit is contained in:
linyq 2024-10-28 18:32:17 +08:00
parent 2896966359
commit 2ff72e8b06
3 changed files with 33 additions and 28 deletions

View File

@ -130,7 +130,7 @@ docker-compose up
## 开发 💻 ## 开发 💻
1. 安装依赖 1. 安装依赖
```shell ```shell
conda create -n narratoai python=3.10 conda create -n narratoai python=3.11
conda activate narratoai conda activate narratoai
cd narratoai cd narratoai
pip install -r requirements.txt pip install -r requirements.txt

View File

@ -14,6 +14,7 @@ from googleapiclient.errors import ResumableUploadError
from google.api_core.exceptions import * from google.api_core.exceptions import *
from google.generativeai.types import * from google.generativeai.types import *
import subprocess import subprocess
from typing import Union, TextIO
from app.config import config from app.config import config
from app.utils.utils import clean_model_output from app.utils.utils import clean_model_output
@ -353,7 +354,7 @@ def _generate_response(prompt: str, llm_provider: str = None) -> str:
return content.replace("\n", "") return content.replace("\n", "")
def _generate_response_video(prompt: str, llm_provider_video: str, video_file: str | File) -> str: def _generate_response_video(prompt: str, llm_provider_video: str, video_file: Union[str, TextIO]) -> str:
""" """
多模态能力大模型 多模态能力大模型
""" """
@ -780,22 +781,28 @@ def screen_matching(huamian: str, wenan: str, llm_provider: str):
if __name__ == "__main__": if __name__ == "__main__":
# 1. 视频转录 # 1. 视频转录
# video_subject = "第二十条之无罪释放" video_subject = "第二十条之无罪释放"
# video_path = "../../resource/videos/test01.mp4" video_path = "/Users/apple/Desktop/home/pipedream_project/downloads/jianzao.mp4"
# language = "zh-CN" language = "zh-CN"
# gemini_video_transcription(video_subject, video_path, language) gemini_video_transcription(
video_name=video_subject,
video_path=video_path,
language=language,
progress_callback=print,
llm_provider_video="gemini"
)
# 2. 解说文案 # # 2. 解说文案
video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4" # video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
# video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4" # # video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
video_plot = """ # video_plot = """
李自忠拿着儿子李牧名下的存折去银行取钱给儿子救命却被要求证明"你儿子是你儿子" # 李自忠拿着儿子李牧名下的存折,去银行取钱给儿子救命,却被要求证明"你儿子是你儿子"。
走投无路时碰到银行被抢劫劫匪给了他两沓钱救命李自忠却因此被银行以抢劫罪起诉并顶格判处20年有期徒刑 # 走投无路时碰到银行被抢劫劫匪给了他两沓钱救命李自忠却因此被银行以抢劫罪起诉并顶格判处20年有期徒刑
苏醒后的李牧坚决为父亲做无罪辩护面对银行的顶级律师团队他一个法学院大一学生能否力挽狂澜创作奇迹挥法律之利剑 持正义之天平 # 苏醒后的李牧坚决为父亲做无罪辩护,面对银行的顶级律师团队,他一个法学院大一学生,能否力挽狂澜,创作奇迹?挥法律之利剑 ,持正义之天平
""" # """
res = generate_script(video_path, video_plot, video_name="第二十条之无罪释放") # res = generate_script(video_path, video_plot, video_name="第二十条之无罪释放")
# res = generate_script(video_path, video_plot, video_name="海岸") # # res = generate_script(video_path, video_plot, video_name="海岸")
print("脚本生成成功:\n", res) # print("脚本生成成功:\n", res)
res = clean_model_output(res) # res = clean_model_output(res)
aaa = json.loads(res) # aaa = json.loads(res)
print(json.dumps(aaa, indent=2, ensure_ascii=False)) # print(json.dumps(aaa, indent=2, ensure_ascii=False))

View File

@ -4,22 +4,20 @@ openai~=1.13.3
faster-whisper~=1.0.1 faster-whisper~=1.0.1
edge_tts~=6.1.15 edge_tts~=6.1.15
uvicorn~=0.27.1 uvicorn~=0.27.1
fastapi~=0.110.0 fastapi~=0.115.4
tomli~=2.0.1 tomli~=2.0.1
streamlit~=1.33.0 streamlit~=1.39.0
loguru~=0.7.2 loguru~=0.7.2
aiohttp~=3.9.3 aiohttp~=3.10.10
urllib3~=2.2.1 urllib3~=2.2.1
pillow~=10.3.0 pillow~=10.4.0
pydantic~=2.6.3 pydantic~=2.6.3
g4f~=0.3.0.4 g4f~=0.3.0.4
dashscope~=1.15.0 dashscope~=1.15.0
google.generativeai>=0.8.2 google.generativeai>=0.8.3
python-multipart~=0.0.9 python-multipart~=0.0.9
redis==5.0.3 redis==5.0.3
# if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video opencv-python~=4.10.0.84
# please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error
opencv-python~=4.9.0.80
# for azure speech # for azure speech
# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471 # https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
azure-cognitiveservices-speech~=1.37.0 azure-cognitiveservices-speech~=1.37.0