Spaces:

sming256
/

VideoAuto-R1_Demo

Sleeping

App Files Files Community

VideoAuto-R1_Demo / app.py

heyong-ai

Update app.py

84eafbf verified 7 days ago

raw

history blame

3.02 kB

	# ============================================================
	# 第二阶段：AI 视觉审计（Qwen-VL 导演核心）
	# ============================================================
	import dashscope
	from dashscope import MultiModalConversation

	# 从环境变量或界面获取千问密钥
	DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")

	def call_qwen_vision(frames: list, chunk_idx: int, total_chunks: int,
	video_duration: float, api_key: str) -> list:
	"""
	【勇哥专用：千问魔心点火版】
	将帧图像投喂给 Qwen-VL-Max，带思考链审计
	"""
	# 优先使用界面输入的 Key，否则用环境变量
	effective_key = api_key.strip() or DASHSCOPE_API_KEY
	if not effective_key:
	raise ValueError("❌ 缺少 DashScope API Key，请在界面或环境变量中设置")

	time_per_frame = 1.0 / FPS_AUDIT
	chunk_start_time = (chunk_idx * CHUNK_SIZE) * time_per_frame

	# 1. 构建导演指令 (包含 CoT 思考要求)
	prompt_text = (
	f"你现在是一位精通非遗竹编手艺的纪录片导演。现在审计第 {chunk_idx+1}/{total_chunks} 包素材。\n"
	f"时间范围：{chunk_start_time:.2f}s 起。请先在 <think> 标签内分析画面的手法的精准度、"
	f"光影的治愈感以及动作的连贯性，然后给出剪辑 JSON 指令。\n"
	f"要求：start 绝不能是整数（如 3.0 必须写成 3.47），duration 在 1.5-8s 之间。"
	)

	# 2. 准备多模态内容（抽样 8 帧，确保不超 Token 限制）
	sample_frames = frames[::max(1, len(frames)//8)][:8]
	content = [{"text": prompt_text}]

	for fp in sample_frames:
	# Qwen-VL 接收本地路径的 file:// 协议
	content.append({"image": f"file://{fp.absolute()}"})

	# 3. 点火调用
	responses = MultiModalConversation.call(
	model='qwen-vl-max', # 或者使用最新的 qwen-vl-max-2025-01-25
	api_key=effective_key,
	messages=[{"role": "user", "content": content}]
	)

	if responses.status_code != 200:
	raise RuntimeError(f"Qwen API 报错: {responses.message}")

	raw_output = responses.output.choices[0].message.content[0]["text"]

	# 4. 提取 JSON 指令（过滤掉 <think> 里的思考过程）
	match = re.search(r'\[\s\{.\}\s*\]', raw_output, re.DOTALL)
	if not match:
	return []

	try:
	clips = json.loads(match.group())
	except:
	return []

	# 5. 勇哥铁律校验
	validated = []
	for c in clips:
	try:
	s = float(c["start"])
	if s == int(s): s += 0.47 # 强制非整数偏移
	validated.append({
	"start": round(s, 2),
	"duration": max(1.5, min(float(c.get("duration", 3)), 8.0)),
	"speed": max(0.8, min(float(c.get("speed", 1.0)), 1.2)),
	"reason": str(c.get("reason", "未分类"))[:15]
	})
	except: continue

	return validated