Spaces:
Sleeping
Sleeping
| # ============================================================ | |
| # 第二阶段:AI 视觉审计(Qwen-VL 导演核心) | |
| # ============================================================ | |
| import dashscope | |
| from dashscope import MultiModalConversation | |
| # 从环境变量或界面获取千问密钥 | |
| DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "") | |
| def call_qwen_vision(frames: list, chunk_idx: int, total_chunks: int, | |
| video_duration: float, api_key: str) -> list: | |
| """ | |
| 【勇哥专用:千问魔心点火版】 | |
| 将帧图像投喂给 Qwen-VL-Max,带思考链审计 | |
| """ | |
| # 优先使用界面输入的 Key,否则用环境变量 | |
| effective_key = api_key.strip() or DASHSCOPE_API_KEY | |
| if not effective_key: | |
| raise ValueError("❌ 缺少 DashScope API Key,请在界面或环境变量中设置") | |
| time_per_frame = 1.0 / FPS_AUDIT | |
| chunk_start_time = (chunk_idx * CHUNK_SIZE) * time_per_frame | |
| # 1. 构建导演指令 (包含 CoT 思考要求) | |
| prompt_text = ( | |
| f"你现在是一位精通非遗竹编手艺的纪录片导演。现在审计第 {chunk_idx+1}/{total_chunks} 包素材。\n" | |
| f"时间范围:{chunk_start_time:.2f}s 起。请先在 <think> 标签内分析画面的手法的精准度、" | |
| f"光影的治愈感以及动作的连贯性,然后给出剪辑 JSON 指令。\n" | |
| f"要求:start 绝不能是整数(如 3.0 必须写成 3.47),duration 在 1.5-8s 之间。" | |
| ) | |
| # 2. 准备多模态内容(抽样 8 帧,确保不超 Token 限制) | |
| sample_frames = frames[::max(1, len(frames)//8)][:8] | |
| content = [{"text": prompt_text}] | |
| for fp in sample_frames: | |
| # Qwen-VL 接收本地路径的 file:// 协议 | |
| content.append({"image": f"file://{fp.absolute()}"}) | |
| # 3. 点火调用 | |
| responses = MultiModalConversation.call( | |
| model='qwen-vl-max', # 或者使用最新的 qwen-vl-max-2025-01-25 | |
| api_key=effective_key, | |
| messages=[{"role": "user", "content": content}] | |
| ) | |
| if responses.status_code != 200: | |
| raise RuntimeError(f"Qwen API 报错: {responses.message}") | |
| raw_output = responses.output.choices[0].message.content[0]["text"] | |
| # 4. 提取 JSON 指令(过滤掉 <think> 里的思考过程) | |
| match = re.search(r'\[\s*\{.*\}\s*\]', raw_output, re.DOTALL) | |
| if not match: | |
| return [] | |
| try: | |
| clips = json.loads(match.group()) | |
| except: | |
| return [] | |
| # 5. 勇哥铁律校验 | |
| validated = [] | |
| for c in clips: | |
| try: | |
| s = float(c["start"]) | |
| if s == int(s): s += 0.47 # 强制非整数偏移 | |
| validated.append({ | |
| "start": round(s, 2), | |
| "duration": max(1.5, min(float(c.get("duration", 3)), 8.0)), | |
| "speed": max(0.8, min(float(c.get("speed", 1.0)), 1.2)), | |
| "reason": str(c.get("reason", "未分类"))[:15] | |
| }) | |
| except: continue | |
| return validated |