""" Quick Mode 任务 Schema 小白用户一键训练模式的请求/响应模型 参考文档: development.md 4.6.1 + 4.6.3 """ from datetime import datetime from typing import List, Literal, Optional from pydantic import BaseModel, Field class InferenceOptions(BaseModel): """ 推理选项(可选) 训练完成后进行推理测试的配置。如果不提供,将使用默认值自动进行推理。 Attributes: enabled: 是否启用推理阶段 ref_audio_path: 参考音频路径,不提供则使用训练音频的切片 ref_text: 参考音频的文本,不提供则从 ASR 结果自动获取 target_text: 要合成的目标文本 """ enabled: bool = Field( default=True, description="是否启用推理阶段,默认启用" ) ref_audio_path: Optional[str] = Field( default=None, description="参考音频路径,不提供则自动使用训练音频的切片" ) ref_text: Optional[str] = Field( default=None, description="参考音频的文本,不提供则从 ASR 结果自动获取" ) target_text: str = Field( default="这是一段测试语音合成的文本。", description="要合成的目标文本" ) model_config = { "json_schema_extra": { "examples": [ { "enabled": True, "ref_audio_path": None, "ref_text": None, "target_text": "这是一段测试语音合成的文本。" } ] } } class QuickModeOptions(BaseModel): """ Quick Mode 训练选项 用于一键训练时的简化参数配置 Attributes: version: 模型版本 language: 训练语言 quality: 训练质量预设 质量预设说明: - fast: SoVITS 4 epochs, GPT 8 epochs, ~10分钟 - standard: SoVITS 8 epochs, GPT 15 epochs, ~20分钟 - high: SoVITS 16 epochs, GPT 30 epochs, ~40分钟 """ version: Literal["v1", "v2", "v2Pro", "v3", "v4"] = Field( default="v2", description="模型版本" ) language: Literal["zh", "en", "ja", "ko", "yue"] = Field( default="zh", description="训练语言:zh(中文)、en(英语)、ja(日语)、ko(韩语)、yue(粤语)" ) quality: Literal["fast", "standard", "high"] = Field( default="standard", description="训练质量预设:fast(快速)、standard(标准)、high(高质量)" ) inference: Optional[InferenceOptions] = Field( default=None, description="推理配置,不提供则使用默认配置自动推理" ) model_config = { "json_schema_extra": { "examples": [ { "version": "v2", "language": "zh", "quality": "standard", "inference": { "enabled": True, "target_text": "这是一段测试语音合成的文本。" } } ] } } class QuickModeRequest(BaseModel): """ 小白用户一键训练请求 创建一键训练任务,系统自动配置所有参数并执行完整流程: audio_slice -> asr -> text_feature -> hubert_feature -> semantic_token -> sovits_train -> gpt_train -> inference Attributes: exp_name: 实验名称(用于标识训练任务) audio_file_id: 已上传音频文件的 ID options: 训练选项(包含推理配置) """ exp_name: str = Field( ..., min_length=1, max_length=100, description="实验名称,用于标识训练任务和生成的模型" ) audio_file_id: str = Field( ..., description="已上传音频文件的 ID" ) options: QuickModeOptions = Field( default_factory=QuickModeOptions, description="训练选项" ) model_config = { "json_schema_extra": { "examples": [ { "exp_name": "my_voice", "audio_file_id": "550e8400-e29b-41d4-a716-446655440000", "options": { "version": "v2", "language": "zh", "quality": "standard" } } ] } } class TaskResponse(BaseModel): """ 任务响应(Quick Mode) 返回任务的完整状态信息,包括进度、当前阶段等 Attributes: id: 任务唯一标识 exp_name: 实验名称 status: 任务状态 current_stage: 当前执行的阶段 progress: 当前阶段进度 (0.0-1.0) overall_progress: 总体进度 (0.0-1.0) message: 最新状态消息 error_message: 错误消息(失败时) created_at: 任务创建时间 started_at: 任务开始执行时间 completed_at: 任务完成时间 """ id: str = Field(..., description="任务唯一标识") exp_name: str = Field(..., description="实验名称") status: Literal["queued", "running", "completed", "failed", "cancelled", "interrupted"] = Field( ..., description="任务状态" ) current_stage: Optional[str] = Field( default=None, description="当前执行的阶段,如 'audio_slice', 'sovits_train' 等" ) progress: float = Field( default=0.0, ge=0.0, le=1.0, description="当前阶段进度 (0.0-1.0)" ) overall_progress: float = Field( default=0.0, ge=0.0, le=1.0, description="总体进度 (0.0-1.0)" ) message: Optional[str] = Field( default=None, description="最新状态消息" ) error_message: Optional[str] = Field( default=None, description="错误消息(失败时)" ) created_at: Optional[datetime] = Field( default=None, description="任务创建时间" ) started_at: Optional[datetime] = Field( default=None, description="任务开始执行时间" ) completed_at: Optional[datetime] = Field( default=None, description="任务完成时间" ) model_config = { "from_attributes": True, "json_schema_extra": { "examples": [ { "id": "task-550e8400-e29b-41d4-a716-446655440000", "exp_name": "my_voice", "status": "running", "current_stage": "sovits_train", "progress": 0.45, "overall_progress": 0.72, "message": "SoVITS 训练中 Epoch 8/16", "error_message": None, "created_at": "2024-01-01T10:00:00Z", "started_at": "2024-01-01T10:00:05Z", "completed_at": None } ] } } class TaskListResponse(BaseModel): """ 任务列表响应 Attributes: items: 任务列表 total: 总数量 limit: 每页数量 offset: 偏移量 """ items: List[TaskResponse] = Field( default_factory=list, description="任务列表" ) total: int = Field( default=0, ge=0, description="总数量" ) limit: int = Field( default=50, ge=1, le=100, description="每页数量" ) offset: int = Field( default=0, ge=0, description="偏移量" ) model_config = { "json_schema_extra": { "examples": [ { "items": [ { "id": "task-123", "exp_name": "voice_1", "status": "completed", "current_stage": None, "progress": 1.0, "overall_progress": 1.0, "message": "训练完成" } ], "total": 1, "limit": 50, "offset": 0 } ] } } class InferenceOutputItem(BaseModel): """ 推理输出项 表示一个推理生成的音频文件的元信息 Attributes: filename: 文件名 gpt_model: 使用的 GPT 模型名称 sovits_model: 使用的 SoVITS 模型名称 gpt_path: GPT 模型完整路径 sovits_path: SoVITS 模型完整路径 file_path: 文件相对路径 size_bytes: 文件大小(字节) created_at: 创建时间 """ filename: str = Field(..., description="文件名") gpt_model: str = Field(..., description="使用的 GPT 模型名称") sovits_model: str = Field(..., description="使用的 SoVITS 模型名称") gpt_path: str = Field(..., description="GPT 模型完整路径") sovits_path: str = Field(..., description="SoVITS 模型完整路径") file_path: str = Field(..., description="文件相对路径") size_bytes: int = Field(..., ge=0, description="文件大小(字节)") created_at: Optional[datetime] = Field(default=None, description="创建时间") model_config = { "json_schema_extra": { "examples": [ { "filename": "my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav", "gpt_model": "my_voice_e15_s150", "sovits_model": "my_voice_e8_s200", "gpt_path": "logs/my_voice/GPT_weights_v2/my_voice_e15_s150.ckpt", "sovits_path": "logs/my_voice/SoVITS_weights_v2/my_voice_e8_s200.pth", "file_path": "logs/my_voice/inference/my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav", "size_bytes": 102400, "created_at": "2024-01-01T12:00:00Z" } ] } } class InferenceOutputsResponse(BaseModel): """ 推理输出列表响应 返回任务的所有推理输出文件列表 Attributes: task_id: 任务 ID exp_name: 实验名称 ref_text: 参考音频文本 ref_audio_path: 参考音频路径 target_text: 合成的目标文本 outputs: 推理输出文件列表 total: 总数量 """ task_id: str = Field(..., description="任务 ID") exp_name: str = Field(..., description="实验名称") ref_text: str = Field(default="", description="参考音频文本") ref_audio_path: str = Field(default="", description="参考音频路径") target_text: str = Field(default="", description="合成的目标文本") outputs: List[InferenceOutputItem] = Field( default_factory=list, description="推理输出文件列表" ) total: int = Field(default=0, ge=0, description="总数量") model_config = { "json_schema_extra": { "examples": [ { "task_id": "task-123", "exp_name": "my_voice", "ref_text": "大家好,又到了复盘的时间,今天即使。", "ref_audio_path": "logs/my_voice/slicer_opt/audio_0000012160_0000152320.wav", "target_text": "这是一段测试语音合成的文本。", "outputs": [ { "filename": "my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav", "gpt_model": "my_voice_e15_s150", "sovits_model": "my_voice_e8_s200", "gpt_path": "logs/my_voice/GPT_weights_v2/my_voice_e15_s150.ckpt", "sovits_path": "logs/my_voice/SoVITS_weights_v2/my_voice_e8_s200.pth", "file_path": "logs/my_voice/inference/my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav", "size_bytes": 102400, "created_at": "2024-01-01T12:00:00Z" } ], "total": 1 } ] } }