File size: 12,541 Bytes
e054d0c 5357d86 e054d0c 9845a3d e054d0c 9845a3d e054d0c 5357d86 e054d0c 5357d86 e054d0c 5357d86 e054d0c 5357d86 e054d0c 5357d86 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | """
Quick Mode 任务 Schema
小白用户一键训练模式的请求/响应模型
参考文档: development.md 4.6.1 + 4.6.3
"""
from datetime import datetime
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
class InferenceOptions(BaseModel):
"""
推理选项(可选)
训练完成后进行推理测试的配置。如果不提供,将使用默认值自动进行推理。
Attributes:
enabled: 是否启用推理阶段
ref_audio_path: 参考音频路径,不提供则使用训练音频的切片
ref_text: 参考音频的文本,不提供则从 ASR 结果自动获取
target_text: 要合成的目标文本
"""
enabled: bool = Field(
default=True,
description="是否启用推理阶段,默认启用"
)
ref_audio_path: Optional[str] = Field(
default=None,
description="参考音频路径,不提供则自动使用训练音频的切片"
)
ref_text: Optional[str] = Field(
default=None,
description="参考音频的文本,不提供则从 ASR 结果自动获取"
)
target_text: str = Field(
default="这是一段测试语音合成的文本。",
description="要合成的目标文本"
)
model_config = {
"json_schema_extra": {
"examples": [
{
"enabled": True,
"ref_audio_path": None,
"ref_text": None,
"target_text": "这是一段测试语音合成的文本。"
}
]
}
}
class QuickModeOptions(BaseModel):
"""
Quick Mode 训练选项
用于一键训练时的简化参数配置
Attributes:
version: 模型版本
language: 训练语言
quality: 训练质量预设
质量预设说明:
- fast: SoVITS 4 epochs, GPT 8 epochs, ~10分钟
- standard: SoVITS 8 epochs, GPT 15 epochs, ~20分钟
- high: SoVITS 16 epochs, GPT 30 epochs, ~40分钟
"""
version: Literal["v1", "v2", "v2Pro", "v3", "v4"] = Field(
default="v2",
description="模型版本"
)
language: Literal["zh", "en", "ja", "ko", "yue"] = Field(
default="zh",
description="训练语言:zh(中文)、en(英语)、ja(日语)、ko(韩语)、yue(粤语)"
)
quality: Literal["fast", "standard", "high"] = Field(
default="standard",
description="训练质量预设:fast(快速)、standard(标准)、high(高质量)"
)
inference: Optional[InferenceOptions] = Field(
default=None,
description="推理配置,不提供则使用默认配置自动推理"
)
model_config = {
"json_schema_extra": {
"examples": [
{
"version": "v2",
"language": "zh",
"quality": "standard",
"inference": {
"enabled": True,
"target_text": "这是一段测试语音合成的文本。"
}
}
]
}
}
class QuickModeRequest(BaseModel):
"""
小白用户一键训练请求
创建一键训练任务,系统自动配置所有参数并执行完整流程:
audio_slice -> asr -> text_feature -> hubert_feature -> semantic_token -> sovits_train -> gpt_train -> inference
Attributes:
exp_name: 实验名称(用于标识训练任务)
audio_file_id: 已上传音频文件的 ID
options: 训练选项(包含推理配置)
"""
exp_name: str = Field(
...,
min_length=1,
max_length=100,
description="实验名称,用于标识训练任务和生成的模型"
)
audio_file_id: str = Field(
...,
description="已上传音频文件的 ID"
)
options: QuickModeOptions = Field(
default_factory=QuickModeOptions,
description="训练选项"
)
model_config = {
"json_schema_extra": {
"examples": [
{
"exp_name": "my_voice",
"audio_file_id": "550e8400-e29b-41d4-a716-446655440000",
"options": {
"version": "v2",
"language": "zh",
"quality": "standard"
}
}
]
}
}
class TaskResponse(BaseModel):
"""
任务响应(Quick Mode)
返回任务的完整状态信息,包括进度、当前阶段等
Attributes:
id: 任务唯一标识
exp_name: 实验名称
status: 任务状态
current_stage: 当前执行的阶段
progress: 当前阶段进度 (0.0-1.0)
overall_progress: 总体进度 (0.0-1.0)
message: 最新状态消息
error_message: 错误消息(失败时)
created_at: 任务创建时间
started_at: 任务开始执行时间
completed_at: 任务完成时间
"""
id: str = Field(..., description="任务唯一标识")
exp_name: str = Field(..., description="实验名称")
status: Literal["queued", "running", "completed", "failed", "cancelled", "interrupted"] = Field(
...,
description="任务状态"
)
current_stage: Optional[str] = Field(
default=None,
description="当前执行的阶段,如 'audio_slice', 'sovits_train' 等"
)
progress: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="当前阶段进度 (0.0-1.0)"
)
overall_progress: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="总体进度 (0.0-1.0)"
)
message: Optional[str] = Field(
default=None,
description="最新状态消息"
)
error_message: Optional[str] = Field(
default=None,
description="错误消息(失败时)"
)
created_at: Optional[datetime] = Field(
default=None,
description="任务创建时间"
)
started_at: Optional[datetime] = Field(
default=None,
description="任务开始执行时间"
)
completed_at: Optional[datetime] = Field(
default=None,
description="任务完成时间"
)
model_config = {
"from_attributes": True,
"json_schema_extra": {
"examples": [
{
"id": "task-550e8400-e29b-41d4-a716-446655440000",
"exp_name": "my_voice",
"status": "running",
"current_stage": "sovits_train",
"progress": 0.45,
"overall_progress": 0.72,
"message": "SoVITS 训练中 Epoch 8/16",
"error_message": None,
"created_at": "2024-01-01T10:00:00Z",
"started_at": "2024-01-01T10:00:05Z",
"completed_at": None
}
]
}
}
class TaskListResponse(BaseModel):
"""
任务列表响应
Attributes:
items: 任务列表
total: 总数量
limit: 每页数量
offset: 偏移量
"""
items: List[TaskResponse] = Field(
default_factory=list,
description="任务列表"
)
total: int = Field(
default=0,
ge=0,
description="总数量"
)
limit: int = Field(
default=50,
ge=1,
le=100,
description="每页数量"
)
offset: int = Field(
default=0,
ge=0,
description="偏移量"
)
model_config = {
"json_schema_extra": {
"examples": [
{
"items": [
{
"id": "task-123",
"exp_name": "voice_1",
"status": "completed",
"current_stage": None,
"progress": 1.0,
"overall_progress": 1.0,
"message": "训练完成"
}
],
"total": 1,
"limit": 50,
"offset": 0
}
]
}
}
class InferenceOutputItem(BaseModel):
"""
推理输出项
表示一个推理生成的音频文件的元信息
Attributes:
filename: 文件名
gpt_model: 使用的 GPT 模型名称
sovits_model: 使用的 SoVITS 模型名称
gpt_path: GPT 模型完整路径
sovits_path: SoVITS 模型完整路径
file_path: 文件相对路径
size_bytes: 文件大小(字节)
created_at: 创建时间
"""
filename: str = Field(..., description="文件名")
gpt_model: str = Field(..., description="使用的 GPT 模型名称")
sovits_model: str = Field(..., description="使用的 SoVITS 模型名称")
gpt_path: str = Field(..., description="GPT 模型完整路径")
sovits_path: str = Field(..., description="SoVITS 模型完整路径")
file_path: str = Field(..., description="文件相对路径")
size_bytes: int = Field(..., ge=0, description="文件大小(字节)")
created_at: Optional[datetime] = Field(default=None, description="创建时间")
model_config = {
"json_schema_extra": {
"examples": [
{
"filename": "my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav",
"gpt_model": "my_voice_e15_s150",
"sovits_model": "my_voice_e8_s200",
"gpt_path": "logs/my_voice/GPT_weights_v2/my_voice_e15_s150.ckpt",
"sovits_path": "logs/my_voice/SoVITS_weights_v2/my_voice_e8_s200.pth",
"file_path": "logs/my_voice/inference/my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav",
"size_bytes": 102400,
"created_at": "2024-01-01T12:00:00Z"
}
]
}
}
class InferenceOutputsResponse(BaseModel):
"""
推理输出列表响应
返回任务的所有推理输出文件列表
Attributes:
task_id: 任务 ID
exp_name: 实验名称
ref_text: 参考音频文本
ref_audio_path: 参考音频路径
target_text: 合成的目标文本
outputs: 推理输出文件列表
total: 总数量
"""
task_id: str = Field(..., description="任务 ID")
exp_name: str = Field(..., description="实验名称")
ref_text: str = Field(default="", description="参考音频文本")
ref_audio_path: str = Field(default="", description="参考音频路径")
target_text: str = Field(default="", description="合成的目标文本")
outputs: List[InferenceOutputItem] = Field(
default_factory=list,
description="推理输出文件列表"
)
total: int = Field(default=0, ge=0, description="总数量")
model_config = {
"json_schema_extra": {
"examples": [
{
"task_id": "task-123",
"exp_name": "my_voice",
"ref_text": "大家好,又到了复盘的时间,今天即使。",
"ref_audio_path": "logs/my_voice/slicer_opt/audio_0000012160_0000152320.wav",
"target_text": "这是一段测试语音合成的文本。",
"outputs": [
{
"filename": "my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav",
"gpt_model": "my_voice_e15_s150",
"sovits_model": "my_voice_e8_s200",
"gpt_path": "logs/my_voice/GPT_weights_v2/my_voice_e15_s150.ckpt",
"sovits_path": "logs/my_voice/SoVITS_weights_v2/my_voice_e8_s200.pth",
"file_path": "logs/my_voice/inference/my_voice_gpt-my_voice_e15_s150-sovits_e8_s200.wav",
"size_bytes": 102400,
"created_at": "2024-01-01T12:00:00Z"
}
],
"total": 1
}
]
}
}
|