File size: 1,473 Bytes
f71bc95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from __future__ import annotations

import argparse

from .infer import load_generator, write_wave


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(prog="wfloat-tts")
    parser.add_argument("--model", "--checkpoint", dest="model", default="model.safetensors")
    parser.add_argument("--config", default="config.json")
    parser.add_argument("--text", required=True)
    parser.add_argument("--sid", type=int, default=0)
    parser.add_argument("--emotion", default="neutral")
    parser.add_argument("--intensity", type=float, default=0.5)
    parser.add_argument("--noise-scale", type=float, default=None)
    parser.add_argument("--length-scale", type=float, default=None)
    parser.add_argument("--noise-w", type=float, default=None)
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--output", required=True)
    return parser


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()

    generator = load_generator(
        checkpoint_path=args.model,
        config_path=args.config,
        device=args.device,
    )
    audio = generator.generate(
        text=args.text,
        sid=args.sid,
        emotion=args.emotion,
        intensity=args.intensity,
        noise_scale=args.noise_scale,
        length_scale=args.length_scale,
        noise_w=args.noise_w,
    )
    write_wave(args.output, audio.samples, audio.sample_rate)


if __name__ == "__main__":
    main()