File size: 7,559 Bytes
b28205b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import torch
from TTS.api import TTS
import os
from tqdm import tqdm
import argparse

# Parse arguments
parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis")
parser.add_argument('-t', '--text_file', type=str, required=True, 
                    help='Path to text file containing text and audio reference files')
parser.add_argument('-r', '--ref_dir', type=str, required=True,
                    help='Root directory containing reference audio files')
parser.add_argument('-s', '--savedir', type=str, required=True,
                    help='Directory to store synthesized audio files')
parser.add_argument('-d', '--device', type=str, required=True,
                    help='Device to use for synthesis (cpu or cuda)')
parser.add_argument('-m', '--model_path', type=str, 
                    default="/app/models/best_model_479919.pth",
                    help='Path to the model file')
parser.add_argument('-c', '--config_path', type=str, 
                    default="/app/models/config.json",
                    help='Path to the config file')

args = parser.parse_args()

# Get device
device = args.device if args.device in ["cpu", "cuda"] else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Initialize TTS model
print(f"Loading model from {args.model_path} with config {args.config_path}")
tts = TTS(
    model_path=args.model_path,
    config_path=args.config_path,
    progress_bar=False,
).to(device)

# Create output directory
os.makedirs(args.savedir, exist_ok=True)
print(f"Output directory: {args.savedir}")

# Read the text file
print(f"Reading text file: {args.text_file}")
with open(args.text_file, 'r') as f:
    lines = f.readlines()

# Process each line
print(f"Processing {len(lines)} entries...")
for i, line in enumerate(tqdm(lines)):
    parts = line.strip().split('\t')
    if len(parts) != 4:
        print(f"Warning: Line {i+1} does not have 4 tab-separated parts. Skipping.")
        continue
        
    idx, lang, text, ref_file = parts
    ref_path = os.path.join(args.ref_dir, ref_file)
    save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}")
    
    print(f"Synthesizing: {text[:30]}... using reference {ref_path}")
    tts.tts_to_file(text=text, speaker_wav=ref_path, language=lang, file_path=save_path)
    print(f"Saved to: {save_path}")

print("Synthesis complete!")

# import torch
# from TTS.api import TTS
# import os
# from tqdm import tqdm
# import argparse

# # Get device
# device = "cuda:3" if torch.cuda.is_available() else "cpu"


# sentences_dict = {
#     "te": ["వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార",
#            "నేను ఈ రోజు నాకు ఇష్టమైన పుస్తకాన్ని చదివాను మరియు తరువాత నా స్నేహితుడితో సినిమాకు వెళ్ళాను",
#            "ఈ వేసవి సెలవులలో నేను నా కుటుంబంతో కలిసి ఒక అందమైన బీచ్‌కి వెళ్ళాలని అనుకుంటున్నాను"],
#     "mr": ["जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं",
#            "मी आज माझ्या आवडत्या पुस्तकाचे वाचन केले आणि नंतर माझ्या मित्रासोबत चित्रपटाला गेलो",
#            "या उन्हाळी सुट्टीत मी माझ्या कुटुंबासोबत एक सुंदर समुद्रकिनाऱ्यावर जाण्याचा विचार करतो"],
#     "bho": ["बिहार के बक्सर जिला के बक्सर नगर निगम क्षेत्र में गंगा नदी पर बने बक्सर पुल का उद्घाटन आज प्रधानमंत्री नरेंद्र मोदी करेंगे",
#            "एन्ट्रापी कंप्यूटिंग में एन्ट्रोपी ऊ ऑपरेटिंग सिस्टम ह जे पे सरा क्रिप्टोग्राफिक फंक्शन सब काम करे लें",
#            "हमार मंडराये वाली जहाज़ सर्पमीनन से भरी है"],
# }

# tts = TTS(
#     model_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/best_model_479919.pth",
#     config_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/config.json",
#     progress_bar=False,
#     ).to(device)


# parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis")
# parser.add_argument('-t', '--text_file', type=str, required=True, 
#                     help='Path to text file containing text and audio reference files')
# parser.add_argument('-r', '--ref_dir', type=str, required=True,
#                         help='Root directory containing reference audio files')
# parser.add_argument('-s', '--savedir', type=str, required=True,
#                         help='Directory to store synthesized audio files')
# parser.add_argument('-d', '--device', type=str, required=True,
#                         help='Device to use for synthesis (cpu or cuda)')

# args = parser.parse_args()




# os.makedirs(args.savedir, exist_ok=True)

# # Read the text file
# with open(args.text_file, 'r') as f:
#     lines = f.readlines()

# for line in lines:
#     idx, lang, text, ref_file = line.strip().split('\t')
#     ref_file = os.path.join(args.ref_dir, ref_file)
#     save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}")
#     tts.tts_to_file(text=text, speaker_wav=ref_file, language=lang, file_path=save_path)
    

# # ref_files = [os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/", x) for x in os.listdir("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/")]



# # for ref_file in ref_files:
# #     for language_key in sentences_dict.keys():
# #         for s_idx, sentence in enumerate(sentences_dict[language_key]):
# #             save_path = os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_infers/", f"test_{language_key}_{s_idx}_{os.path.basename(ref_file)}")
# #             tts.tts_to_file(text=sentence, speaker_wav=ref_file, language=language_key, file_path=save_path)
    
#     # tts.tts_to_file(text="ಹಸ್ದೇವ್ ನದಿ, ರಿಹಂಡ್ ನದಿ ಮತ್ತು ಕನ್ಹರ್ ನದಿಗಳು ಸುರ್ಗುಜಾದ ಮುಖಜ ಭೂಮಿಯಲ್ಲಿ ಹರಿಯುತ್ತವೆ.", speaker_wav="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/syspin_data/Chhattisgarhi_Male/wavs/IISc_SYSPINProject_chha_m_AGRI_00001.wav", language="kn", file_path="test_kn.wav")