| | import soundfile |
| | import pyrubberband |
| | import configparser |
| | import pathlib |
| | import os |
| | import io |
| |
|
| | from Scripts.shared_imports import * |
| | import Scripts.TTS as TTS |
| | from Scripts.utils import parseBool |
| |
|
| | from pydub import AudioSegment |
| | from pydub.silence import detect_leading_silence |
| | import langcodes |
| |
|
| | |
| | workingFolder = "workingFolder" |
| |
|
| |
|
| | def trim_clip(inputSound): |
| | trim_leading_silence: AudioSegment = lambda x: x[detect_leading_silence(x) :] |
| | trim_trailing_silence: AudioSegment = lambda x: trim_leading_silence(x.reverse()).reverse() |
| | strip_silence: AudioSegment = lambda x: trim_trailing_silence(trim_leading_silence(x)) |
| | strippedSound = strip_silence(inputSound) |
| | return strippedSound |
| |
|
| | |
| | def insert_audio(canvas, audioToOverlay, startTimeMs): |
| | |
| | canvasCopy = canvas |
| | |
| | canvasCopy = canvasCopy.overlay(audioToOverlay, position=int(startTimeMs)) |
| | |
| | return canvasCopy |
| |
|
| | |
| | def create_canvas(canvasDuration, frame_rate=int(config['synth_sample_rate'])): |
| | canvas = AudioSegment.silent(duration=canvasDuration, frame_rate=frame_rate) |
| | return canvas |
| |
|
| | def get_speed_factor(subsDict, trimmedAudio, desiredDuration, num): |
| | virtualTempFile = AudioSegment.from_file(trimmedAudio, format="wav") |
| | rawDuration = virtualTempFile.duration_seconds |
| | trimmedAudio.seek(0) |
| | |
| | desiredDuration = float(desiredDuration) |
| | speedFactor = (rawDuration*1000) / desiredDuration |
| | subsDict[num]['speed_factor'] = speedFactor |
| | return subsDict |
| |
|
| | def stretch_audio(audioFileToStretch, speedFactor, num): |
| | virtualTempAudioFile = io.BytesIO() |
| | |
| | y, sampleRate = soundfile.read(audioFileToStretch) |
| |
|
| | streched_audio = pyrubberband.time_stretch(y, sampleRate, speedFactor, rbargs={'--fine': '--fine'}) |
| | |
| | soundfile.write(virtualTempAudioFile, streched_audio, sampleRate, format='wav') |
| | if config['debug_mode']: |
| | soundfile.write(os.path.join(workingFolder, f'{num}_s.wav'), streched_audio, sampleRate) |
| | |
| | return AudioSegment.from_file(virtualTempAudioFile, format="wav") |
| |
|
| |
|
| | from pydub import AudioSegment |
| |
|
| | def build_audio(subsDict, langDict, totalAudioLength, twoPassVoiceSynth=False): |
| | if cloudConfig['tts_service'] == 'azure': |
| | twoPassVoiceSynth = False |
| |
|
| | virtualTrimmedFileDict = {} |
| | |
| | for key, value in subsDict.items(): |
| | filePathTrimmed = os.path.join(workingFolder, str(key)) + "_t.wav" |
| | subsDict[key]['TTS_FilePath_Trimmed'] = filePathTrimmed |
| |
|
| |
|
| | |
| | rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
| | trimmedClip = trim_clip(rawClip) |
| |
|
| | if config['debug_mode']: |
| | trimmedClip.export(filePathTrimmed, format="wav") |
| |
|
| | |
| | tempTrimmedFile = io.BytesIO() |
| | trimmedClip.export(tempTrimmedFile, format="wav") |
| | virtualTrimmedFileDict[key] = tempTrimmedFile |
| | keyIndex = list(subsDict.keys()).index(key) |
| | print(f" Trimmed Audio: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| | print("\n") |
| |
|
| | |
| | if not cloudConfig['tts_service'] == 'azure': |
| | |
| | for key, value in subsDict.items: |
| | |
| | subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
| | keyIndex = list(subsDict.keys()).index(key) |
| | print(f" Calculated Speed Factor: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| | print("\n") |
| |
|
| | |
| | |
| | if twoPassVoiceSynth and not cloudConfig['tts_service'] == 'azure': |
| | if cloudConfig['batch_tts_synthesize'] and cloudConfig['tts_service'] == 'azure': |
| | subsDict = TTS.synthesize_dictionary_batch(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
| | else: |
| | subsDict = TTS.synthesize_dictionary(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
| |
|
| | for key, value in subsDict.items: |
| | |
| | rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
| | trimmedClip = trim_clip(rawClip) |
| | if config['debug_mode']: |
| | |
| | secondPassTrimmedFile = value['TTS_FilePath_Trimmed'][:-4] + "_p2_t.wav" |
| | trimmedClip.export(secondPassTrimmedFile, format="wav") |
| | trimmedClip.export(virtualTrimmedFileDict[key], format="wav") |
| | keyIndex = list(subsDict.keys()).index(key) |
| | print(f" Trimmed Audio (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
| | print("\n") |
| |
|
| | if config['force_stretch_with_twopass']: |
| | for key, value in subsDict.items: |
| | subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
| | keyIndex = list(subsDict.keys()).index(key) |
| | print(f" Calculated Speed Factor (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
| | print("\n") |
| |
|
| | |
| | canvas = create_canvas(totalAudioLength) |
| |
|
| | |
| | for key, value in subsDict.items(): |
| | if (not twoPassVoiceSynth or config['force_stretch_with_twopass']) and not cloudConfig['tts_service'] == 'azure': |
| | |
| | stretchedClip = stretch_audio(virtualTrimmedFileDict[key], speedFactor=subsDict[key]['speed_factor'], num=key) |
| | else: |
| | |
| | stretchedClip = AudioSegment.from_file(virtualTrimmedFileDict[key], format="wav") |
| | virtualTrimmedFileDict[key].seek(0) |
| |
|
| | canvas = insert_audio(canvas, stretchedClip, value['start_ms']) |
| | keyIndex = list(subsDict.keys()).index(key) |
| | print(f" Final Audio Processed: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| | print("\n") |
| |
|
| |
|
| | |
| | lang = langcodes.get(langDict['languageCode']) |
| | langName = langcodes.get(langDict['languageCode']).get(lang.to_alpha3()).display_name() |
| | if config['debug_mode'] and not os.path.isfile(ORIGINAL_VIDEO_PATH): |
| | outputFileName = "debug" + f" - {langName} - {langDict['languageCode']}." |
| | else: |
| | outputFileName = pathlib.Path(ORIGINAL_VIDEO_PATH).stem + f" - {langName} - {langDict['languageCode']}." |
| | |
| | outputFileName = os.path.join(OUTPUT_FOLDER, outputFileName) |
| |
|
| | |
| | outputFormat=config['output_format'].lower() |
| | if outputFormat == "mp3": |
| | outputFileName += "mp3" |
| | formatString = "mp3" |
| | elif outputFormat == "wav": |
| | outputFileName += "wav" |
| | formatString = "wav" |
| | elif outputFormat == "aac": |
| | |
| | |
| | outputFileName += "aac" |
| | formatString = "adts" |
| |
|
| | canvas = canvas.set_channels(2) |
| | try: |
| | print("\nExporting audio file...") |
| | canvas.export(outputFileName, format=formatString, bitrate="192k") |
| | except: |
| | outputFileName = outputFileName + ".bak" |
| | canvas.export(outputFileName, format=formatString, bitrate="192k") |
| | print("\nThere was an issue exporting the audio, it might be a permission error. The file was saved as a backup with the extension .bak") |
| | print("Try removing the .bak extension then listen to the file to see if it worked.\n") |
| | input("Press Enter to exit...") |
| |
|
| | return subsDict |
| |
|