1.安装 python3.7环境
import os
os.system("rm -rf TensorFlowTTS")
!git clone https://github.com/TensorSpeech/TensorFlowTTS.git
os.chdir("TensorFlowTTS")
!pip install .
os.chdir("..")
import sys
sys.path.append("TensorFlowTTS/")
2.
pip3 install git+https://github.com/repodiac/german_transliterate.git#egg=german_transliterate
pip3 install h5py==2.10.0
3.Load Model
import tensorflow as tf
import yaml
import numpy as np
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor
4.Tacotron2
tacotron2 = TFAutoModel.from_pretrained("tensorspeech/tts-tacotron2-baker-ch", name="tacotron2")
FastSpeech2
fastspeech2 = TFAutoModel.from_pretrained("tensorspeech/tts-fastspeech2-baker-ch", name="fastspeech2")
MB-MelGAN
processor = AutoProcessor.from_pretrained("tensorspeech/tts-tacotron2-baker-ch")
input_ids = processor.text_to_sequence("这是一个开源的端到端中文语音合成系统,我喜欢你哦", inference=True)
_, mel_outputs, stop_token_prediction, alignment_history = tacotron2.inference( tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), tf.convert_to_tensor([len(input_ids)], tf.int32), tf.convert_to_tensor([0], dtype=tf.int32) )
remove_end = 1024
audio = mb_melgan.inference(mel_outputs)[0, :-remove_end, 0]
import soundfile
soundfile.write('test.wav',audio.numpy(),24000)
播放文件ok
以下也有demo
https://huggingface.co/tensorspeech/tts-fastspeech2-baker-ch
https://huggingface.co/tensorspeech/tts-tacotron2-baker-ch
可惜官方没有测试的asr
可以简单用用