From 46630ca45bfd161ae64bda0db02cd8b5417a4b57 Mon Sep 17 00:00:00 2001
From: wangyining02 <wangyining02@bilibili.com>
Date: Wed, 26 Mar 2025 19:14:47 +0800
Subject: [PATCH 1/7] =?UTF-8?q?+=E7=AE=80=E5=8D=95=E5=89=8D=E7=AB=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 indextts/infer.py       | 18 +++++---
 indextts/utils/front.py | 96 +++++++++++++++++++++++++++++++++++++++++
 requirements.txt        |  3 +-
 3 files changed, 109 insertions(+), 8 deletions(-)
 create mode 100644 indextts/utils/front.py

diff --git a/indextts/infer.py b/indextts/infer.py
index eed25d4..bc62784 100644
--- a/indextts/infer.py
+++ b/indextts/infer.py
@@ -14,7 +14,7 @@ from indextts.utils.feature_extractors import MelSpectrogramFeatures
 from indextts.utils.common import tokenize_by_CJK_char
 from indextts.vqvae.xtts_dvae import DiscreteVAE
 
-
+from indextts.utils.front import TextNormalizer
 class IndexTTS:
     def __init__(self, cfg_path='checkpoints/config.yaml', model_dir='checkpoints'):
         self.cfg = OmegaConf.load(cfg_path)
@@ -42,16 +42,20 @@ class IndexTTS:
         self.bigvgan = self.bigvgan.to(self.device)
         self.bigvgan.eval()
         print(">> bigvgan weights restored from:", self.bigvgan_path)
+        self.normalizer = TextNormalizer()
+        self.normalizer.load()
+        print(">> TextNormalizer loaded")
 
     def preprocess_text(self, text):
-        chinese_punctuation = "，。！？；：“”‘’（）【】《》"
-        english_punctuation = ",.!?;:\"\"''()[]<>"
-
-        # 创建一个映射字典
-        punctuation_map = str.maketrans(chinese_punctuation, english_punctuation)
+        # chinese_punctuation = "，。！？；：“”‘’（）【】《》"
+        # english_punctuation = ",.!?;:\"\"''()[]<>"
+        #
+        # # 创建一个映射字典
+        # punctuation_map = str.maketrans(chinese_punctuation, english_punctuation)
 
         # 使用translate方法替换标点符号
-        return text.translate(punctuation_map)
+        # return text.translate(punctuation_map)
+        return self.normalizer.infer(text)
 
     def infer(self, audio_prompt, text, output_path):
         text = self.preprocess_text(text)
diff --git a/indextts/utils/front.py b/indextts/utils/front.py
new file mode 100644
index 0000000..24ddf03
--- /dev/null
+++ b/indextts/utils/front.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+import traceback
+import os
+import sys
+import re
+import re
+
+
+
+
+class TextNormalizer:
+    def __init__(self):
+        # self.normalizer = Normalizer(cache_dir="textprocessing/tn")
+        self.zh_normalizer = None
+        self.en_normalizer = None
+        self.char_rep_map = {
+            "：": ",",
+            "；": ",",
+            ";": ",",
+            "，": ",",
+            "。": ".",
+            "！": "!",
+            "？": "?",
+            "\n": ".",
+            "·": ",",
+            "、": ",",
+            "...": "…",
+            "……": "…",
+            "$": ".",
+            "“": "'",
+            "”": "'",
+            '"': "'",
+            "‘": "'",
+            "’": "'",
+            "（": "'",
+            "）": "'",
+            "(": "'",
+            ")": "'",
+            "《": "'",
+            "》": "'",
+            "【": "'",
+            "】": "'",
+            "[": "'",
+            "]": "'",
+            "—": "-",
+            "～": "-",
+            "~": "-",
+            "「": "'",
+            "」": "'",
+            ":": ",",
+        }
+
+    def match_email(self, email):
+        # 正则表达式匹配邮箱格式：数字英文@数字英文.英文
+        pattern = r'^[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+$'
+        return re.match(pattern, email) is not None
+
+    def use_chinese(self, s):
+        has_chinese = bool(re.search(r'[\u4e00-\u9fff]', s))
+        has_digit = bool(re.search(r'\d', s))
+        has_alpha = bool(re.search(r'[a-zA-Z]', s))
+        is_email = self.match_email(s)
+        if has_chinese or not has_alpha or is_email:
+            return True
+        else:
+            return False
+
+    def load(self):
+        # print(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
+        # sys.path.append(model_dir)
+
+        from tn.chinese.normalizer import Normalizer as NormalizerZh
+        from tn.english.normalizer import Normalizer as NormalizerEn
+
+        self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False)
+        self.en_normalizer = NormalizerEn()
+
+    def infer(self, text):
+        pattern = re.compile("|".join(re.escape(p) for p in self.char_rep_map.keys()))
+        replaced_text = pattern.sub(lambda x: self.char_rep_map[x.group()], text)
+        if not self.zh_normalizer or not self.en_normalizer:
+            print("Error, text normalizer is not initialized !!!")
+            return ""
+        try:
+            normalizer = self.zh_normalizer if self.use_chinese(text) else self.en_normalizer
+            result = normalizer.normalize(text)
+        except Exception:
+            result = ""
+            print(traceback.format_exc())
+        return result
+
+
+if __name__ == '__main__':
+    # 测试程序
+    text_normalizer = TextNormalizer()
+    print(text_normalizer.infer("2.5平方电线"))
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 846e188..b813fe9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,4 +20,5 @@ sentencepiece
 pypinyin
 librosa
 gradio
-tqdm
\ No newline at end of file
+tqdm
+WeTextProcessing
\ No newline at end of file

From 9a925a14974040b695cf30e4e48aae3cf3e6dcc3 Mon Sep 17 00:00:00 2001
From: wangyining02 <wangyining02@bilibili.com>
Date: Wed, 26 Mar 2025 19:28:44 +0800
Subject: [PATCH 2/7] =?UTF-8?q?=E5=89=8D=E7=AB=AF=E5=85=BC=E5=AE=B9arm?=
 =?UTF-8?q?=E6=9C=BA=E5=99=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 indextts/utils/front.py | 15 ++++++++++-----
 requirements.txt        |  3 ++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index 24ddf03..ec3eb7f 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -68,12 +68,17 @@ class TextNormalizer:
     def load(self):
         # print(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
         # sys.path.append(model_dir)
+        import platform
+        if platform.machine() == "aarch64":
+            from wetext import Normalizer
+            self.zh_normalizer = Normalizer(remove_erhua=False,lang="zh",operator="tn")
+            self.en_normalizer = Normalizer(lang="en",operator="tn")
+        else:
+            from tn.chinese.normalizer import Normalizer as NormalizerZh
+            from tn.english.normalizer import Normalizer as NormalizerEn
+            self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False)
+            self.en_normalizer = NormalizerEn()
 
-        from tn.chinese.normalizer import Normalizer as NormalizerZh
-        from tn.english.normalizer import Normalizer as NormalizerEn
-
-        self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False)
-        self.en_normalizer = NormalizerEn()
 
     def infer(self, text):
         pattern = re.compile("|".join(re.escape(p) for p in self.char_rep_map.keys()))
diff --git a/requirements.txt b/requirements.txt
index b813fe9..803d193 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,4 +21,5 @@ pypinyin
 librosa
 gradio
 tqdm
-WeTextProcessing
\ No newline at end of file
+WeTextProcessing # arm机器如果安装失败，请注释此行
+wetext
\ No newline at end of file

From fb0bc6a4867db661fead6c53c8618fb63e9e3f41 Mon Sep 17 00:00:00 2001
From: wangyining02 <wangyining02@bilibili.com>
Date: Wed, 26 Mar 2025 19:29:31 +0800
Subject: [PATCH 3/7] fix

---
 indextts/utils/front.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index ec3eb7f..a07cb1e 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -88,7 +88,7 @@ class TextNormalizer:
             return ""
         try:
             normalizer = self.zh_normalizer if self.use_chinese(text) else self.en_normalizer
-            result = normalizer.normalize(text)
+            result = normalizer.normalize(replaced_text)
         except Exception:
             result = ""
             print(traceback.format_exc())

From f6e7b4acf6639e0aa245d723090b8b1f21e9f545 Mon Sep 17 00:00:00 2001
From: wangyining02 <wangyining02@bilibili.com>
Date: Wed, 26 Mar 2025 19:33:12 +0800
Subject: [PATCH 4/7] fix

---
 indextts/utils/front.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index a07cb1e..5e703e9 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -87,7 +87,7 @@ class TextNormalizer:
             print("Error, text normalizer is not initialized !!!")
             return ""
         try:
-            normalizer = self.zh_normalizer if self.use_chinese(text) else self.en_normalizer
+            normalizer = self.zh_normalizer if self.use_chinese(replaced_text) else self.en_normalizer
             result = normalizer.normalize(replaced_text)
         except Exception:
             result = ""

From 1004452e958d3b771355b5c097720a10eb62846b Mon Sep 17 00:00:00 2001
From: wangyining02 <wangyining02@bilibili.com>
Date: Wed, 26 Mar 2025 20:29:12 +0800
Subject: [PATCH 5/7] =?UTF-8?q?WeTextProcessing:=20overwrite=5Fcache=3DTru?=
 =?UTF-8?q?e=20=E5=88=B7=E6=96=B0=E5=89=8D=E7=AB=AF=E7=BC=93=E5=AD=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 indextts/infer.py       | 3 +++
 indextts/utils/front.py | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/indextts/infer.py b/indextts/infer.py
index bc62784..6cb432d 100644
--- a/indextts/infer.py
+++ b/indextts/infer.py
@@ -58,7 +58,10 @@ class IndexTTS:
         return self.normalizer.infer(text)
 
     def infer(self, audio_prompt, text, output_path):
+        print(f"origin text:{text}")
         text = self.preprocess_text(text)
+        print(f"normalized text:{text}")
+
 
         audio, sr = torchaudio.load(audio_prompt)
         audio = torch.mean(audio, dim=0, keepdim=True)
diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index 5e703e9..4276e08 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -76,8 +76,8 @@ class TextNormalizer:
         else:
             from tn.chinese.normalizer import Normalizer as NormalizerZh
             from tn.english.normalizer import Normalizer as NormalizerEn
-            self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False)
-            self.en_normalizer = NormalizerEn()
+            self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False,overwrite_cache=True)
+            self.en_normalizer = NormalizerEn(overwrite_cache=True)
 
 
     def infer(self, text):

From fd81f4a5bd80150b6cceafa40e68563b22e07629 Mon Sep 17 00:00:00 2001
From: kemuriririn <10inspiral@gmail.com>
Date: Thu, 27 Mar 2025 14:03:51 +0800
Subject: [PATCH 6/7] =?UTF-8?q?=E6=81=A2=E5=A4=8D=E8=BE=93=E5=85=A5?=
 =?UTF-8?q?=E4=B8=AD=E7=9A=84=E6=8B=BC=E9=9F=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 indextts/utils/front.py | 80 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 74 insertions(+), 6 deletions(-)

diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index 4276e08..b2ad70a 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -1,11 +1,6 @@
 # -*- coding: utf-8 -*-
 import traceback
-import os
-import sys
 import re
-import re
-
-
 
 
 class TextNormalizer:
@@ -69,7 +64,7 @@ class TextNormalizer:
         # print(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
         # sys.path.append(model_dir)
         import platform
-        if platform.machine() == "aarch64":
+        if platform.system() == "Darwin":
             from wetext import Normalizer
             self.zh_normalizer = Normalizer(remove_erhua=False,lang="zh",operator="tn")
             self.en_normalizer = Normalizer(lang="en",operator="tn")
@@ -92,8 +87,81 @@ class TextNormalizer:
         except Exception:
             result = ""
             print(traceback.format_exc())
+        result = self.restore_pinyin_tone_numbers(replaced_text, result)
         return result
 
+    def pinyin_match(self, pinyin):
+        pattern = r"(qun)(\d)"
+        repl = r"qvn\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(quan)(\d)"
+        repl = r"qvan\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(que)(\d)"
+        repl = r"qve\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(qu)(\d)"
+        repl = r"qv\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(ju)(\d)"
+        repl = r"jv\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(jue)(\d)"
+        repl = r"jve\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(xun)(\d)"
+        repl = r"xvn\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(xue)(\d)"
+        repl = r"xve\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(xu)(\d)"
+        repl = r"xv\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(juan)(\d)"
+        repl = r"jvan\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(jun)(\d)"
+        repl = r"jvn\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+
+        pattern = r"(xuan)(\d)"
+        repl = r"xvan\g<2>"
+        pinyin = re.sub(pattern, repl, pinyin)
+        return pinyin
+
+    def restore_pinyin_tone_numbers(self,original_text, processed_text):
+        # 第一步：恢复拼音后的音调数字（1-4）
+        # 建立中文数字到阿拉伯数字的映射
+        chinese_to_num = {'一': '1', '二': '2', '三': '3', '四': '4'}
+
+        # 使用正则表达式找到拼音+中文数字的组合（如 "xuan四"）
+        def replace_tone(match):
+            pinyin = match.group(1)  # 拼音部分
+            chinese_num = match.group(2)  # 中文数字部分
+            # 将中文数字转换为阿拉伯数字
+            num = chinese_to_num.get(chinese_num, chinese_num)
+            return f"{pinyin}{num}"
+
+        # 匹配拼音后跟中文数字（一、二、三、四）的情况
+        pattern = r'([a-zA-Z]+)([一二三四])'
+        restored_text = re.sub(pattern, replace_tone, processed_text)
+        restored_text = restored_text.lower()
+        restored_text = self.pinyin_match(restored_text)
+
+        return restored_text
+
+
 
 if __name__ == '__main__':
     # 测试程序

From 6286b0ffc966ac7b774cfdffffe165ffa5352c7c Mon Sep 17 00:00:00 2001
From: kemuriririn <10inspiral@gmail.com>
Date: Wed, 2 Apr 2025 17:40:41 +0800
Subject: [PATCH 7/7] =?UTF-8?q?=E6=8E=A8=E7=90=86=E6=97=B6=E5=8A=A0?=
 =?UTF-8?q?=E8=BD=BDbpe=20model=E4=BD=BF=E7=94=A8=E7=9B=B8=E5=AF=B9?=
 =?UTF-8?q?=E4=BA=8E=E6=A8=A1=E5=9E=8B=E6=A0=B9=E7=9B=AE=E5=BD=95=E7=9A=84?=
 =?UTF-8?q?=E8=B7=AF=E5=BE=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md               |   2 -
 checkpoints/config.yaml | 112 ++++++++++++++++++++++++++++++++++++++++
 indextts/infer.py       |   2 +-
 3 files changed, 113 insertions(+), 3 deletions(-)
 create mode 100644 checkpoints/config.yaml

diff --git a/README.md b/README.md
index ad7737c..2d5eea3 100644
--- a/README.md
+++ b/README.md
@@ -105,11 +105,9 @@ apt-get install ffmpeg
 ```
 3. Download models:
 ```bash
-mkdir checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/bigvgan_discriminator.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/bigvgan_generator.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/bpe.model -P checkpoints
-wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/config.yaml -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/dvae.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/gpt.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/unigram_12000.vocab -P checkpoints
diff --git a/checkpoints/config.yaml b/checkpoints/config.yaml
new file mode 100644
index 0000000..e24336d
--- /dev/null
+++ b/checkpoints/config.yaml
@@ -0,0 +1,112 @@
+dataset:
+    bpe_model: bpe.model
+    sample_rate: 24000
+    squeeze: false
+    mel:
+        sample_rate: 24000
+        n_fft: 1024
+        hop_length: 256
+        win_length: 1024
+        n_mels: 100
+        mel_fmin: 0
+        normalize: false
+
+gpt:
+    model_dim: 1024
+    max_mel_tokens: 605
+    max_text_tokens: 402
+    heads: 16
+    use_mel_codes_as_input: true
+    mel_length_compression: 1024
+    layers: 20
+    number_text_tokens: 12000
+    number_mel_codes: 8194
+    start_mel_token: 8192
+    stop_mel_token: 8193
+    start_text_token: 0
+    stop_text_token: 1
+    train_solo_embeddings: false
+    condition_type: "conformer_perceiver"
+    condition_module:
+        output_size: 512
+        linear_units: 2048
+        attention_heads: 8
+        num_blocks: 6
+        input_layer: "conv2d2"
+        perceiver_mult: 2
+
+vqvae:
+    channels: 100
+    num_tokens: 8192
+    hidden_dim: 512
+    num_resnet_blocks: 3
+    codebook_dim: 512
+    num_layers: 2
+    positional_dims: 1
+    kernel_size: 3
+    smooth_l1_loss: true
+    use_transposed_convs: false
+
+bigvgan:
+    adam_b1: 0.8
+    adam_b2: 0.99
+    lr_decay: 0.999998
+    seed: 1234
+
+    resblock: "1"
+    upsample_rates: [4,4,4,4,2,2]
+    upsample_kernel_sizes: [8,8,4,4,4,4]
+    upsample_initial_channel: 1536
+    resblock_kernel_sizes: [3,7,11]
+    resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
+    feat_upsample: false
+    speaker_embedding_dim: 512
+    cond_d_vector_in_each_upsampling_layer: true
+
+    gpt_dim: 1024
+
+    activation: "snakebeta"
+    snake_logscale: true
+
+    use_cqtd_instead_of_mrd: true
+    cqtd_filters: 128
+    cqtd_max_filters: 1024
+    cqtd_filters_scale: 1
+    cqtd_dilations: [1, 2, 4]
+    cqtd_hop_lengths: [512, 256, 256]
+    cqtd_n_octaves: [9, 9, 9]
+    cqtd_bins_per_octaves: [24, 36, 48]
+
+    resolutions: [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]]
+    mpd_reshapes: [2, 3, 5, 7, 11]
+    use_spectral_norm: false
+    discriminator_channel_mult: 1
+
+    use_multiscale_melloss: true
+    lambda_melloss: 15
+
+    clip_grad_norm: 1000
+
+    segment_size: 16384
+    num_mels: 100
+    num_freq: 1025
+    n_fft: 1024
+    hop_size: 256
+    win_size: 1024
+
+    sampling_rate: 24000
+
+    fmin: 0
+    fmax: null
+    fmax_for_loss: null
+    mel_type: "pytorch"
+
+    num_workers: 2
+    dist_config:
+        dist_backend: "nccl"
+        dist_url: "tcp://localhost:54321"
+        world_size: 1
+
+dvae_checkpoint: dvae.pth
+gpt_checkpoint: gpt.pth
+bigvgan_checkpoint: bigvgan_generator.pth
\ No newline at end of file
diff --git a/indextts/infer.py b/indextts/infer.py
index 6cb432d..f1f419e 100644
--- a/indextts/infer.py
+++ b/indextts/infer.py
@@ -74,7 +74,7 @@ class IndexTTS:
         auto_conditioning = cond_mel
 
         tokenizer = spm.SentencePieceProcessor()
-        tokenizer.load(self.cfg.dataset['bpe_model'])
+        tokenizer.load(os.path.join(self.model_dir,self.cfg.dataset['bpe_model']))
 
         punctuation = ["!", "?", ".", ";", "！", "？", "。", "；"]
         pattern = r"(?<=[{0}])\s*".format("".join(punctuation))