From eff6eb8f43a9ef97bc4648de4ca218c08791d66b Mon Sep 17 00:00:00 2001
From: root <root@debian>
Date: Thu, 10 Apr 2025 10:52:59 +0800
Subject: [PATCH] fix bug.

---
 indextts/infer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/indextts/infer.py b/indextts/infer.py
index 2155780..59d1be3 100644
--- a/indextts/infer.py
+++ b/indextts/infer.py
@@ -111,9 +111,9 @@ class IndexTTS:
             else:
                 codes_list.append(codes[i])
             code_lens.append(len_)
+
+        codes = pad_sequence(codes_list, batch_first=True) if isfix else codes[:, :-2]
         code_lens = torch.LongTensor(code_lens).cuda()
-        if isfix:
-            codes = pad_sequence(codes_list, batch_first=True)
         return codes, code_lens
 
     def infer(self, audio_prompt, text, output_path):
@@ -212,7 +212,7 @@ class IndexTTS:
                 # temporarily fix the long silence bug.
                 codes, code_lens = self.remove_long_silence(codes, silent_token=52, max_consecutive=30)
                 print(codes, type(codes))
-                print(f"codes shape: {codes.shape}, codes type: {codes.dtype}")
+                print(f"fix codes shape: {codes.shape}, codes type: {codes.dtype}")
                 print(f"code len: {code_lens}")
 
                 # latent, text_lens_out, code_lens_out = \
@@ -260,6 +260,7 @@ class IndexTTS:
 
 if __name__ == "__main__":
     prompt_wav="test_data/input.wav"
+    prompt_wav="testwav/spk_1744181067_1.wav"
     #text="晕 XUAN4 是 一 种 GAN3 觉"
     #text='大家好，我现在正在bilibili 体验 ai 科技，说实话，来之前我绝对想不到！AI技术已经发展到这样匪夷所思的地步了！'
     text="There is a vehicle arriving in dock number 7?"