From 64cb31a6c3ac9f16de541ad661b42154753e2bec Mon Sep 17 00:00:00 2001
From: kj863257rc <kj863257@163.com>
Date: Thu, 18 Sep 2025 13:59:45 +0800
Subject: [PATCH] Update infer_v2.py: solve the problem of persistent cache
 buildup (#382)

* Update infer_v2.py

clear old cache

* Update infer_v2.py: solve the problem of persistent cache buildup

clear old cache
---
 indextts/infer_v2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/indextts/infer_v2.py b/indextts/infer_v2.py
index b39090b..d3aaa7d 100644
--- a/indextts/infer_v2.py
+++ b/indextts/infer_v2.py
@@ -370,6 +370,12 @@ class IndexTTS2:
 
         # 如果参考音频改变了，才需要重新生成, 提升速度
         if self.cache_spk_cond is None or self.cache_spk_audio_prompt != spk_audio_prompt:
+            if self.cache_spk_cond is not None:
+                self.cache_spk_cond = None
+                self.cache_s2mel_style = None
+                self.cache_s2mel_prompt = None
+                self.cache_mel = None
+                torch.cuda.empty_cache()
             audio,sr = self._load_and_cut_audio(spk_audio_prompt,15,verbose)
             audio_22k = torchaudio.transforms.Resample(sr, 22050)(audio)
             audio_16k = torchaudio.transforms.Resample(sr, 16000)(audio)
@@ -421,6 +427,9 @@ class IndexTTS2:
             emovec_mat = emovec_mat.unsqueeze(0)
 
         if self.cache_emo_cond is None or self.cache_emo_audio_prompt != emo_audio_prompt:
+            if self.cache_emo_cond is not None:
+                self.cache_emo_cond = None
+                torch.cuda.empty_cache()
             emo_audio, _ = self._load_and_cut_audio(emo_audio_prompt,15,verbose,sr=16000)
             emo_inputs = self.extract_features(emo_audio, sampling_rate=16000, return_tensors="pt")
             emo_input_features = emo_inputs["input_features"]