diff --git a/indextts/infer_v2.py b/indextts/infer_v2.py index b851055..4d53e99 100644 --- a/indextts/infer_v2.py +++ b/indextts/infer_v2.py @@ -614,11 +614,22 @@ class QwenEmotion: # TODO: the "低落" (melancholic) emotion will always be mapped to # "悲伤" (sad) by QwenEmotion's text analysis. it doesn't know the # difference between those emotions even if user writes exact words. + # SEE: `self.melancholic_words` for current workaround. "低落": "melancholic", "惊讶": "surprised", "自然": "calm", } self.desired_vector_order = ["高兴", "愤怒", "悲伤", "恐惧", "反感", "低落", "惊讶", "自然"] + self.melancholic_words = { + # emotion text phrases that will force QwenEmotion's "悲伤" (sad) detection + # to become "低落" (melancholic) instead, to fix limitations mentioned above. + "低落", + "melancholy", + "melancholic", + "depression", + "depressed", + "gloomy", + } self.max_score = 1.2 self.min_score = 0.0 @@ -686,6 +697,15 @@ class QwenEmotion: } # print(">> dict result", content) + # workaround for QwenEmotion's inability to distinguish "悲伤" (sad) vs "低落" (melancholic). + # if we detect any of the IndexTTS "melancholic" words, we swap those vectors + # to encode the "sad" emotion as "melancholic" (instead of sadness). + text_input_lower = text_input.lower() + if any(word in text_input_lower for word in self.melancholic_words): + # print(">> before vec swap", content) + content["悲伤"], content["低落"] = content.get("低落", 0.0), content.get("悲伤", 0.0) + # print(">> after vec swap", content) + return self.convert(content)