diff --git a/indextts/infer_v2.py b/indextts/infer_v2.py
index 7c5484c..b851055 100644
--- a/indextts/infer_v2.py
+++ b/indextts/infer_v2.py
@@ -2,6 +2,8 @@ import os
 from subprocess import CalledProcessError
 
 os.environ['HF_HUB_CACHE'] = './checkpoints/hf_cache'
+import json
+import re
 import time
 import librosa
 import torch
@@ -315,8 +317,9 @@ class IndexTTS2:
             # assert emo_alpha == 1.0
             if emo_text is None:
                 emo_text = text
-            emo_dict, content = self.qwen_emo.inference(emo_text)
+            emo_dict = self.qwen_emo.inference(emo_text)
             print(emo_dict)
+            # convert ordered dict to list of vectors; the order is VERY important!
             emo_vector = list(emo_dict.values())
 
         if emo_vector is not None:
@@ -602,59 +605,41 @@ class QwenEmotion:
             device_map="auto"
         )
         self.prompt = "文本情感分类"
-        self.convert_dict = {
+        self.cn_key_to_en = {
             "高兴": "happy",
             "愤怒": "angry",
             "悲伤": "sad",
             "恐惧": "afraid",
             "反感": "disgusted",
+            # TODO: the "低落" (melancholic) emotion will always be mapped to
+            # "悲伤" (sad) by QwenEmotion's text analysis. it doesn't know the
+            # difference between those emotions even if user writes exact words.
             "低落": "melancholic",
             "惊讶": "surprised",
             "自然": "calm",
         }
-        self.backup_dict = {"happy": 0, "angry": 0, "sad": 0, "afraid": 0, "disgusted": 0, "melancholic": 0,
-                            "surprised": 0, "calm": 1.0}
+        self.desired_vector_order = ["高兴", "愤怒", "悲伤", "恐惧", "反感", "低落", "惊讶", "自然"]
         self.max_score = 1.2
         self.min_score = 0.0
 
+    def clamp_score(self, value):
+        return max(self.min_score, min(self.max_score, value))
+
     def convert(self, content):
-        content = content.replace("\n", " ")
-        content = content.replace(" ", "")
-        content = content.replace("{", "")
-        content = content.replace("}", "")
-        content = content.replace('"', "")
-        parts = content.strip().split(',')
-        print(parts)
-        parts_dict = {}
-        desired_order = ["高兴", "愤怒", "悲伤", "恐惧", "反感", "低落", "惊讶", "自然"]
-        for part in parts:
-            key_value = part.strip().split(':')
-            if len(key_value) == 2:
-                parts_dict[key_value[0].strip()] = part
-        # 按照期望顺序重新排列
-        ordered_parts = [parts_dict[key] for key in desired_order if key in parts_dict]
-        parts = ordered_parts
-        if len(parts) != len(self.convert_dict):
-            return self.backup_dict
+        # generate emotion vector dictionary:
+        # - insert values in desired order (Python 3.7+ `dict` remembers insertion order)
+        # - convert Chinese keys to English
+        # - clamp all values to the allowed min/max range
+        # - use 0.0 for any values that were missing in `content`
+        emotion_dict = {
+            self.cn_key_to_en[cn_key]: self.clamp_score(content.get(cn_key, 0.0))
+            for cn_key in self.desired_vector_order
+        }
 
-        emotion_dict = {}
-        for part in parts:
-            key_value = part.strip().split(':')
-            if len(key_value) == 2:
-                try:
-                    key = self.convert_dict[key_value[0].strip()]
-                    value = float(key_value[1].strip())
-                    value = max(self.min_score, min(self.max_score, value))
-                    emotion_dict[key] = value
-                except Exception:
-                    continue
-
-        for key in self.backup_dict:
-            if key not in emotion_dict:
-                emotion_dict[key] = 0.0
-
-        if sum(emotion_dict.values()) <= 0:
-            return self.backup_dict
+        # default to a calm/neutral voice if all emotion vectors were empty
+        if all(val <= 0.0 for val in emotion_dict.values()):
+            print(">> no emotions detected; using default calm/neutral voice")
+            emotion_dict["calm"] = 1.0
 
         return emotion_dict
 
@@ -687,9 +672,21 @@ class QwenEmotion:
         except ValueError:
             index = 0
 
-        content = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
-        emotion_dict = self.convert(content)
-        return emotion_dict, content
+        content = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True)
+
+        # decode the JSON emotion detections as a dictionary
+        try:
+            content = json.loads(content)
+        except json.decoder.JSONDecodeError:
+            # invalid JSON; fallback to manual string parsing
+            # print(">> parsing QwenEmotion response", content)
+            content = {
+                m.group(1): float(m.group(2))
+                for m in re.finditer(r'([^\s":.,]+?)"?\s*:\s*([\d.]+)', content)
+            }
+            # print(">> dict result", content)
+
+        return self.convert(content)
 
 
 if __name__ == "__main__":