diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6f509e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,15 @@ +examples/voice_02.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_04.wav filter=lfs diff=lfs merge=lfs -text +examples/emo_sad.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_03.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_06.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_08.wav filter=lfs diff=lfs merge=lfs -text +tests/sample_prompt.wav filter=lfs diff=lfs merge=lfs -text +examples/emo_hate.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_01.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_05.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_09.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_10.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_12.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_07.wav filter=lfs diff=lfs merge=lfs -text +examples/voice_11.wav filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index d2eee28..0db34f7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,7 @@ __pycache__ *.DS_Store .idea/ .vscode/ -checkpoints/*.pth -checkpoints/*.vocab -checkpoints/*.model -checkpoints/.cache +checkpoints/* outputs/ build/ *.py[cod] diff --git a/INDEX_MODEL_LICENSE_EN.txt b/INDEX_MODEL_LICENSE_EN.txt new file mode 100644 index 0000000..ddc3bcd --- /dev/null +++ b/INDEX_MODEL_LICENSE_EN.txt @@ -0,0 +1,58 @@ +bilibili Model Use License Agreement + +By clicking “I agree” to this bilibili Model Use License Agreement (“this Agreement”) , or by otherwise using any portion or element of the Model or any Derivative Work, you will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately. If you do not agree to this Agreement, you must immediately cease all use and permanently delete the Model and any Derivative Works. + +1. Definitions +1.1 “This Agreement”: means the bilibili Model Use License Agreement, including all of its terms and conditions. +1.2 “We”, “us”, or “our”: means bilibili , the original right-holder of the Model. +1.3 “You”: means any natural person or legal entity exercising rights granted by this Agreement and/or using the Model for any purpose and in any field of use. +1.4 “Model”: means the artificial-intelligence model named “bilibili indextts2”, including but not limited to model weights and final code, in each case only to the extent that such components are published by us at https://github.com/index-tts/index-tts. +1.5 “Derivative Work”: means any derivative of the Model, including without limitation: + (i) any modification of the Model, model outputs, or their derivatives; + (ii) any work based on the Model, model outputs, or their derivatives; + (iii) any other machine learning model which is created by re-training, fine-tuning, quantizing, LoRA, parameter-efficient fine-tuning, or any other method involving incremental weights or merged checkpoints, in each case based on the Model, model outputs, or their derivatives. +1.6 “Use”: means downloading, copying, training, modifying, creating Derivative Works, distributing, publishing, running, fine-tuning, publicly displaying, communicating to the public, or otherwise exploiting the Model or any Derivative Work. + +2. Scope of License and Restrictions +2.1 Subject to the terms and conditions of this Agreement, we grant you a worldwide, non-exclusive, non-transferable, royalty-free limited license to Use the Model or any Derivative Work based on the intellectual properties or other rights owned by Us embodied in the Model or any Derivative Work. +2.2 If You intend to Use, or have already Used, the Model or any Derivative Work, and either (i) your or any of your Affiliates’ products or services had more than 100 million monthly active users in the immediately preceding calendar month, or (ii) your or any of your Affiliates’ annual revenue in the immediately preceding calendar year exceeded RMB 1 billion, You must request a separated license from us, which We may grant to You in our sole discretion. You are not authorized to exercise any of the rights under this Agreement unless and until We have expressly granted You such rights in writing. +2.3 This Agreement is an open-source license for the Model in which we possess intellectual properties and other rights. It governs your Use of the Model only and does not limit any rights that we have regarding the Model. + +3. Disclaimer and Risk Allocation +3.1 The Model and any outputs generated thereby are provided “AS IS,” without warranty of any kind, express or implied, including but not limited to warranties of merchantability, fitness for a particular purpose, non-infringement, absence of errors or omissions, continuity, accuracy, reliability, or stability. You are solely responsible for determining the appropriateness of using or redistributing the Model and assume all risks associated with exercising any rights granted under this Agreement. +3.2 You shall bear sole responsibility for any infringement, illegality, breach of contract, damages, fines, regulatory investigations, or other liabilities (including, without limitation, infringement of third-party patents, copyrights, trademarks, trade secrets, personality rights, data-protection rights, or any other rights) arising out of or related to your Use of the Model or any outputs generated thereby. We assume no joint, several, supplementary, or advance payment liability. +3.3 Under no circumstances shall we be liable to you or any third party for any direct, indirect, incidental, special, punitive, or consequential damages (including, without limitation, loss of data, business interruption, or loss of profits) arising out of or related to the Use of the Model, even if we have been advised of the possibility of such damages. +3.4 Additional Obligations for You and Downstream Recipients +a) You must ensure that any downstream recipient of the Model or any Derivative Work that you distribute complies with this Agreement, and you must impose appropriate contractual terms on such downstream recipients. If any downstream recipient breaches this Agreement, you shall be responsible for the consequences thereof. +b) You must retain all original copyright notices and a copy of this Agreement in every copy of the Model or any Derivative Work that you Use. +c) You may not Use the bilibili indextts2 or any Derivative Work to improve any AI model, except for the bilibili indextts2 itself, its Derivative Works,or non-commercial AI models. + +4. Compliance Obligations +4.1 Usage Restrictions +a) If you distribute a Derivative Work, you must clearly state in the distribution page or accompanying documentation: “Any modifications made to the original model in this Derivative Work are not endorsed, warranted, or guaranteed by the original right-holder of the original model, and the original right-holder disclaims all liability related to this Derivative Work.” +b) If your Use of the Model or any Derivative Work incorporates any third-party data or weights, you must obtain all necessary authorizations on your own and bear full responsibility for compliance. +c) You may not Use the Model or any Derivative Work for any purpose that violates the laws or regulatory requirements of the jurisdiction where the outputs and/or the Model are generated or used (including, without limitation, generating false information, discriminatory content, or content that infringes privacy). +d) If the Model or any Derivative Work is capable of generating content, you must ensure that such content does not violate the laws or regulatory requirements of the applicable jurisdiction (including, without limitation, generating false information, discriminatory content, or content that infringes privacy). +4.2 Prohibited High-Risk Use +You must ensure that the Model and any Derivative Work are not deployed, directly or indirectly, in high-risk scenarios such as medical diagnosis, autonomous driving, military applications, critical-infrastructure control, large-scale biometric surveillance, or automated decision-making (e.g., credit or employment evaluations). If you insist on such deployment, you must independently complete all compliance obligations under applicable laws and regulations (including but not limited to GDPR, CCPA, HIPAA, export-control laws, and AI-specific regulations), and we shall bear no liability for any consequences arising therefrom. +4.3 Infringement Liability +Should any third party raise claims against you with respect to any Derivative Work you develop or your Use of the Model or any Derivative Work, you shall bear full and independent responsibility for defending against and resolving such claims. If your actions cause us to incur any third-party claims, administrative penalties, or other losses, you shall indemnify us for all losses we thereby suffer, including but not limited to attorney fees, litigation costs, damages, and fines, and shall take all necessary measures to eliminate any adverse impact on us. + +5. Reserved Rights +5.1 We reserve the right to revoke the license granted to you under this Agreement in the event of your breach. Upon revocation, you must immediately cease all Use and permanently delete all copies of the Model and any Derivative Work. Sections 3 and 6 of this Agreement shall survive termination of this Agreement under this circumstance. +5.2 Nothing in this Agreement grants you any right to use our trade names, trademarks, service marks, or product names, except as reasonably and customarily required to describe the origin of the Model or any Derivative Work—such as reproducing the content of a NOTICE file under Section 3.4 of this Agreement. +5.3 If you or any of your Affiliates institutes or participates in any legal proceeding (including any cross-claim or counterclaim in a lawsuit) against us or any of our Affiliates, alleging that the Model or any output or any portion thereof infringes any intellectual property or other rights that you own or control, all licenses granted to you under this Agreement shall terminate automatically as of the date such proceeding is filed. + +6. Governing Law and Dispute Resolution +6.1 This Agreement shall be governed by and construed in accordance with the laws of the People’s Republic of China. +6.2 In the event of any dispute arising out of or in connection with this Agreement, the parties shall first attempt to resolve such dispute through friendly negotiation. If negotiation fails, the dispute shall be submitted to the Shanghai Arbitration Commission for arbitration in accordance with its then-effective arbitration rules. The arbitration award shall be final and binding on both parties. The prevailing party shall be entitled to recover reasonable costs, including notarization and investigation fees, arbitration costs, attorneys’ fees, and travel expenses. + +7. Severability +If any provision of this Agreement is held to be invalid or unenforceable, the remaining provisions shall remain in full force and effect. The invalid or unenforceable provision shall be replaced with a valid and enforceable provision that, to the maximum extent permitted by law, most closely reflects the original intent of the invalid or unenforceable provision. + +8. Version Updates +We may release new versions of the AI Model Use License Agreement. Any new version will apply only to Uses occurring after the date of its release. If you obtained the Model under an earlier version, the new version will not have retroactive effect; nevertheless, you are encouraged to adopt the new version voluntarily. + +9. Language Version +In the event of any discrepancy or conflict between the English-language version set forth above and the Chinese-language version of this bilibili Model Use License Agreement, the Chinese-language version shall prevail for all purposes and shall govern the rights and obligations of the parties. + diff --git a/INDEX_MODEL_LICENSE_ZH.txt b/INDEX_MODEL_LICENSE_ZH.txt new file mode 100644 index 0000000..519ba44 --- /dev/null +++ b/INDEX_MODEL_LICENSE_ZH.txt @@ -0,0 +1,52 @@ +bilibili模型使用许可协议 + +若您点击同意《bilibili模型使用许可协议》(“本协议”),或使用我方模型或衍生品的任何部分或元素,即视为您已确认并接受本协议内容,本协议立即生效。若您不同意本协议,应立即停止使用并删除模型及衍生品。 + +1.定义 +1.1 本协议:指《bilibili 模型使用许可协议》,包括本协议所规定的所有条款和条件。 +1.2 我方:指bilibili即模型的原始权利人。 +1.3 您:指行使本许可协议授予的权利和/或使用“模型”的自然人或法人实体。 +1.4 模型:指名为“bilibili indextts2”的AI模型,包括模型权重、最终代码等组件,具体范围以我方在https://github.com/index-tts/index-tts发布的组件为限。 +1.5 衍生品:指模型的衍生品,包括但不限于:(i)对模型、模型输出及其衍生品的修改;(ii)基于模型、模型输出及其衍生品的创作;(iii)对模型、模型输出及其衍生品再训练、微调、量化、LoRA、参数高效微调、以任何增量权重或合并的检查点等方式创建的任何模型。 +1.6 使用:指通过下载、复制、训练、修改、创作衍生品、分发、发布、运行、微调、公开展示、传播或以其他方式利用本模型或其衍生品的行为。 + +2. 许可范围和限制 +2.1 根据本协议的条款与条件,基于对模型或其衍生品中包含的我方拥有的任何知识产权和其他权利,我方特此授予您一项全球范围、非独占、不可转让、免费的使用许可。 +2.2若您拟使用或者已使用我方模型或其衍生品,如果您或者您的关联方提供的产品或服务在前一自然月的月活跃用户数超过1亿,或者如果您或者您的关联方在上一自然年的年收入超过1亿人民币的,您必须向我方申请该模型或其衍生品的商业许可,我方可自行决定是否授予您该许可。您无权行使本协议项下的任何权利,除非我方另行明确授予您该等许可。 +2.3 本协议作为我方享有知识产权和其他权利的模型的开源许可协议,仅约束您对我方模型的使用行为,并不限制我方对该模型享有的任何权利。 + +3. 免责声明与风险约定 +3.1 模型及其任何输出均“按原样”提供,我方及其关联方不提供任何形式的明示或暗示的保证,包括但不限于适销性、特定用途适用性、不侵权、没有错误或疏漏、持续性、准确性、可靠性、稳定性的保证。您需自行负责判断使用或再分发本作品的适当性,并承担行使本许可证所授予权限相关的所有风险。 +3.2 您因使用模型或利用其输出内容而产生的任何侵权、违法、违约、赔偿、罚款、监管调查或其他法律责任(包括但不限于侵犯第三方专利、版权、商标、商业秘密、人格权、数据保护权等),均由您独自承担。我方不承担任何连带责任、补充责任或垫付责任。 +3.3 在任何情况下,我方对因使用本模型而产生的任何直接、间接、附带、特殊、惩罚性或后果性损失(包括但不限于数据丢失、业务中断、利润损失等)不承担责任,即使我方已被告知该等损失的可能性。 +3.4 对您和下游用户的其他约束 +a)您应确保下游用户在使用您发布的本模型或您基于本模型开发的衍生品时,同样遵守本协议的相关规定,并通过合适的协议或条款对下游用户进行约束。若下游用户违反本协议规定,您需承担相应责任。 +b)您需在您使用的本模型或您基于本模型开发的衍生品的所有副本中保留原始版权声明及本使用许可协议。 +c)您不得使用bilibili indextts2或其衍生品来改进任何AI模型(bilibili indextts2或其衍生品、非商业用途的AI模型除外)。 + +4. 合规义务 +4.1使用限制 +a) 若您发布模型的衍生品,必须在发布页面或附随文档中清晰声明“该衍生品对原模型所作的任何改动与原模型原始权利人无关,原始权利人对该衍生品不背书、不担保、不承担责任”。 +b) 若您使用模型或模型衍生品的过程中引入任何第三方数据或权重,您须自行取得合法授权并承担全部合规责任。 +c) 不得将模型及模型衍生品用于违反输出地/使用地法律或监管要求的用途(包括但不限于生成虚假信息、歧视性内容、侵犯隐私等)。 +d) 若模型或模型衍生品具备生成内容功能,您须确保其输出内容不违反输出地/使用地法律或监管要求的用途(包括但不限于生成虚假信息、歧视性内容、侵犯隐私等)。 +4.2 禁止高风险场景 +您须自行确保不在医疗诊断、自动驾驶、军事、关键基础设施控制、大规模生物识别监控、自动化决策(如信贷、就业评估)等高风险场景直接部署本模型及其衍生品。若您坚持部署,应自行完成符合适用法规(包括 GDPR、CCPA、HIPAA、出口管制、AI 特定法规等)的全部合规要求,我方对因此产生的任何后果概不负责。 +4.3 侵权责任 +如第三方就您开发的模型衍生品或您使用模型或其衍生品等行为主张权利,您应独立承担全部责任。若因您的行为导致我方遭受任何第三方索赔、行政处罚或其他损失,您应负责赔偿我方因此遭受的全部损失,包括但不限于律师费、诉讼费、赔偿金、罚款等,并采取一切必要措施消除对我方的负面影响。 + +5. 保留权利 +5.1我方保留在您违反协议的情况下撤销本协议对您授权之权利。协议撤销后,您必须立即删除并停止使用材料。在本协议终止后,本协议第3条、第6条仍然有效。 +5.2 本许可证不授予使用我方的商号、商标、服务标记或产品名称的权限,除非在合理且惯例性地描述模型或衍生品的来源,例如本许可证3.4的规定,以及复制 NOTICE 文件内容时需要使用。 +5.3 若您或您的关联方对我方或我方任何关联实体提起诉讼或其他程序(包括诉讼中的交叉索赔或反诉),主张模型或其任何输出结果或其任何部分侵犯了您拥有或可许可的知识产权或其他权利,则本协议授予您的所有许可自该诉讼或程序提起之日起终止。 + +6. 法律适用与争议解决 +6.1 本协议适用中华人民共和国法律法规。 +6.2 在本协议履行中,若发生争议,双方应本着友好协商的原则解决问题;如协商不成,双方均应将争议提交至上海仲裁委员会根据其仲裁规则进行仲裁,仲裁是一裁终局的,对双方均有约束力。由仲裁败诉方承担本次仲裁产生的公证调查费、仲裁费、律师费、差旅费等实际产生费用。 + +7. 可分割性 +若本协议任何条款被认定为无效或不可执行,不影响其余条款之效力;无效部分应在法律允许的最大范围内按最接近原意的有效条款替代。 + +8. 协议版本更新 +我方可发布新版 AI模型使用许可协议。新版仅适用于发布后新产生的使用行为,若您已按旧版获取模型,新版协议并无溯及力,但鼓励您主动更新。 + diff --git a/MANIFEST.in b/MANIFEST.in index a1f90e6..ad8e99e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ global-exclude *~ *.py[cod] -include indextts/BigVGAN/alias_free_activation/cuda/*.cu indextts/BigVGAN/alias_free_activation/cuda/*.cpp -include indextts/BigVGAN/alias_free_activation/cuda/*.h +include *.cu *.cpp +include *.h *.hpp diff --git a/README.md b/README.md index 16c52c0..204fbf2 100644 --- a/README.md +++ b/README.md @@ -4,111 +4,93 @@ -
+
+
+
+## 👉🏻 IndexTTS 👈🏻
+
+[[HuggingFace Demo]](https://huggingface.co/spaces/IndexTeam/IndexTTS) [[ModelScope Demo]](https://modelscope.cn/studios/IndexTeam/IndexTTS-Demo) \
+[[Paper]](https://arxiv.org/abs/2502.05512) [[Demos]](https://index-tts.github.io)
+
+**IndexTTS** is a GPT-style text-to-speech (TTS) model mainly based on XTTS and Tortoise. It is capable of correcting the pronunciation of Chinese characters using pinyin and controlling pauses at any position through punctuation marks. We enhanced multiple modules of the system, including the improvement of speaker condition feature representation, and the integration of BigVGAN2 to optimize audio quality. Trained on tens of thousands of hours of data, our system achieves state-of-the-art performance, outperforming current popular TTS systems such as XTTS, CosyVoice2, Fish-Speech, and F5-TTS.
+
+Experience **IndexTTS**: Please contact xuanwu@bilibili.com for more detailed information.
+### Contact
+QQ群(二群):1048202584 \
+Discord:https://discord.gg/uT32E7KDmy \
+简历:indexspeech@bilibili.com \
+欢迎大家来交流讨论!
+## 📣 Updates
+
+- `2025/05/14` 🔥🔥 We release the **IndexTTS-1.5**, Significantly improve the model's stability and its performance in the English language.
+- `2025/03/25` 🔥 We release IndexTTS-1.0 model parameters and inference code.
+- `2025/02/12` 🔥 We submitted our paper on arXiv, and released our demos and test sets.
+
+## 🖥️ Method
+
+The overview of IndexTTS is shown as follows.
+
+
+
+ """
+ )
+ with gr.Row():
+ gr.Markdown(markdown_table_v2)
+ with gr.Row():
+ gr.Markdown(description)
+ with gr.Column():
+ gr.Video('https://github.com/myshell-ai/OpenVoice/assets/40556743/3cba936f-82bf-476c-9e52-09f0f417bb2f', autoplay=True)
+
+ with gr.Row():
+ gr.HTML(wrapped_markdown_content)
+
+ with gr.Row():
+ with gr.Column():
+ input_text_gr = gr.Textbox(
+ label="Text Prompt",
+ info="One or two sentences at a time is better. Up to 200 text characters.",
+ value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
+ )
+ style_gr = gr.Dropdown(
+ label="Style",
+ info="Select a style of output audio for the synthesised speech. (Chinese only support 'default' now)",
+ choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
+ max_choices=1,
+ value="default",
+ )
+ ref_gr = gr.Audio(
+ label="Reference Audio",
+ info="Click on the ✎ button to upload your own target speaker audio",
+ type="filepath",
+ value="resources/demo_speaker2.mp3",
+ )
+ tos_gr = gr.Checkbox(
+ label="Agree",
+ value=False,
+ info="I agree to the terms of the cc-by-nc-4.0 license-: https://github.com/myshell-ai/OpenVoice/blob/main/LICENSE",
+ )
+
+ tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
+
+
+ with gr.Column():
+ out_text_gr = gr.Text(label="Info")
+ audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
+ ref_audio_gr = gr.Audio(label="Reference Audio Used")
+
+ gr.Examples(examples,
+ label="Examples",
+ inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
+ outputs=[out_text_gr, audio_gr, ref_audio_gr],
+ fn=predict,
+ cache_examples=False,)
+ tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
+
+demo.queue()
+demo.launch(debug=True, show_api=True, share=args.share)
diff --git a/indextts/s2mel/modules/openvoice/se_extractor.py b/indextts/s2mel/modules/openvoice/se_extractor.py
new file mode 100644
index 0000000..d087179
--- /dev/null
+++ b/indextts/s2mel/modules/openvoice/se_extractor.py
@@ -0,0 +1,153 @@
+import os
+import glob
+import torch
+import hashlib
+import librosa
+import base64
+from glob import glob
+import numpy as np
+from pydub import AudioSegment
+from faster_whisper import WhisperModel
+import hashlib
+import base64
+import librosa
+# from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
+
+model_size = "medium"
+# Run on GPU with FP16
+model = None
+def split_audio_whisper(audio_path, audio_name, target_dir='processed'):
+ global model
+ if model is None:
+ model = WhisperModel(model_size, device="cuda", compute_type="float16")
+ audio = AudioSegment.from_file(audio_path)
+ max_len = len(audio)
+
+ target_folder = os.path.join(target_dir, audio_name)
+
+ segments, info = model.transcribe(audio_path, beam_size=5, word_timestamps=True)
+ segments = list(segments)
+
+ # create directory
+ os.makedirs(target_folder, exist_ok=True)
+ wavs_folder = os.path.join(target_folder, 'wavs')
+ os.makedirs(wavs_folder, exist_ok=True)
+
+ # segments
+ s_ind = 0
+ start_time = None
+
+ for k, w in enumerate(segments):
+ # process with the time
+ if k == 0:
+ start_time = max(0, w.start)
+
+ end_time = w.end
+
+ # calculate confidence
+ if len(w.words) > 0:
+ confidence = sum([s.probability for s in w.words]) / len(w.words)
+ else:
+ confidence = 0.
+ # clean text
+ text = w.text.replace('...', '')
+
+ # left 0.08s for each audios
+ audio_seg = audio[int( start_time * 1000) : min(max_len, int(end_time * 1000) + 80)]
+
+ # segment file name
+ fname = f"{audio_name}_seg{s_ind}.wav"
+
+ # filter out the segment shorter than 1.5s and longer than 20s
+ save = audio_seg.duration_seconds > 1.5 and \
+ audio_seg.duration_seconds < 20. and \
+ len(text) >= 2 and len(text) < 200
+
+ if save:
+ output_file = os.path.join(wavs_folder, fname)
+ audio_seg.export(output_file, format='wav')
+
+ if k < len(segments) - 1:
+ start_time = max(0, segments[k+1].start - 0.08)
+
+ s_ind = s_ind + 1
+ return wavs_folder
+
+
+def split_audio_vad(audio_path, audio_name, target_dir, split_seconds=10.0):
+ SAMPLE_RATE = 16000
+ audio_vad = get_audio_tensor(audio_path)
+ segments = get_vad_segments(
+ audio_vad,
+ output_sample=True,
+ min_speech_duration=0.1,
+ min_silence_duration=1,
+ method="silero",
+ )
+ segments = [(seg["start"], seg["end"]) for seg in segments]
+ segments = [(float(s) / SAMPLE_RATE, float(e) / SAMPLE_RATE) for s,e in segments]
+ print(segments)
+ audio_active = AudioSegment.silent(duration=0)
+ audio = AudioSegment.from_file(audio_path)
+
+ for start_time, end_time in segments:
+ audio_active += audio[int( start_time * 1000) : int(end_time * 1000)]
+
+ audio_dur = audio_active.duration_seconds
+ print(f'after vad: dur = {audio_dur}')
+ target_folder = os.path.join(target_dir, audio_name)
+ wavs_folder = os.path.join(target_folder, 'wavs')
+ os.makedirs(wavs_folder, exist_ok=True)
+ start_time = 0.
+ count = 0
+ num_splits = int(np.round(audio_dur / split_seconds))
+ assert num_splits > 0, 'input audio is too short'
+ interval = audio_dur / num_splits
+
+ for i in range(num_splits):
+ end_time = min(start_time + interval, audio_dur)
+ if i == num_splits - 1:
+ end_time = audio_dur
+ output_file = f"{wavs_folder}/{audio_name}_seg{count}.wav"
+ audio_seg = audio_active[int(start_time * 1000): int(end_time * 1000)]
+ audio_seg.export(output_file, format='wav')
+ start_time = end_time
+ count += 1
+ return wavs_folder
+
+def hash_numpy_array(audio_path):
+ array, _ = librosa.load(audio_path, sr=None, mono=True)
+ # Convert the array to bytes
+ array_bytes = array.tobytes()
+ # Calculate the hash of the array bytes
+ hash_object = hashlib.sha256(array_bytes)
+ hash_value = hash_object.digest()
+ # Convert the hash value to base64
+ base64_value = base64.b64encode(hash_value)
+ return base64_value.decode('utf-8')[:16].replace('/', '_^')
+
+def get_se(audio_path, vc_model, target_dir='processed', vad=True):
+ device = vc_model.device
+ version = vc_model.version
+ print("OpenVoice version:", version)
+
+ audio_name = f"{os.path.basename(audio_path).rsplit('.', 1)[0]}_{version}_{hash_numpy_array(audio_path)}"
+ se_path = os.path.join(target_dir, audio_name, 'se.pth')
+
+ # if os.path.isfile(se_path):
+ # se = torch.load(se_path).to(device)
+ # return se, audio_name
+ # if os.path.isdir(audio_path):
+ # wavs_folder = audio_path
+
+ # if vad:
+ # wavs_folder = split_audio_vad(audio_path, target_dir=target_dir, audio_name=audio_name)
+ # else:
+ # wavs_folder = split_audio_whisper(audio_path, target_dir=target_dir, audio_name=audio_name)
+
+ # audio_segs = glob(f'{wavs_folder}/*.wav')
+ # if len(audio_segs) == 0:
+ # raise NotImplementedError('No audio segments found!')
+
+ return vc_model.extract_se([audio_path], se_save_path=se_path), audio_name
+
diff --git a/indextts/s2mel/modules/openvoice/transforms.py b/indextts/s2mel/modules/openvoice/transforms.py
new file mode 100644
index 0000000..a11f799
--- /dev/null
+++ b/indextts/s2mel/modules/openvoice/transforms.py
@@ -0,0 +1,209 @@
+import torch
+from torch.nn import functional as F
+
+import numpy as np
+
+
+DEFAULT_MIN_BIN_WIDTH = 1e-3
+DEFAULT_MIN_BIN_HEIGHT = 1e-3
+DEFAULT_MIN_DERIVATIVE = 1e-3
+
+
+def piecewise_rational_quadratic_transform(
+ inputs,
+ unnormalized_widths,
+ unnormalized_heights,
+ unnormalized_derivatives,
+ inverse=False,
+ tails=None,
+ tail_bound=1.0,
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+ if tails is None:
+ spline_fn = rational_quadratic_spline
+ spline_kwargs = {}
+ else:
+ spline_fn = unconstrained_rational_quadratic_spline
+ spline_kwargs = {"tails": tails, "tail_bound": tail_bound}
+
+ outputs, logabsdet = spline_fn(
+ inputs=inputs,
+ unnormalized_widths=unnormalized_widths,
+ unnormalized_heights=unnormalized_heights,
+ unnormalized_derivatives=unnormalized_derivatives,
+ inverse=inverse,
+ min_bin_width=min_bin_width,
+ min_bin_height=min_bin_height,
+ min_derivative=min_derivative,
+ **spline_kwargs
+ )
+ return outputs, logabsdet
+
+
+def searchsorted(bin_locations, inputs, eps=1e-6):
+ bin_locations[..., -1] += eps
+ return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1
+
+
+def unconstrained_rational_quadratic_spline(
+ inputs,
+ unnormalized_widths,
+ unnormalized_heights,
+ unnormalized_derivatives,
+ inverse=False,
+ tails="linear",
+ tail_bound=1.0,
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+ inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
+ outside_interval_mask = ~inside_interval_mask
+
+ outputs = torch.zeros_like(inputs)
+ logabsdet = torch.zeros_like(inputs)
+
+ if tails == "linear":
+ unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
+ constant = np.log(np.exp(1 - min_derivative) - 1)
+ unnormalized_derivatives[..., 0] = constant
+ unnormalized_derivatives[..., -1] = constant
+
+ outputs[outside_interval_mask] = inputs[outside_interval_mask]
+ logabsdet[outside_interval_mask] = 0
+ else:
+ raise RuntimeError("{} tails are not implemented.".format(tails))
+
+ (
+ outputs[inside_interval_mask],
+ logabsdet[inside_interval_mask],
+ ) = rational_quadratic_spline(
+ inputs=inputs[inside_interval_mask],
+ unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
+ unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
+ unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
+ inverse=inverse,
+ left=-tail_bound,
+ right=tail_bound,
+ bottom=-tail_bound,
+ top=tail_bound,
+ min_bin_width=min_bin_width,
+ min_bin_height=min_bin_height,
+ min_derivative=min_derivative,
+ )
+
+ return outputs, logabsdet
+
+
+def rational_quadratic_spline(
+ inputs,
+ unnormalized_widths,
+ unnormalized_heights,
+ unnormalized_derivatives,
+ inverse=False,
+ left=0.0,
+ right=1.0,
+ bottom=0.0,
+ top=1.0,
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
+):
+ if torch.min(inputs) < left or torch.max(inputs) > right:
+ raise ValueError("Input to a transform is not within its domain")
+
+ num_bins = unnormalized_widths.shape[-1]
+
+ if min_bin_width * num_bins > 1.0:
+ raise ValueError("Minimal bin width too large for the number of bins")
+ if min_bin_height * num_bins > 1.0:
+ raise ValueError("Minimal bin height too large for the number of bins")
+
+ widths = F.softmax(unnormalized_widths, dim=-1)
+ widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
+ cumwidths = torch.cumsum(widths, dim=-1)
+ cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
+ cumwidths = (right - left) * cumwidths + left
+ cumwidths[..., 0] = left
+ cumwidths[..., -1] = right
+ widths = cumwidths[..., 1:] - cumwidths[..., :-1]
+
+ derivatives = min_derivative + F.softplus(unnormalized_derivatives)
+
+ heights = F.softmax(unnormalized_heights, dim=-1)
+ heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
+ cumheights = torch.cumsum(heights, dim=-1)
+ cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
+ cumheights = (top - bottom) * cumheights + bottom
+ cumheights[..., 0] = bottom
+ cumheights[..., -1] = top
+ heights = cumheights[..., 1:] - cumheights[..., :-1]
+
+ if inverse:
+ bin_idx = searchsorted(cumheights, inputs)[..., None]
+ else:
+ bin_idx = searchsorted(cumwidths, inputs)[..., None]
+
+ input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
+ input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
+
+ input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
+ delta = heights / widths
+ input_delta = delta.gather(-1, bin_idx)[..., 0]
+
+ input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
+ input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
+
+ input_heights = heights.gather(-1, bin_idx)[..., 0]
+
+ if inverse:
+ a = (inputs - input_cumheights) * (
+ input_derivatives + input_derivatives_plus_one - 2 * input_delta
+ ) + input_heights * (input_delta - input_derivatives)
+ b = input_heights * input_derivatives - (inputs - input_cumheights) * (
+ input_derivatives + input_derivatives_plus_one - 2 * input_delta
+ )
+ c = -input_delta * (inputs - input_cumheights)
+
+ discriminant = b.pow(2) - 4 * a * c
+ assert (discriminant >= 0).all()
+
+ root = (2 * c) / (-b - torch.sqrt(discriminant))
+ outputs = root * input_bin_widths + input_cumwidths
+
+ theta_one_minus_theta = root * (1 - root)
+ denominator = input_delta + (
+ (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
+ * theta_one_minus_theta
+ )
+ derivative_numerator = input_delta.pow(2) * (
+ input_derivatives_plus_one * root.pow(2)
+ + 2 * input_delta * theta_one_minus_theta
+ + input_derivatives * (1 - root).pow(2)
+ )
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
+
+ return outputs, -logabsdet
+ else:
+ theta = (inputs - input_cumwidths) / input_bin_widths
+ theta_one_minus_theta = theta * (1 - theta)
+
+ numerator = input_heights * (
+ input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta
+ )
+ denominator = input_delta + (
+ (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
+ * theta_one_minus_theta
+ )
+ outputs = input_cumheights + numerator / denominator
+
+ derivative_numerator = input_delta.pow(2) * (
+ input_derivatives_plus_one * theta.pow(2)
+ + 2 * input_delta * theta_one_minus_theta
+ + input_derivatives * (1 - theta).pow(2)
+ )
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
+
+ return outputs, logabsdet
diff --git a/indextts/s2mel/modules/openvoice/utils.py b/indextts/s2mel/modules/openvoice/utils.py
new file mode 100644
index 0000000..4e80909
--- /dev/null
+++ b/indextts/s2mel/modules/openvoice/utils.py
@@ -0,0 +1,194 @@
+import re
+import json
+import numpy as np
+
+
+def get_hparams_from_file(config_path):
+ with open(config_path, "r", encoding="utf-8") as f:
+ data = f.read()
+ config = json.loads(data)
+
+ hparams = HParams(**config)
+ return hparams
+
+class HParams:
+ def __init__(self, **kwargs):
+ for k, v in kwargs.items():
+ if type(v) == dict:
+ v = HParams(**v)
+ self[k] = v
+
+ def keys(self):
+ return self.__dict__.keys()
+
+ def items(self):
+ return self.__dict__.items()
+
+ def values(self):
+ return self.__dict__.values()
+
+ def __len__(self):
+ return len(self.__dict__)
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ return setattr(self, key, value)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def __repr__(self):
+ return self.__dict__.__repr__()
+
+
+def string_to_bits(string, pad_len=8):
+ # Convert each character to its ASCII value
+ ascii_values = [ord(char) for char in string]
+
+ # Convert ASCII values to binary representation
+ binary_values = [bin(value)[2:].zfill(8) for value in ascii_values]
+
+ # Convert binary strings to integer arrays
+ bit_arrays = [[int(bit) for bit in binary] for binary in binary_values]
+
+ # Convert list of arrays to NumPy array
+ numpy_array = np.array(bit_arrays)
+ numpy_array_full = np.zeros((pad_len, 8), dtype=numpy_array.dtype)
+ numpy_array_full[:, 2] = 1
+ max_len = min(pad_len, len(numpy_array))
+ numpy_array_full[:max_len] = numpy_array[:max_len]
+ return numpy_array_full
+
+
+def bits_to_string(bits_array):
+ # Convert each row of the array to a binary string
+ binary_values = [''.join(str(bit) for bit in row) for row in bits_array]
+
+ # Convert binary strings to ASCII values
+ ascii_values = [int(binary, 2) for binary in binary_values]
+
+ # Convert ASCII values to characters
+ output_string = ''.join(chr(value) for value in ascii_values)
+
+ return output_string
+
+
+def split_sentence(text, min_len=10, language_str='[EN]'):
+ if language_str in ['EN']:
+ sentences = split_sentences_latin(text, min_len=min_len)
+ else:
+ sentences = split_sentences_zh(text, min_len=min_len)
+ return sentences
+
+def split_sentences_latin(text, min_len=10):
+ """Split Long sentences into list of short ones
+
+ Args:
+ str: Input sentences.
+
+ Returns:
+ List[str]: list of output sentences.
+ """
+ # deal with dirty sentences
+ text = re.sub('[。!?;]', '.', text)
+ text = re.sub('[,]', ',', text)
+ text = re.sub('[“”]', '"', text)
+ text = re.sub('[‘’]', "'", text)
+ text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
+ text = re.sub('[\n\t ]+', ' ', text)
+ text = re.sub('([,.!?;])', r'\1 $#!', text)
+ # split
+ sentences = [s.strip() for s in text.split('$#!')]
+ if len(sentences[-1]) == 0: del sentences[-1]
+
+ new_sentences = []
+ new_sent = []
+ count_len = 0
+ for ind, sent in enumerate(sentences):
+ # print(sent)
+ new_sent.append(sent)
+ count_len += len(sent.split(" "))
+ if count_len > min_len or ind == len(sentences) - 1:
+ count_len = 0
+ new_sentences.append(' '.join(new_sent))
+ new_sent = []
+ return merge_short_sentences_latin(new_sentences)
+
+
+def merge_short_sentences_latin(sens):
+ """Avoid short sentences by merging them with the following sentence.
+
+ Args:
+ List[str]: list of input sentences.
+
+ Returns:
+ List[str]: list of output sentences.
+ """
+ sens_out = []
+ for s in sens:
+ # If the previous sentence is too short, merge them with
+ # the current sentence.
+ if len(sens_out) > 0 and len(sens_out[-1].split(" ")) <= 2:
+ sens_out[-1] = sens_out[-1] + " " + s
+ else:
+ sens_out.append(s)
+ try:
+ if len(sens_out[-1].split(" ")) <= 2:
+ sens_out[-2] = sens_out[-2] + " " + sens_out[-1]
+ sens_out.pop(-1)
+ except:
+ pass
+ return sens_out
+
+def split_sentences_zh(text, min_len=10):
+ text = re.sub('[。!?;]', '.', text)
+ text = re.sub('[,]', ',', text)
+ # 将文本中的换行符、空格和制表符替换为空格
+ text = re.sub('[\n\t ]+', ' ', text)
+ # 在标点符号后添加一个空格
+ text = re.sub('([,.!?;])', r'\1 $#!', text)
+ # 分隔句子并去除前后空格
+ # sentences = [s.strip() for s in re.split('(。|!|?|;)', text)]
+ sentences = [s.strip() for s in text.split('$#!')]
+ if len(sentences[-1]) == 0: del sentences[-1]
+
+ new_sentences = []
+ new_sent = []
+ count_len = 0
+ for ind, sent in enumerate(sentences):
+ new_sent.append(sent)
+ count_len += len(sent)
+ if count_len > min_len or ind == len(sentences) - 1:
+ count_len = 0
+ new_sentences.append(' '.join(new_sent))
+ new_sent = []
+ return merge_short_sentences_zh(new_sentences)
+
+
+def merge_short_sentences_zh(sens):
+ # return sens
+ """Avoid short sentences by merging them with the following sentence.
+
+ Args:
+ List[str]: list of input sentences.
+
+ Returns:
+ List[str]: list of output sentences.
+ """
+ sens_out = []
+ for s in sens:
+ # If the previous sentense is too short, merge them with
+ # the current sentence.
+ if len(sens_out) > 0 and len(sens_out[-1]) <= 2:
+ sens_out[-1] = sens_out[-1] + " " + s
+ else:
+ sens_out.append(s)
+ try:
+ if len(sens_out[-1]) <= 2:
+ sens_out[-2] = sens_out[-2] + " " + sens_out[-1]
+ sens_out.pop(-1)
+ except:
+ pass
+ return sens_out
\ No newline at end of file
diff --git a/indextts/s2mel/modules/quantize.py b/indextts/s2mel/modules/quantize.py
new file mode 100644
index 0000000..c81603b
--- /dev/null
+++ b/indextts/s2mel/modules/quantize.py
@@ -0,0 +1,229 @@
+from dac.nn.quantize import ResidualVectorQuantize
+from torch import nn
+from modules.wavenet import WN
+import torch
+import torchaudio
+import torchaudio.functional as audio_F
+import numpy as np
+from .alias_free_torch import *
+from torch.nn.utils import weight_norm
+from torch import nn, sin, pow
+from einops.layers.torch import Rearrange
+from dac.model.encodec import SConv1d
+
+def init_weights(m):
+ if isinstance(m, nn.Conv1d):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+class SnakeBeta(nn.Module):
+ """
+ A modified Snake function which uses separate parameters for the magnitude of the periodic components
+ Shape:
+ - Input: (B, C, T)
+ - Output: (B, C, T), same shape as the input
+ Parameters:
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ References:
+ - This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
+ https://arxiv.org/abs/2006.08195
+ Examples:
+ >>> a1 = snakebeta(256)
+ >>> x = torch.randn(256)
+ >>> x = a1(x)
+ """
+
+ def __init__(
+ self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False
+ ):
+ """
+ Initialization.
+ INPUT:
+ - in_features: shape of the input
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ alpha is initialized to 1 by default, higher values = higher-frequency.
+ beta is initialized to 1 by default, higher values = higher-magnitude.
+ alpha will be trained along with the rest of your model.
+ """
+ super(SnakeBeta, self).__init__()
+ self.in_features = in_features
+
+ # initialize alpha
+ self.alpha_logscale = alpha_logscale
+ if self.alpha_logscale: # log scale alphas initialized to zeros
+ self.alpha = nn.Parameter(torch.zeros(in_features) * alpha)
+ self.beta = nn.Parameter(torch.zeros(in_features) * alpha)
+ else: # linear scale alphas initialized to ones
+ self.alpha = nn.Parameter(torch.ones(in_features) * alpha)
+ self.beta = nn.Parameter(torch.ones(in_features) * alpha)
+
+ self.alpha.requires_grad = alpha_trainable
+ self.beta.requires_grad = alpha_trainable
+
+ self.no_div_by_zero = 0.000000001
+
+ def forward(self, x):
+ """
+ Forward pass of the function.
+ Applies the function to the input elementwise.
+ SnakeBeta := x + 1/b * sin^2 (xa)
+ """
+ alpha = self.alpha.unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
+ beta = self.beta.unsqueeze(0).unsqueeze(-1)
+ if self.alpha_logscale:
+ alpha = torch.exp(alpha)
+ beta = torch.exp(beta)
+ x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+
+ return x
+
+class ResidualUnit(nn.Module):
+ def __init__(self, dim: int = 16, dilation: int = 1):
+ super().__init__()
+ pad = ((7 - 1) * dilation) // 2
+ self.block = nn.Sequential(
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=7, dilation=dilation, padding=pad),
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=1),
+ )
+
+ def forward(self, x):
+ return x + self.block(x)
+
+class CNNLSTM(nn.Module):
+ def __init__(self, indim, outdim, head, global_pred=False):
+ super().__init__()
+ self.global_pred = global_pred
+ self.model = nn.Sequential(
+ ResidualUnit(indim, dilation=1),
+ ResidualUnit(indim, dilation=2),
+ ResidualUnit(indim, dilation=3),
+ Activation1d(activation=SnakeBeta(indim, alpha_logscale=True)),
+ Rearrange("b c t -> b t c"),
+ )
+ self.heads = nn.ModuleList([nn.Linear(indim, outdim) for i in range(head)])
+
+ def forward(self, x):
+ # x: [B, C, T]
+ x = self.model(x)
+ if self.global_pred:
+ x = torch.mean(x, dim=1, keepdim=False)
+ outs = [head(x) for head in self.heads]
+ return outs
+
+def sequence_mask(length, max_length=None):
+ if max_length is None:
+ max_length = length.max()
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+ return x.unsqueeze(0) < length.unsqueeze(1)
+class FAquantizer(nn.Module):
+ def __init__(self, in_dim=1024,
+ n_p_codebooks=1,
+ n_c_codebooks=2,
+ n_t_codebooks=2,
+ n_r_codebooks=3,
+ codebook_size=1024,
+ codebook_dim=8,
+ quantizer_dropout=0.5,
+ causal=False,
+ separate_prosody_encoder=False,
+ timbre_norm=False,):
+ super(FAquantizer, self).__init__()
+ conv1d_type = SConv1d# if causal else nn.Conv1d
+ self.prosody_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_p_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ self.content_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_c_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ self.residual_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_r_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ self.melspec_linear = conv1d_type(in_channels=20, out_channels=256, kernel_size=1, causal=causal)
+ self.melspec_encoder = WN(hidden_channels=256, kernel_size=5, dilation_rate=1, n_layers=8, gin_channels=0, p_dropout=0.2, causal=causal)
+ self.melspec_linear2 = conv1d_type(in_channels=256, out_channels=1024, kernel_size=1, causal=causal)
+
+ self.prob_random_mask_residual = 0.75
+
+ SPECT_PARAMS = {
+ "n_fft": 2048,
+ "win_length": 1200,
+ "hop_length": 300,
+ }
+ MEL_PARAMS = {
+ "n_mels": 80,
+ }
+
+ self.to_mel = torchaudio.transforms.MelSpectrogram(
+ n_mels=MEL_PARAMS["n_mels"], sample_rate=24000, **SPECT_PARAMS
+ )
+ self.mel_mean, self.mel_std = -4, 4
+ self.frame_rate = 24000 / 300
+ self.hop_length = 300
+
+ def preprocess(self, wave_tensor, n_bins=20):
+ mel_tensor = self.to_mel(wave_tensor.squeeze(1))
+ mel_tensor = (torch.log(1e-5 + mel_tensor) - self.mel_mean) / self.mel_std
+ return mel_tensor[:, :n_bins, :int(wave_tensor.size(-1) / self.hop_length)]
+
+ def forward(self, x, wave_segments):
+ outs = 0
+ prosody_feature = self.preprocess(wave_segments)
+
+ f0_input = prosody_feature # (B, T, 20)
+ f0_input = self.melspec_linear(f0_input)
+ f0_input = self.melspec_encoder(f0_input, torch.ones(f0_input.shape[0], 1, f0_input.shape[2]).to(
+ f0_input.device).bool())
+ f0_input = self.melspec_linear2(f0_input)
+
+ common_min_size = min(f0_input.size(2), x.size(2))
+ f0_input = f0_input[:, :, :common_min_size]
+
+ x = x[:, :, :common_min_size]
+
+ z_p, codes_p, latents_p, commitment_loss_p, codebook_loss_p = self.prosody_quantizer(
+ f0_input, 1
+ )
+ outs += z_p.detach()
+
+ z_c, codes_c, latents_c, commitment_loss_c, codebook_loss_c = self.content_quantizer(
+ x, 2
+ )
+ outs += z_c.detach()
+
+ residual_feature = x - z_p.detach() - z_c.detach()
+
+ z_r, codes_r, latents_r, commitment_loss_r, codebook_loss_r = self.residual_quantizer(
+ residual_feature, 3
+ )
+
+ quantized = [z_p, z_c, z_r]
+ codes = [codes_p, codes_c, codes_r]
+
+ return quantized, codes
\ No newline at end of file
diff --git a/indextts/s2mel/modules/rmvpe.py b/indextts/s2mel/modules/rmvpe.py
new file mode 100644
index 0000000..895c580
--- /dev/null
+++ b/indextts/s2mel/modules/rmvpe.py
@@ -0,0 +1,631 @@
+from io import BytesIO
+import os
+from typing import List, Optional, Tuple
+import numpy as np
+import torch
+
+import torch.nn as nn
+import torch.nn.functional as F
+from librosa.util import normalize, pad_center, tiny
+from scipy.signal import get_window
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class STFT(torch.nn.Module):
+ def __init__(
+ self, filter_length=1024, hop_length=512, win_length=None, window="hann"
+ ):
+ """
+ This module implements an STFT using 1D convolution and 1D transpose convolutions.
+ This is a bit tricky so there are some cases that probably won't work as working
+ out the same sizes before and after in all overlap add setups is tough. Right now,
+ this code should work with hop lengths that are half the filter length (50% overlap
+ between frames).
+
+ Keyword Arguments:
+ filter_length {int} -- Length of filters used (default: {1024})
+ hop_length {int} -- Hop length of STFT (restrict to 50% overlap between frames) (default: {512})
+ win_length {[type]} -- Length of the window function applied to each frame (if not specified, it
+ equals the filter length). (default: {None})
+ window {str} -- Type of window to use (options are bartlett, hann, hamming, blackman, blackmanharris)
+ (default: {'hann'})
+ """
+ super(STFT, self).__init__()
+ self.filter_length = filter_length
+ self.hop_length = hop_length
+ self.win_length = win_length if win_length else filter_length
+ self.window = window
+ self.forward_transform = None
+ self.pad_amount = int(self.filter_length / 2)
+ fourier_basis = np.fft.fft(np.eye(self.filter_length))
+
+ cutoff = int((self.filter_length / 2 + 1))
+ fourier_basis = np.vstack(
+ [np.real(fourier_basis[:cutoff, :]), np.imag(fourier_basis[:cutoff, :])]
+ )
+ forward_basis = torch.FloatTensor(fourier_basis)
+ inverse_basis = torch.FloatTensor(np.linalg.pinv(fourier_basis))
+
+ assert filter_length >= self.win_length
+ # get window and zero center pad it to filter_length
+ fft_window = get_window(window, self.win_length, fftbins=True)
+ fft_window = pad_center(fft_window, size=filter_length)
+ fft_window = torch.from_numpy(fft_window).float()
+
+ # window the bases
+ forward_basis *= fft_window
+ inverse_basis = (inverse_basis.T * fft_window).T
+
+ self.register_buffer("forward_basis", forward_basis.float())
+ self.register_buffer("inverse_basis", inverse_basis.float())
+ self.register_buffer("fft_window", fft_window.float())
+
+ def transform(self, input_data, return_phase=False):
+ """Take input data (audio) to STFT domain.
+
+ Arguments:
+ input_data {tensor} -- Tensor of floats, with shape (num_batch, num_samples)
+
+ Returns:
+ magnitude {tensor} -- Magnitude of STFT with shape (num_batch,
+ num_frequencies, num_frames)
+ phase {tensor} -- Phase of STFT with shape (num_batch,
+ num_frequencies, num_frames)
+ """
+ input_data = F.pad(
+ input_data,
+ (self.pad_amount, self.pad_amount),
+ mode="reflect",
+ )
+ forward_transform = input_data.unfold(
+ 1, self.filter_length, self.hop_length
+ ).permute(0, 2, 1)
+ forward_transform = torch.matmul(self.forward_basis, forward_transform)
+ cutoff = int((self.filter_length / 2) + 1)
+ real_part = forward_transform[:, :cutoff, :]
+ imag_part = forward_transform[:, cutoff:, :]
+ magnitude = torch.sqrt(real_part**2 + imag_part**2)
+ if return_phase:
+ phase = torch.atan2(imag_part.data, real_part.data)
+ return magnitude, phase
+ else:
+ return magnitude
+
+ def inverse(self, magnitude, phase):
+ """Call the inverse STFT (iSTFT), given magnitude and phase tensors produced
+ by the ```transform``` function.
+
+ Arguments:
+ magnitude {tensor} -- Magnitude of STFT with shape (num_batch,
+ num_frequencies, num_frames)
+ phase {tensor} -- Phase of STFT with shape (num_batch,
+ num_frequencies, num_frames)
+
+ Returns:
+ inverse_transform {tensor} -- Reconstructed audio given magnitude and phase. Of
+ shape (num_batch, num_samples)
+ """
+ cat = torch.cat(
+ [magnitude * torch.cos(phase), magnitude * torch.sin(phase)], dim=1
+ )
+ fold = torch.nn.Fold(
+ output_size=(1, (cat.size(-1) - 1) * self.hop_length + self.filter_length),
+ kernel_size=(1, self.filter_length),
+ stride=(1, self.hop_length),
+ )
+ inverse_transform = torch.matmul(self.inverse_basis, cat)
+ inverse_transform = fold(inverse_transform)[
+ :, 0, 0, self.pad_amount : -self.pad_amount
+ ]
+ window_square_sum = (
+ self.fft_window.pow(2).repeat(cat.size(-1), 1).T.unsqueeze(0)
+ )
+ window_square_sum = fold(window_square_sum)[
+ :, 0, 0, self.pad_amount : -self.pad_amount
+ ]
+ inverse_transform /= window_square_sum
+ return inverse_transform
+
+ def forward(self, input_data):
+ """Take input data (audio) to STFT domain and then back to audio.
+
+ Arguments:
+ input_data {tensor} -- Tensor of floats, with shape (num_batch, num_samples)
+
+ Returns:
+ reconstruction {tensor} -- Reconstructed audio given magnitude and phase. Of
+ shape (num_batch, num_samples)
+ """
+ self.magnitude, self.phase = self.transform(input_data, return_phase=True)
+ reconstruction = self.inverse(self.magnitude, self.phase)
+ return reconstruction
+
+
+from time import time as ttime
+
+
+class BiGRU(nn.Module):
+ def __init__(self, input_features, hidden_features, num_layers):
+ super(BiGRU, self).__init__()
+ self.gru = nn.GRU(
+ input_features,
+ hidden_features,
+ num_layers=num_layers,
+ batch_first=True,
+ bidirectional=True,
+ )
+
+ def forward(self, x):
+ return self.gru(x)[0]
+
+
+class ConvBlockRes(nn.Module):
+ def __init__(self, in_channels, out_channels, momentum=0.01):
+ super(ConvBlockRes, self).__init__()
+ self.conv = nn.Sequential(
+ nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=(1, 1),
+ padding=(1, 1),
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ nn.Conv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=(1, 1),
+ padding=(1, 1),
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ )
+ # self.shortcut:Optional[nn.Module] = None
+ if in_channels != out_channels:
+ self.shortcut = nn.Conv2d(in_channels, out_channels, (1, 1))
+
+ def forward(self, x: torch.Tensor):
+ if not hasattr(self, "shortcut"):
+ return self.conv(x) + x
+ else:
+ return self.conv(x) + self.shortcut(x)
+
+
+class Encoder(nn.Module):
+ def __init__(
+ self,
+ in_channels,
+ in_size,
+ n_encoders,
+ kernel_size,
+ n_blocks,
+ out_channels=16,
+ momentum=0.01,
+ ):
+ super(Encoder, self).__init__()
+ self.n_encoders = n_encoders
+ self.bn = nn.BatchNorm2d(in_channels, momentum=momentum)
+ self.layers = nn.ModuleList()
+ self.latent_channels = []
+ for i in range(self.n_encoders):
+ self.layers.append(
+ ResEncoderBlock(
+ in_channels, out_channels, kernel_size, n_blocks, momentum=momentum
+ )
+ )
+ self.latent_channels.append([out_channels, in_size])
+ in_channels = out_channels
+ out_channels *= 2
+ in_size //= 2
+ self.out_size = in_size
+ self.out_channel = out_channels
+
+ def forward(self, x: torch.Tensor):
+ concat_tensors: List[torch.Tensor] = []
+ x = self.bn(x)
+ for i, layer in enumerate(self.layers):
+ t, x = layer(x)
+ concat_tensors.append(t)
+ return x, concat_tensors
+
+
+class ResEncoderBlock(nn.Module):
+ def __init__(
+ self, in_channels, out_channels, kernel_size, n_blocks=1, momentum=0.01
+ ):
+ super(ResEncoderBlock, self).__init__()
+ self.n_blocks = n_blocks
+ self.conv = nn.ModuleList()
+ self.conv.append(ConvBlockRes(in_channels, out_channels, momentum))
+ for i in range(n_blocks - 1):
+ self.conv.append(ConvBlockRes(out_channels, out_channels, momentum))
+ self.kernel_size = kernel_size
+ if self.kernel_size is not None:
+ self.pool = nn.AvgPool2d(kernel_size=kernel_size)
+
+ def forward(self, x):
+ for i, conv in enumerate(self.conv):
+ x = conv(x)
+ if self.kernel_size is not None:
+ return x, self.pool(x)
+ else:
+ return x
+
+
+class Intermediate(nn.Module): #
+ def __init__(self, in_channels, out_channels, n_inters, n_blocks, momentum=0.01):
+ super(Intermediate, self).__init__()
+ self.n_inters = n_inters
+ self.layers = nn.ModuleList()
+ self.layers.append(
+ ResEncoderBlock(in_channels, out_channels, None, n_blocks, momentum)
+ )
+ for i in range(self.n_inters - 1):
+ self.layers.append(
+ ResEncoderBlock(out_channels, out_channels, None, n_blocks, momentum)
+ )
+
+ def forward(self, x):
+ for i, layer in enumerate(self.layers):
+ x = layer(x)
+ return x
+
+
+class ResDecoderBlock(nn.Module):
+ def __init__(self, in_channels, out_channels, stride, n_blocks=1, momentum=0.01):
+ super(ResDecoderBlock, self).__init__()
+ out_padding = (0, 1) if stride == (1, 2) else (1, 1)
+ self.n_blocks = n_blocks
+ self.conv1 = nn.Sequential(
+ nn.ConvTranspose2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=stride,
+ padding=(1, 1),
+ output_padding=out_padding,
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ )
+ self.conv2 = nn.ModuleList()
+ self.conv2.append(ConvBlockRes(out_channels * 2, out_channels, momentum))
+ for i in range(n_blocks - 1):
+ self.conv2.append(ConvBlockRes(out_channels, out_channels, momentum))
+
+ def forward(self, x, concat_tensor):
+ x = self.conv1(x)
+ x = torch.cat((x, concat_tensor), dim=1)
+ for i, conv2 in enumerate(self.conv2):
+ x = conv2(x)
+ return x
+
+
+class Decoder(nn.Module):
+ def __init__(self, in_channels, n_decoders, stride, n_blocks, momentum=0.01):
+ super(Decoder, self).__init__()
+ self.layers = nn.ModuleList()
+ self.n_decoders = n_decoders
+ for i in range(self.n_decoders):
+ out_channels = in_channels // 2
+ self.layers.append(
+ ResDecoderBlock(in_channels, out_channels, stride, n_blocks, momentum)
+ )
+ in_channels = out_channels
+
+ def forward(self, x: torch.Tensor, concat_tensors: List[torch.Tensor]):
+ for i, layer in enumerate(self.layers):
+ x = layer(x, concat_tensors[-1 - i])
+ return x
+
+
+class DeepUnet(nn.Module):
+ def __init__(
+ self,
+ kernel_size,
+ n_blocks,
+ en_de_layers=5,
+ inter_layers=4,
+ in_channels=1,
+ en_out_channels=16,
+ ):
+ super(DeepUnet, self).__init__()
+ self.encoder = Encoder(
+ in_channels, 128, en_de_layers, kernel_size, n_blocks, en_out_channels
+ )
+ self.intermediate = Intermediate(
+ self.encoder.out_channel // 2,
+ self.encoder.out_channel,
+ inter_layers,
+ n_blocks,
+ )
+ self.decoder = Decoder(
+ self.encoder.out_channel, en_de_layers, kernel_size, n_blocks
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ x, concat_tensors = self.encoder(x)
+ x = self.intermediate(x)
+ x = self.decoder(x, concat_tensors)
+ return x
+
+
+class E2E(nn.Module):
+ def __init__(
+ self,
+ n_blocks,
+ n_gru,
+ kernel_size,
+ en_de_layers=5,
+ inter_layers=4,
+ in_channels=1,
+ en_out_channels=16,
+ ):
+ super(E2E, self).__init__()
+ self.unet = DeepUnet(
+ kernel_size,
+ n_blocks,
+ en_de_layers,
+ inter_layers,
+ in_channels,
+ en_out_channels,
+ )
+ self.cnn = nn.Conv2d(en_out_channels, 3, (3, 3), padding=(1, 1))
+ if n_gru:
+ self.fc = nn.Sequential(
+ BiGRU(3 * 128, 256, n_gru),
+ nn.Linear(512, 360),
+ nn.Dropout(0.25),
+ nn.Sigmoid(),
+ )
+ else:
+ self.fc = nn.Sequential(
+ nn.Linear(3 * nn.N_MELS, nn.N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
+ )
+
+ def forward(self, mel):
+ # print(mel.shape)
+ mel = mel.transpose(-1, -2).unsqueeze(1)
+ x = self.cnn(self.unet(mel)).transpose(1, 2).flatten(-2)
+ x = self.fc(x)
+ # print(x.shape)
+ return x
+
+
+from librosa.filters import mel
+
+
+class MelSpectrogram(torch.nn.Module):
+ def __init__(
+ self,
+ is_half,
+ n_mel_channels,
+ sampling_rate,
+ win_length,
+ hop_length,
+ n_fft=None,
+ mel_fmin=0,
+ mel_fmax=None,
+ clamp=1e-5,
+ ):
+ super().__init__()
+ n_fft = win_length if n_fft is None else n_fft
+ self.hann_window = {}
+ mel_basis = mel(
+ sr=sampling_rate,
+ n_fft=n_fft,
+ n_mels=n_mel_channels,
+ fmin=mel_fmin,
+ fmax=mel_fmax,
+ htk=True,
+ )
+ mel_basis = torch.from_numpy(mel_basis).float()
+ self.register_buffer("mel_basis", mel_basis)
+ self.n_fft = win_length if n_fft is None else n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ self.sampling_rate = sampling_rate
+ self.n_mel_channels = n_mel_channels
+ self.clamp = clamp
+ self.is_half = is_half
+
+ def forward(self, audio, keyshift=0, speed=1, center=True):
+ factor = 2 ** (keyshift / 12)
+ n_fft_new = int(np.round(self.n_fft * factor))
+ win_length_new = int(np.round(self.win_length * factor))
+ hop_length_new = int(np.round(self.hop_length * speed))
+ keyshift_key = str(keyshift) + "_" + str(audio.device)
+ if keyshift_key not in self.hann_window:
+ self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to(
+ audio.device
+ )
+ if "privateuseone" in str(audio.device):
+ if not hasattr(self, "stft"):
+ self.stft = STFT(
+ filter_length=n_fft_new,
+ hop_length=hop_length_new,
+ win_length=win_length_new,
+ window="hann",
+ ).to(audio.device)
+ magnitude = self.stft.transform(audio)
+ else:
+ fft = torch.stft(
+ audio,
+ n_fft=n_fft_new,
+ hop_length=hop_length_new,
+ win_length=win_length_new,
+ window=self.hann_window[keyshift_key],
+ center=center,
+ return_complex=True,
+ )
+ magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2))
+ if keyshift != 0:
+ size = self.n_fft // 2 + 1
+ resize = magnitude.size(1)
+ if resize < size:
+ magnitude = F.pad(magnitude, (0, 0, 0, size - resize))
+ magnitude = magnitude[:, :size, :] * self.win_length / win_length_new
+ mel_output = torch.matmul(self.mel_basis, magnitude)
+ if self.is_half == True:
+ mel_output = mel_output.half()
+ log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp))
+ return log_mel_spec
+
+
+class RMVPE:
+ def __init__(self, model_path: str, is_half, device=None, use_jit=False):
+ self.resample_kernel = {}
+ self.resample_kernel = {}
+ self.is_half = is_half
+ if device is None:
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ self.device = device
+ self.mel_extractor = MelSpectrogram(
+ is_half, 128, 16000, 1024, 160, None, 30, 8000
+ ).to(device)
+ if "privateuseone" in str(device):
+ import onnxruntime as ort
+
+ ort_session = ort.InferenceSession(
+ "%s/rmvpe.onnx" % os.environ["rmvpe_root"],
+ providers=["DmlExecutionProvider"],
+ )
+ self.model = ort_session
+ else:
+ if str(self.device) == "cuda":
+ self.device = torch.device("cuda:0")
+
+ def get_default_model():
+ model = E2E(4, 1, (2, 2))
+ ckpt = torch.load(model_path, map_location="cpu")
+ model.load_state_dict(ckpt)
+ model.eval()
+ if is_half:
+ model = model.half()
+ else:
+ model = model.float()
+ return model
+
+ self.model = get_default_model()
+
+ self.model = self.model.to(device)
+ cents_mapping = 20 * np.arange(360) + 1997.3794084376191
+ self.cents_mapping = np.pad(cents_mapping, (4, 4)) # 368
+
+ def mel2hidden(self, mel):
+ with torch.no_grad():
+ n_frames = mel.shape[-1]
+ n_pad = 32 * ((n_frames - 1) // 32 + 1) - n_frames
+ if n_pad > 0:
+ mel = F.pad(mel, (0, n_pad), mode="constant")
+ if "privateuseone" in str(self.device):
+ onnx_input_name = self.model.get_inputs()[0].name
+ onnx_outputs_names = self.model.get_outputs()[0].name
+ hidden = self.model.run(
+ [onnx_outputs_names],
+ input_feed={onnx_input_name: mel.cpu().numpy()},
+ )[0]
+ else:
+ mel = mel.half() if self.is_half else mel.float()
+ hidden = self.model(mel)
+ return hidden[:, :n_frames]
+
+ def decode(self, hidden, thred=0.03):
+ cents_pred = self.to_local_average_cents(hidden, thred=thred)
+ f0 = 10 * (2 ** (cents_pred / 1200))
+ f0[f0 == 10] = 0
+ # f0 = np.array([10 * (2 ** (cent_pred / 1200)) if cent_pred else 0 for cent_pred in cents_pred])
+ return f0
+
+ def infer_from_audio(self, audio, thred=0.03):
+ # torch.cuda.synchronize()
+ # t0 = ttime()
+ if not torch.is_tensor(audio):
+ audio = torch.from_numpy(audio)
+ mel = self.mel_extractor(
+ audio.float().to(self.device).unsqueeze(0), center=True
+ )
+ # print(123123123,mel.device.type)
+ # torch.cuda.synchronize()
+ # t1 = ttime()
+ hidden = self.mel2hidden(mel)
+ # torch.cuda.synchronize()
+ # t2 = ttime()
+ # print(234234,hidden.device.type)
+ if "privateuseone" not in str(self.device):
+ hidden = hidden.squeeze(0).cpu().numpy()
+ else:
+ hidden = hidden[0]
+ if self.is_half == True:
+ hidden = hidden.astype("float32")
+
+ f0 = self.decode(hidden, thred=thred)
+ # torch.cuda.synchronize()
+ # t3 = ttime()
+ # print("hmvpe:%s\t%s\t%s\t%s"%(t1-t0,t2-t1,t3-t2,t3-t0))
+ return f0
+ def infer_from_audio_batch(self, audio, thred=0.03):
+ # torch.cuda.synchronize()
+ # t0 = ttime()
+ if not torch.is_tensor(audio):
+ audio = torch.from_numpy(audio)
+ mel = self.mel_extractor(
+ audio.float().to(self.device), center=True
+ )
+ # print(123123123,mel.device.type)
+ # torch.cuda.synchronize()
+ # t1 = ttime()
+ hidden = self.mel2hidden(mel)
+ # torch.cuda.synchronize()
+ # t2 = ttime()
+ # print(234234,hidden.device.type)
+ if "privateuseone" not in str(self.device):
+ hidden = hidden.cpu().numpy()
+ else:
+ pass
+ if self.is_half == True:
+ hidden = hidden.astype("float32")
+
+ f0s = []
+ for bib in range(hidden.shape[0]):
+ f0s.append(self.decode(hidden[bib], thred=thred))
+ f0s = np.stack(f0s)
+ f0s = torch.from_numpy(f0s).to(self.device)
+ # torch.cuda.synchronize()
+ # t3 = ttime()
+ # print("hmvpe:%s\t%s\t%s\t%s"%(t1-t0,t2-t1,t3-t2,t3-t0))
+ return f0s
+
+ def to_local_average_cents(self, salience, thred=0.05):
+ # t0 = ttime()
+ center = np.argmax(salience, axis=1) # 帧长#index
+ salience = np.pad(salience, ((0, 0), (4, 4))) # 帧长,368
+ # t1 = ttime()
+ center += 4
+ todo_salience = []
+ todo_cents_mapping = []
+ starts = center - 4
+ ends = center + 5
+ for idx in range(salience.shape[0]):
+ todo_salience.append(salience[:, starts[idx] : ends[idx]][idx])
+ todo_cents_mapping.append(self.cents_mapping[starts[idx] : ends[idx]])
+ # t2 = ttime()
+ todo_salience = np.array(todo_salience) # 帧长,9
+ todo_cents_mapping = np.array(todo_cents_mapping) # 帧长,9
+ product_sum = np.sum(todo_salience * todo_cents_mapping, 1)
+ weight_sum = np.sum(todo_salience, 1) # 帧长
+ devided = product_sum / weight_sum # 帧长
+ # t3 = ttime()
+ maxx = np.max(salience, axis=1) # 帧长
+ devided[maxx <= thred] = 0
+ # t4 = ttime()
+ # print("decode:%s\t%s\t%s\t%s" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3))
+ return devided
diff --git a/indextts/s2mel/modules/vocos/__init__.py b/indextts/s2mel/modules/vocos/__init__.py
new file mode 100644
index 0000000..3a11ba9
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/__init__.py
@@ -0,0 +1,4 @@
+from .pretrained import Vocos
+
+
+__version__ = "0.1.0"
diff --git a/indextts/s2mel/modules/vocos/heads.py b/indextts/s2mel/modules/vocos/heads.py
new file mode 100644
index 0000000..0591799
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/heads.py
@@ -0,0 +1,164 @@
+from typing import Optional
+
+import torch
+from torch import nn
+from torchaudio.functional.functional import _hz_to_mel, _mel_to_hz
+
+from .spectral_ops import IMDCT, ISTFT
+from .modules import symexp
+
+
+class FourierHead(nn.Module):
+ """Base class for inverse fourier modules."""
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class ISTFTHead(FourierHead):
+ """
+ ISTFT Head module for predicting STFT complex coefficients.
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames, which should align with
+ the resolution of the input features.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, dim: int, n_fft: int, hop_length: int, padding: str = "same"):
+ super().__init__()
+ out_dim = n_fft + 2
+ self.out = torch.nn.Linear(dim, out_dim)
+ self.istft = ISTFT(n_fft=n_fft, hop_length=hop_length, win_length=n_fft, padding=padding)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the ISTFTHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x).transpose(1, 2)
+ mag, p = x.chunk(2, dim=1)
+ mag = torch.exp(mag)
+ mag = torch.clip(mag, max=1e2) # safeguard to prevent excessively large magnitudes
+ # wrapping happens here. These two lines produce real and imaginary value
+ x = torch.cos(p)
+ y = torch.sin(p)
+ # recalculating phase here does not produce anything new
+ # only costs time
+ # phase = torch.atan2(y, x)
+ # S = mag * torch.exp(phase * 1j)
+ # better directly produce the complex value
+ S = mag * (x + 1j * y)
+ audio = self.istft(S)
+ return audio
+
+
+class IMDCTSymExpHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with symmetric exponential function
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ sample_rate (int, optional): The sample rate of the audio. If provided, the last layer will be initialized
+ based on perceptual scaling. Defaults to None.
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ mdct_frame_len: int,
+ padding: str = "same",
+ sample_rate: Optional[int] = None,
+ clip_audio: bool = False,
+ ):
+ super().__init__()
+ out_dim = mdct_frame_len // 2
+ self.out = nn.Linear(dim, out_dim)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+ self.clip_audio = clip_audio
+
+ if sample_rate is not None:
+ # optionally init the last layer following mel-scale
+ m_max = _hz_to_mel(sample_rate // 2)
+ m_pts = torch.linspace(0, m_max, out_dim)
+ f_pts = _mel_to_hz(m_pts)
+ scale = 1 - (f_pts / f_pts.max())
+
+ with torch.no_grad():
+ self.out.weight.mul_(scale.view(-1, 1))
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTSymExpHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ x = symexp(x)
+ x = torch.clip(x, min=-1e2, max=1e2) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(x)
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+
+ return audio
+
+
+class IMDCTCosHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with parametrizing MDCT = exp(m) · cos(p)
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(self, dim: int, mdct_frame_len: int, padding: str = "same", clip_audio: bool = False):
+ super().__init__()
+ self.clip_audio = clip_audio
+ self.out = nn.Linear(dim, mdct_frame_len)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTCosHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ m, p = x.chunk(2, dim=2)
+ m = torch.exp(m).clip(max=1e2) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(m * torch.cos(p))
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+ return audio
diff --git a/indextts/s2mel/modules/vocos/helpers.py b/indextts/s2mel/modules/vocos/helpers.py
new file mode 100644
index 0000000..3d30301
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/helpers.py
@@ -0,0 +1,71 @@
+import matplotlib
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from pytorch_lightning import Callback
+
+matplotlib.use("Agg")
+
+
+def save_figure_to_numpy(fig: plt.Figure) -> np.ndarray:
+ """
+ Save a matplotlib figure to a numpy array.
+
+ Args:
+ fig (Figure): Matplotlib figure object.
+
+ Returns:
+ ndarray: Numpy array representing the figure.
+ """
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+ return data
+
+
+def plot_spectrogram_to_numpy(spectrogram: np.ndarray) -> np.ndarray:
+ """
+ Plot a spectrogram and convert it to a numpy array.
+
+ Args:
+ spectrogram (ndarray): Spectrogram data.
+
+ Returns:
+ ndarray: Numpy array representing the plotted spectrogram.
+ """
+ spectrogram = spectrogram.astype(np.float32)
+ fig, ax = plt.subplots(figsize=(12, 3))
+ im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
+ plt.colorbar(im, ax=ax)
+ plt.xlabel("Frames")
+ plt.ylabel("Channels")
+ plt.tight_layout()
+
+ fig.canvas.draw()
+ data = save_figure_to_numpy(fig)
+ plt.close()
+ return data
+
+
+class GradNormCallback(Callback):
+ """
+ Callback to log the gradient norm.
+ """
+
+ def on_after_backward(self, trainer, model):
+ model.log("grad_norm", gradient_norm(model))
+
+
+def gradient_norm(model: torch.nn.Module, norm_type: float = 2.0) -> torch.Tensor:
+ """
+ Compute the gradient norm.
+
+ Args:
+ model (Module): PyTorch model.
+ norm_type (float, optional): Type of the norm. Defaults to 2.0.
+
+ Returns:
+ Tensor: Gradient norm.
+ """
+ grads = [p.grad for p in model.parameters() if p.grad is not None]
+ total_norm = torch.norm(torch.stack([torch.norm(g.detach(), norm_type) for g in grads]), norm_type)
+ return total_norm
diff --git a/indextts/s2mel/modules/vocos/loss.py b/indextts/s2mel/modules/vocos/loss.py
new file mode 100644
index 0000000..029f6ac
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/loss.py
@@ -0,0 +1,114 @@
+from typing import List, Tuple
+
+import torch
+import torchaudio
+from torch import nn
+
+from vocos.modules import safe_log
+
+
+class MelSpecReconstructionLoss(nn.Module):
+ """
+ L1 distance between the mel-scaled magnitude spectrograms of the ground truth sample and the generated sample
+ """
+
+ def __init__(
+ self, sample_rate: int = 24000, n_fft: int = 1024, hop_length: int = 256, n_mels: int = 100,
+ ):
+ super().__init__()
+ self.mel_spec = torchaudio.transforms.MelSpectrogram(
+ sample_rate=sample_rate, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, center=True, power=1,
+ )
+
+ def forward(self, y_hat, y) -> torch.Tensor:
+ """
+ Args:
+ y_hat (Tensor): Predicted audio waveform.
+ y (Tensor): Ground truth audio waveform.
+
+ Returns:
+ Tensor: L1 loss between the mel-scaled magnitude spectrograms.
+ """
+ mel_hat = safe_log(self.mel_spec(y_hat))
+ mel = safe_log(self.mel_spec(y))
+
+ loss = torch.nn.functional.l1_loss(mel, mel_hat)
+
+ return loss
+
+
+class GeneratorLoss(nn.Module):
+ """
+ Generator Loss module. Calculates the loss for the generator based on discriminator outputs.
+ """
+
+ def forward(self, disc_outputs: List[torch.Tensor]) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+ """
+ Args:
+ disc_outputs (List[Tensor]): List of discriminator outputs.
+
+ Returns:
+ Tuple[Tensor, List[Tensor]]: Tuple containing the total loss and a list of loss values from
+ the sub-discriminators
+ """
+ loss = torch.zeros(1, device=disc_outputs[0].device, dtype=disc_outputs[0].dtype)
+ gen_losses = []
+ for dg in disc_outputs:
+ l = torch.mean(torch.clamp(1 - dg, min=0))
+ gen_losses.append(l)
+ loss += l
+
+ return loss, gen_losses
+
+
+class DiscriminatorLoss(nn.Module):
+ """
+ Discriminator Loss module. Calculates the loss for the discriminator based on real and generated outputs.
+ """
+
+ def forward(
+ self, disc_real_outputs: List[torch.Tensor], disc_generated_outputs: List[torch.Tensor]
+ ) -> Tuple[torch.Tensor, List[torch.Tensor], List[torch.Tensor]]:
+ """
+ Args:
+ disc_real_outputs (List[Tensor]): List of discriminator outputs for real samples.
+ disc_generated_outputs (List[Tensor]): List of discriminator outputs for generated samples.
+
+ Returns:
+ Tuple[Tensor, List[Tensor], List[Tensor]]: A tuple containing the total loss, a list of loss values from
+ the sub-discriminators for real outputs, and a list of
+ loss values for generated outputs.
+ """
+ loss = torch.zeros(1, device=disc_real_outputs[0].device, dtype=disc_real_outputs[0].dtype)
+ r_losses = []
+ g_losses = []
+ for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
+ r_loss = torch.mean(torch.clamp(1 - dr, min=0))
+ g_loss = torch.mean(torch.clamp(1 + dg, min=0))
+ loss += r_loss + g_loss
+ r_losses.append(r_loss)
+ g_losses.append(g_loss)
+
+ return loss, r_losses, g_losses
+
+
+class FeatureMatchingLoss(nn.Module):
+ """
+ Feature Matching Loss module. Calculates the feature matching loss between feature maps of the sub-discriminators.
+ """
+
+ def forward(self, fmap_r: List[List[torch.Tensor]], fmap_g: List[List[torch.Tensor]]) -> torch.Tensor:
+ """
+ Args:
+ fmap_r (List[List[Tensor]]): List of feature maps from real samples.
+ fmap_g (List[List[Tensor]]): List of feature maps from generated samples.
+
+ Returns:
+ Tensor: The calculated feature matching loss.
+ """
+ loss = torch.zeros(1, device=fmap_r[0][0].device, dtype=fmap_r[0][0].dtype)
+ for dr, dg in zip(fmap_r, fmap_g):
+ for rl, gl in zip(dr, dg):
+ loss += torch.mean(torch.abs(rl - gl))
+
+ return loss
diff --git a/indextts/s2mel/modules/vocos/models.py b/indextts/s2mel/modules/vocos/models.py
new file mode 100644
index 0000000..a953275
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/models.py
@@ -0,0 +1,118 @@
+from typing import Optional
+
+import torch
+from torch import nn
+from torch.nn.utils import weight_norm
+
+from .modules import ConvNeXtBlock, ResBlock1, AdaLayerNorm
+
+
+class Backbone(nn.Module):
+ """Base class for the generator's backbone. It preserves the same temporal resolution across all layers."""
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, C, L), where B is the batch size,
+ C denotes output features, and L is the sequence length.
+
+ Returns:
+ Tensor: Output of shape (B, L, H), where B is the batch size, L is the sequence length,
+ and H denotes the model dimension.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class VocosBackbone(Backbone):
+ """
+ Vocos backbone module built with ConvNeXt blocks. Supports additional conditioning with Adaptive Layer Normalization
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ intermediate_dim (int): Intermediate dimension used in ConvNeXtBlock.
+ num_layers (int): Number of ConvNeXtBlock layers.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to `1 / num_layers`.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional model. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ input_channels: int,
+ dim: int,
+ intermediate_dim: int,
+ num_layers: int,
+ layer_scale_init_value: Optional[float] = None,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = nn.Conv1d(input_channels, dim, kernel_size=7, padding=3)
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ layer_scale_init_value = layer_scale_init_value or 1 / num_layers
+ self.convnext = nn.ModuleList(
+ [
+ ConvNeXtBlock(
+ dim=dim,
+ intermediate_dim=intermediate_dim,
+ layer_scale_init_value=layer_scale_init_value,
+ adanorm_num_embeddings=adanorm_num_embeddings,
+ )
+ for _ in range(num_layers)
+ ]
+ )
+ self.final_layer_norm = nn.LayerNorm(dim, eps=1e-6)
+ self.apply(self._init_weights)
+
+ def _init_weights(self, m):
+ if isinstance(m, (nn.Conv1d, nn.Linear)):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ bandwidth_id = kwargs.get('bandwidth_id', None)
+ x = self.embed(x)
+ if self.adanorm:
+ assert bandwidth_id is not None
+ x = self.norm(x.transpose(1, 2), cond_embedding_id=bandwidth_id)
+ else:
+ x = self.norm(x.transpose(1, 2))
+ x = x.transpose(1, 2)
+ for conv_block in self.convnext:
+ x = conv_block(x, cond_embedding_id=bandwidth_id)
+ x = self.final_layer_norm(x.transpose(1, 2))
+ return x
+
+
+class VocosResNetBackbone(Backbone):
+ """
+ Vocos backbone module built with ResBlocks.
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ num_blocks (int): Number of ResBlock1 blocks.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to None.
+ """
+
+ def __init__(
+ self, input_channels, dim, num_blocks, layer_scale_init_value=None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = weight_norm(nn.Conv1d(input_channels, dim, kernel_size=3, padding=1))
+ layer_scale_init_value = layer_scale_init_value or 1 / num_blocks / 3
+ self.resnet = nn.Sequential(
+ *[ResBlock1(dim=dim, layer_scale_init_value=layer_scale_init_value) for _ in range(num_blocks)]
+ )
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ x = self.embed(x)
+ x = self.resnet(x)
+ x = x.transpose(1, 2)
+ return x
diff --git a/indextts/s2mel/modules/vocos/modules.py b/indextts/s2mel/modules/vocos/modules.py
new file mode 100644
index 0000000..af1d6db
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/modules.py
@@ -0,0 +1,213 @@
+from typing import Optional, Tuple
+
+import torch
+from torch import nn
+from torch.nn.utils import weight_norm, remove_weight_norm
+
+
+class ConvNeXtBlock(nn.Module):
+ """ConvNeXt Block adapted from https://github.com/facebookresearch/ConvNeXt to 1D audio signal.
+
+ Args:
+ dim (int): Number of input channels.
+ intermediate_dim (int): Dimensionality of the intermediate layer.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional LayerNorm. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ intermediate_dim: int,
+ layer_scale_init_value: float,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.dwconv = nn.Conv1d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(dim, intermediate_dim) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(intermediate_dim, dim)
+ self.gamma = (
+ nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True)
+ if layer_scale_init_value > 0
+ else None
+ )
+
+ def forward(self, x: torch.Tensor, cond_embedding_id: Optional[torch.Tensor] = None) -> torch.Tensor:
+ residual = x
+ x = self.dwconv(x)
+ x = x.transpose(1, 2) # (B, C, T) -> (B, T, C)
+ if self.adanorm:
+ assert cond_embedding_id is not None
+ x = self.norm(x, cond_embedding_id)
+ else:
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.transpose(1, 2) # (B, T, C) -> (B, C, T)
+
+ x = residual + x
+ return x
+
+
+class AdaLayerNorm(nn.Module):
+ """
+ Adaptive Layer Normalization module with learnable embeddings per `num_embeddings` classes
+
+ Args:
+ num_embeddings (int): Number of embeddings.
+ embedding_dim (int): Dimension of the embeddings.
+ """
+
+ def __init__(self, num_embeddings: int, embedding_dim: int, eps: float = 1e-6):
+ super().__init__()
+ self.eps = eps
+ self.dim = embedding_dim
+ self.scale = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
+ self.shift = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
+ torch.nn.init.ones_(self.scale.weight)
+ torch.nn.init.zeros_(self.shift.weight)
+
+ def forward(self, x: torch.Tensor, cond_embedding_id: torch.Tensor) -> torch.Tensor:
+ scale = self.scale(cond_embedding_id)
+ shift = self.shift(cond_embedding_id)
+ x = nn.functional.layer_norm(x, (self.dim,), eps=self.eps)
+ x = x * scale + shift
+ return x
+
+
+class ResBlock1(nn.Module):
+ """
+ ResBlock adapted from HiFi-GAN V1 (https://github.com/jik876/hifi-gan) with dilated 1D convolutions,
+ but without upsampling layers.
+
+ Args:
+ dim (int): Number of input channels.
+ kernel_size (int, optional): Size of the convolutional kernel. Defaults to 3.
+ dilation (tuple[int], optional): Dilation factors for the dilated convolutions.
+ Defaults to (1, 3, 5).
+ lrelu_slope (float, optional): Negative slope of the LeakyReLU activation function.
+ Defaults to 0.1.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ kernel_size: int = 3,
+ dilation: Tuple[int, int, int] = (1, 3, 5),
+ lrelu_slope: float = 0.1,
+ layer_scale_init_value: Optional[float] = None,
+ ):
+ super().__init__()
+ self.lrelu_slope = lrelu_slope
+ self.convs1 = nn.ModuleList(
+ [
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[0],
+ padding=self.get_padding(kernel_size, dilation[0]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[1],
+ padding=self.get_padding(kernel_size, dilation[1]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[2],
+ padding=self.get_padding(kernel_size, dilation[2]),
+ )
+ ),
+ ]
+ )
+
+ self.convs2 = nn.ModuleList(
+ [
+ weight_norm(nn.Conv1d(dim, dim, kernel_size, 1, dilation=1, padding=self.get_padding(kernel_size, 1))),
+ weight_norm(nn.Conv1d(dim, dim, kernel_size, 1, dilation=1, padding=self.get_padding(kernel_size, 1))),
+ weight_norm(nn.Conv1d(dim, dim, kernel_size, 1, dilation=1, padding=self.get_padding(kernel_size, 1))),
+ ]
+ )
+
+ self.gamma = nn.ParameterList(
+ [
+ nn.Parameter(layer_scale_init_value * torch.ones(dim, 1), requires_grad=True)
+ if layer_scale_init_value is not None
+ else None,
+ nn.Parameter(layer_scale_init_value * torch.ones(dim, 1), requires_grad=True)
+ if layer_scale_init_value is not None
+ else None,
+ nn.Parameter(layer_scale_init_value * torch.ones(dim, 1), requires_grad=True)
+ if layer_scale_init_value is not None
+ else None,
+ ]
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ for c1, c2, gamma in zip(self.convs1, self.convs2, self.gamma):
+ xt = torch.nn.functional.leaky_relu(x, negative_slope=self.lrelu_slope)
+ xt = c1(xt)
+ xt = torch.nn.functional.leaky_relu(xt, negative_slope=self.lrelu_slope)
+ xt = c2(xt)
+ if gamma is not None:
+ xt = gamma * xt
+ x = xt + x
+ return x
+
+ def remove_weight_norm(self):
+ for l in self.convs1:
+ remove_weight_norm(l)
+ for l in self.convs2:
+ remove_weight_norm(l)
+
+ @staticmethod
+ def get_padding(kernel_size: int, dilation: int = 1) -> int:
+ return int((kernel_size * dilation - dilation) / 2)
+
+
+def safe_log(x: torch.Tensor, clip_val: float = 1e-7) -> torch.Tensor:
+ """
+ Computes the element-wise logarithm of the input tensor with clipping to avoid near-zero values.
+
+ Args:
+ x (Tensor): Input tensor.
+ clip_val (float, optional): Minimum value to clip the input tensor. Defaults to 1e-7.
+
+ Returns:
+ Tensor: Element-wise logarithm of the input tensor with clipping applied.
+ """
+ return torch.log(torch.clip(x, min=clip_val))
+
+
+def symlog(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * torch.log1p(x.abs())
+
+
+def symexp(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * (torch.exp(x.abs()) - 1)
diff --git a/indextts/s2mel/modules/vocos/pretrained.py b/indextts/s2mel/modules/vocos/pretrained.py
new file mode 100644
index 0000000..dd8e494
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/pretrained.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Tuple, Union, Optional
+
+import torch
+import yaml
+from torch import nn
+from .heads import ISTFTHead
+from .models import VocosBackbone
+
+
+class Vocos(nn.Module):
+ """
+ The Vocos class represents a Fourier-based neural vocoder for audio synthesis.
+ This class is primarily designed for inference, with support for loading from pretrained
+ model checkpoints. It consists of three main components: a feature extractor,
+ a backbone, and a head.
+ """
+
+ def __init__(
+ self, args,
+ ):
+ super().__init__()
+ self.backbone = VocosBackbone(
+ input_channels=args.vocos.backbone.input_channels,
+ dim=args.vocos.backbone.dim,
+ intermediate_dim=args.vocos.backbone.intermediate_dim,
+ num_layers=args.vocos.backbone.num_layers,
+ )
+ self.head = ISTFTHead(
+ dim=args.vocos.head.dim,
+ n_fft=args.vocos.head.n_fft,
+ hop_length=args.vocos.head.hop_length,
+ padding=args.vocos.head.padding,
+ )
+
+ def forward(self, features_input: torch.Tensor, **kwargs: Any) -> torch.Tensor:
+ """
+ Method to decode audio waveform from already calculated features. The features input is passed through
+ the backbone and the head to reconstruct the audio output.
+
+ Args:
+ features_input (Tensor): The input tensor of features of shape (B, C, L), where B is the batch size,
+ C denotes the feature dimension, and L is the sequence length.
+
+ Returns:
+ Tensor: The output tensor representing the reconstructed audio waveform of shape (B, T).
+ """
+ x = self.backbone(features_input, **kwargs)
+ audio_output = self.head(x)
+ return audio_output
diff --git a/indextts/s2mel/modules/vocos/spectral_ops.py b/indextts/s2mel/modules/vocos/spectral_ops.py
new file mode 100644
index 0000000..a8eda1c
--- /dev/null
+++ b/indextts/s2mel/modules/vocos/spectral_ops.py
@@ -0,0 +1,192 @@
+import numpy as np
+import scipy
+import torch
+from torch import nn, view_as_real, view_as_complex
+
+
+class ISTFT(nn.Module):
+ """
+ Custom implementation of ISTFT since torch.istft doesn't allow custom padding (other than `center=True`) with
+ windowing. This is because the NOLA (Nonzero Overlap Add) check fails at the edges.
+ See issue: https://github.com/pytorch/pytorch/issues/62323
+ Specifically, in the context of neural vocoding we are interested in "same" padding analogous to CNNs.
+ The NOLA constraint is met as we trim padded samples anyway.
+
+ Args:
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames.
+ win_length (int): The size of window frame and STFT filter.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, n_fft: int, hop_length: int, win_length: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.n_fft = n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ window = torch.hann_window(win_length)
+ self.register_buffer("window", window)
+
+ def forward(self, spec: torch.Tensor) -> torch.Tensor:
+ """
+ Compute the Inverse Short Time Fourier Transform (ISTFT) of a complex spectrogram.
+
+ Args:
+ spec (Tensor): Input complex spectrogram of shape (B, N, T), where B is the batch size,
+ N is the number of frequency bins, and T is the number of time frames.
+
+ Returns:
+ Tensor: Reconstructed time-domain signal of shape (B, L), where L is the length of the output signal.
+ """
+ if self.padding == "center":
+ # Fallback to pytorch native implementation
+ return torch.istft(spec, self.n_fft, self.hop_length, self.win_length, self.window, center=True)
+ elif self.padding == "same":
+ pad = (self.win_length - self.hop_length) // 2
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ assert spec.dim() == 3, "Expected a 3D tensor as input"
+ B, N, T = spec.shape
+
+ # Inverse FFT
+ ifft = torch.fft.irfft(spec, self.n_fft, dim=1, norm="backward")
+ ifft = ifft * self.window[None, :, None]
+
+ # Overlap and Add
+ output_size = (T - 1) * self.hop_length + self.win_length
+ y = torch.nn.functional.fold(
+ ifft, output_size=(1, output_size), kernel_size=(1, self.win_length), stride=(1, self.hop_length),
+ )[:, 0, 0, pad:-pad]
+
+ # Window envelope
+ window_sq = self.window.square().expand(1, T, -1).transpose(1, 2)
+ window_envelope = torch.nn.functional.fold(
+ window_sq, output_size=(1, output_size), kernel_size=(1, self.win_length), stride=(1, self.hop_length),
+ ).squeeze()[pad:-pad]
+
+ # Normalize
+ assert (window_envelope > 1e-11).all()
+ y = y / window_envelope
+
+ return y
+
+
+class MDCT(nn.Module):
+ """
+ Modified Discrete Cosine Transform (MDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(-1j * torch.pi * torch.arange(frame_len) / frame_len)
+ post_twiddle = torch.exp(-1j * torch.pi * n0 * (torch.arange(N) + 0.5) / N)
+ # view_as_real: NCCL Backend does not support ComplexFloat data type
+ # https://github.com/pytorch/pytorch/issues/71613
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, audio: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Modified Discrete Cosine Transform (MDCT) to the input audio.
+
+ Args:
+ audio (Tensor): Input audio waveform of shape (B, T), where B is the batch size
+ and T is the length of the audio.
+
+ Returns:
+ Tensor: MDCT coefficients of shape (B, L, N), where L is the number of output frames
+ and N is the number of frequency bins.
+ """
+ if self.padding == "center":
+ audio = torch.nn.functional.pad(audio, (self.frame_len // 2, self.frame_len // 2))
+ elif self.padding == "same":
+ # hop_length is 1/2 frame_len
+ audio = torch.nn.functional.pad(audio, (self.frame_len // 4, self.frame_len // 4))
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ x = audio.unfold(-1, self.frame_len, self.frame_len // 2)
+ N = self.frame_len // 2
+ x = x * self.window.expand(x.shape)
+ X = torch.fft.fft(x * view_as_complex(self.pre_twiddle).expand(x.shape), dim=-1)[..., :N]
+ res = X * view_as_complex(self.post_twiddle).expand(X.shape) * np.sqrt(1 / N)
+ return torch.real(res) * np.sqrt(2)
+
+
+class IMDCT(nn.Module):
+ """
+ Inverse Modified Discrete Cosine Transform (IMDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(1j * torch.pi * n0 * torch.arange(N * 2) / N)
+ post_twiddle = torch.exp(1j * torch.pi * (torch.arange(N * 2) + n0) / (N * 2))
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, X: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Inverse Modified Discrete Cosine Transform (IMDCT) to the input MDCT coefficients.
+
+ Args:
+ X (Tensor): Input MDCT coefficients of shape (B, L, N), where B is the batch size,
+ L is the number of frames, and N is the number of frequency bins.
+
+ Returns:
+ Tensor: Reconstructed audio waveform of shape (B, T), where T is the length of the audio.
+ """
+ B, L, N = X.shape
+ Y = torch.zeros((B, L, N * 2), dtype=X.dtype, device=X.device)
+ Y[..., :N] = X
+ Y[..., N:] = -1 * torch.conj(torch.flip(X, dims=(-1,)))
+ y = torch.fft.ifft(Y * view_as_complex(self.pre_twiddle).expand(Y.shape), dim=-1)
+ y = torch.real(y * view_as_complex(self.post_twiddle).expand(y.shape)) * np.sqrt(N) * np.sqrt(2)
+ result = y * self.window.expand(y.shape)
+ output_size = (1, (L + 1) * N)
+ audio = torch.nn.functional.fold(
+ result.transpose(1, 2),
+ output_size=output_size,
+ kernel_size=(1, self.frame_len),
+ stride=(1, self.frame_len // 2),
+ )[:, 0, 0, :]
+
+ if self.padding == "center":
+ pad = self.frame_len // 2
+ elif self.padding == "same":
+ pad = self.frame_len // 4
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ audio = audio[:, pad:-pad]
+ return audio
diff --git a/indextts/s2mel/modules/wavenet.py b/indextts/s2mel/modules/wavenet.py
new file mode 100644
index 0000000..3989be1
--- /dev/null
+++ b/indextts/s2mel/modules/wavenet.py
@@ -0,0 +1,174 @@
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from indextts.s2mel.modules.encodec import SConv1d
+
+from . import commons
+LRELU_SLOPE = 0.1
+
+class LayerNorm(nn.Module):
+ def __init__(self, channels, eps=1e-5):
+ super().__init__()
+ self.channels = channels
+ self.eps = eps
+
+ self.gamma = nn.Parameter(torch.ones(channels))
+ self.beta = nn.Parameter(torch.zeros(channels))
+
+ def forward(self, x):
+ x = x.transpose(1, -1)
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+ return x.transpose(1, -1)
+
+
+class ConvReluNorm(nn.Module):
+ def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
+ super().__init__()
+ self.in_channels = in_channels
+ self.hidden_channels = hidden_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ self.n_layers = n_layers
+ self.p_dropout = p_dropout
+ assert n_layers > 1, "Number of layers should be larger than 0."
+
+ self.conv_layers = nn.ModuleList()
+ self.norm_layers = nn.ModuleList()
+ self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+ self.norm_layers.append(LayerNorm(hidden_channels))
+ self.relu_drop = nn.Sequential(
+ nn.ReLU(),
+ nn.Dropout(p_dropout))
+ for _ in range(n_layers - 1):
+ self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
+ self.norm_layers.append(LayerNorm(hidden_channels))
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+ self.proj.weight.data.zero_()
+ self.proj.bias.data.zero_()
+
+ def forward(self, x, x_mask):
+ x_org = x
+ for i in range(self.n_layers):
+ x = self.conv_layers[i](x * x_mask)
+ x = self.norm_layers[i](x)
+ x = self.relu_drop(x)
+ x = x_org + self.proj(x)
+ return x * x_mask
+
+
+class DDSConv(nn.Module):
+ """
+ Dialted and Depth-Separable Convolution
+ """
+
+ def __init__(self, channels, kernel_size, n_layers, p_dropout=0.):
+ super().__init__()
+ self.channels = channels
+ self.kernel_size = kernel_size
+ self.n_layers = n_layers
+ self.p_dropout = p_dropout
+
+ self.drop = nn.Dropout(p_dropout)
+ self.convs_sep = nn.ModuleList()
+ self.convs_1x1 = nn.ModuleList()
+ self.norms_1 = nn.ModuleList()
+ self.norms_2 = nn.ModuleList()
+ for i in range(n_layers):
+ dilation = kernel_size ** i
+ padding = (kernel_size * dilation - dilation) // 2
+ self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size,
+ groups=channels, dilation=dilation, padding=padding
+ ))
+ self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
+ self.norms_1.append(LayerNorm(channels))
+ self.norms_2.append(LayerNorm(channels))
+
+ def forward(self, x, x_mask, g=None):
+ if g is not None:
+ x = x + g
+ for i in range(self.n_layers):
+ y = self.convs_sep[i](x * x_mask)
+ y = self.norms_1[i](y)
+ y = F.gelu(y)
+ y = self.convs_1x1[i](y)
+ y = self.norms_2[i](y)
+ y = F.gelu(y)
+ y = self.drop(y)
+ x = x + y
+ return x * x_mask
+
+
+class WN(torch.nn.Module):
+ def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0, causal=False):
+ super(WN, self).__init__()
+ conv1d_type = SConv1d
+ assert (kernel_size % 2 == 1)
+ self.hidden_channels = hidden_channels
+ self.kernel_size = kernel_size,
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.gin_channels = gin_channels
+ self.p_dropout = p_dropout
+
+ self.in_layers = torch.nn.ModuleList()
+ self.res_skip_layers = torch.nn.ModuleList()
+ self.drop = nn.Dropout(p_dropout)
+
+ if gin_channels != 0:
+ self.cond_layer = conv1d_type(gin_channels, 2 * hidden_channels * n_layers, 1, norm='weight_norm')
+
+ for i in range(n_layers):
+ dilation = dilation_rate ** i
+ padding = int((kernel_size * dilation - dilation) / 2)
+ in_layer = conv1d_type(hidden_channels, 2 * hidden_channels, kernel_size, dilation=dilation,
+ padding=padding, norm='weight_norm', causal=causal)
+ self.in_layers.append(in_layer)
+
+ # last one is not necessary
+ if i < n_layers - 1:
+ res_skip_channels = 2 * hidden_channels
+ else:
+ res_skip_channels = hidden_channels
+
+ res_skip_layer = conv1d_type(hidden_channels, res_skip_channels, 1, norm='weight_norm', causal=causal)
+ self.res_skip_layers.append(res_skip_layer)
+
+ def forward(self, x, x_mask, g=None, **kwargs):
+ output = torch.zeros_like(x)
+ n_channels_tensor = torch.IntTensor([self.hidden_channels])
+
+ if g is not None:
+ g = self.cond_layer(g)
+
+ for i in range(self.n_layers):
+ x_in = self.in_layers[i](x)
+ if g is not None:
+ cond_offset = i * 2 * self.hidden_channels
+ g_l = g[:, cond_offset:cond_offset + 2 * self.hidden_channels, :]
+ else:
+ g_l = torch.zeros_like(x_in)
+
+ acts = commons.fused_add_tanh_sigmoid_multiply(
+ x_in,
+ g_l,
+ n_channels_tensor)
+ acts = self.drop(acts)
+
+ res_skip_acts = self.res_skip_layers[i](acts)
+ if i < self.n_layers - 1:
+ res_acts = res_skip_acts[:, :self.hidden_channels, :]
+ x = (x + res_acts) * x_mask
+ output = output + res_skip_acts[:, self.hidden_channels:, :]
+ else:
+ output = output + res_skip_acts
+ return output * x_mask
+
+ def remove_weight_norm(self):
+ if self.gin_channels != 0:
+ torch.nn.utils.remove_weight_norm(self.cond_layer)
+ for l in self.in_layers:
+ torch.nn.utils.remove_weight_norm(l)
+ for l in self.res_skip_layers:
+ torch.nn.utils.remove_weight_norm(l)
diff --git a/indextts/s2mel/optimizers.py b/indextts/s2mel/optimizers.py
new file mode 100644
index 0000000..979ed1b
--- /dev/null
+++ b/indextts/s2mel/optimizers.py
@@ -0,0 +1,96 @@
+#coding:utf-8
+import os, sys
+import os.path as osp
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import Optimizer
+from functools import reduce
+from torch.optim import AdamW
+
+class MultiOptimizer:
+ def __init__(self, optimizers={}, schedulers={}):
+ self.optimizers = optimizers
+ self.schedulers = schedulers
+ self.keys = list(optimizers.keys())
+ self.param_groups = reduce(lambda x,y: x+y, [v.param_groups for v in self.optimizers.values()])
+
+ def state_dict(self):
+ state_dicts = [(key, self.optimizers[key].state_dict())\
+ for key in self.keys]
+ return state_dicts
+
+ def scheduler_state_dict(self):
+ state_dicts = [(key, self.schedulers[key].state_dict())\
+ for key in self.keys]
+ return state_dicts
+
+ def load_state_dict(self, state_dict):
+ for key, val in state_dict:
+ try:
+ self.optimizers[key].load_state_dict(val)
+ except:
+ print("Unloaded %s" % key)
+
+ def load_scheduler_state_dict(self, state_dict):
+ for key, val in state_dict:
+ try:
+ self.schedulers[key].load_state_dict(val)
+ except:
+ print("Unloaded %s" % key)
+
+ def step(self, key=None, scaler=None):
+ keys = [key] if key is not None else self.keys
+ _ = [self._step(key, scaler) for key in keys]
+
+ def _step(self, key, scaler=None):
+ if scaler is not None:
+ scaler.step(self.optimizers[key])
+ scaler.update()
+ else:
+ self.optimizers[key].step()
+
+ def zero_grad(self, key=None):
+ if key is not None:
+ self.optimizers[key].zero_grad()
+ else:
+ _ = [self.optimizers[key].zero_grad() for key in self.keys]
+
+ def scheduler(self, *args, key=None):
+ if key is not None:
+ self.schedulers[key].step(*args)
+ else:
+ _ = [self.schedulers[key].step_batch(*args) for key in self.keys]
+
+def define_scheduler(optimizer, params):
+ scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params['gamma'])
+
+ return scheduler
+
+def build_optimizer(model_dict, lr, type='AdamW'):
+ optim = {}
+ for key, model in model_dict.items():
+ model_parameters = model.parameters()
+ parameters_names = []
+ parameters_names.append(
+ [
+ name_param_pair[0]
+ for name_param_pair in model.named_parameters()
+ ]
+ )
+ if type == 'AdamW':
+ optim[key] = AdamW(
+ model_parameters,
+ lr=lr,
+ betas=(0.9, 0.98),
+ eps=1e-9,
+ weight_decay=0.1,
+ )
+ else:
+ raise ValueError('Unknown optimizer type: %s' % type)
+
+ schedulers = dict([(key, torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.999996))
+ for key, opt in optim.items()])
+
+ multi_optim = MultiOptimizer(optim, schedulers)
+ return multi_optim
\ No newline at end of file
diff --git a/indextts/s2mel/wav2vecbert_extract.py b/indextts/s2mel/wav2vecbert_extract.py
new file mode 100644
index 0000000..2aa845d
--- /dev/null
+++ b/indextts/s2mel/wav2vecbert_extract.py
@@ -0,0 +1,148 @@
+from transformers import SeamlessM4TFeatureExtractor
+from transformers import Wav2Vec2BertModel
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import librosa
+import os
+import pickle
+import math
+import json
+import safetensors
+import json5
+# from codec.kmeans.repcodec_model import RepCodec
+from startts.examples.ftchar.models.codec.kmeans.repcodec_model import RepCodec
+
+class JsonHParams:
+ def __init__(self, **kwargs):
+ for k, v in kwargs.items():
+ if type(v) == dict:
+ v = JsonHParams(**v)
+ self[k] = v
+
+ def keys(self):
+ return self.__dict__.keys()
+
+ def items(self):
+ return self.__dict__.items()
+
+ def values(self):
+ return self.__dict__.values()
+
+ def __len__(self):
+ return len(self.__dict__)
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ return setattr(self, key, value)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def __repr__(self):
+ return self.__dict__.__repr__()
+
+
+def _load_config(config_fn, lowercase=False):
+ """Load configurations into a dictionary
+
+ Args:
+ config_fn (str): path to configuration file
+ lowercase (bool, optional): whether changing keys to lower case. Defaults to False.
+
+ Returns:
+ dict: dictionary that stores configurations
+ """
+ with open(config_fn, "r") as f:
+ data = f.read()
+ config_ = json5.loads(data)
+ if "base_config" in config_:
+ # load configurations from new path
+ p_config_path = os.path.join(os.getenv("WORK_DIR"), config_["base_config"])
+ p_config_ = _load_config(p_config_path)
+ config_ = override_config(p_config_, config_)
+ if lowercase:
+ # change keys in config_ to lower case
+ config_ = get_lowercase_keys_config(config_)
+ return config_
+
+
+def load_config(config_fn, lowercase=False):
+ """Load configurations into a dictionary
+
+ Args:
+ config_fn (str): path to configuration file
+ lowercase (bool, optional): _description_. Defaults to False.
+
+ Returns:
+ JsonHParams: an object that stores configurations
+ """
+ config_ = _load_config(config_fn, lowercase=lowercase)
+ # create an JsonHParams object with configuration dict
+ cfg = JsonHParams(**config_)
+ return cfg
+
+class Extract_wav2vectbert:
+ def __init__(self,device):
+ #semantic_model = Wav2Vec2BertModel.from_pretrained("facebook/w2v-bert-2.0")
+ self.semantic_model = Wav2Vec2BertModel.from_pretrained("./MaskGCT_model/w2v_bert/")
+ self.semantic_model.eval()
+ self.semantic_model.to(device)
+ self.stat_mean_var = torch.load("./MaskGCT_model/wav2vec2bert_stats.pt")
+ self.semantic_mean = self.stat_mean_var["mean"]
+ self.semantic_std = torch.sqrt(self.stat_mean_var["var"])
+ self.semantic_mean = self.semantic_mean.to(device)
+ self.semantic_std = self.semantic_std.to(device)
+ self.processor = SeamlessM4TFeatureExtractor.from_pretrained(
+ "./MaskGCT_model/w2v_bert/")
+ self.device = device
+
+ cfg_maskgct = load_config('./MaskGCT_model/maskgct.json')
+ cfg = cfg_maskgct.model.semantic_codec
+ self.semantic_code_ckpt = r'./MaskGCT_model/semantic_codec/model.safetensors'
+ self.semantic_codec = RepCodec(cfg=cfg)
+ self.semantic_codec.eval()
+ self.semantic_codec.to(device)
+ safetensors.torch.load_model(self.semantic_codec, self.semantic_code_ckpt)
+
+ @torch.no_grad()
+ def extract_features(self, speech): # speech [b,T]
+ inputs = self.processor(speech, sampling_rate=16000, return_tensors="pt")
+ input_features = inputs["input_features"]
+ attention_mask = inputs["attention_mask"]
+ return input_features, attention_mask #[2, 620, 160] [2, 620]
+
+ @torch.no_grad()
+ def extract_semantic_code(self, input_features, attention_mask):
+ vq_emb = self.semantic_model( # Wav2Vec2BertModel
+ input_features=input_features,
+ attention_mask=attention_mask,
+ output_hidden_states=True,
+ )
+ feat = vq_emb.hidden_states[17] # (B, T, C)
+ feat = (feat - self.semantic_mean.to(feat)) / self.semantic_std.to(feat)
+
+ semantic_code, rec_feat = self.semantic_codec.quantize(feat) # (B, T)
+ return semantic_code, rec_feat
+
+ def feature_extract(self, prompt_speech):
+
+ input_features, attention_mask = self.extract_features(prompt_speech)
+ input_features = input_features.to(self.device)
+ attention_mask = attention_mask.to(self.device)
+ semantic_code, rec_feat = self.extract_semantic_code(input_features, attention_mask)
+ return semantic_code,rec_feat
+
+if __name__=='__main__':
+ speech_path = 'test/magi1.wav'
+ speech = librosa.load(speech_path, sr=16000)[0]
+ speech = np.c_[speech,speech,speech].T #[2, 198559]
+ print(speech.shape)
+
+ Extract_feature = Extract_wav2vectbert('cuda:0')
+ semantic_code,rec_feat = Extract_feature.feature_extract(speech)
+ print(semantic_code.shape,rec_feat.shape)
+
diff --git a/indextts/utils/front.py b/indextts/utils/front.py
index 93da183..e6c2e44 100644
--- a/indextts/utils/front.py
+++ b/indextts/utils/front.py
@@ -111,7 +111,6 @@ class TextNormalizer:
self.en_normalizer = NormalizerEn(overwrite_cache=False)
def normalize(self, text: str) -> str:
- text = text.replace("嗯", "恩").replace("呣", "母")
if not self.zh_normalizer or not self.en_normalizer:
print("Error, text normalizer is not initialized !!!")
return ""
diff --git a/indextts/utils/maskgct/models/codec/__init__.py b/indextts/utils/maskgct/models/codec/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/codec.py b/indextts/utils/maskgct/models/codec/amphion_codec/codec.py
new file mode 100644
index 0000000..65d6234
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/codec.py
@@ -0,0 +1,427 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from torch.nn.utils import weight_norm
+
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize import (
+ ResidualVQ,
+ VectorQuantize,
+ FactorizedVectorQuantize,
+ LookupFreeQuantize,
+)
+
+from indextts.utils.maskgct.models.codec.amphion_codec.vocos import Vocos
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+# Scripting this brings model speed up 1.4x
+@torch.jit.script
+def snake(x, alpha):
+ shape = x.shape
+ x = x.reshape(shape[0], shape[1], -1)
+ x = x + (alpha + 1e-9).reciprocal() * torch.sin(alpha * x).pow(2)
+ x = x.reshape(shape)
+ return x
+
+
+class Snake1d(nn.Module):
+ def __init__(self, channels):
+ super().__init__()
+ self.alpha = nn.Parameter(torch.ones(1, channels, 1))
+
+ def forward(self, x):
+ return snake(x, self.alpha)
+
+
+def init_weights(m):
+ if isinstance(m, nn.Conv1d):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+ if isinstance(m, nn.Linear):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+
+class ResidualUnit(nn.Module):
+ def __init__(self, dim: int = 16, dilation: int = 1):
+ super().__init__()
+ pad = ((7 - 1) * dilation) // 2
+ self.block = nn.Sequential(
+ Snake1d(dim),
+ WNConv1d(dim, dim, kernel_size=7, dilation=dilation, padding=pad),
+ Snake1d(dim),
+ WNConv1d(dim, dim, kernel_size=1),
+ )
+
+ def forward(self, x):
+ y = self.block(x)
+ pad = (x.shape[-1] - y.shape[-1]) // 2
+ if pad > 0:
+ x = x[..., pad:-pad]
+ return x + y
+
+
+class EncoderBlock(nn.Module):
+ def __init__(self, dim: int = 16, stride: int = 1):
+ super().__init__()
+ self.block = nn.Sequential(
+ ResidualUnit(dim // 2, dilation=1),
+ ResidualUnit(dim // 2, dilation=3),
+ ResidualUnit(dim // 2, dilation=9),
+ Snake1d(dim // 2),
+ WNConv1d(
+ dim // 2,
+ dim,
+ kernel_size=2 * stride,
+ stride=stride,
+ padding=math.ceil(stride / 2),
+ ),
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class CodecEncoder(nn.Module):
+ def __init__(
+ self,
+ d_model: int = 64,
+ up_ratios: list = [4, 5, 5, 6],
+ out_channels: int = 256,
+ use_tanh: bool = False,
+ cfg=None,
+ ):
+ super().__init__()
+
+ d_model = cfg.d_model if cfg is not None else d_model
+ up_ratios = cfg.up_ratios if cfg is not None else up_ratios
+ out_channels = cfg.out_channels if cfg is not None else out_channels
+ use_tanh = cfg.use_tanh if cfg is not None else use_tanh
+
+ # Create first convolution
+ self.block = [WNConv1d(1, d_model, kernel_size=7, padding=3)]
+
+ # Create EncoderBlocks that double channels as they downsample by `stride`
+ for stride in up_ratios:
+ d_model *= 2
+ self.block += [EncoderBlock(d_model, stride=stride)]
+
+ # Create last convolution
+ self.block += [
+ Snake1d(d_model),
+ WNConv1d(d_model, out_channels, kernel_size=3, padding=1),
+ ]
+
+ if use_tanh:
+ self.block += [nn.Tanh()]
+
+ # Wrap black into nn.Sequential
+ self.block = nn.Sequential(*self.block)
+ self.enc_dim = d_model
+
+ self.reset_parameters()
+
+ def forward(self, x):
+ return self.block(x)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
+
+
+class DecoderBlock(nn.Module):
+ def __init__(self, input_dim: int = 16, output_dim: int = 8, stride: int = 1):
+ super().__init__()
+ self.block = nn.Sequential(
+ Snake1d(input_dim),
+ WNConvTranspose1d(
+ input_dim,
+ output_dim,
+ kernel_size=2 * stride,
+ stride=stride,
+ padding=stride // 2 + stride % 2,
+ output_padding=stride % 2,
+ ),
+ ResidualUnit(output_dim, dilation=1),
+ ResidualUnit(output_dim, dilation=3),
+ ResidualUnit(output_dim, dilation=9),
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class CodecDecoder(nn.Module):
+ def __init__(
+ self,
+ in_channels: int = 256,
+ upsample_initial_channel: int = 1536,
+ up_ratios: list = [5, 5, 4, 2],
+ num_quantizers: int = 8,
+ codebook_size: int = 1024,
+ codebook_dim: int = 256,
+ quantizer_type: str = "vq",
+ quantizer_dropout: float = 0.5,
+ commitment: float = 0.25,
+ codebook_loss_weight: float = 1.0,
+ use_l2_normlize: bool = False,
+ codebook_type: str = "euclidean",
+ kmeans_init: bool = False,
+ kmeans_iters: int = 10,
+ decay: float = 0.8,
+ eps: float = 1e-5,
+ threshold_ema_dead_code: int = 2,
+ weight_init: bool = False,
+ use_vocos: bool = False,
+ vocos_dim: int = 384,
+ vocos_intermediate_dim: int = 1152,
+ vocos_num_layers: int = 8,
+ n_fft: int = 800,
+ hop_size: int = 200,
+ padding: str = "same",
+ cfg=None,
+ ):
+ super().__init__()
+
+ in_channels = (
+ cfg.in_channels
+ if cfg is not None and hasattr(cfg, "in_channels")
+ else in_channels
+ )
+ upsample_initial_channel = (
+ cfg.upsample_initial_channel
+ if cfg is not None and hasattr(cfg, "upsample_initial_channel")
+ else upsample_initial_channel
+ )
+ up_ratios = (
+ cfg.up_ratios
+ if cfg is not None and hasattr(cfg, "up_ratios")
+ else up_ratios
+ )
+ num_quantizers = (
+ cfg.num_quantizers
+ if cfg is not None and hasattr(cfg, "num_quantizers")
+ else num_quantizers
+ )
+ codebook_size = (
+ cfg.codebook_size
+ if cfg is not None and hasattr(cfg, "codebook_size")
+ else codebook_size
+ )
+ codebook_dim = (
+ cfg.codebook_dim
+ if cfg is not None and hasattr(cfg, "codebook_dim")
+ else codebook_dim
+ )
+ quantizer_type = (
+ cfg.quantizer_type
+ if cfg is not None and hasattr(cfg, "quantizer_type")
+ else quantizer_type
+ )
+ quantizer_dropout = (
+ cfg.quantizer_dropout
+ if cfg is not None and hasattr(cfg, "quantizer_dropout")
+ else quantizer_dropout
+ )
+ commitment = (
+ cfg.commitment
+ if cfg is not None and hasattr(cfg, "commitment")
+ else commitment
+ )
+ codebook_loss_weight = (
+ cfg.codebook_loss_weight
+ if cfg is not None and hasattr(cfg, "codebook_loss_weight")
+ else codebook_loss_weight
+ )
+ use_l2_normlize = (
+ cfg.use_l2_normlize
+ if cfg is not None and hasattr(cfg, "use_l2_normlize")
+ else use_l2_normlize
+ )
+ codebook_type = (
+ cfg.codebook_type
+ if cfg is not None and hasattr(cfg, "codebook_type")
+ else codebook_type
+ )
+ kmeans_init = (
+ cfg.kmeans_init
+ if cfg is not None and hasattr(cfg, "kmeans_init")
+ else kmeans_init
+ )
+ kmeans_iters = (
+ cfg.kmeans_iters
+ if cfg is not None and hasattr(cfg, "kmeans_iters")
+ else kmeans_iters
+ )
+ decay = cfg.decay if cfg is not None and hasattr(cfg, "decay") else decay
+ eps = cfg.eps if cfg is not None and hasattr(cfg, "eps") else eps
+ threshold_ema_dead_code = (
+ cfg.threshold_ema_dead_code
+ if cfg is not None and hasattr(cfg, "threshold_ema_dead_code")
+ else threshold_ema_dead_code
+ )
+ weight_init = (
+ cfg.weight_init
+ if cfg is not None and hasattr(cfg, "weight_init")
+ else weight_init
+ )
+ use_vocos = (
+ cfg.use_vocos
+ if cfg is not None and hasattr(cfg, "use_vocos")
+ else use_vocos
+ )
+ vocos_dim = (
+ cfg.vocos_dim
+ if cfg is not None and hasattr(cfg, "vocos_dim")
+ else vocos_dim
+ )
+ vocos_intermediate_dim = (
+ cfg.vocos_intermediate_dim
+ if cfg is not None and hasattr(cfg, "vocos_intermediate_dim")
+ else vocos_intermediate_dim
+ )
+ vocos_num_layers = (
+ cfg.vocos_num_layers
+ if cfg is not None and hasattr(cfg, "vocos_num_layers")
+ else vocos_num_layers
+ )
+ n_fft = cfg.n_fft if cfg is not None and hasattr(cfg, "n_fft") else n_fft
+ hop_size = (
+ cfg.hop_size if cfg is not None and hasattr(cfg, "hop_size") else hop_size
+ )
+ padding = (
+ cfg.padding if cfg is not None and hasattr(cfg, "padding") else padding
+ )
+
+ if quantizer_type == "vq":
+ self.quantizer = ResidualVQ(
+ input_dim=in_channels,
+ num_quantizers=num_quantizers,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_type=quantizer_type,
+ quantizer_dropout=quantizer_dropout,
+ commitment=commitment,
+ codebook_loss_weight=codebook_loss_weight,
+ use_l2_normlize=use_l2_normlize,
+ codebook_type=codebook_type,
+ kmeans_init=kmeans_init,
+ kmeans_iters=kmeans_iters,
+ decay=decay,
+ eps=eps,
+ threshold_ema_dead_code=threshold_ema_dead_code,
+ weight_init=weight_init,
+ )
+ elif quantizer_type == "fvq":
+ self.quantizer = ResidualVQ(
+ input_dim=in_channels,
+ num_quantizers=num_quantizers,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_type=quantizer_type,
+ quantizer_dropout=quantizer_dropout,
+ commitment=commitment,
+ codebook_loss_weight=codebook_loss_weight,
+ use_l2_normlize=use_l2_normlize,
+ )
+ elif quantizer_type == "lfq":
+ self.quantizer = ResidualVQ(
+ input_dim=in_channels,
+ num_quantizers=num_quantizers,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_type=quantizer_type,
+ )
+ else:
+ raise ValueError(f"Unknown quantizer type {quantizer_type}")
+
+ if not use_vocos:
+ # Add first conv layer
+ channels = upsample_initial_channel
+ layers = [WNConv1d(in_channels, channels, kernel_size=7, padding=3)]
+
+ # Add upsampling + MRF blocks
+ for i, stride in enumerate(up_ratios):
+ input_dim = channels // 2**i
+ output_dim = channels // 2 ** (i + 1)
+ layers += [DecoderBlock(input_dim, output_dim, stride)]
+
+ # Add final conv layer
+ layers += [
+ Snake1d(output_dim),
+ WNConv1d(output_dim, 1, kernel_size=7, padding=3),
+ nn.Tanh(),
+ ]
+
+ self.model = nn.Sequential(*layers)
+
+ if use_vocos:
+ self.model = Vocos(
+ input_channels=in_channels,
+ dim=vocos_dim,
+ intermediate_dim=vocos_intermediate_dim,
+ num_layers=vocos_num_layers,
+ adanorm_num_embeddings=None,
+ n_fft=n_fft,
+ hop_size=hop_size,
+ padding=padding,
+ )
+
+ self.reset_parameters()
+
+ def forward(self, x=None, vq=False, eval_vq=False, n_quantizers=None):
+ """
+ if vq is True, x = encoder output, then return quantized output;
+ else, x = quantized output, then return decoder output
+ """
+ if vq is True:
+ if eval_vq:
+ self.quantizer.eval()
+ (
+ quantized_out,
+ all_indices,
+ all_commit_losses,
+ all_codebook_losses,
+ all_quantized,
+ ) = self.quantizer(x, n_quantizers=n_quantizers)
+ return (
+ quantized_out,
+ all_indices,
+ all_commit_losses,
+ all_codebook_losses,
+ all_quantized,
+ )
+
+ return self.model(x)
+
+ def quantize(self, x, n_quantizers=None):
+ self.quantizer.eval()
+ quantized_out, vq, _, _, _ = self.quantizer(x, n_quantizers=n_quantizers)
+ return quantized_out, vq
+
+ # TODO: check consistency of vq2emb and quantize
+ def vq2emb(self, vq, n_quantizers=None):
+ return self.quantizer.vq2emb(vq, n_quantizers=n_quantizers)
+
+ def decode(self, x):
+ return self.model(x)
+
+ def latent2dist(self, x, n_quantizers=None):
+ return self.quantizer.latent2dist(x, n_quantizers=n_quantizers)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py
new file mode 100644
index 0000000..6c2cf45
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.factorized_vector_quantize import (
+ FactorizedVectorQuantize,
+)
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.vector_quantize import VectorQuantize
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.lookup_free_quantize import LookupFreeQuantize
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.residual_vq import ResidualVQ
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py
new file mode 100644
index 0000000..5c359f8
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from torch.nn.utils import weight_norm
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+class FactorizedVectorQuantize(nn.Module):
+ def __init__(
+ self,
+ input_dim,
+ codebook_size,
+ codebook_dim,
+ commitment=0.005,
+ codebook_loss_weight=1.0,
+ use_l2_normlize=True,
+ ):
+ super().__init__()
+ self.input_dim = input_dim
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+ self.commitment = commitment
+ self.codebook_loss_weight = codebook_loss_weight
+ self.use_l2_normlize = use_l2_normlize
+
+ if self.input_dim != self.codebook_dim:
+ self.in_project = WNConv1d(self.input_dim, self.codebook_dim, kernel_size=1)
+ self.out_project = WNConv1d(
+ self.codebook_dim, self.input_dim, kernel_size=1
+ )
+
+ else:
+ self.in_project = nn.Identity()
+ self.out_project = nn.Identity()
+
+ self.codebook = nn.Embedding(self.codebook_size, self.codebook_dim)
+
+ def forward(self, z):
+ """
+ Parameters
+ ----------
+ z: torch.Tensor[B x D x T]
+
+ Returns
+ -------
+ z_q: torch.Tensor[B x D x T]
+ Quantized continuous representation of input
+ commit_loss: Tensor[B]
+ Commitment loss to train encoder to predict vectors closer to codebook entries
+ codebook_loss: Tensor[B]
+ Codebook loss to update the codebook
+ indices: torch.Tensor[B x T]
+ Codebook indices (quantized discrete representation of input)
+ z_e: torch.Tensor[B x D x T]
+ Projected latents (continuous representation of input before quantization)
+ """
+
+ # Factorized codes project input into low-dimensional space if self.input_dim != self.codebook_dim
+ z_e = self.in_project(z)
+ z_q, indices = self.decode_latents(z_e)
+
+ # Compute commitment loss and codebook loss
+ if self.training:
+ commit_loss = (
+ F.mse_loss(z_e, z_q.detach(), reduction="none").mean([1, 2])
+ * self.commitment
+ )
+ codebook_loss = (
+ F.mse_loss(z_q, z_e.detach(), reduction="none").mean([1, 2])
+ * self.codebook_loss_weight
+ )
+ else:
+ commit_loss = torch.zeros(z.shape[0], device=z.device)
+ codebook_loss = torch.zeros(z.shape[0], device=z.device)
+
+ z_q = z_e + (z_q - z_e).detach()
+
+ z_q = self.out_project(z_q)
+
+ return z_q, commit_loss, codebook_loss, indices, z_e
+
+ def embed_code(self, embed_id):
+ return F.embedding(embed_id, self.codebook.weight)
+
+ def decode_code(self, embed_id):
+ return self.embed_code(embed_id).transpose(1, 2)
+
+ def decode_latents(self, latents):
+ encodings = rearrange(latents, "b d t -> (b t) d")
+ codebook = self.codebook.weight
+
+ # L2 normalize encodings and codebook
+ if self.use_l2_normlize:
+ encodings = F.normalize(encodings)
+ codebook = F.normalize(codebook)
+
+ # Compute euclidean distance between encodings and codebook,
+ # if use_l2_normlize is True, the distance is equal to cosine distance
+ dist = (
+ encodings.pow(2).sum(1, keepdim=True)
+ - 2 * encodings @ codebook.t()
+ + codebook.pow(2).sum(1, keepdim=True).t()
+ )
+ indices = rearrange((-dist).max(1)[1], "(b t) -> b t", b=latents.size(0))
+ z_q = self.decode_code(indices)
+
+ return z_q, indices
+
+ def vq2emb(self, vq, out_proj=True):
+ emb = self.decode_code(vq)
+ if out_proj:
+ emb = self.out_project(emb)
+ return emb
+
+ def latent2dist(self, latents):
+ encodings = rearrange(latents, "b d t -> (b t) d")
+ codebook = self.codebook.weight
+
+ # L2 normalize encodings and codebook
+ if self.use_l2_normlize:
+ encodings = F.normalize(encodings)
+ codebook = F.normalize(codebook)
+
+ # Compute euclidean distance between encodings and codebook,
+ # if use_l2_normlize is True, the distance is equal to cosine distance
+ dist = (
+ encodings.pow(2).sum(1, keepdim=True)
+ - 2 * encodings @ codebook.t()
+ + codebook.pow(2).sum(1, keepdim=True).t()
+ ) # (b*t, k)
+
+ indices = rearrange((-dist).max(1)[1], "(b t) -> b t", b=latents.size(0))
+ dist = rearrange(dist, "(b t) k -> b t k", b=latents.size(0))
+ z_q = self.decode_code(indices)
+
+ return -dist, indices, z_q
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py
new file mode 100644
index 0000000..6b18015
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from torch.nn.utils import weight_norm
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+class LookupFreeQuantize(nn.Module):
+ def __init__(
+ self,
+ input_dim,
+ codebook_size,
+ codebook_dim,
+ ):
+ super().__init__()
+ self.input_dim = input_dim
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+
+ assert 2**codebook_dim == codebook_size
+
+ if self.input_dim != self.codebook_dim:
+ self.in_project = WNConv1d(self.input_dim, self.codebook_dim, kernel_size=1)
+ self.out_project = WNConv1d(
+ self.codebook_dim, self.input_dim, kernel_size=1
+ )
+
+ else:
+ self.in_project = nn.Identity()
+ self.out_project = nn.Identity()
+
+ def forward(self, z):
+ z_e = self.in_project(z)
+ z_e = F.sigmoid(z_e)
+
+ z_q = z_e + (torch.round(z_e) - z_e).detach()
+
+ z_q = self.out_project(z_q)
+
+ commit_loss = torch.zeros(z.shape[0], device=z.device)
+ codebook_loss = torch.zeros(z.shape[0], device=z.device)
+
+ bits = (
+ 2
+ ** torch.arange(self.codebook_dim, device=z.device)
+ .unsqueeze(0)
+ .unsqueeze(-1)
+ .long()
+ ) # (1, d, 1)
+ indices = (torch.round(z_e.clone().detach()).long() * bits).sum(1).long()
+
+ return z_q, commit_loss, codebook_loss, indices, z_e
+
+ def vq2emb(self, vq, out_proj=True):
+ emb = torch.zeros(
+ vq.shape[0], self.codebook_dim, vq.shape[-1], device=vq.device
+ ) # (B, d, T)
+ for i in range(self.codebook_dim):
+ emb[:, i, :] = (vq % 2).float()
+ vq = vq // 2
+ if out_proj:
+ emb = self.out_project(emb)
+ return emb
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py
new file mode 100644
index 0000000..d3b5538
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py
@@ -0,0 +1,177 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from torch.nn.utils import weight_norm
+
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.factorized_vector_quantize import (
+ FactorizedVectorQuantize,
+)
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.vector_quantize import VectorQuantize
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize.lookup_free_quantize import LookupFreeQuantize
+
+
+class ResidualVQ(nn.Module):
+ """
+ Introduced in SoundStream: An end2end neural audio codec
+ https://arxiv.org/abs/2107.03312
+ """
+
+ def __init__(
+ self,
+ input_dim: int = 256,
+ num_quantizers: int = 8,
+ codebook_size: int = 1024,
+ codebook_dim: int = 256,
+ quantizer_type: str = "vq", # "vq" or "fvq" or "lfq"
+ quantizer_dropout: float = 0.5,
+ **kwargs,
+ ):
+ super().__init__()
+
+ self.input_dim = input_dim
+ self.num_quantizers = num_quantizers
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+ self.quantizer_type = quantizer_type
+ self.quantizer_dropout = quantizer_dropout
+
+ if quantizer_type == "vq":
+ VQ = VectorQuantize
+ elif quantizer_type == "fvq":
+ VQ = FactorizedVectorQuantize
+ elif quantizer_type == "lfq":
+ VQ = LookupFreeQuantize
+ else:
+ raise ValueError(f"Unknown quantizer type {quantizer_type}")
+
+ self.quantizers = nn.ModuleList(
+ [
+ VQ(
+ input_dim=input_dim,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ **kwargs,
+ )
+ for _ in range(num_quantizers)
+ ]
+ )
+
+ def forward(self, z, n_quantizers: int = None):
+ """
+ Parameters
+ ----------
+ z : Tensor[B x D x T]
+ n_quantizers : int, optional
+ No. of quantizers to use
+ (n_quantizers < self.n_codebooks ex: for quantizer dropout)
+ Note: if `self.quantizer_dropout` is True, this argument is ignored
+ when in training mode, and a random number of quantizers is used.
+ Returns
+ -------
+ "quantized_out" : Tensor[B x D x T]
+ Quantized continuous representation of input
+ "all_indices" : Tensor[N x B x T]
+ Codebook indices for each codebook
+ (quantized discrete representation of input)
+ "all_commit_losses" : Tensor[N]
+ "all_codebook_losses" : Tensor[N]
+ "all_quantized" : Tensor[N x B x D x T]
+ """
+
+ quantized_out = 0.0
+ residual = z
+
+ all_commit_losses = []
+ all_codebook_losses = []
+ all_indices = []
+ all_quantized = []
+
+ if n_quantizers is None:
+ n_quantizers = self.num_quantizers
+
+ if self.training:
+ n_quantizers = torch.ones((z.shape[0],)) * self.num_quantizers + 1
+ dropout = torch.randint(1, self.num_quantizers + 1, (z.shape[0],))
+ n_dropout = int(z.shape[0] * self.quantizer_dropout)
+ n_quantizers[:n_dropout] = dropout[:n_dropout]
+ n_quantizers = n_quantizers.to(z.device)
+
+ for i, quantizer in enumerate(self.quantizers):
+ if self.training is False and i >= n_quantizers:
+ break
+
+ z_q_i, commit_loss_i, codebook_loss_i, indices_i, z_e_i = quantizer(
+ residual
+ )
+
+ # Create mask to apply quantizer dropout
+ mask = (
+ torch.full((z.shape[0],), fill_value=i, device=z.device) < n_quantizers
+ )
+ quantized_out = quantized_out + z_q_i * mask[:, None, None]
+ residual = residual - z_q_i
+
+ commit_loss_i = (commit_loss_i * mask).mean()
+ codebook_loss_i = (codebook_loss_i * mask).mean()
+
+ all_commit_losses.append(commit_loss_i)
+ all_codebook_losses.append(codebook_loss_i)
+ all_indices.append(indices_i)
+ all_quantized.append(z_q_i)
+
+ all_commit_losses, all_codebook_losses, all_indices, all_quantized = map(
+ torch.stack,
+ (all_commit_losses, all_codebook_losses, all_indices, all_quantized),
+ )
+
+ return (
+ quantized_out,
+ all_indices,
+ all_commit_losses,
+ all_codebook_losses,
+ all_quantized,
+ )
+
+ def vq2emb(self, vq, n_quantizers=None):
+ quantized_out = 0.0
+ if n_quantizers is None:
+ n_quantizers = self.num_quantizers
+ for idx, quantizer in enumerate(self.quantizers):
+ if idx >= n_quantizers:
+ break
+ quantized_out += quantizer.vq2emb(vq[idx])
+ return quantized_out
+
+ def latent2dist(self, z, n_quantizers=None):
+ quantized_out = 0.0
+ residual = z
+
+ all_dists = []
+ all_indices = []
+
+ if n_quantizers is None:
+ n_quantizers = self.num_quantizers
+
+ for i, quantizer in enumerate(self.quantizers):
+ if self.training is False and i >= n_quantizers:
+ break
+ dist_i, indices_i, z_q_i = quantizer.latent2dist(residual)
+ all_dists.append(dist_i)
+ all_indices.append(indices_i)
+
+ quantized_out = quantized_out + z_q_i
+ residual = residual - z_q_i
+
+ all_dists = torch.stack(all_dists)
+ all_indices = torch.stack(all_indices)
+
+ return all_dists, all_indices
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py
new file mode 100644
index 0000000..8296893
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py
@@ -0,0 +1,401 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from torch.nn.utils import weight_norm
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+def l2norm(t):
+ return F.normalize(t, p=2, dim=-1)
+
+
+def ema_inplace(moving_avg, new, decay):
+ moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay))
+
+
+def laplace_smoothing(x, n_categories, eps=1e-5):
+ return (x + eps) / (x.sum() + n_categories * eps)
+
+
+def sample_vectors(samples, num):
+ num_samples, device = samples.shape[0], samples.device
+
+ if num_samples >= num:
+ indices = torch.randperm(num_samples, device=device)[:num]
+ else:
+ indices = torch.randint(0, num_samples, (num,), device=device)
+
+ return samples[indices]
+
+
+def kmeans(samples, num_clusters, num_iters=10, use_cosine_sim=False):
+ dim, dtype, device = samples.shape[-1], samples.dtype, samples.device
+
+ means = sample_vectors(samples, num_clusters)
+
+ for _ in range(num_iters):
+ if use_cosine_sim:
+ dists = samples @ means.t()
+ else:
+ diffs = rearrange(samples, "n d -> n () d") - rearrange(
+ means, "c d -> () c d"
+ )
+ dists = -(diffs**2).sum(dim=-1)
+
+ buckets = dists.max(dim=-1).indices
+ bins = torch.bincount(buckets, minlength=num_clusters)
+ zero_mask = bins == 0
+ bins_min_clamped = bins.masked_fill(zero_mask, 1)
+
+ new_means = buckets.new_zeros(num_clusters, dim, dtype=dtype)
+ new_means.scatter_add_(0, repeat(buckets, "n -> n d", d=dim), samples)
+ new_means = new_means / bins_min_clamped[..., None]
+
+ if use_cosine_sim:
+ new_means = l2norm(new_means)
+
+ means = torch.where(zero_mask[..., None], means, new_means)
+
+ return means, bins
+
+
+class EuclideanCodebook(nn.Module):
+ def __init__(
+ self,
+ dim,
+ codebook_size,
+ kmeans_init=False,
+ kmeans_iters=10,
+ decay=0.8,
+ eps=1e-5,
+ threshold_ema_dead_code=2,
+ weight_init=False,
+ ):
+ super().__init__()
+
+ self.decay = decay
+ init_fn = torch.randn if not weight_init else torch.zeros
+ embed = init_fn(codebook_size, dim)
+
+ if weight_init:
+ nn.init.uniform_(embed, -1 / codebook_size, 1 / codebook_size)
+
+ self.codebook_size = codebook_size
+ self.kmeans_iters = kmeans_iters
+ self.eps = eps
+ self.threshold_ema_dead_code = threshold_ema_dead_code
+
+ self.register_buffer(
+ "initted", torch.Tensor([not kmeans_init])
+ ) # if kmeans_init is True, then initted is False; otherwise, initted is True
+ self.register_buffer("cluster_size", torch.zeros(codebook_size))
+ self.register_buffer("embed", embed)
+ self.register_buffer("embed_avg", embed.clone())
+
+ def init_embed_(self, data):
+ embed, cluster_size = kmeans(data, self.codebook_size, self.kmeans_iters)
+ self.embed.data.copy_(embed)
+ self.embed_avg.data.copy_(embed)
+ self.cluster_size.data.copy_(cluster_size)
+ self.initted.data.copy_(torch.Tensor([True]))
+
+ def replace(self, samples, mask):
+ modified_codebook = torch.where(
+ mask[..., None], sample_vectors(samples, self.codebook_size), self.embed
+ )
+ self.embed.data.copy_(modified_codebook)
+
+ def expire_codes_(self, batch_samples):
+ if self.threshold_ema_dead_code == 0:
+ return
+
+ expired_codes = self.cluster_size < self.threshold_ema_dead_code
+ if not torch.any(expired_codes):
+ return
+ batch_samples = rearrange(batch_samples, "... d -> (...) d")
+ self.replace(batch_samples, mask=expired_codes)
+
+ def forward(self, x):
+ shape, dtype = x.shape, x.dtype
+ flatten = rearrange(x, "... d -> (...) d")
+ embed = self.embed.t() # (codebook_size, dim) -> (dim, codebook_size)
+
+ if not self.initted:
+ self.init_embed_(flatten)
+
+ dist = -(
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ embed
+ + embed.pow(2).sum(0, keepdim=True)
+ )
+
+ embed_ind = dist.max(dim=-1).indices
+ embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype)
+ embed_ind = embed_ind.view(*shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed)
+
+ if self.training:
+ ema_inplace(self.cluster_size, embed_onehot.sum(0), self.decay)
+ embed_sum = (
+ flatten.t() @ embed_onehot
+ ) # (dim, ...) @ (..., codebook_size) -> (dim, codebook_size)
+ ema_inplace(self.embed_avg, embed_sum.t(), self.decay)
+ cluster_size = (
+ laplace_smoothing(self.cluster_size, self.codebook_size, self.eps)
+ * self.cluster_size.sum()
+ )
+ embed_normalized = self.embed_avg / cluster_size.unsqueeze(1)
+ self.embed.data.copy_(embed_normalized)
+ self.expire_codes_(x)
+
+ return quantize, embed_ind
+
+ def vq2emb(self, vq):
+ quantize = F.embedding(vq, self.embed)
+ return quantize
+
+ def latent2dist(self, x):
+ shape, dtype = x.shape, x.dtype
+ flatten = rearrange(x, "... d -> (...) d")
+ embed = self.embed.t() # (codebook_size, dim) -> (dim, codebook_size)
+
+ if not self.initted:
+ self.init_embed_(flatten)
+
+ dist = -(
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ embed
+ + embed.pow(2).sum(0, keepdim=True)
+ )
+
+ embed_ind = dist.max(dim=-1).indices
+ embed_ind = embed_ind.view(*shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed)
+
+ dist = dist.view(*shape[:-1], -1)
+
+ return dist, embed_ind, quantize
+
+
+class SimpleCodebook(nn.Module):
+ def __init__(
+ self,
+ dim,
+ codebook_size,
+ use_l2_normlize=False,
+ ):
+ super().__init__()
+
+ self.dim = dim
+ self.codebook_size = codebook_size
+ self.use_l2_normlize = use_l2_normlize
+
+ self.embed = nn.Embedding(self.codebook_size, self.dim)
+
+ def forward(self, x):
+ shape, dtype = x.shape, x.dtype
+ flatten = rearrange(x, "... d -> (...) d")
+ embed = self.embed.weight.t() # (codebook_size, dim) -> (dim, codebook_size)
+
+ if self.use_l2_normlize:
+ flatten = F.normalize(flatten)
+ embed = F.normalize(embed)
+
+ dist = -(
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ embed
+ + embed.pow(2).sum(0, keepdim=True)
+ )
+
+ embed_ind = dist.max(dim=-1).indices
+ embed_ind = embed_ind.view(*shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed)
+
+ return quantize, embed_ind
+
+ def vq2emb(self, vq):
+ quantize = F.embedding(vq, self.embed.weight)
+ return quantize
+
+ def latent2dist(self, x):
+ shape, dtype = x.shape, x.dtype
+ flatten = rearrange(x, "... d -> (...) d")
+ embed = self.embed.weight.t() # (codebook_size, dim) -> (dim, codebook_size)
+
+ if self.use_l2_normlize:
+ flatten = F.normalize(flatten)
+ embed = F.normalize(embed)
+
+ dist = -(
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ embed
+ + embed.pow(2).sum(0, keepdim=True)
+ )
+
+ embed_ind = dist.max(dim=-1).indices
+ embed_ind = embed_ind.view(*shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed)
+
+ dist = dist.view(*shape[:-1], -1)
+
+ return dist, embed_ind, quantize
+
+
+class VectorQuantize(nn.Module):
+ """Vector quantization and factorized vecotor quantization implementation
+ Args:
+ input_dim (int): Dimension of input.
+ codebook_size (int): Codebook size.
+ codebook_dim (int): Codebook dimension. We suggest use codebook_dim = input_dim
+ if use codebook_type == "euclidean", otherwise, if you want to use
+ factorized vector quantization, use codebook_dim as small number (e.g. 8 or 32).
+ commitment (float): Weight for commitment loss.
+ use_l2_normlize (bool): Whether to use l2 normlized codes for factorized vecotor quantization,
+ we suggest use it as True if you want to use factorized vector quantization
+ kmeans_init (bool): Whether to use kmeans to initialize the codebooks.
+ kmeans_iters (int): Number of iterations used for kmeans initialization.
+ decay (float): Decay for exponential moving average over the codebooks.
+ epsilon (float): Epsilon value for numerical stability.
+ threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
+ that have an exponential moving average cluster size less than the specified threshold with
+ randomly selected vector from the current batch.
+ """
+
+ def __init__(
+ self,
+ input_dim,
+ codebook_size,
+ codebook_dim,
+ commitment=0.005,
+ codebook_loss_weight=1.0,
+ use_l2_normlize=False,
+ codebook_type="euclidean", # "euclidean" or "simple"
+ kmeans_init=False,
+ kmeans_iters=10,
+ decay=0.8,
+ eps=1e-5,
+ threshold_ema_dead_code=2,
+ weight_init=False,
+ ):
+ super().__init__()
+ self.input_dim = input_dim
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+ self.commitment = commitment
+ self.codebook_loss_weight = codebook_loss_weight
+ self.use_l2_normlize = use_l2_normlize
+ self.codebook_type = codebook_type
+ self.kmeans_init = kmeans_init
+ self.kmeans_iters = kmeans_iters
+ self.decay = decay
+ self.eps = eps
+ self.threshold_ema_dead_code = threshold_ema_dead_code
+ self.weight_init = weight_init
+
+ if self.input_dim != self.codebook_dim:
+ self.in_project = WNConv1d(self.input_dim, self.codebook_dim, kernel_size=1)
+ self.out_project = WNConv1d(
+ self.codebook_dim, self.input_dim, kernel_size=1
+ )
+
+ else:
+ self.in_project = nn.Identity()
+ self.out_project = nn.Identity()
+
+ if self.codebook_type == "euclidean":
+ self.codebook = EuclideanCodebook(
+ self.codebook_dim,
+ codebook_size=self.codebook_size,
+ kmeans_init=self.kmeans_init,
+ kmeans_iters=self.kmeans_iters,
+ decay=self.decay,
+ eps=self.eps,
+ threshold_ema_dead_code=self.threshold_ema_dead_code,
+ weight_init=self.weight_init,
+ )
+ elif self.codebook_type == "simple":
+ self.codebook = SimpleCodebook(
+ self.codebook_dim,
+ codebook_size=self.codebook_size,
+ use_l2_normlize=self.use_l2_normlize,
+ )
+ else:
+ raise NotImplementedError(
+ f"codebook_type {self.codebook_type} is not implemented!"
+ )
+
+ def forward(self, z):
+ """
+ Parameters
+ ----------
+ z: torch.Tensor[B x D x T]
+
+ Returns
+ -------
+ z_q: torch.Tensor[B x D x T]
+ Quantized continuous representation of input
+ commit_loss: Tensor[B]
+ Commitment loss to train encoder to predict vectors closer to codebook entries
+ codebook_loss: Tensor[B]
+ Codebook loss to update the codebook
+ indices: torch.Tensor[B x T]
+ Codebook indices (quantized discrete representation of input)
+ z_e: torch.Tensor[B x D x T]
+ Projected latents (continuous representation of input before quantization)
+ """
+
+ # Factorized codes project input into low-dimensional space if self.input_dim != self.codebook_dim
+ z_e = self.in_project(z)
+ z_q, indices = self.decode_latents(z_e)
+
+ # Compute commitment loss and codebook loss
+ if self.training:
+ commit_loss = (
+ F.mse_loss(z_e, z_q.detach(), reduction="none").mean([1, 2])
+ * self.commitment
+ )
+ codebook_loss = (
+ F.mse_loss(z_q, z_e.detach(), reduction="none").mean([1, 2])
+ * self.codebook_loss_weight
+ )
+ else:
+ commit_loss = torch.zeros(z.shape[0], device=z.device)
+ codebook_loss = torch.zeros(z.shape[0], device=z.device)
+
+ z_q = z_e + (z_q - z_e).detach()
+
+ z_q = self.out_project(z_q)
+
+ return z_q, commit_loss, codebook_loss, indices, z_e
+
+ def decode_latents(self, latents):
+ encodings = rearrange(latents, "b d t -> b t d")
+ z_q, indices = self.codebook(encodings)
+ z_q = z_q.transpose(1, 2)
+ return z_q, indices
+
+ def vq2emb(self, vq, out_proj=True):
+ emb = self.codebook.vq2emb(vq)
+ emb = emb.transpose(1, 2)
+ if out_proj:
+ emb = self.out_project(emb)
+ return emb
+
+ def latent2dist(self, latents):
+ latents = rearrange(latents, "b d t -> b t d")
+ dist, embed_ind, quantize = self.codebook.latent2dist(latents)
+ return dist, embed_ind, quantize.transpose(1, 2)
diff --git a/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py b/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py
new file mode 100644
index 0000000..038d8ef
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py
@@ -0,0 +1,881 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple
+
+import numpy as np
+import scipy
+import torch
+from torch import nn, view_as_real, view_as_complex
+from torch import nn
+from torch.nn.utils import weight_norm, remove_weight_norm
+from torchaudio.functional.functional import _hz_to_mel, _mel_to_hz
+import librosa
+
+
+def safe_log(x: torch.Tensor, clip_val: float = 1e-7) -> torch.Tensor:
+ """
+ Computes the element-wise logarithm of the input tensor with clipping to avoid near-zero values.
+
+ Args:
+ x (Tensor): Input tensor.
+ clip_val (float, optional): Minimum value to clip the input tensor. Defaults to 1e-7.
+
+ Returns:
+ Tensor: Element-wise logarithm of the input tensor with clipping applied.
+ """
+ return torch.log(torch.clip(x, min=clip_val))
+
+
+def symlog(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * torch.log1p(x.abs())
+
+
+def symexp(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * (torch.exp(x.abs()) - 1)
+
+
+class STFT(nn.Module):
+ def __init__(
+ self,
+ n_fft: int,
+ hop_length: int,
+ win_length: int,
+ center=True,
+ ):
+ super().__init__()
+ self.center = center
+ self.n_fft = n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ window = torch.hann_window(win_length)
+ self.register_buffer("window", window)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ # x: (B, T * hop_length)
+
+ if not self.center:
+ pad = self.win_length - self.hop_length
+ x = torch.nn.functional.pad(x, (pad // 2, pad // 2), mode="reflect")
+
+ stft_spec = torch.stft(
+ x,
+ self.n_fft,
+ hop_length=self.hop_length,
+ win_length=self.win_length,
+ window=self.window,
+ center=self.center,
+ return_complex=False,
+ ) # (B, n_fft // 2 + 1, T, 2)
+
+ rea = stft_spec[:, :, :, 0] # (B, n_fft // 2 + 1, T, 2)
+ imag = stft_spec[:, :, :, 1] # (B, n_fft // 2 + 1, T, 2)
+
+ log_mag = torch.log(
+ torch.abs(torch.sqrt(torch.pow(rea, 2) + torch.pow(imag, 2))) + 1e-5
+ ) # (B, n_fft // 2 + 1, T)
+ phase = torch.atan2(imag, rea) # (B, n_fft // 2 + 1, T)
+
+ return log_mag, phase
+
+
+class ISTFT(nn.Module):
+ """
+ Custom implementation of ISTFT since torch.istft doesn't allow custom padding (other than `center=True`) with
+ windowing. This is because the NOLA (Nonzero Overlap Add) check fails at the edges.
+ See issue: https://github.com/pytorch/pytorch/issues/62323
+ Specifically, in the context of neural vocoding we are interested in "same" padding analogous to CNNs.
+ The NOLA constraint is met as we trim padded samples anyway.
+
+ Args:
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames.
+ win_length (int): The size of window frame and STFT filter.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(
+ self, n_fft: int, hop_length: int, win_length: int, padding: str = "same"
+ ):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.n_fft = n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ window = torch.hann_window(win_length)
+ self.register_buffer("window", window)
+
+ def forward(self, spec: torch.Tensor) -> torch.Tensor:
+ """
+ Compute the Inverse Short Time Fourier Transform (ISTFT) of a complex spectrogram.
+
+ Args:
+ spec (Tensor): Input complex spectrogram of shape (B, N, T), where B is the batch size,
+ N is the number of frequency bins, and T is the number of time frames.
+
+ Returns:
+ Tensor: Reconstructed time-domain signal of shape (B, L), where L is the length of the output signal.
+ """
+ if self.padding == "center":
+ # Fallback to pytorch native implementation
+ return torch.istft(
+ spec,
+ self.n_fft,
+ self.hop_length,
+ self.win_length,
+ self.window,
+ center=True,
+ )
+ elif self.padding == "same":
+ pad = (self.win_length - self.hop_length) // 2
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ assert spec.dim() == 3, "Expected a 3D tensor as input"
+ B, N, T = spec.shape
+
+ # Inverse FFT
+ ifft = torch.fft.irfft(spec, self.n_fft, dim=1, norm="backward")
+ ifft = ifft * self.window[None, :, None]
+
+ # Overlap and Add
+ output_size = (T - 1) * self.hop_length + self.win_length
+ y = torch.nn.functional.fold(
+ ifft,
+ output_size=(1, output_size),
+ kernel_size=(1, self.win_length),
+ stride=(1, self.hop_length),
+ )[:, 0, 0, pad:-pad]
+
+ # Window envelope
+ window_sq = self.window.square().expand(1, T, -1).transpose(1, 2)
+ window_envelope = torch.nn.functional.fold(
+ window_sq,
+ output_size=(1, output_size),
+ kernel_size=(1, self.win_length),
+ stride=(1, self.hop_length),
+ ).squeeze()[pad:-pad]
+
+ # Normalize
+ assert (window_envelope > 1e-11).all()
+ y = y / window_envelope
+
+ return y
+
+
+class MDCT(nn.Module):
+ """
+ Modified Discrete Cosine Transform (MDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(-1j * torch.pi * torch.arange(frame_len) / frame_len)
+ post_twiddle = torch.exp(-1j * torch.pi * n0 * (torch.arange(N) + 0.5) / N)
+ # view_as_real: NCCL Backend does not support ComplexFloat data type
+ # https://github.com/pytorch/pytorch/issues/71613
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, audio: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Modified Discrete Cosine Transform (MDCT) to the input audio.
+
+ Args:
+ audio (Tensor): Input audio waveform of shape (B, T), where B is the batch size
+ and T is the length of the audio.
+
+ Returns:
+ Tensor: MDCT coefficients of shape (B, L, N), where L is the number of output frames
+ and N is the number of frequency bins.
+ """
+ if self.padding == "center":
+ audio = torch.nn.functional.pad(
+ audio, (self.frame_len // 2, self.frame_len // 2)
+ )
+ elif self.padding == "same":
+ # hop_length is 1/2 frame_len
+ audio = torch.nn.functional.pad(
+ audio, (self.frame_len // 4, self.frame_len // 4)
+ )
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ x = audio.unfold(-1, self.frame_len, self.frame_len // 2)
+ N = self.frame_len // 2
+ x = x * self.window.expand(x.shape)
+ X = torch.fft.fft(
+ x * view_as_complex(self.pre_twiddle).expand(x.shape), dim=-1
+ )[..., :N]
+ res = X * view_as_complex(self.post_twiddle).expand(X.shape) * np.sqrt(1 / N)
+ return torch.real(res) * np.sqrt(2)
+
+
+class IMDCT(nn.Module):
+ """
+ Inverse Modified Discrete Cosine Transform (IMDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(1j * torch.pi * n0 * torch.arange(N * 2) / N)
+ post_twiddle = torch.exp(1j * torch.pi * (torch.arange(N * 2) + n0) / (N * 2))
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, X: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Inverse Modified Discrete Cosine Transform (IMDCT) to the input MDCT coefficients.
+
+ Args:
+ X (Tensor): Input MDCT coefficients of shape (B, L, N), where B is the batch size,
+ L is the number of frames, and N is the number of frequency bins.
+
+ Returns:
+ Tensor: Reconstructed audio waveform of shape (B, T), where T is the length of the audio.
+ """
+ B, L, N = X.shape
+ Y = torch.zeros((B, L, N * 2), dtype=X.dtype, device=X.device)
+ Y[..., :N] = X
+ Y[..., N:] = -1 * torch.conj(torch.flip(X, dims=(-1,)))
+ y = torch.fft.ifft(
+ Y * view_as_complex(self.pre_twiddle).expand(Y.shape), dim=-1
+ )
+ y = (
+ torch.real(y * view_as_complex(self.post_twiddle).expand(y.shape))
+ * np.sqrt(N)
+ * np.sqrt(2)
+ )
+ result = y * self.window.expand(y.shape)
+ output_size = (1, (L + 1) * N)
+ audio = torch.nn.functional.fold(
+ result.transpose(1, 2),
+ output_size=output_size,
+ kernel_size=(1, self.frame_len),
+ stride=(1, self.frame_len // 2),
+ )[:, 0, 0, :]
+
+ if self.padding == "center":
+ pad = self.frame_len // 2
+ elif self.padding == "same":
+ pad = self.frame_len // 4
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ audio = audio[:, pad:-pad]
+ return audio
+
+
+class FourierHead(nn.Module):
+ """Base class for inverse fourier modules."""
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class ISTFTHead(FourierHead):
+ """
+ ISTFT Head module for predicting STFT complex coefficients.
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames, which should align with
+ the resolution of the input features.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, dim: int, n_fft: int, hop_length: int, padding: str = "same"):
+ super().__init__()
+ out_dim = n_fft + 2
+ self.out = torch.nn.Linear(dim, out_dim)
+ self.istft = ISTFT(
+ n_fft=n_fft, hop_length=hop_length, win_length=n_fft, padding=padding
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the ISTFTHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x).transpose(1, 2)
+ mag, p = x.chunk(2, dim=1)
+ mag = torch.exp(mag)
+ mag = torch.clip(
+ mag, max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ # wrapping happens here. These two lines produce real and imaginary value
+ x = torch.cos(p)
+ y = torch.sin(p)
+ # recalculating phase here does not produce anything new
+ # only costs time
+ # phase = torch.atan2(y, x)
+ # S = mag * torch.exp(phase * 1j)
+ # better directly produce the complex value
+ S = mag * (x + 1j * y)
+ audio = self.istft(S)
+ return audio
+
+
+class IMDCTSymExpHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with symmetric exponential function
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ sample_rate (int, optional): The sample rate of the audio. If provided, the last layer will be initialized
+ based on perceptual scaling. Defaults to None.
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ mdct_frame_len: int,
+ padding: str = "same",
+ sample_rate: Optional[int] = None,
+ clip_audio: bool = False,
+ ):
+ super().__init__()
+ out_dim = mdct_frame_len // 2
+ self.out = nn.Linear(dim, out_dim)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+ self.clip_audio = clip_audio
+
+ if sample_rate is not None:
+ # optionally init the last layer following mel-scale
+ m_max = _hz_to_mel(sample_rate // 2)
+ m_pts = torch.linspace(0, m_max, out_dim)
+ f_pts = _mel_to_hz(m_pts)
+ scale = 1 - (f_pts / f_pts.max())
+
+ with torch.no_grad():
+ self.out.weight.mul_(scale.view(-1, 1))
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTSymExpHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ x = symexp(x)
+ x = torch.clip(
+ x, min=-1e2, max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(x)
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+
+ return audio
+
+
+class IMDCTCosHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with parametrizing MDCT = exp(m) · cos(p)
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ mdct_frame_len: int,
+ padding: str = "same",
+ clip_audio: bool = False,
+ ):
+ super().__init__()
+ self.clip_audio = clip_audio
+ self.out = nn.Linear(dim, mdct_frame_len)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTCosHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ m, p = x.chunk(2, dim=2)
+ m = torch.exp(m).clip(
+ max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(m * torch.cos(p))
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+ return audio
+
+
+class ConvNeXtBlock(nn.Module):
+ """ConvNeXt Block adapted from https://github.com/facebookresearch/ConvNeXt to 1D audio signal.
+
+ Args:
+ dim (int): Number of input channels.
+ intermediate_dim (int): Dimensionality of the intermediate layer.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional LayerNorm. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ intermediate_dim: int,
+ layer_scale_init_value: float,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.dwconv = nn.Conv1d(
+ dim, dim, kernel_size=7, padding=3, groups=dim
+ ) # depthwise conv
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(
+ dim, intermediate_dim
+ ) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(intermediate_dim, dim)
+ self.gamma = (
+ nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True)
+ if layer_scale_init_value > 0
+ else None
+ )
+
+ def forward(
+ self, x: torch.Tensor, cond_embedding_id: Optional[torch.Tensor] = None
+ ) -> torch.Tensor:
+ residual = x
+ x = self.dwconv(x)
+ x = x.transpose(1, 2) # (B, C, T) -> (B, T, C)
+ if self.adanorm:
+ assert cond_embedding_id is not None
+ x = self.norm(x, cond_embedding_id)
+ else:
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.transpose(1, 2) # (B, T, C) -> (B, C, T)
+
+ x = residual + x
+ return x
+
+
+class AdaLayerNorm(nn.Module):
+ """
+ Adaptive Layer Normalization module with learnable embeddings per `num_embeddings` classes
+
+ Args:
+ num_embeddings (int): Number of embeddings.
+ embedding_dim (int): Dimension of the embeddings.
+ """
+
+ def __init__(self, num_embeddings: int, embedding_dim: int, eps: float = 1e-6):
+ super().__init__()
+ self.eps = eps
+ self.dim = embedding_dim
+ self.scale = nn.Embedding(
+ num_embeddings=num_embeddings, embedding_dim=embedding_dim
+ )
+ self.shift = nn.Embedding(
+ num_embeddings=num_embeddings, embedding_dim=embedding_dim
+ )
+ torch.nn.init.ones_(self.scale.weight)
+ torch.nn.init.zeros_(self.shift.weight)
+
+ def forward(self, x: torch.Tensor, cond_embedding_id: torch.Tensor) -> torch.Tensor:
+ scale = self.scale(cond_embedding_id)
+ shift = self.shift(cond_embedding_id)
+ x = nn.functional.layer_norm(x, (self.dim,), eps=self.eps)
+ x = x * scale + shift
+ return x
+
+
+class ResBlock1(nn.Module):
+ """
+ ResBlock adapted from HiFi-GAN V1 (https://github.com/jik876/hifi-gan) with dilated 1D convolutions,
+ but without upsampling layers.
+
+ Args:
+ dim (int): Number of input channels.
+ kernel_size (int, optional): Size of the convolutional kernel. Defaults to 3.
+ dilation (tuple[int], optional): Dilation factors for the dilated convolutions.
+ Defaults to (1, 3, 5).
+ lrelu_slope (float, optional): Negative slope of the LeakyReLU activation function.
+ Defaults to 0.1.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ kernel_size: int = 3,
+ dilation: Tuple[int, int, int] = (1, 3, 5),
+ lrelu_slope: float = 0.1,
+ layer_scale_init_value: Optional[float] = None,
+ ):
+ super().__init__()
+ self.lrelu_slope = lrelu_slope
+ self.convs1 = nn.ModuleList(
+ [
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[0],
+ padding=self.get_padding(kernel_size, dilation[0]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[1],
+ padding=self.get_padding(kernel_size, dilation[1]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[2],
+ padding=self.get_padding(kernel_size, dilation[2]),
+ )
+ ),
+ ]
+ )
+
+ self.convs2 = nn.ModuleList(
+ [
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ ]
+ )
+
+ self.gamma = nn.ParameterList(
+ [
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ ]
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ for c1, c2, gamma in zip(self.convs1, self.convs2, self.gamma):
+ xt = torch.nn.functional.leaky_relu(x, negative_slope=self.lrelu_slope)
+ xt = c1(xt)
+ xt = torch.nn.functional.leaky_relu(xt, negative_slope=self.lrelu_slope)
+ xt = c2(xt)
+ if gamma is not None:
+ xt = gamma * xt
+ x = xt + x
+ return x
+
+ def remove_weight_norm(self):
+ for l in self.convs1:
+ remove_weight_norm(l)
+ for l in self.convs2:
+ remove_weight_norm(l)
+
+ @staticmethod
+ def get_padding(kernel_size: int, dilation: int = 1) -> int:
+ return int((kernel_size * dilation - dilation) / 2)
+
+
+class Backbone(nn.Module):
+ """Base class for the generator's backbone. It preserves the same temporal resolution across all layers."""
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, C, L), where B is the batch size,
+ C denotes output features, and L is the sequence length.
+
+ Returns:
+ Tensor: Output of shape (B, L, H), where B is the batch size, L is the sequence length,
+ and H denotes the model dimension.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class VocosBackbone(Backbone):
+ """
+ Vocos backbone module built with ConvNeXt blocks. Supports additional conditioning with Adaptive Layer Normalization
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ intermediate_dim (int): Intermediate dimension used in ConvNeXtBlock.
+ num_layers (int): Number of ConvNeXtBlock layers.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to `1 / num_layers`.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional model. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ input_channels: int,
+ dim: int,
+ intermediate_dim: int,
+ num_layers: int,
+ layer_scale_init_value: Optional[float] = None,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = nn.Conv1d(input_channels, dim, kernel_size=7, padding=3)
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ layer_scale_init_value = layer_scale_init_value or 1 / num_layers
+ self.convnext = nn.ModuleList(
+ [
+ ConvNeXtBlock(
+ dim=dim,
+ intermediate_dim=intermediate_dim,
+ layer_scale_init_value=layer_scale_init_value,
+ adanorm_num_embeddings=adanorm_num_embeddings,
+ )
+ for _ in range(num_layers)
+ ]
+ )
+ self.final_layer_norm = nn.LayerNorm(dim, eps=1e-6)
+ self.apply(self._init_weights)
+
+ def _init_weights(self, m):
+ if isinstance(m, (nn.Conv1d, nn.Linear)):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ bandwidth_id = kwargs.get("bandwidth_id", None)
+ x = self.embed(x)
+ if self.adanorm:
+ assert bandwidth_id is not None
+ x = self.norm(x.transpose(1, 2), cond_embedding_id=bandwidth_id)
+ else:
+ x = self.norm(x.transpose(1, 2))
+ x = x.transpose(1, 2)
+ for conv_block in self.convnext:
+ x = conv_block(x, cond_embedding_id=bandwidth_id)
+ x = self.final_layer_norm(x.transpose(1, 2))
+ return x
+
+
+class VocosResNetBackbone(Backbone):
+ """
+ Vocos backbone module built with ResBlocks.
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ num_blocks (int): Number of ResBlock1 blocks.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ input_channels,
+ dim,
+ num_blocks,
+ layer_scale_init_value=None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = weight_norm(
+ nn.Conv1d(input_channels, dim, kernel_size=3, padding=1)
+ )
+ layer_scale_init_value = layer_scale_init_value or 1 / num_blocks / 3
+ self.resnet = nn.Sequential(
+ *[
+ ResBlock1(dim=dim, layer_scale_init_value=layer_scale_init_value)
+ for _ in range(num_blocks)
+ ]
+ )
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ x = self.embed(x)
+ x = self.resnet(x)
+ x = x.transpose(1, 2)
+ return x
+
+
+class Vocos(nn.Module):
+ def __init__(
+ self,
+ input_channels: int = 256,
+ dim: int = 384,
+ intermediate_dim: int = 1152,
+ num_layers: int = 8,
+ n_fft: int = 800,
+ hop_size: int = 200,
+ padding: str = "same",
+ adanorm_num_embeddings=None,
+ cfg=None,
+ ):
+ super().__init__()
+
+ input_channels = (
+ cfg.input_channels
+ if cfg is not None and hasattr(cfg, "input_channels")
+ else input_channels
+ )
+ dim = cfg.dim if cfg is not None and hasattr(cfg, "dim") else dim
+ intermediate_dim = (
+ cfg.intermediate_dim
+ if cfg is not None and hasattr(cfg, "intermediate_dim")
+ else intermediate_dim
+ )
+ num_layers = (
+ cfg.num_layers
+ if cfg is not None and hasattr(cfg, "num_layers")
+ else num_layers
+ )
+ adanorm_num_embeddings = (
+ cfg.adanorm_num_embeddings
+ if cfg is not None and hasattr(cfg, "adanorm_num_embeddings")
+ else adanorm_num_embeddings
+ )
+ n_fft = cfg.n_fft if cfg is not None and hasattr(cfg, "n_fft") else n_fft
+ hop_size = (
+ cfg.hop_size if cfg is not None and hasattr(cfg, "hop_size") else hop_size
+ )
+ padding = (
+ cfg.padding if cfg is not None and hasattr(cfg, "padding") else padding
+ )
+
+ self.backbone = VocosBackbone(
+ input_channels=input_channels,
+ dim=dim,
+ intermediate_dim=intermediate_dim,
+ num_layers=num_layers,
+ adanorm_num_embeddings=adanorm_num_embeddings,
+ )
+ self.head = ISTFTHead(dim, n_fft, hop_size, padding)
+
+ def forward(self, x):
+ x = self.backbone(x)
+ x = self.head(x)
+
+ return x[:, None, :]
diff --git a/indextts/utils/maskgct/models/codec/codec_dataset.py b/indextts/utils/maskgct/models/codec/codec_dataset.py
new file mode 100644
index 0000000..be0a308
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/codec_dataset.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Iterable
+import torch
+import numpy as np
+import torch.utils.data
+from torch.nn.utils.rnn import pad_sequence
+from utils.data_utils import *
+from torch.utils.data import ConcatDataset, Dataset
+
+
+class CodecDataset(torch.utils.data.Dataset):
+ def __init__(self, cfg, dataset, is_valid=False):
+ """
+ Args:
+ cfg: config
+ dataset: dataset name
+ is_valid: whether to use train or valid dataset
+ """
+ assert isinstance(dataset, str)
+
+ processed_data_dir = os.path.join(cfg.preprocess.processed_dir, dataset)
+
+ meta_file = cfg.preprocess.valid_file if is_valid else cfg.preprocess.train_file
+ self.metafile_path = os.path.join(processed_data_dir, meta_file)
+ self.metadata = self.get_metadata()
+
+ self.data_root = processed_data_dir
+ self.cfg = cfg
+
+ if cfg.preprocess.use_audio:
+ self.utt2audio_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ self.utt2audio_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.audio_dir,
+ uid + ".npy",
+ )
+ elif cfg.preprocess.use_label:
+ self.utt2label_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ self.utt2label_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.label_dir,
+ uid + ".npy",
+ )
+ elif cfg.preprocess.use_one_hot:
+ self.utt2one_hot_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ self.utt2one_hot_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.one_hot_dir,
+ uid + ".npy",
+ )
+
+ if cfg.preprocess.use_mel:
+ self.utt2mel_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ self.utt2mel_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.mel_dir,
+ uid + ".npy",
+ )
+
+ if cfg.preprocess.use_frame_pitch:
+ self.utt2frame_pitch_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ self.utt2frame_pitch_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.pitch_dir,
+ uid + ".npy",
+ )
+
+ if cfg.preprocess.use_uv:
+ self.utt2uv_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+ self.utt2uv_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.uv_dir,
+ uid + ".npy",
+ )
+
+ if cfg.preprocess.use_amplitude_phase:
+ self.utt2logamp_path = {}
+ self.utt2pha_path = {}
+ self.utt2rea_path = {}
+ self.utt2imag_path = {}
+ for utt_info in self.metadata:
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+ self.utt2logamp_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.log_amplitude_dir,
+ uid + ".npy",
+ )
+ self.utt2pha_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.phase_dir,
+ uid + ".npy",
+ )
+ self.utt2rea_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.real_dir,
+ uid + ".npy",
+ )
+ self.utt2imag_path[utt] = os.path.join(
+ cfg.preprocess.processed_dir,
+ dataset,
+ cfg.preprocess.imaginary_dir,
+ uid + ".npy",
+ )
+
+ def __getitem__(self, index):
+ utt_info = self.metadata[index]
+
+ dataset = utt_info["Dataset"]
+ uid = utt_info["Uid"]
+ utt = "{}_{}".format(dataset, uid)
+
+ single_feature = dict()
+
+ if self.cfg.preprocess.use_mel:
+ mel = np.load(self.utt2mel_path[utt])
+ assert mel.shape[0] == self.cfg.preprocess.n_mel # [n_mels, T]
+
+ if "target_len" not in single_feature.keys():
+ single_feature["target_len"] = mel.shape[1]
+
+ single_feature["mel"] = mel
+
+ if self.cfg.preprocess.use_frame_pitch:
+ frame_pitch = np.load(self.utt2frame_pitch_path[utt])
+
+ if "target_len" not in single_feature.keys():
+ single_feature["target_len"] = len(frame_pitch)
+
+ aligned_frame_pitch = align_length(
+ frame_pitch, single_feature["target_len"]
+ )
+
+ single_feature["frame_pitch"] = aligned_frame_pitch
+
+ if self.cfg.preprocess.use_audio:
+ audio = np.load(self.utt2audio_path[utt])
+
+ single_feature["audio"] = audio
+
+ return single_feature
+
+ def get_metadata(self):
+ with open(self.metafile_path, "r", encoding="utf-8") as f:
+ metadata = json.load(f)
+
+ return metadata
+
+ def get_dataset_name(self):
+ return self.metadata[0]["Dataset"]
+
+ def __len__(self):
+ return len(self.metadata)
+
+
+class CodecConcatDataset(ConcatDataset):
+ def __init__(self, datasets: Iterable[Dataset], full_audio_inference=False):
+ """Concatenate a series of datasets with their random inference audio merged."""
+ super().__init__(datasets)
+
+ self.cfg = self.datasets[0].cfg
+
+ self.metadata = []
+
+ # Merge metadata
+ for dataset in self.datasets:
+ self.metadata += dataset.metadata
+
+ # Merge random inference features
+ if full_audio_inference:
+ self.eval_audios = []
+ self.eval_dataset_names = []
+ if self.cfg.preprocess.use_mel:
+ self.eval_mels = []
+ if self.cfg.preprocess.use_frame_pitch:
+ self.eval_pitchs = []
+ for dataset in self.datasets:
+ self.eval_audios.append(dataset.eval_audio)
+ self.eval_dataset_names.append(dataset.get_dataset_name())
+ if self.cfg.preprocess.use_mel:
+ self.eval_mels.append(dataset.eval_mel)
+ if self.cfg.preprocess.use_frame_pitch:
+ self.eval_pitchs.append(dataset.eval_pitch)
+
+
+class CodecCollator(object):
+ """Zero-pads model inputs and targets based on number of frames per step"""
+
+ def __init__(self, cfg):
+ self.cfg = cfg
+
+ def __call__(self, batch):
+ packed_batch_features = dict()
+
+ # mel: [b, n_mels, frame]
+ # frame_pitch: [b, frame]
+ # audios: [b, frame * hop_size]
+
+ for key in batch[0].keys():
+ if key == "target_len":
+ packed_batch_features["target_len"] = torch.LongTensor(
+ [b["target_len"] for b in batch]
+ )
+ masks = [
+ torch.ones((b["target_len"], 1), dtype=torch.long) for b in batch
+ ]
+ packed_batch_features["mask"] = pad_sequence(
+ masks, batch_first=True, padding_value=0
+ )
+ elif key == "mel":
+ values = [torch.from_numpy(b[key]).T for b in batch]
+ packed_batch_features[key] = pad_sequence(
+ values, batch_first=True, padding_value=0
+ )
+ else:
+ values = [torch.from_numpy(b[key]) for b in batch]
+ packed_batch_features[key] = pad_sequence(
+ values, batch_first=True, padding_value=0
+ )
+
+ return packed_batch_features
diff --git a/indextts/utils/maskgct/models/codec/codec_inference.py b/indextts/utils/maskgct/models/codec/codec_inference.py
new file mode 100644
index 0000000..95e354c
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/codec_inference.py
@@ -0,0 +1,515 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import torch
+import json
+import json5
+import time
+import accelerate
+import random
+import numpy as np
+import shutil
+
+from pathlib import Path
+from tqdm import tqdm
+from glob import glob
+from accelerate.logging import get_logger
+from torch.utils.data import DataLoader
+
+from models.vocoders.vocoder_dataset import (
+ VocoderDataset,
+ VocoderCollator,
+ VocoderConcatDataset,
+)
+
+from models.vocoders.gan.generator import bigvgan, hifigan, melgan, nsfhifigan, apnet
+from models.vocoders.flow.waveglow import waveglow
+from models.vocoders.diffusion.diffwave import diffwave
+from models.vocoders.autoregressive.wavenet import wavenet
+from models.vocoders.autoregressive.wavernn import wavernn
+
+from models.vocoders.gan import gan_vocoder_inference
+from models.vocoders.diffusion import diffusion_vocoder_inference
+
+from utils.io import save_audio
+
+_vocoders = {
+ "diffwave": diffwave.DiffWave,
+ "wavernn": wavernn.WaveRNN,
+ "wavenet": wavenet.WaveNet,
+ "waveglow": waveglow.WaveGlow,
+ "nsfhifigan": nsfhifigan.NSFHiFiGAN,
+ "bigvgan": bigvgan.BigVGAN,
+ "hifigan": hifigan.HiFiGAN,
+ "melgan": melgan.MelGAN,
+ "apnet": apnet.APNet,
+}
+
+# Forward call for generalized Inferencor
+_vocoder_forward_funcs = {
+ # "world": world_inference.synthesis_audios,
+ # "wavernn": wavernn_inference.synthesis_audios,
+ # "wavenet": wavenet_inference.synthesis_audios,
+ "diffwave": diffusion_vocoder_inference.vocoder_inference,
+ "nsfhifigan": gan_vocoder_inference.vocoder_inference,
+ "bigvgan": gan_vocoder_inference.vocoder_inference,
+ "melgan": gan_vocoder_inference.vocoder_inference,
+ "hifigan": gan_vocoder_inference.vocoder_inference,
+ "apnet": gan_vocoder_inference.vocoder_inference,
+}
+
+# APIs for other tasks. e.g. SVC, TTS, TTA...
+_vocoder_infer_funcs = {
+ # "world": world_inference.synthesis_audios,
+ # "wavernn": wavernn_inference.synthesis_audios,
+ # "wavenet": wavenet_inference.synthesis_audios,
+ "diffwave": diffusion_vocoder_inference.synthesis_audios,
+ "nsfhifigan": gan_vocoder_inference.synthesis_audios,
+ "bigvgan": gan_vocoder_inference.synthesis_audios,
+ "melgan": gan_vocoder_inference.synthesis_audios,
+ "hifigan": gan_vocoder_inference.synthesis_audios,
+ "apnet": gan_vocoder_inference.synthesis_audios,
+}
+
+
+class VocoderInference(object):
+ def __init__(self, args=None, cfg=None, infer_type="from_dataset"):
+ super().__init__()
+
+ start = time.monotonic_ns()
+ self.args = args
+ self.cfg = cfg
+ self.infer_type = infer_type
+
+ # Init accelerator
+ self.accelerator = accelerate.Accelerator()
+ self.accelerator.wait_for_everyone()
+
+ # Get logger
+ with self.accelerator.main_process_first():
+ self.logger = get_logger("inference", log_level=args.log_level)
+
+ # Log some info
+ self.logger.info("=" * 56)
+ self.logger.info("||\t\t" + "New inference process started." + "\t\t||")
+ self.logger.info("=" * 56)
+ self.logger.info("\n")
+
+ self.vocoder_dir = args.vocoder_dir
+ self.logger.debug(f"Vocoder dir: {args.vocoder_dir}")
+
+ os.makedirs(args.output_dir, exist_ok=True)
+ if os.path.exists(os.path.join(args.output_dir, "pred")):
+ shutil.rmtree(os.path.join(args.output_dir, "pred"))
+ if os.path.exists(os.path.join(args.output_dir, "gt")):
+ shutil.rmtree(os.path.join(args.output_dir, "gt"))
+ os.makedirs(os.path.join(args.output_dir, "pred"), exist_ok=True)
+ os.makedirs(os.path.join(args.output_dir, "gt"), exist_ok=True)
+
+ # Set random seed
+ with self.accelerator.main_process_first():
+ start = time.monotonic_ns()
+ self._set_random_seed(self.cfg.train.random_seed)
+ end = time.monotonic_ns()
+ self.logger.debug(
+ f"Setting random seed done in {(end - start) / 1e6:.2f}ms"
+ )
+ self.logger.debug(f"Random seed: {self.cfg.train.random_seed}")
+
+ # Setup inference mode
+ if self.infer_type == "infer_from_dataset":
+ self.cfg.dataset = self.args.infer_datasets
+ elif self.infer_type == "infer_from_feature":
+ self._build_tmp_dataset_from_feature()
+ self.cfg.dataset = ["tmp"]
+ elif self.infer_type == "infer_from_audio":
+ self._build_tmp_dataset_from_audio()
+ self.cfg.dataset = ["tmp"]
+
+ # Setup data loader
+ with self.accelerator.main_process_first():
+ self.logger.info("Building dataset...")
+ start = time.monotonic_ns()
+ self.test_dataloader = self._build_dataloader()
+ end = time.monotonic_ns()
+ self.logger.info(f"Building dataset done in {(end - start) / 1e6:.2f}ms")
+
+ # Build model
+ with self.accelerator.main_process_first():
+ self.logger.info("Building model...")
+ start = time.monotonic_ns()
+ self.model = self._build_model()
+ end = time.monotonic_ns()
+ self.logger.info(f"Building model done in {(end - start) / 1e6:.3f}ms")
+
+ # Init with accelerate
+ self.logger.info("Initializing accelerate...")
+ start = time.monotonic_ns()
+ self.accelerator = accelerate.Accelerator()
+ (self.model, self.test_dataloader) = self.accelerator.prepare(
+ self.model, self.test_dataloader
+ )
+ end = time.monotonic_ns()
+ self.accelerator.wait_for_everyone()
+ self.logger.info(f"Initializing accelerate done in {(end - start) / 1e6:.3f}ms")
+
+ with self.accelerator.main_process_first():
+ self.logger.info("Loading checkpoint...")
+ start = time.monotonic_ns()
+ if os.path.isdir(args.vocoder_dir):
+ if os.path.isdir(os.path.join(args.vocoder_dir, "checkpoint")):
+ self._load_model(os.path.join(args.vocoder_dir, "checkpoint"))
+ else:
+ self._load_model(os.path.join(args.vocoder_dir))
+ else:
+ self._load_model(os.path.join(args.vocoder_dir))
+ end = time.monotonic_ns()
+ self.logger.info(f"Loading checkpoint done in {(end - start) / 1e6:.3f}ms")
+
+ self.model.eval()
+ self.accelerator.wait_for_everyone()
+
+ def _build_tmp_dataset_from_feature(self):
+ if os.path.exists(os.path.join(self.cfg.preprocess.processed_dir, "tmp")):
+ shutil.rmtree(os.path.join(self.cfg.preprocess.processed_dir, "tmp"))
+
+ utts = []
+ mels = glob(os.path.join(self.args.feature_folder, "mels", "*.npy"))
+ for i, mel in enumerate(mels):
+ uid = mel.split("/")[-1].split(".")[0]
+ utt = {"Dataset": "tmp", "Uid": uid, "index": i}
+ utts.append(utt)
+
+ os.makedirs(os.path.join(self.cfg.preprocess.processed_dir, "tmp"))
+ with open(
+ os.path.join(self.cfg.preprocess.processed_dir, "tmp", "test.json"), "w"
+ ) as f:
+ json.dump(utts, f)
+
+ meta_info = {"dataset": "tmp", "test": {"size": len(utts)}}
+
+ with open(
+ os.path.join(self.cfg.preprocess.processed_dir, "tmp", "meta_info.json"),
+ "w",
+ ) as f:
+ json.dump(meta_info, f)
+
+ features = glob(os.path.join(self.args.feature_folder, "*"))
+ for feature in features:
+ feature_name = feature.split("/")[-1]
+ if os.path.isfile(feature):
+ continue
+ shutil.copytree(
+ os.path.join(self.args.feature_folder, feature_name),
+ os.path.join(self.cfg.preprocess.processed_dir, "tmp", feature_name),
+ )
+
+ def _build_tmp_dataset_from_audio(self):
+ if os.path.exists(os.path.join(self.cfg.preprocess.processed_dir, "tmp")):
+ shutil.rmtree(os.path.join(self.cfg.preprocess.processed_dir, "tmp"))
+
+ utts = []
+ audios = glob(os.path.join(self.args.audio_folder, "*"))
+ for i, audio in enumerate(audios):
+ uid = audio.split("/")[-1].split(".")[0]
+ utt = {"Dataset": "tmp", "Uid": uid, "index": i, "Path": audio}
+ utts.append(utt)
+
+ os.makedirs(os.path.join(self.cfg.preprocess.processed_dir, "tmp"))
+ with open(
+ os.path.join(self.cfg.preprocess.processed_dir, "tmp", "test.json"), "w"
+ ) as f:
+ json.dump(utts, f)
+
+ meta_info = {"dataset": "tmp", "test": {"size": len(utts)}}
+
+ with open(
+ os.path.join(self.cfg.preprocess.processed_dir, "tmp", "meta_info.json"),
+ "w",
+ ) as f:
+ json.dump(meta_info, f)
+
+ from processors import acoustic_extractor
+
+ acoustic_extractor.extract_utt_acoustic_features_serial(
+ utts, os.path.join(self.cfg.preprocess.processed_dir, "tmp"), self.cfg
+ )
+
+ def _build_test_dataset(self):
+ return VocoderDataset, VocoderCollator
+
+ def _build_model(self):
+ model = _vocoders[self.cfg.model.generator](self.cfg)
+ return model
+
+ def _build_dataloader(self):
+ """Build dataloader which merges a series of datasets."""
+ Dataset, Collator = self._build_test_dataset()
+
+ datasets_list = []
+ for dataset in self.cfg.dataset:
+ subdataset = Dataset(self.cfg, dataset, is_valid=True)
+ datasets_list.append(subdataset)
+ test_dataset = VocoderConcatDataset(datasets_list, full_audio_inference=False)
+ test_collate = Collator(self.cfg)
+ test_batch_size = min(self.cfg.inference.batch_size, len(test_dataset))
+ test_dataloader = DataLoader(
+ test_dataset,
+ collate_fn=test_collate,
+ num_workers=1,
+ batch_size=test_batch_size,
+ shuffle=False,
+ )
+ self.test_batch_size = test_batch_size
+ self.test_dataset = test_dataset
+ return test_dataloader
+
+ def _load_model(self, checkpoint_dir, from_multi_gpu=False):
+ """Load model from checkpoint. If a folder is given, it will
+ load the latest checkpoint in checkpoint_dir. If a path is given
+ it will load the checkpoint specified by checkpoint_path.
+ **Only use this method after** ``accelerator.prepare()``.
+ """
+ if os.path.isdir(checkpoint_dir):
+ if "epoch" in checkpoint_dir and "step" in checkpoint_dir:
+ checkpoint_path = checkpoint_dir
+ else:
+ # Load the latest accelerator state dicts
+ ls = [
+ str(i)
+ for i in Path(checkpoint_dir).glob("*")
+ if not "audio" in str(i)
+ ]
+ ls.sort(
+ key=lambda x: int(x.split("/")[-1].split("_")[0].split("-")[-1]),
+ reverse=True,
+ )
+ checkpoint_path = ls[0]
+ accelerate.load_checkpoint_and_dispatch(
+ self.accelerator.unwrap_model(self.model),
+ os.path.join(checkpoint_path, "pytorch_model.bin"),
+ )
+ return str(checkpoint_path)
+ else:
+ # Load old .pt checkpoints
+ if self.cfg.model.generator in [
+ "bigvgan",
+ "hifigan",
+ "melgan",
+ "nsfhifigan",
+ ]:
+ ckpt = torch.load(
+ checkpoint_dir,
+ map_location=(
+ torch.device("cuda")
+ if torch.cuda.is_available()
+ else torch.device("cpu")
+ ),
+ )
+ if from_multi_gpu:
+ pretrained_generator_dict = ckpt["generator_state_dict"]
+ generator_dict = self.model.state_dict()
+
+ new_generator_dict = {
+ k.split("module.")[-1]: v
+ for k, v in pretrained_generator_dict.items()
+ if (
+ k.split("module.")[-1] in generator_dict
+ and v.shape == generator_dict[k.split("module.")[-1]].shape
+ )
+ }
+
+ generator_dict.update(new_generator_dict)
+
+ self.model.load_state_dict(generator_dict)
+ else:
+ self.model.load_state_dict(ckpt["generator_state_dict"])
+ else:
+ self.model.load_state_dict(torch.load(checkpoint_dir)["state_dict"])
+ return str(checkpoint_dir)
+
+ def inference(self):
+ """Inference via batches"""
+ for i, batch in tqdm(enumerate(self.test_dataloader)):
+ if self.cfg.preprocess.use_frame_pitch:
+ audio_pred = _vocoder_forward_funcs[self.cfg.model.generator](
+ self.cfg,
+ self.model,
+ batch["mel"].transpose(-1, -2),
+ f0s=batch["frame_pitch"].float(),
+ device=next(self.model.parameters()).device,
+ )
+ else:
+ audio_pred = _vocoder_forward_funcs[self.cfg.model.generator](
+ self.cfg,
+ self.model,
+ batch["mel"].transpose(-1, -2),
+ device=next(self.model.parameters()).device,
+ )
+ audio_ls = audio_pred.chunk(self.test_batch_size)
+ audio_gt_ls = batch["audio"].cpu().chunk(self.test_batch_size)
+ length_ls = batch["target_len"].cpu().chunk(self.test_batch_size)
+ j = 0
+ for it, it_gt, l in zip(audio_ls, audio_gt_ls, length_ls):
+ l = l.item()
+ it = it.squeeze(0).squeeze(0)[: l * self.cfg.preprocess.hop_size]
+ it_gt = it_gt.squeeze(0)[: l * self.cfg.preprocess.hop_size]
+ uid = self.test_dataset.metadata[i * self.test_batch_size + j]["Uid"]
+ save_audio(
+ os.path.join(self.args.output_dir, "pred", "{}.wav").format(uid),
+ it,
+ self.cfg.preprocess.sample_rate,
+ )
+ save_audio(
+ os.path.join(self.args.output_dir, "gt", "{}.wav").format(uid),
+ it_gt,
+ self.cfg.preprocess.sample_rate,
+ )
+ j += 1
+
+ if os.path.exists(os.path.join(self.cfg.preprocess.processed_dir, "tmp")):
+ shutil.rmtree(os.path.join(self.cfg.preprocess.processed_dir, "tmp"))
+
+ def _set_random_seed(self, seed):
+ """Set random seed for all possible random modules."""
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.random.manual_seed(seed)
+
+ def _count_parameters(self, model):
+ return sum(p.numel() for p in model.parameters())
+
+ def _dump_cfg(self, path):
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ json5.dump(
+ self.cfg,
+ open(path, "w"),
+ indent=4,
+ sort_keys=True,
+ ensure_ascii=False,
+ quote_keys=True,
+ )
+
+
+def load_nnvocoder(
+ cfg,
+ vocoder_name,
+ weights_file,
+ from_multi_gpu=False,
+):
+ """Load the specified vocoder.
+ cfg: the vocoder config filer.
+ weights_file: a folder or a .pt path.
+ from_multi_gpu: automatically remove the "module" string in state dicts if "True".
+ """
+ print("Loading Vocoder from Weights file: {}".format(weights_file))
+
+ # Build model
+ model = _vocoders[vocoder_name](cfg)
+ if not os.path.isdir(weights_file):
+ # Load from .pt file
+ if vocoder_name in ["bigvgan", "hifigan", "melgan", "nsfhifigan"]:
+ ckpt = torch.load(
+ weights_file,
+ map_location=(
+ torch.device("cuda")
+ if torch.cuda.is_available()
+ else torch.device("cpu")
+ ),
+ )
+ if from_multi_gpu:
+ pretrained_generator_dict = ckpt["generator_state_dict"]
+ generator_dict = model.state_dict()
+
+ new_generator_dict = {
+ k.split("module.")[-1]: v
+ for k, v in pretrained_generator_dict.items()
+ if (
+ k.split("module.")[-1] in generator_dict
+ and v.shape == generator_dict[k.split("module.")[-1]].shape
+ )
+ }
+
+ generator_dict.update(new_generator_dict)
+
+ model.load_state_dict(generator_dict)
+ else:
+ model.load_state_dict(ckpt["generator_state_dict"])
+ else:
+ model.load_state_dict(torch.load(weights_file)["state_dict"])
+ else:
+ # Load from accelerator state dict
+ weights_file = os.path.join(weights_file, "checkpoint")
+ ls = [str(i) for i in Path(weights_file).glob("*") if not "audio" in str(i)]
+ ls.sort(key=lambda x: int(x.split("_")[-3].split("-")[-1]), reverse=True)
+ checkpoint_path = ls[0]
+ accelerator = accelerate.Accelerator()
+ model = accelerator.prepare(model)
+ accelerator.load_state(checkpoint_path)
+
+ if torch.cuda.is_available():
+ model = model.cuda()
+
+ model = model.eval()
+ return model
+
+
+def tensorize(data, device, n_samples):
+ """
+ data: a list of numpy array
+ """
+ assert type(data) == list
+ if n_samples:
+ data = data[:n_samples]
+ data = [torch.as_tensor(x, device=device) for x in data]
+ return data
+
+
+def synthesis(
+ cfg,
+ vocoder_weight_file,
+ n_samples,
+ pred,
+ f0s=None,
+ batch_size=64,
+ fast_inference=False,
+):
+ """Synthesis audios from a given vocoder and series of given features.
+ cfg: vocoder config.
+ vocoder_weight_file: a folder of accelerator state dict or a path to the .pt file.
+ pred: a list of numpy arrays. [(seq_len1, acoustic_features_dim), (seq_len2, acoustic_features_dim), ...]
+ """
+
+ vocoder_name = cfg.model.generator
+
+ print("Synthesis audios using {} vocoder...".format(vocoder_name))
+
+ ###### TODO: World Vocoder Refactor ######
+ # if vocoder_name == "world":
+ # world_inference.synthesis_audios(
+ # cfg, dataset_name, split, n_samples, pred, save_dir, tag
+ # )
+ # return
+
+ # ====== Loading neural vocoder model ======
+ vocoder = load_nnvocoder(
+ cfg, vocoder_name, weights_file=vocoder_weight_file, from_multi_gpu=True
+ )
+ device = next(vocoder.parameters()).device
+
+ # ====== Inference for predicted acoustic features ======
+ # pred: (frame_len, n_mels) -> (n_mels, frame_len)
+ mels_pred = tensorize([p.T for p in pred], device, n_samples)
+ print("For predicted mels, #sample = {}...".format(len(mels_pred)))
+ audios_pred = _vocoder_infer_funcs[vocoder_name](
+ cfg,
+ vocoder,
+ mels_pred,
+ f0s=f0s,
+ batch_size=batch_size,
+ fast_inference=fast_inference,
+ )
+ return audios_pred
diff --git a/indextts/utils/maskgct/models/codec/codec_sampler.py b/indextts/utils/maskgct/models/codec/codec_sampler.py
new file mode 100644
index 0000000..9d29f88
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/codec_sampler.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import random
+
+from torch.utils.data import ConcatDataset, Dataset
+from torch.utils.data.sampler import (
+ BatchSampler,
+ RandomSampler,
+ Sampler,
+ SequentialSampler,
+)
+
+
+class ScheduledSampler(Sampler):
+ """A sampler that samples data from a given concat-dataset.
+
+ Args:
+ concat_dataset (ConcatDataset): a concatenated dataset consisting of all datasets
+ batch_size (int): batch size
+ holistic_shuffle (bool): whether to shuffle the whole dataset or not
+ logger (logging.Logger): logger to print warning message
+
+ Usage:
+ For cfg.train.batch_size = 3, cfg.train.holistic_shuffle = False, cfg.train.drop_last = True:
+ >>> list(ScheduledSampler(ConcatDataset([0, 1, 2], [3, 4, 5], [6, 7, 8]])))
+ [3, 4, 5, 0, 1, 2, 6, 7, 8]
+ """
+
+ def __init__(
+ self, concat_dataset, batch_size, holistic_shuffle, logger=None, type="train"
+ ):
+ if not isinstance(concat_dataset, ConcatDataset):
+ raise ValueError(
+ "concat_dataset must be an instance of ConcatDataset, but got {}".format(
+ type(concat_dataset)
+ )
+ )
+ if not isinstance(batch_size, int):
+ raise ValueError(
+ "batch_size must be an integer, but got {}".format(type(batch_size))
+ )
+ if not isinstance(holistic_shuffle, bool):
+ raise ValueError(
+ "holistic_shuffle must be a boolean, but got {}".format(
+ type(holistic_shuffle)
+ )
+ )
+
+ self.concat_dataset = concat_dataset
+ self.batch_size = batch_size
+ self.holistic_shuffle = holistic_shuffle
+
+ affected_dataset_name = []
+ affected_dataset_len = []
+ for dataset in concat_dataset.datasets:
+ dataset_len = len(dataset)
+ dataset_name = dataset.get_dataset_name()
+ if dataset_len < batch_size:
+ affected_dataset_name.append(dataset_name)
+ affected_dataset_len.append(dataset_len)
+
+ self.type = type
+ for dataset_name, dataset_len in zip(
+ affected_dataset_name, affected_dataset_len
+ ):
+ if not type == "valid":
+ logger.warning(
+ "The {} dataset {} has a length of {}, which is smaller than the batch size {}. This may cause unexpected behavior.".format(
+ type, dataset_name, dataset_len, batch_size
+ )
+ )
+
+ def __len__(self):
+ # the number of batches with drop last
+ num_of_batches = sum(
+ [
+ math.floor(len(dataset) / self.batch_size)
+ for dataset in self.concat_dataset.datasets
+ ]
+ )
+ return num_of_batches * self.batch_size
+
+ def __iter__(self):
+ iters = []
+ for dataset in self.concat_dataset.datasets:
+ iters.append(
+ SequentialSampler(dataset).__iter__()
+ if self.holistic_shuffle
+ else RandomSampler(dataset).__iter__()
+ )
+ init_indices = [0] + self.concat_dataset.cumulative_sizes[:-1]
+ output_batches = []
+ for dataset_idx in range(len(self.concat_dataset.datasets)):
+ cur_batch = []
+ for idx in iters[dataset_idx]:
+ cur_batch.append(idx + init_indices[dataset_idx])
+ if len(cur_batch) == self.batch_size:
+ output_batches.append(cur_batch)
+ cur_batch = []
+ if self.type == "valid" and len(cur_batch) > 0:
+ output_batches.append(cur_batch)
+ cur_batch = []
+ # force drop last in training
+ random.shuffle(output_batches)
+ output_indices = [item for sublist in output_batches for item in sublist]
+ return iter(output_indices)
+
+
+def build_samplers(concat_dataset: Dataset, cfg, logger, type):
+ sampler = ScheduledSampler(
+ concat_dataset,
+ cfg.train.batch_size,
+ cfg.train.sampler.holistic_shuffle,
+ logger,
+ type,
+ )
+ batch_sampler = BatchSampler(
+ sampler,
+ cfg.train.batch_size,
+ cfg.train.sampler.drop_last if not type == "valid" else False,
+ )
+ return sampler, batch_sampler
diff --git a/indextts/utils/maskgct/models/codec/codec_trainer.py b/indextts/utils/maskgct/models/codec/codec_trainer.py
new file mode 100644
index 0000000..8a6f838
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/codec_trainer.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import random
+from pathlib import Path
+import re
+
+import accelerate
+import json5
+import numpy as np
+import torch
+from accelerate.utils import ProjectConfiguration
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from models.codec.codec_sampler import build_samplers
+
+
+class CodecTrainer:
+ def __init__(self):
+ super().__init__()
+
+ def _init_accelerator(self):
+ """Initialize the accelerator components."""
+ self.exp_dir = os.path.join(
+ os.path.abspath(self.cfg.log_dir), self.args.exp_name
+ )
+ project_config = ProjectConfiguration(
+ project_dir=self.exp_dir, logging_dir=os.path.join(self.exp_dir, "log")
+ )
+ self.accelerator = accelerate.Accelerator(
+ gradient_accumulation_steps=self.cfg.train.gradient_accumulation_step,
+ log_with=self.cfg.train.tracker,
+ project_config=project_config,
+ )
+ if self.accelerator.is_main_process:
+ os.makedirs(project_config.project_dir, exist_ok=True)
+ os.makedirs(project_config.logging_dir, exist_ok=True)
+ with self.accelerator.main_process_first():
+ self.accelerator.init_trackers(self.args.exp_name)
+
+ def _build_dataset(self):
+ pass
+
+ def _build_criterion(self):
+ pass
+
+ def _build_model(self):
+ pass
+
+ def _build_dataloader(self):
+ """Build dataloader which merges a series of datasets."""
+ # Build dataset instance for each dataset and combine them by ConcatDataset
+ Dataset, Collator = self._build_dataset()
+
+ # Build train set
+ train_dataset = Dataset(self.cfg, self.cfg.dataset, is_valid=False)
+ train_collate = Collator(self.cfg)
+ sampler = torch.utils.data.distributed.DistributedSampler(
+ train_dataset,
+ num_replicas=self.accelerator.num_processes,
+ rank=self.accelerator.local_process_index,
+ shuffle=True,
+ seed=self.cfg.train.random_seed,
+ )
+ train_loader = DataLoader(
+ train_dataset,
+ batch_size=self.cfg.train.batch_size,
+ collate_fn=train_collate,
+ sampler=sampler,
+ num_workers=self.cfg.train.dataloader.num_worker,
+ pin_memory=self.cfg.train.dataloader.pin_memory,
+ )
+ return train_loader, None
+
+ def _build_optimizer(self):
+ pass
+
+ def _build_scheduler(self):
+ pass
+
+ def _load_model(self, checkpoint_dir, checkpoint_path=None, resume_type="resume"):
+ """Load model from checkpoint. If a folder is given, it will
+ load the latest checkpoint in checkpoint_dir. If a path is given
+ it will load the checkpoint specified by checkpoint_path.
+ **Only use this method after** ``accelerator.prepare()``.
+ """
+ if checkpoint_path is None:
+ ls = [str(i) for i in Path(checkpoint_dir).glob("*")]
+ ls.sort(key=lambda x: int(x.split("_")[-3].split("-")[-1]), reverse=True)
+ checkpoint_path = ls[0]
+ if resume_type == "resume":
+ self.accelerator.load_state(checkpoint_path)
+ elif resume_type == "finetune":
+ accelerate.load_checkpoint_and_dispatch(
+ self.accelerator.unwrap_model(self.model),
+ os.path.join(checkpoint_path, "pytorch_model.bin"),
+ )
+ self.logger.info("Load model weights for finetune SUCCESS!")
+ else:
+ raise ValueError("Unsupported resume type: {}".format(resume_type))
+ self.epoch = int(checkpoint_path.split("_")[-3].split("-")[-1]) + 1
+ self.step = int(checkpoint_path.split("_")[-2].split("-")[-1]) + 1
+ return checkpoint_path
+
+ def train_loop(self):
+ pass
+
+ def _train_epoch(self):
+ pass
+
+ def _valid_epoch(self):
+ pass
+
+ def _train_step(self):
+ pass
+
+ def _valid_step(self):
+ pass
+
+ def _inference(self):
+ pass
+
+ def _set_random_seed(self, seed):
+ """Set random seed for all possible random modules."""
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.random.manual_seed(seed)
+
+ def _check_nan(self, loss):
+ if torch.any(torch.isnan(loss)):
+ self.logger.fatal("Fatal Error: NaN!")
+ self.logger.error("loss = {:.6f}".format(loss.item()), in_order=True)
+
+ def _check_basic_configs(self):
+ if self.cfg.train.gradient_accumulation_step <= 0:
+ self.logger.fatal("Invalid gradient_accumulation_step value!")
+ self.logger.error(
+ f"Invalid gradient_accumulation_step value: {self.cfg.train.gradient_accumulation_step}. It should be positive."
+ )
+ self.accelerator.end_training()
+ raise ValueError(
+ f"Invalid gradient_accumulation_step value: {self.cfg.train.gradient_accumulation_step}. It should be positive."
+ )
+
+ def _count_parameters(self):
+ pass
+
+ def _dump_cfg(self, path):
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ json5.dump(
+ self.cfg,
+ open(path, "w"),
+ indent=4,
+ sort_keys=True,
+ ensure_ascii=False,
+ quote_keys=True,
+ )
+
+ def _is_valid_pattern(self, directory_name):
+ directory_name = str(directory_name)
+ pattern = r"^epoch-\d{4}_step-\d{7}_loss-\d{1}\.\d{6}"
+ return re.match(pattern, directory_name) is not None
diff --git a/indextts/utils/maskgct/models/codec/facodec/__init__.py b/indextts/utils/maskgct/models/codec/facodec/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py
new file mode 100644
index 0000000..b3bccdb
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py
@@ -0,0 +1,5 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+from .filter import *
+from .resample import *
+from .act import *
diff --git a/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py
new file mode 100644
index 0000000..779d58d
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py
@@ -0,0 +1,29 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch.nn as nn
+from .resample import UpSample1d, DownSample1d
+
+
+class Activation1d(nn.Module):
+ def __init__(
+ self,
+ activation,
+ up_ratio: int = 2,
+ down_ratio: int = 2,
+ up_kernel_size: int = 12,
+ down_kernel_size: int = 12,
+ ):
+ super().__init__()
+ self.up_ratio = up_ratio
+ self.down_ratio = down_ratio
+ self.act = activation
+ self.upsample = UpSample1d(up_ratio, up_kernel_size)
+ self.downsample = DownSample1d(down_ratio, down_kernel_size)
+
+ # x: [B,C,T]
+ def forward(self, x):
+ x = self.upsample(x)
+ x = self.act(x)
+ x = self.downsample(x)
+
+ return x
diff --git a/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py
new file mode 100644
index 0000000..ece8e02
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py
@@ -0,0 +1,96 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+if "sinc" in dir(torch):
+ sinc = torch.sinc
+else:
+ # This code is adopted from adefossez's julius.core.sinc under the MIT License
+ # https://adefossez.github.io/julius/julius/core.html
+ def sinc(x: torch.Tensor):
+ """
+ Implementation of sinc, i.e. sin(pi * x) / (pi * x)
+ __Warning__: Different to julius.sinc, the input is multiplied by `pi`!
+ """
+ return torch.where(
+ x == 0,
+ torch.tensor(1.0, device=x.device, dtype=x.dtype),
+ torch.sin(math.pi * x) / math.pi / x,
+ )
+
+
+# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
+# https://adefossez.github.io/julius/julius/lowpass.html
+def kaiser_sinc_filter1d(
+ cutoff, half_width, kernel_size
+): # return filter [1,1,kernel_size]
+ even = kernel_size % 2 == 0
+ half_size = kernel_size // 2
+
+ # For kaiser window
+ delta_f = 4 * half_width
+ A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
+ if A > 50.0:
+ beta = 0.1102 * (A - 8.7)
+ elif A >= 21.0:
+ beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21.0)
+ else:
+ beta = 0.0
+ window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
+
+ # ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
+ if even:
+ time = torch.arange(-half_size, half_size) + 0.5
+ else:
+ time = torch.arange(kernel_size) - half_size
+ if cutoff == 0:
+ filter_ = torch.zeros_like(time)
+ else:
+ filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
+ # Normalize filter to have sum = 1, otherwise we will have a small leakage
+ # of the constant component in the input signal.
+ filter_ /= filter_.sum()
+ filter = filter_.view(1, 1, kernel_size)
+
+ return filter
+
+
+class LowPassFilter1d(nn.Module):
+ def __init__(
+ self,
+ cutoff=0.5,
+ half_width=0.6,
+ stride: int = 1,
+ padding: bool = True,
+ padding_mode: str = "replicate",
+ kernel_size: int = 12,
+ ):
+ # kernel_size should be even number for stylegan3 setup,
+ # in this implementation, odd number is also possible.
+ super().__init__()
+ if cutoff < -0.0:
+ raise ValueError("Minimum cutoff must be larger than zero.")
+ if cutoff > 0.5:
+ raise ValueError("A cutoff above 0.5 does not make sense.")
+ self.kernel_size = kernel_size
+ self.even = kernel_size % 2 == 0
+ self.pad_left = kernel_size // 2 - int(self.even)
+ self.pad_right = kernel_size // 2
+ self.stride = stride
+ self.padding = padding
+ self.padding_mode = padding_mode
+ filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
+ self.register_buffer("filter", filter)
+
+ # input [B, C, T]
+ def forward(self, x):
+ _, C, _ = x.shape
+
+ if self.padding:
+ x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode)
+ out = F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C)
+
+ return out
diff --git a/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py
new file mode 100644
index 0000000..ee993b1
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py
@@ -0,0 +1,57 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch.nn as nn
+from torch.nn import functional as F
+from .filter import LowPassFilter1d
+from .filter import kaiser_sinc_filter1d
+
+
+class UpSample1d(nn.Module):
+ def __init__(self, ratio=2, kernel_size=None):
+ super().__init__()
+ self.ratio = ratio
+ self.kernel_size = (
+ int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+ )
+ self.stride = ratio
+ self.pad = self.kernel_size // ratio - 1
+ self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
+ self.pad_right = (
+ self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
+ )
+ filter = kaiser_sinc_filter1d(
+ cutoff=0.5 / ratio, half_width=0.6 / ratio, kernel_size=self.kernel_size
+ )
+ self.register_buffer("filter", filter)
+
+ # x: [B, C, T]
+ def forward(self, x):
+ _, C, _ = x.shape
+
+ x = F.pad(x, (self.pad, self.pad), mode="replicate")
+ x = self.ratio * F.conv_transpose1d(
+ x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C
+ )
+ x = x[..., self.pad_left : -self.pad_right]
+
+ return x
+
+
+class DownSample1d(nn.Module):
+ def __init__(self, ratio=2, kernel_size=None):
+ super().__init__()
+ self.ratio = ratio
+ self.kernel_size = (
+ int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+ )
+ self.lowpass = LowPassFilter1d(
+ cutoff=0.5 / ratio,
+ half_width=0.6 / ratio,
+ stride=ratio,
+ kernel_size=self.kernel_size,
+ )
+
+ def forward(self, x):
+ xx = self.lowpass(x)
+
+ return xx
diff --git a/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py b/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py
new file mode 100644
index 0000000..e86b82d
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import random
+
+import numpy as np
+
+import torchaudio
+import librosa
+from torch.nn import functional as F
+
+from torch.nn.utils.rnn import pad_sequence
+from utils.data_utils import *
+from models.codec.codec_dataset import CodecDataset
+
+
+class FAcodecDataset(torch.utils.data.Dataset):
+ def __init__(self, cfg, dataset, is_valid=False):
+ """
+ Args:
+ cfg: config
+ dataset: dataset name
+ is_valid: whether to use train or valid dataset
+ """
+ self.data_root_dir = cfg.dataset
+ self.data_list = []
+ # walk through the dataset directory recursively, save all files ends with .wav/.mp3/.opus/.flac/.m4a
+ for root, _, files in os.walk(self.data_root_dir):
+ for file in files:
+ if file.endswith((".wav", ".mp3", ".opus", ".flac", ".m4a")):
+ self.data_list.append(os.path.join(root, file))
+ self.sr = cfg.preprocess_params.sr
+ self.duration_range = cfg.preprocess_params.duration_range
+ self.to_mel = torchaudio.transforms.MelSpectrogram(
+ n_mels=cfg.preprocess_params.spect_params.n_mels,
+ n_fft=cfg.preprocess_params.spect_params.n_fft,
+ win_length=cfg.preprocess_params.spect_params.win_length,
+ hop_length=cfg.preprocess_params.spect_params.hop_length,
+ )
+ self.mean, self.std = -4, 4
+
+ def preprocess(self, wave):
+ wave_tensor = (
+ torch.from_numpy(wave).float() if isinstance(wave, np.ndarray) else wave
+ )
+ mel_tensor = self.to_mel(wave_tensor)
+ mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - self.mean) / self.std
+ return mel_tensor
+
+ def __len__(self):
+ # return len(self.data_list)
+ return len(self.data_list) # return a fixed number for testing
+
+ def __getitem__(self, index):
+ wave, _ = librosa.load(self.data_list[index], sr=self.sr)
+ wave = np.random.randn(self.sr * random.randint(*self.duration_range))
+ wave = wave / np.max(np.abs(wave))
+ mel = self.preprocess(wave).squeeze(0)
+ wave = torch.from_numpy(wave).float()
+ return wave, mel
+
+
+class FAcodecCollator(object):
+ """Zero-pads model inputs and targets based on number of frames per step"""
+
+ def __init__(self, cfg):
+ self.cfg = cfg
+
+ def __call__(self, batch):
+ # batch[0] = wave, mel, text, f0, speakerid
+ batch_size = len(batch)
+
+ # sort by mel length
+ lengths = [b[1].shape[1] for b in batch]
+ batch_indexes = np.argsort(lengths)[::-1]
+ batch = [batch[bid] for bid in batch_indexes]
+
+ nmels = batch[0][1].size(0)
+ max_mel_length = max([b[1].shape[1] for b in batch])
+ max_wave_length = max([b[0].size(0) for b in batch])
+
+ mels = torch.zeros((batch_size, nmels, max_mel_length)).float() - 10
+ waves = torch.zeros((batch_size, max_wave_length)).float()
+
+ mel_lengths = torch.zeros(batch_size).long()
+ wave_lengths = torch.zeros(batch_size).long()
+
+ for bid, (wave, mel) in enumerate(batch):
+ mel_size = mel.size(1)
+ mels[bid, :, :mel_size] = mel
+ waves[bid, : wave.size(0)] = wave
+ mel_lengths[bid] = mel_size
+ wave_lengths[bid] = wave.size(0)
+
+ return waves, mels, wave_lengths, mel_lengths
diff --git a/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py b/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py
new file mode 100644
index 0000000..c494349
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py
@@ -0,0 +1,137 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import shutil
+import warnings
+import argparse
+import torch
+import os
+import yaml
+
+warnings.simplefilter("ignore")
+
+from .modules.commons import *
+import time
+
+import torchaudio
+import librosa
+from collections import OrderedDict
+
+
+class FAcodecInference(object):
+ def __init__(self, args=None, cfg=None):
+ self.args = args
+ self.cfg = cfg
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ self.model = self._build_model()
+ self._load_checkpoint()
+
+ def _build_model(self):
+ model = build_model(self.cfg.model_params)
+ _ = [model[key].to(self.device) for key in model]
+ return model
+
+ def _load_checkpoint(self):
+ sd = torch.load(self.args.checkpoint_path, map_location="cpu")
+ sd = sd["net"] if "net" in sd else sd
+ new_params = dict()
+ for key, state_dict in sd.items():
+ new_state_dict = OrderedDict()
+ for k, v in state_dict.items():
+ if k.startswith("module."):
+ k = k[7:]
+ new_state_dict[k] = v
+ new_params[key] = new_state_dict
+ for key in new_params:
+ if key in self.model:
+ self.model[key].load_state_dict(new_params[key])
+ _ = [self.model[key].eval() for key in self.model]
+
+ @torch.no_grad()
+ def inference(self, source, output_dir):
+ source_audio = librosa.load(source, sr=self.cfg.preprocess_params.sr)[0]
+ source_audio = torch.tensor(source_audio).unsqueeze(0).float().to(self.device)
+
+ z = self.model.encoder(source_audio[None, ...].to(self.device).float())
+ (
+ z,
+ quantized,
+ commitment_loss,
+ codebook_loss,
+ timbre,
+ codes,
+ ) = self.model.quantizer(
+ z,
+ source_audio[None, ...].to(self.device).float(),
+ n_c=self.cfg.model_params.n_c_codebooks,
+ return_codes=True,
+ )
+
+ full_pred_wave = self.model.decoder(z)
+
+ os.makedirs(output_dir, exist_ok=True)
+ source_name = source.split("/")[-1].split(".")[0]
+ torchaudio.save(
+ f"{output_dir}/reconstructed_{source_name}.wav",
+ full_pred_wave[0].cpu(),
+ self.cfg.preprocess_params.sr,
+ )
+
+ print(
+ "Reconstructed audio saved as: ",
+ f"{output_dir}/reconstructed_{source_name}.wav",
+ )
+
+ return quantized, codes
+
+ @torch.no_grad()
+ def voice_conversion(self, source, reference, output_dir):
+ source_audio = librosa.load(source, sr=self.cfg.preprocess_params.sr)[0]
+ source_audio = torch.tensor(source_audio).unsqueeze(0).float().to(self.device)
+
+ reference_audio = librosa.load(reference, sr=self.cfg.preprocess_params.sr)[0]
+ reference_audio = (
+ torch.tensor(reference_audio).unsqueeze(0).float().to(self.device)
+ )
+
+ z = self.model.encoder(source_audio[None, ...].to(self.device).float())
+ z, quantized, commitment_loss, codebook_loss, timbre = self.model.quantizer(
+ z,
+ source_audio[None, ...].to(self.device).float(),
+ n_c=self.cfg.model_params.n_c_codebooks,
+ )
+
+ z_ref = self.model.encoder(reference_audio[None, ...].to(self.device).float())
+ (
+ z_ref,
+ quantized_ref,
+ commitment_loss_ref,
+ codebook_loss_ref,
+ timbre_ref,
+ ) = self.model.quantizer(
+ z_ref,
+ reference_audio[None, ...].to(self.device).float(),
+ n_c=self.cfg.model_params.n_c_codebooks,
+ )
+
+ z_conv = self.model.quantizer.voice_conversion(
+ quantized[0] + quantized[1],
+ reference_audio[None, ...].to(self.device).float(),
+ )
+ full_pred_wave = self.model.decoder(z_conv)
+
+ os.makedirs(output_dir, exist_ok=True)
+ source_name = source.split("/")[-1].split(".")[0]
+ reference_name = reference.split("/")[-1].split(".")[0]
+ torchaudio.save(
+ f"{output_dir}/converted_{source_name}_to_{reference_name}.wav",
+ full_pred_wave[0].cpu(),
+ self.cfg.preprocess_params.sr,
+ )
+
+ print(
+ "Voice conversion results saved as: ",
+ f"{output_dir}/converted_{source_name}_to_{reference_name}.wav",
+ )
diff --git a/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py b/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py
new file mode 100644
index 0000000..6e0b685
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py
@@ -0,0 +1,776 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import time
+import random
+from pathlib import Path
+import re
+import glob
+
+import accelerate
+import json
+import numpy as np
+import torch
+from accelerate.utils import ProjectConfiguration
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+import torch
+import torch.nn.functional as F
+import torchaudio
+
+from accelerate.logging import get_logger
+
+from models.codec.facodec.facodec_dataset import FAcodecDataset, FAcodecCollator
+from models.codec.codec_sampler import build_samplers
+from models.codec.codec_trainer import CodecTrainer
+
+from modules.dac.nn.loss import (
+ MultiScaleSTFTLoss,
+ MelSpectrogramLoss,
+ GANLoss,
+ L1Loss,
+ FocalLoss,
+)
+from audiotools import AudioSignal
+
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+
+try:
+ import nemo.collections.asr as nemo_asr
+except ImportError:
+ print(
+ "Unable to import nemo_asr, titanet outputs will be set to random values, you may only run debugging mode. DO NOT USE THIS FOR TRAINING"
+ )
+ nemo_asr = None
+
+from models.codec.facodec.modules.commons import (
+ build_model,
+ load_checkpoint,
+ load_F0_models,
+ log_norm,
+)
+from models.codec.facodec.optimizer import build_optimizer
+
+
+class FAcodecTrainer(CodecTrainer):
+ def __init__(self, args, cfg):
+ super().__init__()
+
+ self.args = args
+ self.cfg = cfg
+
+ cfg.exp_name = args.exp_name
+
+ # Init accelerator
+ self._init_accelerator()
+ self.accelerator.wait_for_everyone()
+
+ # Init logger
+ with self.accelerator.main_process_first():
+ self.logger = get_logger(args.exp_name, log_level=args.log_level)
+
+ self.logger.info("=" * 56)
+ self.logger.info("||\t\t" + "New training process started." + "\t\t||")
+ self.logger.info("=" * 56)
+ self.logger.info("\n")
+ self.logger.debug(f"Using {args.log_level.upper()} logging level.")
+ self.logger.info(f"Experiment name: {args.exp_name}")
+ self.logger.info(f"Experiment directory: {self.exp_dir}")
+ self.checkpoint_dir = os.path.join(self.exp_dir, "checkpoint")
+ if self.accelerator.is_main_process:
+ os.makedirs(self.checkpoint_dir, exist_ok=True)
+ self.logger.debug(f"Checkpoint directory: {self.checkpoint_dir}")
+
+ # Init training status
+ self.batch_count: int = 0
+ self.step: int = 0
+ self.epoch: int = 0
+
+ self.max_epoch = (
+ self.cfg.train.max_epoch if self.cfg.train.max_epoch > 0 else float("inf")
+ )
+ self.logger.info(
+ "Max epoch: {}".format(
+ self.max_epoch if self.max_epoch < float("inf") else "Unlimited"
+ )
+ )
+
+ # Check potential erorrs
+ if self.accelerator.is_main_process:
+ self._check_basic_configs()
+ self.save_checkpoint_stride = self.cfg.train.save_checkpoint_stride
+ self.checkpoints_path = [
+ [] for _ in range(len(self.save_checkpoint_stride))
+ ]
+ self.run_eval = self.cfg.train.run_eval
+
+ # Set random seed
+ with self.accelerator.main_process_first():
+ start = time.monotonic_ns()
+ self._set_random_seed(self.cfg.train.random_seed)
+ end = time.monotonic_ns()
+ self.logger.debug(
+ f"Setting random seed done in {(end - start) / 1e6:.2f}ms"
+ )
+ self.logger.debug(f"Random seed: {self.cfg.train.random_seed}")
+
+ # Build dataloader
+ with self.accelerator.main_process_first():
+ self.logger.info("Building dataset...")
+ start = time.monotonic_ns()
+ self.train_dataloader, self.valid_dataloader = self._build_dataloader()
+ end = time.monotonic_ns()
+ self.logger.info(f"Building dataset done in {(end - start) / 1e6:.2f}ms")
+
+ # Build model
+ with self.accelerator.main_process_first():
+ self.logger.info("Building model...")
+ start = time.monotonic_ns()
+ self.model = self._build_model()
+ end = time.monotonic_ns()
+ for _, model in self.model.items():
+ self.logger.debug(model)
+ self.logger.info(f"Building model done in {(end - start) / 1e6:.2f}ms")
+ self.logger.info(f"Model parameters: {self._count_parameters()/1e6:.2f}M")
+
+ # Build optimizers and schedulers
+ with self.accelerator.main_process_first():
+ self.logger.info("Building optimizer and scheduler...")
+ start = time.monotonic_ns()
+ self.optimizer = self._build_optimizer()
+ end = time.monotonic_ns()
+ self.logger.info(
+ f"Building optimizer and scheduler done in {(end - start) / 1e6:.2f}ms"
+ )
+
+ # Build helper models
+ with self.accelerator.main_process_first():
+ self.logger.info("Building helper models...")
+ start = time.monotonic_ns()
+ self._built_helper_model()
+ end = time.monotonic_ns()
+ self.logger.info(
+ f"Building helper models done in {(end - start) / 1e6:.2f}ms"
+ )
+
+ # Accelerator preparing
+ self.logger.info("Initializing accelerate...")
+ start = time.monotonic_ns()
+ for k in self.model:
+ self.model[k] = self.accelerator.prepare(self.model[k])
+ for k, v in self.optimizer.optimizers.items():
+ self.optimizer.optimizers[k] = self.accelerator.prepare(
+ self.optimizer.optimizers[k]
+ )
+ self.optimizer.schedulers[k] = self.accelerator.prepare(
+ self.optimizer.schedulers[k]
+ )
+ end = time.monotonic_ns()
+ self.logger.info(f"Initializing accelerate done in {(end - start) / 1e6:.2f}ms")
+
+ # Build criterions
+ with self.accelerator.main_process_first():
+ self.logger.info("Building criterion...")
+ start = time.monotonic_ns()
+ self.criterions = self._build_criterion()
+ end = time.monotonic_ns()
+ self.logger.info(f"Building criterion done in {(end - start) / 1e6:.2f}ms")
+
+ # Resume checkpoints
+ with self.accelerator.main_process_first():
+ self.checkpoint_dir = os.path.join(self.exp_dir, "checkpoint")
+ if args.resume_type:
+ self.logger.info("Resuming from checkpoint...")
+ start = time.monotonic_ns()
+ ckpt_path = Path(args.checkpoint)
+ if self._is_valid_pattern(ckpt_path.parts[-1]):
+ ckpt_path = self._load_model(args.checkpoint, args.resume_type)
+ else:
+ ckpt_path = self._load_model(
+ args.checkpoint, resume_type=args.resume_type
+ )
+ end = time.monotonic_ns()
+ self.logger.info(
+ f"Resuming from checkpoint done in {(end - start) / 1e6:.2f}ms"
+ )
+ self.checkpoints_path = json.load(
+ open(os.path.join(ckpt_path, "ckpts.json"), "r")
+ )
+
+ if self.accelerator.is_main_process:
+ os.makedirs(self.checkpoint_dir, exist_ok=True)
+ self.logger.debug(f"Checkpoint directory: {self.checkpoint_dir}")
+
+ # Save config
+ self.config_save_path = os.path.join(self.exp_dir, "args.json")
+
+ def _build_dataset(self):
+ return FAcodecDataset, FAcodecCollator
+
+ def _build_criterion(self):
+ criterions = dict()
+ stft_criterion = MultiScaleSTFTLoss()
+ mel_criterion = MelSpectrogramLoss(
+ n_mels=[5, 10, 20, 40, 80, 160, 320],
+ window_lengths=[32, 64, 128, 256, 512, 1024, 2048],
+ mel_fmin=[0, 0, 0, 0, 0, 0, 0],
+ mel_fmax=[None, None, None, None, None, None, None],
+ pow=1.0,
+ mag_weight=0.0,
+ clamp_eps=1e-5,
+ )
+ content_criterion = FocalLoss(gamma=2)
+ l1_criterion = L1Loss()
+ criterions["stft"] = stft_criterion
+ criterions["mel"] = mel_criterion
+ criterions["l1"] = l1_criterion
+ criterions["content"] = content_criterion
+
+ return criterions
+
+ def _build_model(self):
+ model = build_model(self.cfg.model_params)
+ _ = [model[key].to(self.accelerator.device) for key in model]
+ return model
+
+ def _built_helper_model(self):
+ device = self.accelerator.device
+ self.pitch_extractor = load_F0_models(self.cfg.F0_path).to(device)
+
+ # load model and processor
+ self.w2v_processor = Wav2Vec2Processor.from_pretrained(
+ "facebook/wav2vec2-xlsr-53-espeak-cv-ft"
+ )
+ self.w2v_model = Wav2Vec2ForCTC.from_pretrained(
+ "facebook/wav2vec2-xlsr-53-espeak-cv-ft"
+ ).to(device)
+ self.w2v_model.eval()
+
+ if nemo_asr is None:
+ self.speaker_model = None
+ else:
+ self.speaker_model = (
+ nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained(
+ "nvidia/speakerverification_en_titanet_large"
+ )
+ )
+ self.speaker_model = self.speaker_model.to(device)
+ self.speaker_model.eval()
+
+ def _build_optimizer(self):
+ scheduler_params = {
+ "warmup_steps": self.cfg.loss_params.warmup_steps,
+ "base_lr": self.cfg.loss_params.base_lr,
+ }
+ optimizer = build_optimizer(
+ {key: self.model[key] for key in self.model},
+ scheduler_params_dict={key: scheduler_params.copy() for key in self.model},
+ lr=float(scheduler_params["base_lr"]),
+ )
+
+ return optimizer
+
+ def train_loop(self):
+ """Training process"""
+ self.accelerator.wait_for_everyone()
+
+ # Dump config
+ if self.accelerator.is_main_process:
+ self._dump_cfg(self.config_save_path)
+ _ = [self.model[key].train() for key in self.model]
+ self.optimizer.zero_grad()
+
+ # Sync and start training
+ self.accelerator.wait_for_everyone()
+ while self.epoch < self.max_epoch:
+ self.logger.info("\n")
+ self.logger.info("-" * 32)
+ self.logger.info("Epoch {}: ".format(self.epoch))
+
+ # Train and Validate
+ train_total_loss, train_losses = self._train_epoch()
+ for key, loss in train_losses.items():
+ self.logger.info(" |- Train/{} Loss: {:.6f}".format(key, loss))
+ self.accelerator.log(
+ {"Epoch/Train {} Loss".format(key): loss},
+ step=self.epoch,
+ )
+ self.accelerator.log(
+ {
+ "Epoch/Train Total Loss": train_total_loss,
+ },
+ step=self.epoch,
+ )
+
+ # Update scheduler
+ self.accelerator.wait_for_everyone()
+
+ # Check save checkpoint interval
+ run_eval = False
+ if self.accelerator.is_main_process:
+ save_checkpoint = False
+ for i, num in enumerate(self.save_checkpoint_stride):
+ if self.epoch % num == 0:
+ save_checkpoint = True
+ run_eval |= self.run_eval[i]
+
+ # Save checkpoints
+ self.accelerator.wait_for_everyone()
+ if self.accelerator.is_main_process and save_checkpoint:
+ print("Saving..")
+ state = {
+ "net": {key: self.model[key].state_dict() for key in self.model},
+ "optimizer": self.optimizer.state_dict(),
+ "scheduler": self.optimizer.scheduler_state_dict(),
+ "iters": self.step,
+ "epoch": self.epoch,
+ }
+ save_path = os.path.join(
+ self.checkpoint_dir,
+ "FAcodec_epoch_%05d_step_%05d.pth" % (self.epoch, self.iters),
+ )
+ torch.save(state, save_path)
+ json.dump(
+ self.checkpoints_path,
+ open(os.path.join(self.checkpoint_dir, "ckpts.json"), "w"),
+ ensure_ascii=False,
+ indent=4,
+ )
+
+ self.accelerator.wait_for_everyone()
+
+ self.epoch += 1
+
+ # Finish training
+ self.accelerator.wait_for_everyone()
+ if self.accelerator.is_main_process:
+ path = os.path.join(
+ self.checkpoint_dir,
+ "epoch-{:04d}_step-{:07d}".format(
+ self.epoch,
+ self.step,
+ ),
+ )
+ print("Saving..")
+ state = {
+ "net": {key: self.model[key].state_dict() for key in self.model},
+ "optimizer": self.optimizer.state_dict(),
+ "scheduler": self.optimizer.scheduler_state_dict(),
+ "iters": self.step,
+ "epoch": self.epoch,
+ }
+ save_path = os.path.join(
+ self.checkpoint_dir,
+ "FAcodec_epoch_%05d_step_%05d.pth" % (self.epoch, self.iters),
+ )
+ torch.save(state, save_path)
+
+ def _train_epoch(self):
+ """Training epoch. Should return average loss of a batch (sample) over
+ one epoch. See ``train_loop`` for usage.
+ """
+ _ = [self.model[key].train() for key in self.model]
+
+ epoch_losses: dict = {}
+ epoch_total_loss: int = 0
+
+ for batch in tqdm(
+ self.train_dataloader,
+ desc=f"Training Epoch {self.epoch}",
+ unit="batch",
+ colour="GREEN",
+ leave=False,
+ dynamic_ncols=True,
+ smoothing=0.04,
+ disable=not self.accelerator.is_main_process,
+ ):
+ # Get losses
+ total_loss, losses = self._train_step(batch)
+ self.batch_count += 1
+
+ # Log info
+ if self.batch_count % self.cfg.train.gradient_accumulation_step == 0:
+ self.accelerator.log(
+ {
+ "Step/Learning Rate": (
+ self.optimizer.schedulers["encoder"].get_last_lr()[0]
+ if self.step != 0
+ else 0
+ )
+ },
+ step=self.step,
+ )
+ for key, _ in losses.items():
+ self.accelerator.log(
+ {
+ "Step/Train {} Loss".format(key): losses[key],
+ },
+ step=self.step,
+ )
+
+ if not epoch_losses:
+ epoch_losses = losses
+ else:
+ for key, value in losses.items():
+ epoch_losses[key] += value
+ epoch_total_loss += total_loss
+ self.step += 1
+
+ # Get and log total losses
+ self.accelerator.wait_for_everyone()
+ epoch_total_loss = (
+ epoch_total_loss
+ / len(self.train_dataloader)
+ * self.cfg.train.gradient_accumulation_step
+ )
+ for key in epoch_losses.keys():
+ epoch_losses[key] = (
+ epoch_losses[key]
+ / len(self.train_dataloader)
+ * self.cfg.train.gradient_accumulation_step
+ )
+ return epoch_total_loss, epoch_losses
+
+ def _train_step(self, data):
+ """Training forward step. Should return average loss of a sample over
+ one batch. Provoke ``_forward_step`` is recommended except for special case.
+ See ``_train_epoch`` for usage.
+ """
+ # Init losses
+ train_losses = {}
+ total_loss = 0
+
+ # Use input feature to get predictions
+ data = [b.to(self.accelerator.device, non_blocking=True) for b in data]
+ waves, mels, wave_lengths, mel_input_length = data
+
+ # extract semantic latent with w2v model
+ waves_16k = torchaudio.functional.resample(waves, 24000, 16000)
+ w2v_input = self.w2v_processor(
+ waves_16k, sampling_rate=16000, return_tensors="pt"
+ ).input_values.to(self.accelerator.device)
+ with torch.no_grad():
+ w2v_outputs = self.w2v_model(w2v_input.squeeze(0)).logits
+ predicted_ids = torch.argmax(w2v_outputs, dim=-1)
+ phone_ids = (
+ F.interpolate(
+ predicted_ids.unsqueeze(0).float(), mels.size(-1), mode="nearest"
+ )
+ .long()
+ .squeeze(0)
+ )
+
+ # get clips
+ mel_seg_len = min(
+ [int(mel_input_length.min().item()), self.cfg.train.max_frame_len]
+ )
+
+ gt_mel_seg = []
+ wav_seg = []
+ w2v_seg = []
+
+ for bib in range(len(mel_input_length)):
+ mel_length = int(mel_input_length[bib].item())
+
+ random_start = (
+ np.random.randint(0, mel_length - mel_seg_len)
+ if mel_length != mel_seg_len
+ else 0
+ )
+ gt_mel_seg.append(mels[bib, :, random_start : random_start + mel_seg_len])
+
+ # w2v_seg.append(w2v_latent[bib, :, random_start:random_start + mel_seg_len])
+ w2v_seg.append(phone_ids[bib, random_start : random_start + mel_seg_len])
+
+ y = waves[bib][random_start * 300 : (random_start + mel_seg_len) * 300]
+
+ wav_seg.append(y.to(self.accelerator.device))
+
+ gt_mel_seg = torch.stack(gt_mel_seg).detach()
+
+ wav_seg = torch.stack(wav_seg).float().detach().unsqueeze(1)
+ w2v_seg = torch.stack(w2v_seg).float().detach()
+
+ with torch.no_grad():
+ real_norm = log_norm(gt_mel_seg.unsqueeze(1)).squeeze(1).detach()
+ F0_real, _, _ = self.pitch_extractor(gt_mel_seg.unsqueeze(1))
+
+ # normalize f0
+ # Remove unvoiced frames (replace with -1)
+ gt_glob_f0s = []
+ f0_targets = []
+ for bib in range(len(F0_real)):
+ voiced_indices = F0_real[bib] > 5.0
+ f0_voiced = F0_real[bib][voiced_indices]
+
+ if len(f0_voiced) != 0:
+ # Convert to log scale
+ log_f0 = f0_voiced.log2()
+
+ # Calculate mean and standard deviation
+ mean_f0 = log_f0.mean()
+ std_f0 = log_f0.std()
+
+ # Normalize the F0 sequence
+ normalized_f0 = (log_f0 - mean_f0) / std_f0
+
+ # Create the normalized F0 sequence with unvoiced frames
+ normalized_sequence = torch.zeros_like(F0_real[bib])
+ normalized_sequence[voiced_indices] = normalized_f0
+ normalized_sequence[~voiced_indices] = (
+ -10
+ ) # Assign -10 to unvoiced frames
+
+ gt_glob_f0s.append(mean_f0)
+ else:
+ normalized_sequence = torch.zeros_like(F0_real[bib]) - 10.0
+ gt_glob_f0s.append(torch.tensor(0.0).to(self.accelerator.device))
+
+ # f0_targets.append(normalized_sequence[single_side_context // 200:-single_side_context // 200])
+ f0_targets.append(normalized_sequence)
+ f0_targets = torch.stack(f0_targets).to(self.accelerator.device)
+ # fill nan with -10
+ f0_targets[torch.isnan(f0_targets)] = -10.0
+ # fill inf with -10
+ f0_targets[torch.isinf(f0_targets)] = -10.0
+ # if frame_rate not equal to 80, interpolate f0 from frame rate of 80 to target frame rate
+ if self.cfg.preprocess_params.frame_rate != 80:
+ f0_targets = F.interpolate(
+ f0_targets.unsqueeze(1),
+ mel_seg_len // 80 * self.cfg.preprocess_params.frame_rate,
+ mode="nearest",
+ ).squeeze(1)
+ w2v_seg = F.interpolate(
+ w2v_seg,
+ mel_seg_len // 80 * self.cfg.preprocess_params.frame_rate,
+ mode="nearest",
+ )
+
+ wav_seg_input = wav_seg
+ wav_seg_target = wav_seg
+
+ z = self.model.encoder(wav_seg_input)
+ z, quantized, commitment_loss, codebook_loss, timbre = self.model.quantizer(
+ z, wav_seg_input, n_c=2, full_waves=waves, wave_lens=wave_lengths
+ )
+ preds, rev_preds = self.model.fa_predictors(quantized, timbre)
+
+ pred_wave = self.model.decoder(z)
+
+ len_diff = wav_seg_target.size(-1) - pred_wave.size(-1)
+ if len_diff > 0:
+ wav_seg_target = wav_seg_target[..., len_diff // 2 : -len_diff // 2]
+
+ # discriminator loss
+ d_fake = self.model.discriminator(pred_wave.detach())
+ d_real = self.model.discriminator(wav_seg_target)
+ loss_d = 0
+ for x_fake, x_real in zip(d_fake, d_real):
+ loss_d += torch.mean(x_fake[-1] ** 2)
+ loss_d += torch.mean((1 - x_real[-1]) ** 2)
+
+ self.optimizer.zero_grad()
+ self.accelerator.backward(loss_d)
+ grad_norm_d = torch.nn.utils.clip_grad_norm_(
+ self.model.discriminator.parameters(), 10.0
+ )
+ self.optimizer.step("discriminator")
+ self.optimizer.scheduler(key="discriminator")
+
+ # generator loss
+ signal = AudioSignal(wav_seg_target, sample_rate=24000)
+ recons = AudioSignal(pred_wave, sample_rate=24000)
+ stft_loss = self.criterions["stft"](recons, signal)
+ mel_loss = self.criterions["mel"](recons, signal)
+ waveform_loss = self.criterions["l1"](recons, signal)
+
+ d_fake = self.model.discriminator(pred_wave)
+ d_real = self.model.discriminator(wav_seg_target)
+
+ loss_g = 0
+ for x_fake in d_fake:
+ loss_g += torch.mean((1 - x_fake[-1]) ** 2)
+
+ loss_feature = 0
+
+ for i in range(len(d_fake)):
+ for j in range(len(d_fake[i]) - 1):
+ loss_feature += F.l1_loss(d_fake[i][j], d_real[i][j].detach())
+
+ pred_f0, pred_uv = preds["f0"], preds["uv"]
+ rev_pred_f0, rev_pred_uv = rev_preds["rev_f0"], rev_preds["rev_uv"]
+
+ common_min_size = min(pred_f0.size(-2), f0_targets.size(-1))
+ f0_targets = f0_targets[..., :common_min_size]
+ real_norm = real_norm[..., :common_min_size]
+
+ f0_loss = F.smooth_l1_loss(
+ f0_targets, pred_f0.squeeze(-1)[..., :common_min_size]
+ )
+ uv_loss = F.smooth_l1_loss(
+ real_norm, pred_uv.squeeze(-1)[..., :common_min_size]
+ )
+ rev_f0_loss = (
+ F.smooth_l1_loss(f0_targets, rev_pred_f0.squeeze(-1)[..., :common_min_size])
+ if rev_pred_f0 is not None
+ else torch.FloatTensor([0]).to(self.accelerator.device)
+ )
+ rev_uv_loss = (
+ F.smooth_l1_loss(real_norm, rev_pred_uv.squeeze(-1)[..., :common_min_size])
+ if rev_pred_uv is not None
+ else torch.FloatTensor([0]).to(self.accelerator.device)
+ )
+
+ tot_f0_loss = f0_loss + rev_f0_loss
+ tot_uv_loss = uv_loss + rev_uv_loss
+
+ pred_content = preds["content"]
+ rev_pred_content = rev_preds["rev_content"]
+
+ target_content_latents = w2v_seg[..., :common_min_size]
+
+ content_loss = self.criterions["content"](
+ pred_content.transpose(1, 2)[..., :common_min_size],
+ target_content_latents.long(),
+ )
+ rev_content_loss = (
+ self.criterions["content"](
+ rev_pred_content.transpose(1, 2)[..., :common_min_size],
+ target_content_latents.long(),
+ )
+ if rev_pred_content is not None
+ else torch.FloatTensor([0]).to(self.accelerator.device)
+ )
+
+ tot_content_loss = content_loss + rev_content_loss
+
+ if self.speaker_model is not None:
+ spk_logits = torch.cat(
+ [
+ self.speaker_model.infer_segment(w16.cpu()[..., :wl])[1]
+ for w16, wl in zip(waves_16k, wave_lengths)
+ ],
+ dim=0,
+ )
+ spk_labels = spk_logits.argmax(dim=-1)
+ else:
+ spk_labels = torch.zeros([len(waves_16k)], dtype=torch.long).to(
+ self.accelerator.device
+ )
+
+ spk_pred_logits = preds["timbre"]
+ spk_loss = F.cross_entropy(spk_pred_logits, spk_labels)
+ x_spk_pred_logits = rev_preds["x_timbre"]
+
+ x_spk_loss = (
+ F.cross_entropy(x_spk_pred_logits, spk_labels)
+ if x_spk_pred_logits is not None
+ else torch.FloatTensor([0]).to(self.accelerator.device)
+ )
+
+ tot_spk_loss = spk_loss + x_spk_loss
+
+ loss_gen_all = (
+ mel_loss * 15.0
+ + loss_feature * 1.0
+ + loss_g * 1.0
+ + commitment_loss * 0.25
+ + codebook_loss * 1.0
+ + tot_f0_loss * 1.0
+ + tot_uv_loss * 1.0
+ + tot_content_loss * 5.0
+ + tot_spk_loss * 5.0
+ )
+
+ self.optimizer.zero_grad()
+ self.accelerator.backward(loss_gen_all)
+
+ with torch.no_grad():
+ total_loss = loss_gen_all.item()
+ train_losses["stft"] = stft_loss.item()
+ train_losses["mel"] = mel_loss.item()
+ train_losses["l1"] = waveform_loss.item()
+ train_losses["f0"] = f0_loss.item()
+ train_losses["uv"] = uv_loss.item()
+ train_losses["content"] = content_loss.item()
+ train_losses["speaker"] = spk_loss.item()
+ train_losses["rev_f0"] = rev_f0_loss.item()
+ train_losses["rev_uv"] = rev_uv_loss.item()
+ train_losses["rev_content"] = rev_content_loss.item()
+ train_losses["rev_speaker"] = x_spk_loss.item()
+
+ train_losses["feature"] = loss_feature.item()
+ train_losses["generator"] = loss_g.item()
+ train_losses["commitment"] = commitment_loss.item()
+ train_losses["codebook"] = codebook_loss.item()
+
+ # discriminators
+ train_losses["discriminator"] = loss_d.item()
+
+ return total_loss, train_losses
+
+ def _inference(self, eval_wave):
+ """Inference during training for test audios."""
+ z = self.model.encoder(
+ eval_wave[None, None, ...].to(self.accelerator.device).float()
+ )
+ z, quantized, commitment_loss, codebook_loss, timbre = self.model.quantizer(
+ z, eval_wave[None, None, ...], n_c=self.cfg.model_params.n_c_codebooks
+ )
+ full_pred_wave = self.model.decoder(z)
+ return full_pred_wave[0]
+
+ def _load_model(self, checkpoint_path=None, resume_type="resume"):
+ """Load model from checkpoint. If checkpoint_path is None, it will
+ load the latest checkpoint in checkpoint_dir. If checkpoint_path is not
+ None, it will load the checkpoint specified by checkpoint_path. **Only use this
+ method after** ``accelerator.prepare()``.
+ """
+ if resume_type == "resume":
+ if checkpoint_path is None:
+ available_checkpoints = glob.glob(
+ os.path.join(self.checkpoint_dir, "FAcodc_epoch_*_step_*.pth")
+ )
+ # find the checkpoint that has the highest step number
+ latest_checkpoint = max(
+ available_checkpoints,
+ key=lambda x: int(x.split("_")[-1].split(".")[0]),
+ )
+ earliest_checkpoint = min(
+ available_checkpoints,
+ key=lambda x: int(x.split("_")[-1].split(".")[0]),
+ )
+ # delete the earliest checkpoint
+ if (
+ earliest_checkpoint != latest_checkpoint
+ and self.accelerator.is_main_process
+ and len(available_checkpoints) > 4
+ ):
+ os.remove(earliest_checkpoint)
+ print(f"Removed {earliest_checkpoint}")
+ else:
+ latest_checkpoint = checkpoint_path
+
+ self.model, self.optimizer, self.epoch, self.step = load_checkpoint(
+ self.model,
+ self.optimizer,
+ latest_checkpoint,
+ load_only_params=False,
+ ignore_modules=[],
+ is_distributed=self.accelerator.num_processes > 1,
+ )
+
+ else:
+ raise ValueError("Invalid resume type")
+ return checkpoint_path
+
+ def _count_parameters(self):
+ total_num = sum(
+ sum(p.numel() for p in self.model[key].parameters()) for key in self.model
+ )
+ # trainable_num = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+ return total_num
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py
@@ -0,0 +1 @@
+
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7
new file mode 100644
index 0000000..d6cf419
Binary files /dev/null and b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 differ
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py
new file mode 100644
index 0000000..601ec96
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py
@@ -0,0 +1,219 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This code is borrowed from https://github.com/yl4579/PitchExtractor/blob/main/model.py
+
+"""
+Implementation of model from:
+Kum et al. - "Joint Detection and Classification of Singing Voice Melody Using
+Convolutional Recurrent Neural Networks" (2019)
+Link: https://www.semanticscholar.org/paper/Joint-Detection-and-Classification-of-Singing-Voice-Kum-Nam/60a2ad4c7db43bace75805054603747fcd062c0d
+"""
+import torch
+from torch import nn
+
+
+class JDCNet(nn.Module):
+ """
+ Joint Detection and Classification Network model for singing voice melody.
+ """
+
+ def __init__(self, num_class=722, seq_len=31, leaky_relu_slope=0.01):
+ super().__init__()
+ self.num_class = num_class
+
+ # input = (b, 1, 31, 513), b = batch size
+ self.conv_block = nn.Sequential(
+ nn.Conv2d(
+ in_channels=1, out_channels=64, kernel_size=3, padding=1, bias=False
+ ), # out: (b, 64, 31, 513)
+ nn.BatchNorm2d(num_features=64),
+ nn.LeakyReLU(leaky_relu_slope, inplace=True),
+ nn.Conv2d(64, 64, 3, padding=1, bias=False), # (b, 64, 31, 513)
+ )
+
+ # res blocks
+ self.res_block1 = ResBlock(
+ in_channels=64, out_channels=128
+ ) # (b, 128, 31, 128)
+ self.res_block2 = ResBlock(
+ in_channels=128, out_channels=192
+ ) # (b, 192, 31, 32)
+ self.res_block3 = ResBlock(in_channels=192, out_channels=256) # (b, 256, 31, 8)
+
+ # pool block
+ self.pool_block = nn.Sequential(
+ nn.BatchNorm2d(num_features=256),
+ nn.LeakyReLU(leaky_relu_slope, inplace=True),
+ nn.MaxPool2d(kernel_size=(1, 4)), # (b, 256, 31, 2)
+ nn.Dropout(p=0.2),
+ )
+
+ # maxpool layers (for auxiliary network inputs)
+ # in = (b, 128, 31, 513) from conv_block, out = (b, 128, 31, 2)
+ self.maxpool1 = nn.MaxPool2d(kernel_size=(1, 40))
+ # in = (b, 128, 31, 128) from res_block1, out = (b, 128, 31, 2)
+ self.maxpool2 = nn.MaxPool2d(kernel_size=(1, 20))
+ # in = (b, 128, 31, 32) from res_block2, out = (b, 128, 31, 2)
+ self.maxpool3 = nn.MaxPool2d(kernel_size=(1, 10))
+
+ # in = (b, 640, 31, 2), out = (b, 256, 31, 2)
+ self.detector_conv = nn.Sequential(
+ nn.Conv2d(640, 256, 1, bias=False),
+ nn.BatchNorm2d(256),
+ nn.LeakyReLU(leaky_relu_slope, inplace=True),
+ nn.Dropout(p=0.2),
+ )
+
+ # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+ self.bilstm_classifier = nn.LSTM(
+ input_size=512, hidden_size=256, batch_first=True, bidirectional=True
+ ) # (b, 31, 512)
+
+ # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+ self.bilstm_detector = nn.LSTM(
+ input_size=512, hidden_size=256, batch_first=True, bidirectional=True
+ ) # (b, 31, 512)
+
+ # input: (b * 31, 512)
+ self.classifier = nn.Linear(
+ in_features=512, out_features=self.num_class
+ ) # (b * 31, num_class)
+
+ # input: (b * 31, 512)
+ self.detector = nn.Linear(
+ in_features=512, out_features=2
+ ) # (b * 31, 2) - binary classifier
+
+ # initialize weights
+ self.apply(self.init_weights)
+
+ def get_feature_GAN(self, x):
+ seq_len = x.shape[-2]
+ x = x.float().transpose(-1, -2)
+
+ convblock_out = self.conv_block(x)
+
+ resblock1_out = self.res_block1(convblock_out)
+ resblock2_out = self.res_block2(resblock1_out)
+ resblock3_out = self.res_block3(resblock2_out)
+ poolblock_out = self.pool_block[0](resblock3_out)
+ poolblock_out = self.pool_block[1](poolblock_out)
+
+ return poolblock_out.transpose(-1, -2)
+
+ def get_feature(self, x):
+ seq_len = x.shape[-2]
+ x = x.float().transpose(-1, -2)
+
+ convblock_out = self.conv_block(x)
+
+ resblock1_out = self.res_block1(convblock_out)
+ resblock2_out = self.res_block2(resblock1_out)
+ resblock3_out = self.res_block3(resblock2_out)
+ poolblock_out = self.pool_block[0](resblock3_out)
+ poolblock_out = self.pool_block[1](poolblock_out)
+
+ return self.pool_block[2](poolblock_out)
+
+ def forward(self, x):
+ """
+ Returns:
+ classification_prediction, detection_prediction
+ sizes: (b, 31, 722), (b, 31, 2)
+ """
+ ###############################
+ # forward pass for classifier #
+ ###############################
+ seq_len = x.shape[-1]
+ x = x.float().transpose(-1, -2)
+
+ convblock_out = self.conv_block(x)
+
+ resblock1_out = self.res_block1(convblock_out)
+ resblock2_out = self.res_block2(resblock1_out)
+ resblock3_out = self.res_block3(resblock2_out)
+
+ poolblock_out = self.pool_block[0](resblock3_out)
+ poolblock_out = self.pool_block[1](poolblock_out)
+ GAN_feature = poolblock_out.transpose(-1, -2)
+ poolblock_out = self.pool_block[2](poolblock_out)
+
+ # (b, 256, 31, 2) => (b, 31, 256, 2) => (b, 31, 512)
+ classifier_out = (
+ poolblock_out.permute(0, 2, 1, 3).contiguous().view((-1, seq_len, 512))
+ )
+ classifier_out, _ = self.bilstm_classifier(
+ classifier_out
+ ) # ignore the hidden states
+
+ classifier_out = classifier_out.contiguous().view((-1, 512)) # (b * 31, 512)
+ classifier_out = self.classifier(classifier_out)
+ classifier_out = classifier_out.view(
+ (-1, seq_len, self.num_class)
+ ) # (b, 31, num_class)
+
+ # sizes: (b, 31, 722), (b, 31, 2)
+ # classifier output consists of predicted pitch classes per frame
+ # detector output consists of: (isvoice, notvoice) estimates per frame
+ return torch.abs(classifier_out.squeeze(-1)), GAN_feature, poolblock_out
+
+ @staticmethod
+ def init_weights(m):
+ if isinstance(m, nn.Linear):
+ nn.init.kaiming_uniform_(m.weight)
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.Conv2d):
+ nn.init.xavier_normal_(m.weight)
+ elif isinstance(m, nn.LSTM) or isinstance(m, nn.LSTMCell):
+ for p in m.parameters():
+ if p.data is None:
+ continue
+
+ if len(p.shape) >= 2:
+ nn.init.orthogonal_(p.data)
+ else:
+ nn.init.normal_(p.data)
+
+
+class ResBlock(nn.Module):
+ def __init__(self, in_channels: int, out_channels: int, leaky_relu_slope=0.01):
+ super().__init__()
+ self.downsample = in_channels != out_channels
+
+ # BN / LReLU / MaxPool layer before the conv layer - see Figure 1b in the paper
+ self.pre_conv = nn.Sequential(
+ nn.BatchNorm2d(num_features=in_channels),
+ nn.LeakyReLU(leaky_relu_slope, inplace=True),
+ nn.MaxPool2d(kernel_size=(1, 2)), # apply downsampling on the y axis only
+ )
+
+ # conv layers
+ self.conv = nn.Sequential(
+ nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3,
+ padding=1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels),
+ nn.LeakyReLU(leaky_relu_slope, inplace=True),
+ nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+ )
+
+ # 1 x 1 convolution layer to match the feature dimensions
+ self.conv1by1 = None
+ if self.downsample:
+ self.conv1by1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
+
+ def forward(self, x):
+ x = self.pre_conv(x)
+ if self.downsample:
+ x = self.conv(x) + self.conv1by1(x)
+ else:
+ x = self.conv(x) + x
+ return x
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py b/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py
new file mode 100644
index 0000000..c29854f
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py
@@ -0,0 +1,437 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This code is modified from https://github.com/sh-lee-prml/HierSpeechpp/blob/main/ttv_v1/attentions.py
+
+import copy
+import math
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from . import commons
+
+
+class LayerNorm(nn.Module):
+ def __init__(self, channels, eps=1e-5):
+ super().__init__()
+ self.channels = channels
+ self.eps = eps
+
+ self.gamma = nn.Parameter(torch.ones(channels))
+ self.beta = nn.Parameter(torch.zeros(channels))
+
+ def forward(self, x):
+ x = x.transpose(1, -1)
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+ return x.transpose(1, -1)
+
+
+class Encoder(nn.Module):
+ def __init__(
+ self,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size=1,
+ p_dropout=0.0,
+ window_size=4,
+ **kwargs
+ ):
+ super().__init__()
+ self.hidden_channels = hidden_channels
+ self.filter_channels = filter_channels
+ self.n_heads = n_heads
+ self.n_layers = n_layers
+ self.kernel_size = kernel_size
+ self.p_dropout = p_dropout
+ self.window_size = window_size
+
+ self.drop = nn.Dropout(p_dropout)
+ self.attn_layers = nn.ModuleList()
+ self.norm_layers_1 = nn.ModuleList()
+ self.ffn_layers = nn.ModuleList()
+ self.norm_layers_2 = nn.ModuleList()
+ for i in range(self.n_layers):
+ self.attn_layers.append(
+ MultiHeadAttention(
+ hidden_channels,
+ hidden_channels,
+ n_heads,
+ p_dropout=p_dropout,
+ window_size=window_size,
+ )
+ )
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
+ self.ffn_layers.append(
+ FFN(
+ hidden_channels,
+ hidden_channels,
+ filter_channels,
+ kernel_size,
+ p_dropout=p_dropout,
+ )
+ )
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
+
+ def forward(self, x, x_mask):
+ attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
+ x = x * x_mask
+ for i in range(self.n_layers):
+ y = self.attn_layers[i](x, x, attn_mask)
+ y = self.drop(y)
+ x = self.norm_layers_1[i](x + y)
+
+ y = self.ffn_layers[i](x, x_mask)
+ y = self.drop(y)
+ x = self.norm_layers_2[i](x + y)
+ x = x * x_mask
+ return x
+
+
+class Decoder(nn.Module):
+ def __init__(
+ self,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size=1,
+ p_dropout=0.0,
+ proximal_bias=False,
+ proximal_init=True,
+ **kwargs
+ ):
+ super().__init__()
+ self.hidden_channels = hidden_channels
+ self.filter_channels = filter_channels
+ self.n_heads = n_heads
+ self.n_layers = n_layers
+ self.kernel_size = kernel_size
+ self.p_dropout = p_dropout
+ self.proximal_bias = proximal_bias
+ self.proximal_init = proximal_init
+
+ self.drop = nn.Dropout(p_dropout)
+ self.self_attn_layers = nn.ModuleList()
+ self.norm_layers_0 = nn.ModuleList()
+ self.encdec_attn_layers = nn.ModuleList()
+ self.norm_layers_1 = nn.ModuleList()
+ self.ffn_layers = nn.ModuleList()
+ self.norm_layers_2 = nn.ModuleList()
+ for i in range(self.n_layers):
+ self.self_attn_layers.append(
+ MultiHeadAttention(
+ hidden_channels,
+ hidden_channels,
+ n_heads,
+ p_dropout=p_dropout,
+ proximal_bias=proximal_bias,
+ proximal_init=proximal_init,
+ )
+ )
+ self.norm_layers_0.append(LayerNorm(hidden_channels))
+ self.encdec_attn_layers.append(
+ MultiHeadAttention(
+ hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout
+ )
+ )
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
+ self.ffn_layers.append(
+ FFN(
+ hidden_channels,
+ hidden_channels,
+ filter_channels,
+ kernel_size,
+ p_dropout=p_dropout,
+ causal=True,
+ )
+ )
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
+
+ def forward(self, x, x_mask, h, h_mask):
+ """
+ x: decoder input
+ h: encoder output
+ """
+ self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(
+ device=x.device, dtype=x.dtype
+ )
+ encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
+ x = x * x_mask
+ for i in range(self.n_layers):
+ y = self.self_attn_layers[i](x, x, self_attn_mask)
+ y = self.drop(y)
+ x = self.norm_layers_0[i](x + y)
+
+ y = self.encdec_attn_layers[i](x, h, encdec_attn_mask)
+ y = self.drop(y)
+ x = self.norm_layers_1[i](x + y)
+
+ y = self.ffn_layers[i](x, x_mask)
+ y = self.drop(y)
+ x = self.norm_layers_2[i](x + y)
+ x = x * x_mask
+ return x
+
+
+class MultiHeadAttention(nn.Module):
+ def __init__(
+ self,
+ channels,
+ out_channels,
+ n_heads,
+ p_dropout=0.0,
+ window_size=None,
+ heads_share=True,
+ block_length=None,
+ proximal_bias=False,
+ proximal_init=False,
+ ):
+ super().__init__()
+ assert channels % n_heads == 0
+
+ self.channels = channels
+ self.out_channels = out_channels
+ self.n_heads = n_heads
+ self.p_dropout = p_dropout
+ self.window_size = window_size
+ self.heads_share = heads_share
+ self.block_length = block_length
+ self.proximal_bias = proximal_bias
+ self.proximal_init = proximal_init
+ self.attn = None
+
+ self.k_channels = channels // n_heads
+ self.conv_q = nn.Conv1d(channels, channels, 1)
+ self.conv_k = nn.Conv1d(channels, channels, 1)
+ self.conv_v = nn.Conv1d(channels, channels, 1)
+ self.conv_o = nn.Conv1d(channels, out_channels, 1)
+ self.drop = nn.Dropout(p_dropout)
+
+ if window_size is not None:
+ n_heads_rel = 1 if heads_share else n_heads
+ rel_stddev = self.k_channels**-0.5
+ self.emb_rel_k = nn.Parameter(
+ torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
+ * rel_stddev
+ )
+ self.emb_rel_v = nn.Parameter(
+ torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
+ * rel_stddev
+ )
+
+ nn.init.xavier_uniform_(self.conv_q.weight)
+ nn.init.xavier_uniform_(self.conv_k.weight)
+ nn.init.xavier_uniform_(self.conv_v.weight)
+ if proximal_init:
+ with torch.no_grad():
+ self.conv_k.weight.copy_(self.conv_q.weight)
+ self.conv_k.bias.copy_(self.conv_q.bias)
+
+ def forward(self, x, c, attn_mask=None):
+ q = self.conv_q(x)
+ k = self.conv_k(c)
+ v = self.conv_v(c)
+
+ x, self.attn = self.attention(q, k, v, mask=attn_mask)
+
+ x = self.conv_o(x)
+ return x
+
+ def attention(self, query, key, value, mask=None):
+ # reshape [b, d, t] -> [b, n_h, t, d_k]
+ b, d, t_s, t_t = (*key.size(), query.size(2))
+ query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
+ key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+ value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+
+ scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1))
+ if self.window_size is not None:
+ assert (
+ t_s == t_t
+ ), "Relative attention is only available for self-attention."
+ key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
+ rel_logits = self._matmul_with_relative_keys(
+ query / math.sqrt(self.k_channels), key_relative_embeddings
+ )
+ scores_local = self._relative_position_to_absolute_position(rel_logits)
+ scores = scores + scores_local
+ if self.proximal_bias:
+ assert t_s == t_t, "Proximal bias is only available for self-attention."
+ scores = scores + self._attention_bias_proximal(t_s).to(
+ device=scores.device, dtype=scores.dtype
+ )
+ if mask is not None:
+ scores = scores.masked_fill(mask == 0, -1e4)
+ if self.block_length is not None:
+ assert (
+ t_s == t_t
+ ), "Local attention is only available for self-attention."
+ block_mask = (
+ torch.ones_like(scores)
+ .triu(-self.block_length)
+ .tril(self.block_length)
+ )
+ scores = scores.masked_fill(block_mask == 0, -1e4)
+ p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s]
+ p_attn = self.drop(p_attn)
+ output = torch.matmul(p_attn, value)
+ if self.window_size is not None:
+ relative_weights = self._absolute_position_to_relative_position(p_attn)
+ value_relative_embeddings = self._get_relative_embeddings(
+ self.emb_rel_v, t_s
+ )
+ output = output + self._matmul_with_relative_values(
+ relative_weights, value_relative_embeddings
+ )
+ output = (
+ output.transpose(2, 3).contiguous().view(b, d, t_t)
+ ) # [b, n_h, t_t, d_k] -> [b, d, t_t]
+ return output, p_attn
+
+ def _matmul_with_relative_values(self, x, y):
+ """
+ x: [b, h, l, m]
+ y: [h or 1, m, d]
+ ret: [b, h, l, d]
+ """
+ ret = torch.matmul(x, y.unsqueeze(0))
+ return ret
+
+ def _matmul_with_relative_keys(self, x, y):
+ """
+ x: [b, h, l, d]
+ y: [h or 1, m, d]
+ ret: [b, h, l, m]
+ """
+ ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
+ return ret
+
+ def _get_relative_embeddings(self, relative_embeddings, length):
+ max_relative_position = 2 * self.window_size + 1
+ # Pad first before slice to avoid using cond ops.
+ pad_length = max(length - (self.window_size + 1), 0)
+ slice_start_position = max((self.window_size + 1) - length, 0)
+ slice_end_position = slice_start_position + 2 * length - 1
+ if pad_length > 0:
+ padded_relative_embeddings = F.pad(
+ relative_embeddings,
+ commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]),
+ )
+ else:
+ padded_relative_embeddings = relative_embeddings
+ used_relative_embeddings = padded_relative_embeddings[
+ :, slice_start_position:slice_end_position
+ ]
+ return used_relative_embeddings
+
+ def _relative_position_to_absolute_position(self, x):
+ """
+ x: [b, h, l, 2*l-1]
+ ret: [b, h, l, l]
+ """
+ batch, heads, length, _ = x.size()
+ # Concat columns of pad to shift from relative to absolute indexing.
+ x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]]))
+
+ # Concat extra elements so to add up to shape (len+1, 2*len-1).
+ x_flat = x.view([batch, heads, length * 2 * length])
+ x_flat = F.pad(
+ x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])
+ )
+
+ # Reshape and slice out the padded elements.
+ x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[
+ :, :, :length, length - 1 :
+ ]
+ return x_final
+
+ def _absolute_position_to_relative_position(self, x):
+ """
+ x: [b, h, l, l]
+ ret: [b, h, l, 2*l-1]
+ """
+ batch, heads, length, _ = x.size()
+ # padd along column
+ x = F.pad(
+ x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])
+ )
+ x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
+ # add 0's in the beginning that will skew the elements after reshape
+ x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
+ x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
+ return x_final
+
+ def _attention_bias_proximal(self, length):
+ """Bias for self-attention to encourage attention to close positions.
+ Args:
+ length: an integer scalar.
+ Returns:
+ a Tensor with shape [1, 1, length, length]
+ """
+ r = torch.arange(length, dtype=torch.float32)
+ diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
+ return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
+
+
+class FFN(nn.Module):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ filter_channels,
+ kernel_size,
+ p_dropout=0.0,
+ activation=None,
+ causal=False,
+ ):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.filter_channels = filter_channels
+ self.kernel_size = kernel_size
+ self.p_dropout = p_dropout
+ self.activation = activation
+ self.causal = causal
+
+ if causal:
+ self.padding = self._causal_padding
+ else:
+ self.padding = self._same_padding
+
+ self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size)
+ self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size)
+ self.drop = nn.Dropout(p_dropout)
+
+ def forward(self, x, x_mask):
+ x = self.conv_1(self.padding(x * x_mask))
+ if self.activation == "gelu":
+ x = x * torch.sigmoid(1.702 * x)
+ else:
+ x = torch.relu(x)
+ x = self.drop(x)
+ x = self.conv_2(self.padding(x * x_mask))
+ return x * x_mask
+
+ def _causal_padding(self, x):
+ if self.kernel_size == 1:
+ return x
+ pad_l = self.kernel_size - 1
+ pad_r = 0
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+ x = F.pad(x, commons.convert_pad_shape(padding))
+ return x
+
+ def _same_padding(self, x):
+ if self.kernel_size == 1:
+ return x
+ pad_l = (self.kernel_size - 1) // 2
+ pad_r = self.kernel_size // 2
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+ x = F.pad(x, commons.convert_pad_shape(padding))
+ return x
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/commons.py b/indextts/utils/maskgct/models/codec/facodec/modules/commons.py
new file mode 100644
index 0000000..89baaf4
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/commons.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import math
+import os.path
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from munch import Munch
+import json
+
+
+class AttrDict(dict):
+ def __init__(self, *args, **kwargs):
+ super(AttrDict, self).__init__(*args, **kwargs)
+ self.__dict__ = self
+
+
+def init_weights(m, mean=0.0, std=0.01):
+ classname = m.__class__.__name__
+ if classname.find("Conv") != -1:
+ m.weight.data.normal_(mean, std)
+
+
+def get_padding(kernel_size, dilation=1):
+ return int((kernel_size * dilation - dilation) / 2)
+
+
+def convert_pad_shape(pad_shape):
+ l = pad_shape[::-1]
+ pad_shape = [item for sublist in l for item in sublist]
+ return pad_shape
+
+
+def intersperse(lst, item):
+ result = [item] * (len(lst) * 2 + 1)
+ result[1::2] = lst
+ return result
+
+
+def kl_divergence(m_p, logs_p, m_q, logs_q):
+ """KL(P||Q)"""
+ kl = (logs_q - logs_p) - 0.5
+ kl += (
+ 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q)
+ )
+ return kl
+
+
+def rand_gumbel(shape):
+ """Sample from the Gumbel distribution, protect from overflows."""
+ uniform_samples = torch.rand(shape) * 0.99998 + 0.00001
+ return -torch.log(-torch.log(uniform_samples))
+
+
+def rand_gumbel_like(x):
+ g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device)
+ return g
+
+
+def slice_segments(x, ids_str, segment_size=4):
+ ret = torch.zeros_like(x[:, :, :segment_size])
+ for i in range(x.size(0)):
+ idx_str = ids_str[i]
+ idx_end = idx_str + segment_size
+ ret[i] = x[i, :, idx_str:idx_end]
+ return ret
+
+
+def slice_segments_audio(x, ids_str, segment_size=4):
+ ret = torch.zeros_like(x[:, :segment_size])
+ for i in range(x.size(0)):
+ idx_str = ids_str[i]
+ idx_end = idx_str + segment_size
+ ret[i] = x[i, idx_str:idx_end]
+ return ret
+
+
+def rand_slice_segments(x, x_lengths=None, segment_size=4):
+ b, d, t = x.size()
+ if x_lengths is None:
+ x_lengths = t
+ ids_str_max = x_lengths - segment_size + 1
+ ids_str = ((torch.rand([b]).to(device=x.device) * ids_str_max).clip(0)).to(
+ dtype=torch.long
+ )
+ ret = slice_segments(x, ids_str, segment_size)
+ return ret, ids_str
+
+
+def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4):
+ position = torch.arange(length, dtype=torch.float)
+ num_timescales = channels // 2
+ log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (
+ num_timescales - 1
+ )
+ inv_timescales = min_timescale * torch.exp(
+ torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment
+ )
+ scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
+ signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
+ signal = F.pad(signal, [0, 0, 0, channels % 2])
+ signal = signal.view(1, channels, length)
+ return signal
+
+
+def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
+ b, channels, length = x.size()
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
+ return x + signal.to(dtype=x.dtype, device=x.device)
+
+
+def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1):
+ b, channels, length = x.size()
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
+ return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis)
+
+
+def subsequent_mask(length):
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
+ return mask
+
+
+@torch.jit.script
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+ n_channels_int = n_channels[0]
+ in_act = input_a + input_b
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+ acts = t_act * s_act
+ return acts
+
+
+def convert_pad_shape(pad_shape):
+ l = pad_shape[::-1]
+ pad_shape = [item for sublist in l for item in sublist]
+ return pad_shape
+
+
+def shift_1d(x):
+ x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
+ return x
+
+
+def sequence_mask(length, max_length=None):
+ if max_length is None:
+ max_length = length.max()
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+ return x.unsqueeze(0) < length.unsqueeze(1)
+
+
+def generate_path(duration, mask):
+ """
+ duration: [b, 1, t_x]
+ mask: [b, 1, t_y, t_x]
+ """
+ device = duration.device
+
+ b, _, t_y, t_x = mask.shape
+ cum_duration = torch.cumsum(duration, -1)
+
+ cum_duration_flat = cum_duration.view(b * t_x)
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
+ path = path.view(b, t_x, t_y)
+ path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
+ path = path.unsqueeze(1).transpose(2, 3) * mask
+ return path
+
+
+def clip_grad_value_(parameters, clip_value, norm_type=2):
+ if isinstance(parameters, torch.Tensor):
+ parameters = [parameters]
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
+ norm_type = float(norm_type)
+ if clip_value is not None:
+ clip_value = float(clip_value)
+
+ total_norm = 0
+ for p in parameters:
+ param_norm = p.grad.data.norm(norm_type)
+ total_norm += param_norm.item() ** norm_type
+ if clip_value is not None:
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
+ total_norm = total_norm ** (1.0 / norm_type)
+ return total_norm
+
+
+def log_norm(x, mean=-4, std=4, dim=2):
+ """
+ normalized log mel -> mel -> norm -> log(norm)
+ """
+ x = torch.log(torch.exp(x * std + mean).norm(dim=dim))
+ return x
+
+
+from huggingface_hub import hf_hub_download
+
+
+def load_F0_models(path):
+ # load F0 model
+ from .JDC.model import JDCNet
+
+ F0_model = JDCNet(num_class=1, seq_len=192)
+ if not os.path.exists(path):
+ path = hf_hub_download(repo_id="Plachta/JDCnet", filename="bst.t7")
+ params = torch.load(path, map_location="cpu")["net"]
+ F0_model.load_state_dict(params)
+ _ = F0_model.train()
+
+ return F0_model
+
+
+# Generators
+from modules.dac.model.dac import Encoder, Decoder
+from .quantize import FAquantizer, FApredictors
+
+# Discriminators
+from modules.dac.model.discriminator import Discriminator
+
+
+def build_model(args):
+ encoder = Encoder(
+ d_model=args.DAC.encoder_dim,
+ strides=args.DAC.encoder_rates,
+ d_latent=1024,
+ causal=args.causal,
+ lstm=args.lstm,
+ )
+
+ quantizer = FAquantizer(
+ in_dim=1024,
+ n_p_codebooks=1,
+ n_c_codebooks=args.n_c_codebooks,
+ n_t_codebooks=2,
+ n_r_codebooks=3,
+ codebook_size=1024,
+ codebook_dim=8,
+ quantizer_dropout=0.5,
+ causal=args.causal,
+ separate_prosody_encoder=args.separate_prosody_encoder,
+ timbre_norm=args.timbre_norm,
+ )
+
+ fa_predictors = FApredictors(
+ in_dim=1024,
+ use_gr_content_f0=args.use_gr_content_f0,
+ use_gr_prosody_phone=args.use_gr_prosody_phone,
+ use_gr_residual_f0=True,
+ use_gr_residual_phone=True,
+ use_gr_timbre_content=True,
+ use_gr_timbre_prosody=args.use_gr_timbre_prosody,
+ use_gr_x_timbre=True,
+ norm_f0=args.norm_f0,
+ timbre_norm=args.timbre_norm,
+ use_gr_content_global_f0=args.use_gr_content_global_f0,
+ )
+
+ decoder = Decoder(
+ input_channel=1024,
+ channels=args.DAC.decoder_dim,
+ rates=args.DAC.decoder_rates,
+ causal=args.causal,
+ lstm=args.lstm,
+ )
+
+ discriminator = Discriminator(
+ rates=[],
+ periods=[2, 3, 5, 7, 11],
+ fft_sizes=[2048, 1024, 512],
+ sample_rate=args.DAC.sr,
+ bands=[(0.0, 0.1), (0.1, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)],
+ )
+
+ nets = Munch(
+ encoder=encoder,
+ quantizer=quantizer,
+ decoder=decoder,
+ discriminator=discriminator,
+ fa_predictors=fa_predictors,
+ )
+
+ return nets
+
+
+def load_checkpoint(
+ model,
+ optimizer,
+ path,
+ load_only_params=True,
+ ignore_modules=[],
+ is_distributed=False,
+):
+ state = torch.load(path, map_location="cpu")
+ params = state["net"]
+ for key in model:
+ if key in params and key not in ignore_modules:
+ if not is_distributed:
+ # strip prefix of DDP (module.), create a new OrderedDict that does not contain the prefix
+ for k in list(params[key].keys()):
+ if k.startswith("module."):
+ params[key][k[len("module.") :]] = params[key][k]
+ del params[key][k]
+ print("%s loaded" % key)
+ model[key].load_state_dict(params[key], strict=True)
+ _ = [model[key].eval() for key in model]
+
+ if not load_only_params:
+ epoch = state["epoch"] + 1
+ iters = state["iters"]
+ optimizer.load_state_dict(state["optimizer"])
+ optimizer.load_scheduler_state_dict(state["scheduler"])
+
+ else:
+ epoch = state["epoch"] + 1
+ iters = state["iters"]
+
+ return model, optimizer, epoch, iters
+
+
+def recursive_munch(d):
+ if isinstance(d, dict):
+ return Munch((k, recursive_munch(v)) for k, v in d.items())
+ elif isinstance(d, list):
+ return [recursive_munch(v) for v in d]
+ else:
+ return d
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py b/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py
new file mode 100644
index 0000000..d09396e
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.autograd import Function
+import torch
+from torch import nn
+
+
+class GradientReversal(Function):
+ @staticmethod
+ def forward(ctx, x, alpha):
+ ctx.save_for_backward(x, alpha)
+ return x
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ grad_input = None
+ _, alpha = ctx.saved_tensors
+ if ctx.needs_input_grad[0]:
+ grad_input = -alpha * grad_output
+ return grad_input, None
+
+
+revgrad = GradientReversal.apply
+
+
+class GradientReversal(nn.Module):
+ def __init__(self, alpha):
+ super().__init__()
+ self.alpha = torch.tensor(alpha, requires_grad=False)
+
+ def forward(self, x):
+ return revgrad(x, self.alpha)
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/layers.py b/indextts/utils/maskgct/models/codec/facodec/modules/layers.py
new file mode 100644
index 0000000..186cbe7
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/layers.py
@@ -0,0 +1,460 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import torch
+from torch import nn
+from typing import Optional, Any
+from torch import Tensor
+import torch.nn.functional as F
+import torchaudio
+import torchaudio.functional as audio_F
+
+import random
+
+random.seed(0)
+
+
+def _get_activation_fn(activ):
+ if activ == "relu":
+ return nn.ReLU()
+ elif activ == "lrelu":
+ return nn.LeakyReLU(0.2)
+ elif activ == "swish":
+ return lambda x: x * torch.sigmoid(x)
+ else:
+ raise RuntimeError(
+ "Unexpected activ type %s, expected [relu, lrelu, swish]" % activ
+ )
+
+
+class LinearNorm(torch.nn.Module):
+ def __init__(self, in_dim, out_dim, bias=True, w_init_gain="linear"):
+ super(LinearNorm, self).__init__()
+ self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+
+ torch.nn.init.xavier_uniform_(
+ self.linear_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain)
+ )
+
+ def forward(self, x):
+ return self.linear_layer(x)
+
+
+class ConvNorm(torch.nn.Module):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=None,
+ dilation=1,
+ bias=True,
+ w_init_gain="linear",
+ param=None,
+ ):
+ super(ConvNorm, self).__init__()
+ if padding is None:
+ assert kernel_size % 2 == 1
+ padding = int(dilation * (kernel_size - 1) / 2)
+
+ self.conv = torch.nn.Conv1d(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ bias=bias,
+ )
+
+ torch.nn.init.xavier_uniform_(
+ self.conv.weight,
+ gain=torch.nn.init.calculate_gain(w_init_gain, param=param),
+ )
+
+ def forward(self, signal):
+ conv_signal = self.conv(signal)
+ return conv_signal
+
+
+class CausualConv(nn.Module):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=1,
+ dilation=1,
+ bias=True,
+ w_init_gain="linear",
+ param=None,
+ ):
+ super(CausualConv, self).__init__()
+ if padding is None:
+ assert kernel_size % 2 == 1
+ padding = int(dilation * (kernel_size - 1) / 2) * 2
+ else:
+ self.padding = padding * 2
+ self.conv = nn.Conv1d(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=self.padding,
+ dilation=dilation,
+ bias=bias,
+ )
+
+ torch.nn.init.xavier_uniform_(
+ self.conv.weight,
+ gain=torch.nn.init.calculate_gain(w_init_gain, param=param),
+ )
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = x[:, :, : -self.padding]
+ return x
+
+
+class CausualBlock(nn.Module):
+ def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ="lrelu"):
+ super(CausualBlock, self).__init__()
+ self.blocks = nn.ModuleList(
+ [
+ self._get_conv(
+ hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p
+ )
+ for i in range(n_conv)
+ ]
+ )
+
+ def forward(self, x):
+ for block in self.blocks:
+ res = x
+ x = block(x)
+ x += res
+ return x
+
+ def _get_conv(self, hidden_dim, dilation, activ="lrelu", dropout_p=0.2):
+ layers = [
+ CausualConv(
+ hidden_dim,
+ hidden_dim,
+ kernel_size=3,
+ padding=dilation,
+ dilation=dilation,
+ ),
+ _get_activation_fn(activ),
+ nn.BatchNorm1d(hidden_dim),
+ nn.Dropout(p=dropout_p),
+ CausualConv(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+ _get_activation_fn(activ),
+ nn.Dropout(p=dropout_p),
+ ]
+ return nn.Sequential(*layers)
+
+
+class ConvBlock(nn.Module):
+ def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ="relu"):
+ super().__init__()
+ self._n_groups = 8
+ self.blocks = nn.ModuleList(
+ [
+ self._get_conv(
+ hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p
+ )
+ for i in range(n_conv)
+ ]
+ )
+
+ def forward(self, x):
+ for block in self.blocks:
+ res = x
+ x = block(x)
+ x += res
+ return x
+
+ def _get_conv(self, hidden_dim, dilation, activ="relu", dropout_p=0.2):
+ layers = [
+ ConvNorm(
+ hidden_dim,
+ hidden_dim,
+ kernel_size=3,
+ padding=dilation,
+ dilation=dilation,
+ ),
+ _get_activation_fn(activ),
+ nn.GroupNorm(num_groups=self._n_groups, num_channels=hidden_dim),
+ nn.Dropout(p=dropout_p),
+ ConvNorm(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+ _get_activation_fn(activ),
+ nn.Dropout(p=dropout_p),
+ ]
+ return nn.Sequential(*layers)
+
+
+class LocationLayer(nn.Module):
+ def __init__(self, attention_n_filters, attention_kernel_size, attention_dim):
+ super(LocationLayer, self).__init__()
+ padding = int((attention_kernel_size - 1) / 2)
+ self.location_conv = ConvNorm(
+ 2,
+ attention_n_filters,
+ kernel_size=attention_kernel_size,
+ padding=padding,
+ bias=False,
+ stride=1,
+ dilation=1,
+ )
+ self.location_dense = LinearNorm(
+ attention_n_filters, attention_dim, bias=False, w_init_gain="tanh"
+ )
+
+ def forward(self, attention_weights_cat):
+ processed_attention = self.location_conv(attention_weights_cat)
+ processed_attention = processed_attention.transpose(1, 2)
+ processed_attention = self.location_dense(processed_attention)
+ return processed_attention
+
+
+class Attention(nn.Module):
+ def __init__(
+ self,
+ attention_rnn_dim,
+ embedding_dim,
+ attention_dim,
+ attention_location_n_filters,
+ attention_location_kernel_size,
+ ):
+ super(Attention, self).__init__()
+ self.query_layer = LinearNorm(
+ attention_rnn_dim, attention_dim, bias=False, w_init_gain="tanh"
+ )
+ self.memory_layer = LinearNorm(
+ embedding_dim, attention_dim, bias=False, w_init_gain="tanh"
+ )
+ self.v = LinearNorm(attention_dim, 1, bias=False)
+ self.location_layer = LocationLayer(
+ attention_location_n_filters, attention_location_kernel_size, attention_dim
+ )
+ self.score_mask_value = -float("inf")
+
+ def get_alignment_energies(self, query, processed_memory, attention_weights_cat):
+ """
+ PARAMS
+ ------
+ query: decoder output (batch, n_mel_channels * n_frames_per_step)
+ processed_memory: processed encoder outputs (B, T_in, attention_dim)
+ attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
+ RETURNS
+ -------
+ alignment (batch, max_time)
+ """
+
+ processed_query = self.query_layer(query.unsqueeze(1))
+ processed_attention_weights = self.location_layer(attention_weights_cat)
+ energies = self.v(
+ torch.tanh(processed_query + processed_attention_weights + processed_memory)
+ )
+
+ energies = energies.squeeze(-1)
+ return energies
+
+ def forward(
+ self,
+ attention_hidden_state,
+ memory,
+ processed_memory,
+ attention_weights_cat,
+ mask,
+ ):
+ """
+ PARAMS
+ ------
+ attention_hidden_state: attention rnn last output
+ memory: encoder outputs
+ processed_memory: processed encoder outputs
+ attention_weights_cat: previous and cummulative attention weights
+ mask: binary mask for padded data
+ """
+ alignment = self.get_alignment_energies(
+ attention_hidden_state, processed_memory, attention_weights_cat
+ )
+
+ if mask is not None:
+ alignment.data.masked_fill_(mask, self.score_mask_value)
+
+ attention_weights = F.softmax(alignment, dim=1)
+ attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
+ attention_context = attention_context.squeeze(1)
+
+ return attention_context, attention_weights
+
+
+class ForwardAttentionV2(nn.Module):
+ def __init__(
+ self,
+ attention_rnn_dim,
+ embedding_dim,
+ attention_dim,
+ attention_location_n_filters,
+ attention_location_kernel_size,
+ ):
+ super(ForwardAttentionV2, self).__init__()
+ self.query_layer = LinearNorm(
+ attention_rnn_dim, attention_dim, bias=False, w_init_gain="tanh"
+ )
+ self.memory_layer = LinearNorm(
+ embedding_dim, attention_dim, bias=False, w_init_gain="tanh"
+ )
+ self.v = LinearNorm(attention_dim, 1, bias=False)
+ self.location_layer = LocationLayer(
+ attention_location_n_filters, attention_location_kernel_size, attention_dim
+ )
+ self.score_mask_value = -float(1e20)
+
+ def get_alignment_energies(self, query, processed_memory, attention_weights_cat):
+ """
+ PARAMS
+ ------
+ query: decoder output (batch, n_mel_channels * n_frames_per_step)
+ processed_memory: processed encoder outputs (B, T_in, attention_dim)
+ attention_weights_cat: prev. and cumulative att weights (B, 2, max_time)
+ RETURNS
+ -------
+ alignment (batch, max_time)
+ """
+
+ processed_query = self.query_layer(query.unsqueeze(1))
+ processed_attention_weights = self.location_layer(attention_weights_cat)
+ energies = self.v(
+ torch.tanh(processed_query + processed_attention_weights + processed_memory)
+ )
+
+ energies = energies.squeeze(-1)
+ return energies
+
+ def forward(
+ self,
+ attention_hidden_state,
+ memory,
+ processed_memory,
+ attention_weights_cat,
+ mask,
+ log_alpha,
+ ):
+ """
+ PARAMS
+ ------
+ attention_hidden_state: attention rnn last output
+ memory: encoder outputs
+ processed_memory: processed encoder outputs
+ attention_weights_cat: previous and cummulative attention weights
+ mask: binary mask for padded data
+ """
+ log_energy = self.get_alignment_energies(
+ attention_hidden_state, processed_memory, attention_weights_cat
+ )
+
+ # log_energy =
+
+ if mask is not None:
+ log_energy.data.masked_fill_(mask, self.score_mask_value)
+
+ # attention_weights = F.softmax(alignment, dim=1)
+
+ # content_score = log_energy.unsqueeze(1) #[B, MAX_TIME] -> [B, 1, MAX_TIME]
+ # log_alpha = log_alpha.unsqueeze(2) #[B, MAX_TIME] -> [B, MAX_TIME, 1]
+
+ # log_total_score = log_alpha + content_score
+
+ # previous_attention_weights = attention_weights_cat[:,0,:]
+
+ log_alpha_shift_padded = []
+ max_time = log_energy.size(1)
+ for sft in range(2):
+ shifted = log_alpha[:, : max_time - sft]
+ shift_padded = F.pad(shifted, (sft, 0), "constant", self.score_mask_value)
+ log_alpha_shift_padded.append(shift_padded.unsqueeze(2))
+
+ biased = torch.logsumexp(torch.cat(log_alpha_shift_padded, 2), 2)
+
+ log_alpha_new = biased + log_energy
+
+ attention_weights = F.softmax(log_alpha_new, dim=1)
+
+ attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
+ attention_context = attention_context.squeeze(1)
+
+ return attention_context, attention_weights, log_alpha_new
+
+
+class PhaseShuffle2d(nn.Module):
+ def __init__(self, n=2):
+ super(PhaseShuffle2d, self).__init__()
+ self.n = n
+ self.random = random.Random(1)
+
+ def forward(self, x, move=None):
+ # x.size = (B, C, M, L)
+ if move is None:
+ move = self.random.randint(-self.n, self.n)
+
+ if move == 0:
+ return x
+ else:
+ left = x[:, :, :, :move]
+ right = x[:, :, :, move:]
+ shuffled = torch.cat([right, left], dim=3)
+ return shuffled
+
+
+class PhaseShuffle1d(nn.Module):
+ def __init__(self, n=2):
+ super(PhaseShuffle1d, self).__init__()
+ self.n = n
+ self.random = random.Random(1)
+
+ def forward(self, x, move=None):
+ # x.size = (B, C, M, L)
+ if move is None:
+ move = self.random.randint(-self.n, self.n)
+
+ if move == 0:
+ return x
+ else:
+ left = x[:, :, :move]
+ right = x[:, :, move:]
+ shuffled = torch.cat([right, left], dim=2)
+
+ return shuffled
+
+
+class MFCC(nn.Module):
+ def __init__(self, n_mfcc=40, n_mels=80):
+ super(MFCC, self).__init__()
+ self.n_mfcc = n_mfcc
+ self.n_mels = n_mels
+ self.norm = "ortho"
+ dct_mat = audio_F.create_dct(self.n_mfcc, self.n_mels, self.norm)
+ self.register_buffer("dct_mat", dct_mat)
+
+ def forward(self, mel_specgram):
+ if len(mel_specgram.shape) == 2:
+ mel_specgram = mel_specgram.unsqueeze(0)
+ unsqueezed = True
+ else:
+ unsqueezed = False
+ # (channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
+ # -> (channel, time, n_mfcc).tranpose(...)
+ mfcc = torch.matmul(mel_specgram.transpose(1, 2), self.dct_mat).transpose(1, 2)
+
+ # unpack batch
+ if unsqueezed:
+ mfcc = mfcc.squeeze(0)
+ return mfcc
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py b/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py
new file mode 100644
index 0000000..d9cb55b
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py
@@ -0,0 +1,741 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from modules.dac.nn.quantize import ResidualVectorQuantize
+from torch import nn
+from .wavenet import WN
+from .style_encoder import StyleEncoder
+from .gradient_reversal import GradientReversal
+import torch
+import torchaudio
+import torchaudio.functional as audio_F
+import numpy as np
+from ..alias_free_torch import *
+from torch.nn.utils import weight_norm
+from torch import nn, sin, pow
+from einops.layers.torch import Rearrange
+from modules.dac.model.encodec import SConv1d
+
+
+def init_weights(m):
+ if isinstance(m, nn.Conv1d):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+class SnakeBeta(nn.Module):
+ """
+ A modified Snake function which uses separate parameters for the magnitude of the periodic components
+ Shape:
+ - Input: (B, C, T)
+ - Output: (B, C, T), same shape as the input
+ Parameters:
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ References:
+ - This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
+ https://arxiv.org/abs/2006.08195
+ Examples:
+ >>> a1 = snakebeta(256)
+ >>> x = torch.randn(256)
+ >>> x = a1(x)
+ """
+
+ def __init__(
+ self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False
+ ):
+ """
+ Initialization.
+ INPUT:
+ - in_features: shape of the input
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ alpha is initialized to 1 by default, higher values = higher-frequency.
+ beta is initialized to 1 by default, higher values = higher-magnitude.
+ alpha will be trained along with the rest of your model.
+ """
+ super(SnakeBeta, self).__init__()
+ self.in_features = in_features
+
+ # initialize alpha
+ self.alpha_logscale = alpha_logscale
+ if self.alpha_logscale: # log scale alphas initialized to zeros
+ self.alpha = nn.Parameter(torch.zeros(in_features) * alpha)
+ self.beta = nn.Parameter(torch.zeros(in_features) * alpha)
+ else: # linear scale alphas initialized to ones
+ self.alpha = nn.Parameter(torch.ones(in_features) * alpha)
+ self.beta = nn.Parameter(torch.ones(in_features) * alpha)
+
+ self.alpha.requires_grad = alpha_trainable
+ self.beta.requires_grad = alpha_trainable
+
+ self.no_div_by_zero = 0.000000001
+
+ def forward(self, x):
+ """
+ Forward pass of the function.
+ Applies the function to the input elementwise.
+ SnakeBeta := x + 1/b * sin^2 (xa)
+ """
+ alpha = self.alpha.unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
+ beta = self.beta.unsqueeze(0).unsqueeze(-1)
+ if self.alpha_logscale:
+ alpha = torch.exp(alpha)
+ beta = torch.exp(beta)
+ x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+
+ return x
+
+
+class ResidualUnit(nn.Module):
+ def __init__(self, dim: int = 16, dilation: int = 1):
+ super().__init__()
+ pad = ((7 - 1) * dilation) // 2
+ self.block = nn.Sequential(
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=7, dilation=dilation, padding=pad),
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=1),
+ )
+
+ def forward(self, x):
+ return x + self.block(x)
+
+
+class CNNLSTM(nn.Module):
+ def __init__(self, indim, outdim, head, global_pred=False):
+ super().__init__()
+ self.global_pred = global_pred
+ self.model = nn.Sequential(
+ ResidualUnit(indim, dilation=1),
+ ResidualUnit(indim, dilation=2),
+ ResidualUnit(indim, dilation=3),
+ Activation1d(activation=SnakeBeta(indim, alpha_logscale=True)),
+ Rearrange("b c t -> b t c"),
+ )
+ self.heads = nn.ModuleList([nn.Linear(indim, outdim) for i in range(head)])
+
+ def forward(self, x):
+ # x: [B, C, T]
+ x = self.model(x)
+ if self.global_pred:
+ x = torch.mean(x, dim=1, keepdim=False)
+ outs = [head(x) for head in self.heads]
+ return outs
+
+
+def sequence_mask(length, max_length=None):
+ if max_length is None:
+ max_length = length.max()
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+ return x.unsqueeze(0) < length.unsqueeze(1)
+
+
+class MFCC(nn.Module):
+ def __init__(self, n_mfcc=40, n_mels=80):
+ super(MFCC, self).__init__()
+ self.n_mfcc = n_mfcc
+ self.n_mels = n_mels
+ self.norm = "ortho"
+ dct_mat = audio_F.create_dct(self.n_mfcc, self.n_mels, self.norm)
+ self.register_buffer("dct_mat", dct_mat)
+
+ def forward(self, mel_specgram):
+ if len(mel_specgram.shape) == 2:
+ mel_specgram = mel_specgram.unsqueeze(0)
+ unsqueezed = True
+ else:
+ unsqueezed = False
+ # (channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
+ # -> (channel, time, n_mfcc).tranpose(...)
+ mfcc = torch.matmul(mel_specgram.transpose(1, 2), self.dct_mat).transpose(1, 2)
+
+ # unpack batch
+ if unsqueezed:
+ mfcc = mfcc.squeeze(0)
+ return mfcc
+
+
+class FAquantizer(nn.Module):
+ def __init__(
+ self,
+ in_dim=1024,
+ n_p_codebooks=1,
+ n_c_codebooks=2,
+ n_t_codebooks=2,
+ n_r_codebooks=3,
+ codebook_size=1024,
+ codebook_dim=8,
+ quantizer_dropout=0.5,
+ causal=False,
+ separate_prosody_encoder=False,
+ timbre_norm=False,
+ ):
+ super(FAquantizer, self).__init__()
+ conv1d_type = SConv1d # if causal else nn.Conv1d
+ self.prosody_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_p_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ self.content_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_c_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ if not timbre_norm:
+ self.timbre_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_t_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+ else:
+ self.timbre_encoder = StyleEncoder(
+ in_dim=80, hidden_dim=512, out_dim=in_dim
+ )
+ self.timbre_linear = nn.Linear(1024, 1024 * 2)
+ self.timbre_linear.bias.data[:1024] = 1
+ self.timbre_linear.bias.data[1024:] = 0
+ self.timbre_norm = nn.LayerNorm(1024, elementwise_affine=False)
+
+ self.residual_quantizer = ResidualVectorQuantize(
+ input_dim=in_dim,
+ n_codebooks=n_r_codebooks,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_dropout=quantizer_dropout,
+ )
+
+ if separate_prosody_encoder:
+ self.melspec_linear = conv1d_type(
+ in_channels=20, out_channels=256, kernel_size=1, causal=causal
+ )
+ self.melspec_encoder = WN(
+ hidden_channels=256,
+ kernel_size=5,
+ dilation_rate=1,
+ n_layers=8,
+ gin_channels=0,
+ p_dropout=0.2,
+ causal=causal,
+ )
+ self.melspec_linear2 = conv1d_type(
+ in_channels=256, out_channels=1024, kernel_size=1, causal=causal
+ )
+ else:
+ pass
+ self.separate_prosody_encoder = separate_prosody_encoder
+
+ self.prob_random_mask_residual = 0.75
+
+ SPECT_PARAMS = {
+ "n_fft": 2048,
+ "win_length": 1200,
+ "hop_length": 300,
+ }
+ MEL_PARAMS = {
+ "n_mels": 80,
+ }
+
+ self.to_mel = torchaudio.transforms.MelSpectrogram(
+ n_mels=MEL_PARAMS["n_mels"], sample_rate=24000, **SPECT_PARAMS
+ )
+ self.mel_mean, self.mel_std = -4, 4
+ self.frame_rate = 24000 / 300
+ self.hop_length = 300
+
+ self.is_timbre_norm = timbre_norm
+ if timbre_norm:
+ self.forward = self.forward_v2
+
+ def preprocess(self, wave_tensor, n_bins=20):
+ mel_tensor = self.to_mel(wave_tensor.squeeze(1))
+ mel_tensor = (torch.log(1e-5 + mel_tensor) - self.mel_mean) / self.mel_std
+ return mel_tensor[:, :n_bins, : int(wave_tensor.size(-1) / self.hop_length)]
+
+ @torch.no_grad()
+ def decode(self, codes):
+ code_c, code_p, code_t = codes.split([1, 1, 2], dim=1)
+
+ z_c = self.content_quantizer.from_codes(code_c)[0]
+ z_p = self.prosody_quantizer.from_codes(code_p)[0]
+ z_t = self.timbre_quantizer.from_codes(code_t)[0]
+
+ z = z_c + z_p + z_t
+
+ return z, [z_c, z_p, z_t]
+
+ @torch.no_grad()
+ def encode(self, x, wave_segments, n_c=1):
+ outs = 0
+ if self.separate_prosody_encoder:
+ prosody_feature = self.preprocess(wave_segments)
+
+ f0_input = prosody_feature # (B, T, 20)
+ f0_input = self.melspec_linear(f0_input)
+ f0_input = self.melspec_encoder(
+ f0_input,
+ torch.ones(f0_input.shape[0], 1, f0_input.shape[2])
+ .to(f0_input.device)
+ .bool(),
+ )
+ f0_input = self.melspec_linear2(f0_input)
+
+ common_min_size = min(f0_input.size(2), x.size(2))
+ f0_input = f0_input[:, :, :common_min_size]
+
+ x = x[:, :, :common_min_size]
+
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(f0_input, 1)
+ outs += z_p.detach()
+ else:
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(x, 1)
+ outs += z_p.detach()
+
+ (
+ z_c,
+ codes_c,
+ latents_c,
+ commitment_loss_c,
+ codebook_loss_c,
+ ) = self.content_quantizer(x, n_c)
+ outs += z_c.detach()
+
+ timbre_residual_feature = x - z_p.detach() - z_c.detach()
+
+ (
+ z_t,
+ codes_t,
+ latents_t,
+ commitment_loss_t,
+ codebook_loss_t,
+ ) = self.timbre_quantizer(timbre_residual_feature, 2)
+ outs += z_t # we should not detach timbre
+
+ residual_feature = timbre_residual_feature - z_t
+
+ (
+ z_r,
+ codes_r,
+ latents_r,
+ commitment_loss_r,
+ codebook_loss_r,
+ ) = self.residual_quantizer(residual_feature, 3)
+
+ return [codes_c, codes_p, codes_t, codes_r], [z_c, z_p, z_t, z_r]
+
+ def forward(
+ self, x, wave_segments, noise_added_flags, recon_noisy_flags, n_c=2, n_t=2
+ ):
+ # timbre = self.timbre_encoder(mels, sequence_mask(mel_lens, mels.size(-1)).unsqueeze(1))
+ # timbre = self.timbre_encoder(mel_segments, torch.ones(mel_segments.size(0), 1, mel_segments.size(2)).bool().to(mel_segments.device))
+ outs = 0
+ if self.separate_prosody_encoder:
+ prosody_feature = self.preprocess(wave_segments)
+
+ f0_input = prosody_feature # (B, T, 20)
+ f0_input = self.melspec_linear(f0_input)
+ f0_input = self.melspec_encoder(
+ f0_input,
+ torch.ones(f0_input.shape[0], 1, f0_input.shape[2])
+ .to(f0_input.device)
+ .bool(),
+ )
+ f0_input = self.melspec_linear2(f0_input)
+
+ common_min_size = min(f0_input.size(2), x.size(2))
+ f0_input = f0_input[:, :, :common_min_size]
+
+ x = x[:, :, :common_min_size]
+
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(f0_input, 1)
+ outs += z_p.detach()
+ else:
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(x, 1)
+ outs += z_p.detach()
+
+ (
+ z_c,
+ codes_c,
+ latents_c,
+ commitment_loss_c,
+ codebook_loss_c,
+ ) = self.content_quantizer(x, n_c)
+ outs += z_c.detach()
+
+ timbre_residual_feature = x - z_p.detach() - z_c.detach()
+
+ (
+ z_t,
+ codes_t,
+ latents_t,
+ commitment_loss_t,
+ codebook_loss_t,
+ ) = self.timbre_quantizer(timbre_residual_feature, n_t)
+ outs += z_t # we should not detach timbre
+
+ residual_feature = timbre_residual_feature - z_t
+
+ (
+ z_r,
+ codes_r,
+ latents_r,
+ commitment_loss_r,
+ codebook_loss_r,
+ ) = self.residual_quantizer(residual_feature, 3)
+
+ bsz = z_r.shape[0]
+ res_mask = np.random.choice(
+ [0, 1],
+ size=bsz,
+ p=[
+ self.prob_random_mask_residual,
+ 1 - self.prob_random_mask_residual,
+ ],
+ )
+ res_mask = torch.from_numpy(res_mask).unsqueeze(1).unsqueeze(1) # (B, 1, 1)
+ res_mask = res_mask.to(device=z_r.device, dtype=z_r.dtype)
+ noise_must_on = noise_added_flags * recon_noisy_flags
+ noise_must_off = noise_added_flags * (~recon_noisy_flags)
+ res_mask[noise_must_on] = 1
+ res_mask[noise_must_off] = 0
+
+ outs += z_r * res_mask
+
+ quantized = [z_p, z_c, z_t, z_r]
+ commitment_losses = (
+ commitment_loss_p
+ + commitment_loss_c
+ + commitment_loss_t
+ + commitment_loss_r
+ )
+ codebook_losses = (
+ codebook_loss_p + codebook_loss_c + codebook_loss_t + codebook_loss_r
+ )
+
+ return outs, quantized, commitment_losses, codebook_losses
+
+ def forward_v2(
+ self,
+ x,
+ wave_segments,
+ n_c=1,
+ n_t=2,
+ full_waves=None,
+ wave_lens=None,
+ return_codes=False,
+ ):
+ # timbre = self.timbre_encoder(x, sequence_mask(mel_lens, mels.size(-1)).unsqueeze(1))
+ if full_waves is None:
+ mel = self.preprocess(wave_segments, n_bins=80)
+ timbre = self.timbre_encoder(
+ mel, torch.ones(mel.size(0), 1, mel.size(2)).bool().to(mel.device)
+ )
+ else:
+ mel = self.preprocess(full_waves, n_bins=80)
+ timbre = self.timbre_encoder(
+ mel,
+ sequence_mask(wave_lens // self.hop_length, mel.size(-1)).unsqueeze(1),
+ )
+ outs = 0
+ if self.separate_prosody_encoder:
+ prosody_feature = self.preprocess(wave_segments)
+
+ f0_input = prosody_feature # (B, T, 20)
+ f0_input = self.melspec_linear(f0_input)
+ f0_input = self.melspec_encoder(
+ f0_input,
+ torch.ones(f0_input.shape[0], 1, f0_input.shape[2])
+ .to(f0_input.device)
+ .bool(),
+ )
+ f0_input = self.melspec_linear2(f0_input)
+
+ common_min_size = min(f0_input.size(2), x.size(2))
+ f0_input = f0_input[:, :, :common_min_size]
+
+ x = x[:, :, :common_min_size]
+
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(f0_input, 1)
+ outs += z_p.detach()
+ else:
+ (
+ z_p,
+ codes_p,
+ latents_p,
+ commitment_loss_p,
+ codebook_loss_p,
+ ) = self.prosody_quantizer(x, 1)
+ outs += z_p.detach()
+
+ (
+ z_c,
+ codes_c,
+ latents_c,
+ commitment_loss_c,
+ codebook_loss_c,
+ ) = self.content_quantizer(x, n_c)
+ outs += z_c.detach()
+
+ residual_feature = x - z_p.detach() - z_c.detach()
+
+ (
+ z_r,
+ codes_r,
+ latents_r,
+ commitment_loss_r,
+ codebook_loss_r,
+ ) = self.residual_quantizer(residual_feature, 3)
+
+ bsz = z_r.shape[0]
+ res_mask = np.random.choice(
+ [0, 1],
+ size=bsz,
+ p=[
+ self.prob_random_mask_residual,
+ 1 - self.prob_random_mask_residual,
+ ],
+ )
+ res_mask = torch.from_numpy(res_mask).unsqueeze(1).unsqueeze(1) # (B, 1, 1)
+ res_mask = res_mask.to(device=z_r.device, dtype=z_r.dtype)
+
+ if not self.training:
+ res_mask = torch.ones_like(res_mask)
+ outs += z_r * res_mask
+
+ quantized = [z_p, z_c, z_r]
+ codes = [codes_p, codes_c, codes_r]
+ commitment_losses = commitment_loss_p + commitment_loss_c + commitment_loss_r
+ codebook_losses = codebook_loss_p + codebook_loss_c + codebook_loss_r
+
+ style = self.timbre_linear(timbre).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ outs = outs.transpose(1, 2)
+ outs = self.timbre_norm(outs)
+ outs = outs.transpose(1, 2)
+ outs = outs * gamma + beta
+
+ if return_codes:
+ return outs, quantized, commitment_losses, codebook_losses, timbre, codes
+ else:
+ return outs, quantized, commitment_losses, codebook_losses, timbre
+
+ def voice_conversion(self, z, ref_wave):
+ ref_mel = self.preprocess(ref_wave, n_bins=80)
+ ref_timbre = self.timbre_encoder(
+ ref_mel,
+ sequence_mask(
+ torch.LongTensor([ref_wave.size(-1)]).to(z.device) // self.hop_length,
+ ref_mel.size(-1),
+ ).unsqueeze(1),
+ )
+ style = self.timbre_linear(ref_timbre).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ outs = z.transpose(1, 2)
+ outs = self.timbre_norm(outs)
+ outs = outs.transpose(1, 2)
+ outs = outs * gamma + beta
+
+ return outs
+
+
+class FApredictors(nn.Module):
+ def __init__(
+ self,
+ in_dim=1024,
+ use_gr_content_f0=False,
+ use_gr_prosody_phone=False,
+ use_gr_residual_f0=False,
+ use_gr_residual_phone=False,
+ use_gr_timbre_content=True,
+ use_gr_timbre_prosody=True,
+ use_gr_x_timbre=False,
+ norm_f0=True,
+ timbre_norm=False,
+ use_gr_content_global_f0=False,
+ ):
+ super(FApredictors, self).__init__()
+ self.f0_predictor = CNNLSTM(in_dim, 1, 2)
+ self.phone_predictor = CNNLSTM(in_dim, 1024, 1)
+ if timbre_norm:
+ self.timbre_predictor = nn.Linear(in_dim, 20000)
+ else:
+ self.timbre_predictor = CNNLSTM(in_dim, 20000, 1, global_pred=True)
+
+ self.use_gr_content_f0 = use_gr_content_f0
+ self.use_gr_prosody_phone = use_gr_prosody_phone
+ self.use_gr_residual_f0 = use_gr_residual_f0
+ self.use_gr_residual_phone = use_gr_residual_phone
+ self.use_gr_timbre_content = use_gr_timbre_content
+ self.use_gr_timbre_prosody = use_gr_timbre_prosody
+ self.use_gr_x_timbre = use_gr_x_timbre
+
+ self.rev_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_dim, 1, 2)
+ )
+ self.rev_content_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_dim, 1024, 1)
+ )
+ self.rev_timbre_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_dim, 20000, 1, global_pred=True)
+ )
+
+ self.norm_f0 = norm_f0
+ self.timbre_norm = timbre_norm
+ if timbre_norm:
+ self.forward = self.forward_v2
+ self.global_f0_predictor = nn.Linear(in_dim, 1)
+
+ self.use_gr_content_global_f0 = use_gr_content_global_f0
+ if use_gr_content_global_f0:
+ self.rev_global_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_dim, 1, 1, global_pred=True)
+ )
+
+ def forward(self, quantized):
+ prosody_latent = quantized[0]
+ content_latent = quantized[1]
+ timbre_latent = quantized[2]
+ residual_latent = quantized[3]
+ content_pred = self.phone_predictor(content_latent)[0]
+
+ if self.norm_f0:
+ spk_pred = self.timbre_predictor(timbre_latent)[0]
+ f0_pred, uv_pred = self.f0_predictor(prosody_latent)
+ else:
+ spk_pred = self.timbre_predictor(timbre_latent + prosody_latent)[0]
+ f0_pred, uv_pred = self.f0_predictor(prosody_latent + timbre_latent)
+
+ prosody_rev_latent = torch.zeros_like(quantized[0])
+ if self.use_gr_content_f0:
+ prosody_rev_latent += quantized[1]
+ if self.use_gr_timbre_prosody:
+ prosody_rev_latent += quantized[2]
+ if self.use_gr_residual_f0:
+ prosody_rev_latent += quantized[3]
+ rev_f0_pred, rev_uv_pred = self.rev_f0_predictor(prosody_rev_latent)
+
+ content_rev_latent = torch.zeros_like(quantized[1])
+ if self.use_gr_prosody_phone:
+ content_rev_latent += quantized[0]
+ if self.use_gr_timbre_content:
+ content_rev_latent += quantized[2]
+ if self.use_gr_residual_phone:
+ content_rev_latent += quantized[3]
+ rev_content_pred = self.rev_content_predictor(content_rev_latent)[0]
+
+ if self.norm_f0:
+ timbre_rev_latent = quantized[0] + quantized[1] + quantized[3]
+ else:
+ timbre_rev_latent = quantized[1] + quantized[3]
+ if self.use_gr_x_timbre:
+ x_spk_pred = self.rev_timbre_predictor(timbre_rev_latent)[0]
+ else:
+ x_spk_pred = None
+
+ preds = {
+ "f0": f0_pred,
+ "uv": uv_pred,
+ "content": content_pred,
+ "timbre": spk_pred,
+ }
+
+ rev_preds = {
+ "rev_f0": rev_f0_pred,
+ "rev_uv": rev_uv_pred,
+ "rev_content": rev_content_pred,
+ "x_timbre": x_spk_pred,
+ }
+ return preds, rev_preds
+
+ def forward_v2(self, quantized, timbre):
+ prosody_latent = quantized[0]
+ content_latent = quantized[1]
+ residual_latent = quantized[2]
+ content_pred = self.phone_predictor(content_latent)[0]
+
+ spk_pred = self.timbre_predictor(timbre)
+ f0_pred, uv_pred = self.f0_predictor(prosody_latent)
+
+ prosody_rev_latent = torch.zeros_like(prosody_latent)
+ if self.use_gr_content_f0:
+ prosody_rev_latent += content_latent
+ if self.use_gr_residual_f0:
+ prosody_rev_latent += residual_latent
+ rev_f0_pred, rev_uv_pred = self.rev_f0_predictor(prosody_rev_latent)
+
+ content_rev_latent = torch.zeros_like(content_latent)
+ if self.use_gr_prosody_phone:
+ content_rev_latent += prosody_latent
+ if self.use_gr_residual_phone:
+ content_rev_latent += residual_latent
+ rev_content_pred = self.rev_content_predictor(content_rev_latent)[0]
+
+ timbre_rev_latent = prosody_latent + content_latent + residual_latent
+ if self.use_gr_x_timbre:
+ x_spk_pred = self.rev_timbre_predictor(timbre_rev_latent)[0]
+ else:
+ x_spk_pred = None
+
+ preds = {
+ "f0": f0_pred,
+ "uv": uv_pred,
+ "content": content_pred,
+ "timbre": spk_pred,
+ }
+
+ rev_preds = {
+ "rev_f0": rev_f0_pred,
+ "rev_uv": rev_uv_pred,
+ "rev_content": rev_content_pred,
+ "x_timbre": x_spk_pred,
+ }
+ return preds, rev_preds
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py b/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py
new file mode 100644
index 0000000..e437c1a
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This code is modified from https://github.com/sh-lee-prml/HierSpeechpp/blob/main/ttv_v1/styleencoder.py
+
+from . import attentions
+from torch import nn
+import torch
+from torch.nn import functional as F
+
+
+class Mish(nn.Module):
+ def __init__(self):
+ super(Mish, self).__init__()
+
+ def forward(self, x):
+ return x * torch.tanh(F.softplus(x))
+
+
+class Conv1dGLU(nn.Module):
+ """
+ Conv1d + GLU(Gated Linear Unit) with residual connection.
+ For GLU refer to https://arxiv.org/abs/1612.08083 paper.
+ """
+
+ def __init__(self, in_channels, out_channels, kernel_size, dropout):
+ super(Conv1dGLU, self).__init__()
+ self.out_channels = out_channels
+ self.conv1 = nn.Conv1d(
+ in_channels, 2 * out_channels, kernel_size=kernel_size, padding=2
+ )
+ self.dropout = nn.Dropout(dropout)
+
+ def forward(self, x):
+ residual = x
+ x = self.conv1(x)
+ x1, x2 = torch.split(x, split_size_or_sections=self.out_channels, dim=1)
+ x = x1 * torch.sigmoid(x2)
+ x = residual + self.dropout(x)
+ return x
+
+
+class StyleEncoder(torch.nn.Module):
+ def __init__(self, in_dim=513, hidden_dim=128, out_dim=256):
+
+ super().__init__()
+
+ self.in_dim = in_dim # Linear 513 wav2vec 2.0 1024
+ self.hidden_dim = hidden_dim
+ self.out_dim = out_dim
+ self.kernel_size = 5
+ self.n_head = 2
+ self.dropout = 0.1
+
+ self.spectral = nn.Sequential(
+ nn.Conv1d(self.in_dim, self.hidden_dim, 1),
+ Mish(),
+ nn.Dropout(self.dropout),
+ nn.Conv1d(self.hidden_dim, self.hidden_dim, 1),
+ Mish(),
+ nn.Dropout(self.dropout),
+ )
+
+ self.temporal = nn.Sequential(
+ Conv1dGLU(self.hidden_dim, self.hidden_dim, self.kernel_size, self.dropout),
+ Conv1dGLU(self.hidden_dim, self.hidden_dim, self.kernel_size, self.dropout),
+ )
+
+ self.slf_attn = attentions.MultiHeadAttention(
+ self.hidden_dim,
+ self.hidden_dim,
+ self.n_head,
+ p_dropout=self.dropout,
+ proximal_bias=False,
+ proximal_init=True,
+ )
+ self.atten_drop = nn.Dropout(self.dropout)
+ self.fc = nn.Conv1d(self.hidden_dim, self.out_dim, 1)
+
+ def forward(self, x, mask=None):
+
+ # spectral
+ x = self.spectral(x) * mask
+ # temporal
+ x = self.temporal(x) * mask
+
+ # self-attention
+ attn_mask = mask.unsqueeze(2) * mask.unsqueeze(-1)
+ y = self.slf_attn(x, x, attn_mask=attn_mask)
+ x = x + self.atten_drop(y)
+
+ # fc
+ x = self.fc(x)
+
+ # temoral average pooling
+ w = self.temporal_avg_pool(x, mask=mask)
+
+ return w
+
+ def temporal_avg_pool(self, x, mask=None):
+ if mask is None:
+ out = torch.mean(x, dim=2)
+ else:
+ len_ = mask.sum(dim=2)
+ x = x.sum(dim=2)
+
+ out = torch.div(x, len_)
+ return out
diff --git a/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py b/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py
new file mode 100644
index 0000000..3a2da54
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This code is modified from https://github.com/sh-lee-prml/HierSpeechpp/blob/main/ttv_v1/modules.py
+
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from modules.dac.model.encodec import SConv1d
+
+from . import commons
+
+LRELU_SLOPE = 0.1
+
+
+class LayerNorm(nn.Module):
+ def __init__(self, channels, eps=1e-5):
+ super().__init__()
+ self.channels = channels
+ self.eps = eps
+
+ self.gamma = nn.Parameter(torch.ones(channels))
+ self.beta = nn.Parameter(torch.zeros(channels))
+
+ def forward(self, x):
+ x = x.transpose(1, -1)
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+ return x.transpose(1, -1)
+
+
+class ConvReluNorm(nn.Module):
+ def __init__(
+ self,
+ in_channels,
+ hidden_channels,
+ out_channels,
+ kernel_size,
+ n_layers,
+ p_dropout,
+ ):
+ super().__init__()
+ self.in_channels = in_channels
+ self.hidden_channels = hidden_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ self.n_layers = n_layers
+ self.p_dropout = p_dropout
+ assert n_layers > 1, "Number of layers should be larger than 0."
+
+ self.conv_layers = nn.ModuleList()
+ self.norm_layers = nn.ModuleList()
+ self.conv_layers.append(
+ nn.Conv1d(
+ in_channels, hidden_channels, kernel_size, padding=kernel_size // 2
+ )
+ )
+ self.norm_layers.append(LayerNorm(hidden_channels))
+ self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout))
+ for _ in range(n_layers - 1):
+ self.conv_layers.append(
+ nn.Conv1d(
+ hidden_channels,
+ hidden_channels,
+ kernel_size,
+ padding=kernel_size // 2,
+ )
+ )
+ self.norm_layers.append(LayerNorm(hidden_channels))
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+ self.proj.weight.data.zero_()
+ self.proj.bias.data.zero_()
+
+ def forward(self, x, x_mask):
+ x_org = x
+ for i in range(self.n_layers):
+ x = self.conv_layers[i](x * x_mask)
+ x = self.norm_layers[i](x)
+ x = self.relu_drop(x)
+ x = x_org + self.proj(x)
+ return x * x_mask
+
+
+class DDSConv(nn.Module):
+ """
+ Dialted and Depth-Separable Convolution
+ """
+
+ def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0):
+ super().__init__()
+ self.channels = channels
+ self.kernel_size = kernel_size
+ self.n_layers = n_layers
+ self.p_dropout = p_dropout
+
+ self.drop = nn.Dropout(p_dropout)
+ self.convs_sep = nn.ModuleList()
+ self.convs_1x1 = nn.ModuleList()
+ self.norms_1 = nn.ModuleList()
+ self.norms_2 = nn.ModuleList()
+ for i in range(n_layers):
+ dilation = kernel_size**i
+ padding = (kernel_size * dilation - dilation) // 2
+ self.convs_sep.append(
+ nn.Conv1d(
+ channels,
+ channels,
+ kernel_size,
+ groups=channels,
+ dilation=dilation,
+ padding=padding,
+ )
+ )
+ self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
+ self.norms_1.append(LayerNorm(channels))
+ self.norms_2.append(LayerNorm(channels))
+
+ def forward(self, x, x_mask, g=None):
+ if g is not None:
+ x = x + g
+ for i in range(self.n_layers):
+ y = self.convs_sep[i](x * x_mask)
+ y = self.norms_1[i](y)
+ y = F.gelu(y)
+ y = self.convs_1x1[i](y)
+ y = self.norms_2[i](y)
+ y = F.gelu(y)
+ y = self.drop(y)
+ x = x + y
+ return x * x_mask
+
+
+class WN(torch.nn.Module):
+ def __init__(
+ self,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ gin_channels=0,
+ p_dropout=0,
+ causal=False,
+ ):
+ super(WN, self).__init__()
+ conv1d_type = SConv1d
+ assert kernel_size % 2 == 1
+ self.hidden_channels = hidden_channels
+ self.kernel_size = (kernel_size,)
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.gin_channels = gin_channels
+ self.p_dropout = p_dropout
+
+ self.in_layers = torch.nn.ModuleList()
+ self.res_skip_layers = torch.nn.ModuleList()
+ self.drop = nn.Dropout(p_dropout)
+
+ if gin_channels != 0:
+ self.cond_layer = conv1d_type(
+ gin_channels, 2 * hidden_channels * n_layers, 1, norm="weight_norm"
+ )
+
+ for i in range(n_layers):
+ dilation = dilation_rate**i
+ padding = int((kernel_size * dilation - dilation) / 2)
+ in_layer = conv1d_type(
+ hidden_channels,
+ 2 * hidden_channels,
+ kernel_size,
+ dilation=dilation,
+ padding=padding,
+ norm="weight_norm",
+ causal=causal,
+ )
+ self.in_layers.append(in_layer)
+
+ # last one is not necessary
+ if i < n_layers - 1:
+ res_skip_channels = 2 * hidden_channels
+ else:
+ res_skip_channels = hidden_channels
+
+ res_skip_layer = conv1d_type(
+ hidden_channels, res_skip_channels, 1, norm="weight_norm", causal=causal
+ )
+ self.res_skip_layers.append(res_skip_layer)
+
+ def forward(self, x, x_mask, g=None, **kwargs):
+ output = torch.zeros_like(x)
+ n_channels_tensor = torch.IntTensor([self.hidden_channels])
+
+ if g is not None:
+ g = self.cond_layer(g)
+
+ for i in range(self.n_layers):
+ x_in = self.in_layers[i](x)
+ if g is not None:
+ cond_offset = i * 2 * self.hidden_channels
+ g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+ else:
+ g_l = torch.zeros_like(x_in)
+
+ acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+ acts = self.drop(acts)
+
+ res_skip_acts = self.res_skip_layers[i](acts)
+ if i < self.n_layers - 1:
+ res_acts = res_skip_acts[:, : self.hidden_channels, :]
+ x = (x + res_acts) * x_mask
+ output = output + res_skip_acts[:, self.hidden_channels :, :]
+ else:
+ output = output + res_skip_acts
+ return output * x_mask
+
+ def remove_weight_norm(self):
+ if self.gin_channels != 0:
+ torch.nn.utils.remove_weight_norm(self.cond_layer)
+ for l in self.in_layers:
+ torch.nn.utils.remove_weight_norm(l)
+ for l in self.res_skip_layers:
+ torch.nn.utils.remove_weight_norm(l)
diff --git a/indextts/utils/maskgct/models/codec/facodec/optimizer.py b/indextts/utils/maskgct/models/codec/facodec/optimizer.py
new file mode 100644
index 0000000..4d6d798
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/facodec/optimizer.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os, sys
+import os.path as osp
+import numpy as np
+import torch
+from torch import nn
+from torch.optim import Optimizer
+from functools import reduce
+from torch.optim import AdamW
+
+
+class MultiOptimizer:
+ def __init__(self, optimizers={}, schedulers={}):
+ self.optimizers = optimizers
+ self.schedulers = schedulers
+ self.keys = list(optimizers.keys())
+ self.param_groups = reduce(
+ lambda x, y: x + y, [v.param_groups for v in self.optimizers.values()]
+ )
+
+ def state_dict(self):
+ state_dicts = [(key, self.optimizers[key].state_dict()) for key in self.keys]
+ return state_dicts
+
+ def scheduler_state_dict(self):
+ state_dicts = [(key, self.schedulers[key].state_dict()) for key in self.keys]
+ return state_dicts
+
+ def load_state_dict(self, state_dict):
+ for key, val in state_dict:
+ try:
+ self.optimizers[key].load_state_dict(val)
+ except:
+ print("Unloaded %s" % key)
+
+ def load_scheduler_state_dict(self, state_dict):
+ for key, val in state_dict:
+ try:
+ self.schedulers[key].load_state_dict(val)
+ except:
+ print("Unloaded %s" % key)
+
+ def step(self, key=None, scaler=None):
+ keys = [key] if key is not None else self.keys
+ _ = [self._step(key, scaler) for key in keys]
+
+ def _step(self, key, scaler=None):
+ if scaler is not None:
+ scaler.step(self.optimizers[key])
+ scaler.update()
+ else:
+ self.optimizers[key].step()
+
+ def zero_grad(self, key=None):
+ if key is not None:
+ self.optimizers[key].zero_grad()
+ else:
+ _ = [self.optimizers[key].zero_grad() for key in self.keys]
+
+ def scheduler(self, *args, key=None):
+ if key is not None:
+ self.schedulers[key].step(*args)
+ else:
+ _ = [self.schedulers[key].step_batch(*args) for key in self.keys]
+
+
+def define_scheduler(optimizer, params):
+ scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params["gamma"])
+
+ return scheduler
+
+
+def build_optimizer(model_dict, scheduler_params_dict, lr, type="AdamW"):
+ optim = {}
+ for key, model in model_dict.items():
+ model_parameters = model.parameters()
+ parameters_names = []
+ parameters_names.append(
+ [name_param_pair[0] for name_param_pair in model.named_parameters()]
+ )
+ if type == "AdamW":
+ optim[key] = AdamW(
+ model_parameters,
+ lr=lr,
+ betas=(0.9, 0.98),
+ eps=1e-9,
+ weight_decay=0.1,
+ )
+ else:
+ raise ValueError("Unknown optimizer type: %s" % type)
+
+ schedulers = dict(
+ [
+ (key, torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.999996))
+ for key, opt in optim.items()
+ ]
+ )
+
+ multi_optim = MultiOptimizer(optim, schedulers)
+ return multi_optim
diff --git a/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py b/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py
new file mode 100644
index 0000000..fdc9c3e
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from concurrent.futures import ALL_COMPLETED
+import numpy as np
+import torch
+import torch.nn as nn
+
+from torch.nn import functional as F
+from einops import rearrange, repeat
+
+from indextts.utils.maskgct.models.codec.amphion_codec.quantize import ResidualVQ
+from indextts.utils.maskgct.models.codec.kmeans.vocos import VocosBackbone
+
+
+def init_weights(m):
+ if isinstance(m, nn.Conv1d):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+ if isinstance(m, nn.Linear):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+
+def compute_codebook_perplexity(indices, codebook_size):
+ indices = indices.flatten()
+ prob = torch.bincount(indices, minlength=codebook_size).float() / indices.size(0)
+ perp = torch.exp(-torch.sum(prob * torch.log(prob + 1e-10)))
+ return perp
+
+
+class RepCodec(nn.Module):
+ def __init__(
+ self,
+ codebook_size=8192,
+ hidden_size=1024,
+ codebook_dim=8,
+ vocos_dim=384,
+ vocos_intermediate_dim=2048,
+ vocos_num_layers=12,
+ num_quantizers=1,
+ downsample_scale=1,
+ cfg=None,
+ ):
+ super().__init__()
+ codebook_size = (
+ cfg.codebook_size
+ if cfg is not None and hasattr(cfg, "codebook_size")
+ else codebook_size
+ )
+ codebook_dim = (
+ cfg.codebook_dim
+ if cfg is not None and hasattr(cfg, "codebook_dim")
+ else codebook_dim
+ )
+ hidden_size = (
+ cfg.hidden_size
+ if cfg is not None and hasattr(cfg, "hidden_size")
+ else hidden_size
+ )
+ vocos_dim = (
+ cfg.vocos_dim
+ if cfg is not None and hasattr(cfg, "vocos_dim")
+ else vocos_dim
+ )
+ vocos_intermediate_dim = (
+ cfg.vocos_intermediate_dim
+ if cfg is not None and hasattr(cfg, "vocos_dim")
+ else vocos_intermediate_dim
+ )
+ vocos_num_layers = (
+ cfg.vocos_num_layers
+ if cfg is not None and hasattr(cfg, "vocos_dim")
+ else vocos_num_layers
+ )
+ num_quantizers = (
+ cfg.num_quantizers
+ if cfg is not None and hasattr(cfg, "num_quantizers")
+ else num_quantizers
+ )
+ downsample_scale = (
+ cfg.downsample_scale
+ if cfg is not None and hasattr(cfg, "downsample_scale")
+ else downsample_scale
+ )
+
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+ self.hidden_size = hidden_size
+ self.vocos_dim = vocos_dim
+ self.vocos_intermediate_dim = vocos_intermediate_dim
+ self.vocos_num_layers = vocos_num_layers
+ self.num_quantizers = num_quantizers
+ self.downsample_scale = downsample_scale
+
+ if self.downsample_scale != None and self.downsample_scale > 1:
+ self.down = nn.Conv1d(
+ self.hidden_size, self.hidden_size, kernel_size=3, stride=2, padding=1
+ )
+ self.up = nn.Conv1d(
+ self.hidden_size, self.hidden_size, kernel_size=3, stride=1, padding=1
+ )
+
+ self.encoder = nn.Sequential(
+ VocosBackbone(
+ input_channels=self.hidden_size,
+ dim=self.vocos_dim,
+ intermediate_dim=self.vocos_intermediate_dim,
+ num_layers=self.vocos_num_layers,
+ adanorm_num_embeddings=None,
+ ),
+ nn.Linear(self.vocos_dim, self.hidden_size),
+ )
+ self.decoder = nn.Sequential(
+ VocosBackbone(
+ input_channels=self.hidden_size,
+ dim=self.vocos_dim,
+ intermediate_dim=self.vocos_intermediate_dim,
+ num_layers=self.vocos_num_layers,
+ adanorm_num_embeddings=None,
+ ),
+ nn.Linear(self.vocos_dim, self.hidden_size),
+ )
+
+ self.quantizer = ResidualVQ(
+ input_dim=hidden_size,
+ num_quantizers=num_quantizers,
+ codebook_size=codebook_size,
+ codebook_dim=codebook_dim,
+ quantizer_type="fvq",
+ quantizer_dropout=0.0,
+ commitment=0.15,
+ codebook_loss_weight=1.0,
+ use_l2_normlize=True,
+ )
+
+ self.reset_parameters()
+
+ def forward(self, x):
+
+ # downsample
+ if self.downsample_scale != None and self.downsample_scale > 1:
+ x = x.transpose(1, 2)
+ x = self.down(x)
+ x = F.gelu(x)
+ x = x.transpose(1, 2)
+
+ # encoder
+ x = self.encoder(x.transpose(1, 2)).transpose(1, 2)
+
+ # vq
+ (
+ quantized_out,
+ all_indices,
+ all_commit_losses,
+ all_codebook_losses,
+ _,
+ ) = self.quantizer(x)
+
+ # decoder
+ x = self.decoder(quantized_out)
+
+ # up
+ if self.downsample_scale != None and self.downsample_scale > 1:
+ x = x.transpose(1, 2)
+ x = F.interpolate(x, scale_factor=2, mode="nearest")
+ x_rec = self.up(x).transpose(1, 2)
+
+ codebook_loss = (all_codebook_losses + all_commit_losses).mean()
+ all_indices = all_indices
+
+ return x_rec, codebook_loss, all_indices
+
+ def quantize(self, x):
+
+ if self.downsample_scale != None and self.downsample_scale > 1:
+ x = x.transpose(1, 2)
+ x = self.down(x)
+ x = F.gelu(x)
+ x = x.transpose(1, 2)
+
+ x = self.encoder(x.transpose(1, 2)).transpose(1, 2)
+
+ (
+ quantized_out,
+ all_indices,
+ all_commit_losses,
+ all_codebook_losses,
+ _,
+ ) = self.quantizer(x)
+
+ if all_indices.shape[0] == 1:
+ return all_indices.squeeze(0), quantized_out.transpose(1, 2)
+ return all_indices, quantized_out.transpose(1, 2)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
+
+
+if __name__ == "__main__":
+ repcodec = RepCodec(vocos_dim=1024, downsample_scale=2)
+ print(repcodec)
+ print(sum(p.numel() for p in repcodec.parameters()) / 1e6)
+ x = torch.randn(5, 10, 1024)
+ x_rec, codebook_loss, all_indices = repcodec(x)
+ print(x_rec.shape, codebook_loss, all_indices.shape)
+ vq_id, emb = repcodec.quantize(x)
+ print(vq_id.shape, emb.shape)
diff --git a/indextts/utils/maskgct/models/codec/kmeans/vocos.py b/indextts/utils/maskgct/models/codec/kmeans/vocos.py
new file mode 100644
index 0000000..7d9f5a5
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/kmeans/vocos.py
@@ -0,0 +1,850 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple
+
+import numpy as np
+import scipy
+import torch
+from torch import nn, view_as_real, view_as_complex
+from torch import nn
+from torch.nn.utils import weight_norm, remove_weight_norm
+from torchaudio.functional.functional import _hz_to_mel, _mel_to_hz
+
+
+def safe_log(x: torch.Tensor, clip_val: float = 1e-7) -> torch.Tensor:
+ """
+ Computes the element-wise logarithm of the input tensor with clipping to avoid near-zero values.
+
+ Args:
+ x (Tensor): Input tensor.
+ clip_val (float, optional): Minimum value to clip the input tensor. Defaults to 1e-7.
+
+ Returns:
+ Tensor: Element-wise logarithm of the input tensor with clipping applied.
+ """
+ return torch.log(torch.clip(x, min=clip_val))
+
+
+def symlog(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * torch.log1p(x.abs())
+
+
+def symexp(x: torch.Tensor) -> torch.Tensor:
+ return torch.sign(x) * (torch.exp(x.abs()) - 1)
+
+
+class STFT(nn.Module):
+ def __init__(
+ self,
+ n_fft: int,
+ hop_length: int,
+ win_length: int,
+ center=True,
+ ):
+ super().__init__()
+ self.center = center
+ self.n_fft = n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ window = torch.hann_window(win_length)
+ self.register_buffer("window", window)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ # x: (B, T * hop_length)
+
+ if not self.center:
+ pad = self.win_length - self.hop_length
+ x = torch.nn.functional.pad(x, (pad // 2, pad // 2), mode="reflect")
+
+ stft_spec = torch.stft(
+ x,
+ self.n_fft,
+ hop_length=self.hop_length,
+ win_length=self.win_length,
+ window=self.window,
+ center=self.center,
+ return_complex=False,
+ ) # (B, n_fft // 2 + 1, T, 2)
+
+ rea = stft_spec[:, :, :, 0] # (B, n_fft // 2 + 1, T, 2)
+ imag = stft_spec[:, :, :, 1] # (B, n_fft // 2 + 1, T, 2)
+
+ log_mag = torch.log(
+ torch.abs(torch.sqrt(torch.pow(rea, 2) + torch.pow(imag, 2))) + 1e-5
+ ) # (B, n_fft // 2 + 1, T)
+ phase = torch.atan2(imag, rea) # (B, n_fft // 2 + 1, T)
+
+ return log_mag, phase
+
+
+class ISTFT(nn.Module):
+ """
+ Custom implementation of ISTFT since torch.istft doesn't allow custom padding (other than `center=True`) with
+ windowing. This is because the NOLA (Nonzero Overlap Add) check fails at the edges.
+ See issue: https://github.com/pytorch/pytorch/issues/62323
+ Specifically, in the context of neural vocoding we are interested in "same" padding analogous to CNNs.
+ The NOLA constraint is met as we trim padded samples anyway.
+
+ Args:
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames.
+ win_length (int): The size of window frame and STFT filter.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(
+ self, n_fft: int, hop_length: int, win_length: int, padding: str = "same"
+ ):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.n_fft = n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ window = torch.hann_window(win_length)
+ self.register_buffer("window", window)
+
+ def forward(self, spec: torch.Tensor) -> torch.Tensor:
+ """
+ Compute the Inverse Short Time Fourier Transform (ISTFT) of a complex spectrogram.
+
+ Args:
+ spec (Tensor): Input complex spectrogram of shape (B, N, T), where B is the batch size,
+ N is the number of frequency bins, and T is the number of time frames.
+
+ Returns:
+ Tensor: Reconstructed time-domain signal of shape (B, L), where L is the length of the output signal.
+ """
+ if self.padding == "center":
+ # Fallback to pytorch native implementation
+ return torch.istft(
+ spec,
+ self.n_fft,
+ self.hop_length,
+ self.win_length,
+ self.window,
+ center=True,
+ )
+ elif self.padding == "same":
+ pad = (self.win_length - self.hop_length) // 2
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ assert spec.dim() == 3, "Expected a 3D tensor as input"
+ B, N, T = spec.shape
+
+ # Inverse FFT
+ ifft = torch.fft.irfft(spec, self.n_fft, dim=1, norm="backward")
+ ifft = ifft * self.window[None, :, None]
+
+ # Overlap and Add
+ output_size = (T - 1) * self.hop_length + self.win_length
+ y = torch.nn.functional.fold(
+ ifft,
+ output_size=(1, output_size),
+ kernel_size=(1, self.win_length),
+ stride=(1, self.hop_length),
+ )[:, 0, 0, pad:-pad]
+
+ # Window envelope
+ window_sq = self.window.square().expand(1, T, -1).transpose(1, 2)
+ window_envelope = torch.nn.functional.fold(
+ window_sq,
+ output_size=(1, output_size),
+ kernel_size=(1, self.win_length),
+ stride=(1, self.hop_length),
+ ).squeeze()[pad:-pad]
+
+ # Normalize
+ assert (window_envelope > 1e-11).all()
+ y = y / window_envelope
+
+ return y
+
+
+class MDCT(nn.Module):
+ """
+ Modified Discrete Cosine Transform (MDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(-1j * torch.pi * torch.arange(frame_len) / frame_len)
+ post_twiddle = torch.exp(-1j * torch.pi * n0 * (torch.arange(N) + 0.5) / N)
+ # view_as_real: NCCL Backend does not support ComplexFloat data type
+ # https://github.com/pytorch/pytorch/issues/71613
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, audio: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Modified Discrete Cosine Transform (MDCT) to the input audio.
+
+ Args:
+ audio (Tensor): Input audio waveform of shape (B, T), where B is the batch size
+ and T is the length of the audio.
+
+ Returns:
+ Tensor: MDCT coefficients of shape (B, L, N), where L is the number of output frames
+ and N is the number of frequency bins.
+ """
+ if self.padding == "center":
+ audio = torch.nn.functional.pad(
+ audio, (self.frame_len // 2, self.frame_len // 2)
+ )
+ elif self.padding == "same":
+ # hop_length is 1/2 frame_len
+ audio = torch.nn.functional.pad(
+ audio, (self.frame_len // 4, self.frame_len // 4)
+ )
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ x = audio.unfold(-1, self.frame_len, self.frame_len // 2)
+ N = self.frame_len // 2
+ x = x * self.window.expand(x.shape)
+ X = torch.fft.fft(
+ x * view_as_complex(self.pre_twiddle).expand(x.shape), dim=-1
+ )[..., :N]
+ res = X * view_as_complex(self.post_twiddle).expand(X.shape) * np.sqrt(1 / N)
+ return torch.real(res) * np.sqrt(2)
+
+
+class IMDCT(nn.Module):
+ """
+ Inverse Modified Discrete Cosine Transform (IMDCT) module.
+
+ Args:
+ frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, frame_len: int, padding: str = "same"):
+ super().__init__()
+ if padding not in ["center", "same"]:
+ raise ValueError("Padding must be 'center' or 'same'.")
+ self.padding = padding
+ self.frame_len = frame_len
+ N = frame_len // 2
+ n0 = (N + 1) / 2
+ window = torch.from_numpy(scipy.signal.cosine(frame_len)).float()
+ self.register_buffer("window", window)
+
+ pre_twiddle = torch.exp(1j * torch.pi * n0 * torch.arange(N * 2) / N)
+ post_twiddle = torch.exp(1j * torch.pi * (torch.arange(N * 2) + n0) / (N * 2))
+ self.register_buffer("pre_twiddle", view_as_real(pre_twiddle))
+ self.register_buffer("post_twiddle", view_as_real(post_twiddle))
+
+ def forward(self, X: torch.Tensor) -> torch.Tensor:
+ """
+ Apply the Inverse Modified Discrete Cosine Transform (IMDCT) to the input MDCT coefficients.
+
+ Args:
+ X (Tensor): Input MDCT coefficients of shape (B, L, N), where B is the batch size,
+ L is the number of frames, and N is the number of frequency bins.
+
+ Returns:
+ Tensor: Reconstructed audio waveform of shape (B, T), where T is the length of the audio.
+ """
+ B, L, N = X.shape
+ Y = torch.zeros((B, L, N * 2), dtype=X.dtype, device=X.device)
+ Y[..., :N] = X
+ Y[..., N:] = -1 * torch.conj(torch.flip(X, dims=(-1,)))
+ y = torch.fft.ifft(
+ Y * view_as_complex(self.pre_twiddle).expand(Y.shape), dim=-1
+ )
+ y = (
+ torch.real(y * view_as_complex(self.post_twiddle).expand(y.shape))
+ * np.sqrt(N)
+ * np.sqrt(2)
+ )
+ result = y * self.window.expand(y.shape)
+ output_size = (1, (L + 1) * N)
+ audio = torch.nn.functional.fold(
+ result.transpose(1, 2),
+ output_size=output_size,
+ kernel_size=(1, self.frame_len),
+ stride=(1, self.frame_len // 2),
+ )[:, 0, 0, :]
+
+ if self.padding == "center":
+ pad = self.frame_len // 2
+ elif self.padding == "same":
+ pad = self.frame_len // 4
+ else:
+ raise ValueError("Padding must be 'center' or 'same'.")
+
+ audio = audio[:, pad:-pad]
+ return audio
+
+
+class FourierHead(nn.Module):
+ """Base class for inverse fourier modules."""
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class ISTFTHead(FourierHead):
+ """
+ ISTFT Head module for predicting STFT complex coefficients.
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ n_fft (int): Size of Fourier transform.
+ hop_length (int): The distance between neighboring sliding window frames, which should align with
+ the resolution of the input features.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ """
+
+ def __init__(self, dim: int, n_fft: int, hop_length: int, padding: str = "same"):
+ super().__init__()
+ out_dim = n_fft + 2
+ self.out = torch.nn.Linear(dim, out_dim)
+ self.istft = ISTFT(
+ n_fft=n_fft, hop_length=hop_length, win_length=n_fft, padding=padding
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the ISTFTHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x).transpose(1, 2)
+ mag, p = x.chunk(2, dim=1)
+ mag = torch.exp(mag)
+ mag = torch.clip(
+ mag, max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ # wrapping happens here. These two lines produce real and imaginary value
+ x = torch.cos(p)
+ y = torch.sin(p)
+ # recalculating phase here does not produce anything new
+ # only costs time
+ # phase = torch.atan2(y, x)
+ # S = mag * torch.exp(phase * 1j)
+ # better directly produce the complex value
+ S = mag * (x + 1j * y)
+ audio = self.istft(S)
+ return audio
+
+
+class IMDCTSymExpHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with symmetric exponential function
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ sample_rate (int, optional): The sample rate of the audio. If provided, the last layer will be initialized
+ based on perceptual scaling. Defaults to None.
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ mdct_frame_len: int,
+ padding: str = "same",
+ sample_rate: Optional[int] = None,
+ clip_audio: bool = False,
+ ):
+ super().__init__()
+ out_dim = mdct_frame_len // 2
+ self.out = nn.Linear(dim, out_dim)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+ self.clip_audio = clip_audio
+
+ if sample_rate is not None:
+ # optionally init the last layer following mel-scale
+ m_max = _hz_to_mel(sample_rate // 2)
+ m_pts = torch.linspace(0, m_max, out_dim)
+ f_pts = _mel_to_hz(m_pts)
+ scale = 1 - (f_pts / f_pts.max())
+
+ with torch.no_grad():
+ self.out.weight.mul_(scale.view(-1, 1))
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTSymExpHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ x = symexp(x)
+ x = torch.clip(
+ x, min=-1e2, max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(x)
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+
+ return audio
+
+
+class IMDCTCosHead(FourierHead):
+ """
+ IMDCT Head module for predicting MDCT coefficients with parametrizing MDCT = exp(m) · cos(p)
+
+ Args:
+ dim (int): Hidden dimension of the model.
+ mdct_frame_len (int): Length of the MDCT frame.
+ padding (str, optional): Type of padding. Options are "center" or "same". Defaults to "same".
+ clip_audio (bool, optional): Whether to clip the audio output within the range of [-1.0, 1.0]. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ mdct_frame_len: int,
+ padding: str = "same",
+ clip_audio: bool = False,
+ ):
+ super().__init__()
+ self.clip_audio = clip_audio
+ self.out = nn.Linear(dim, mdct_frame_len)
+ self.imdct = IMDCT(frame_len=mdct_frame_len, padding=padding)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the IMDCTCosHead module.
+
+ Args:
+ x (Tensor): Input tensor of shape (B, L, H), where B is the batch size,
+ L is the sequence length, and H denotes the model dimension.
+
+ Returns:
+ Tensor: Reconstructed time-domain audio signal of shape (B, T), where T is the length of the output signal.
+ """
+ x = self.out(x)
+ m, p = x.chunk(2, dim=2)
+ m = torch.exp(m).clip(
+ max=1e2
+ ) # safeguard to prevent excessively large magnitudes
+ audio = self.imdct(m * torch.cos(p))
+ if self.clip_audio:
+ audio = torch.clip(x, min=-1.0, max=1.0)
+ return audio
+
+
+class ConvNeXtBlock(nn.Module):
+ """ConvNeXt Block adapted from https://github.com/facebookresearch/ConvNeXt to 1D audio signal.
+
+ Args:
+ dim (int): Number of input channels.
+ intermediate_dim (int): Dimensionality of the intermediate layer.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional LayerNorm. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ intermediate_dim: int,
+ layer_scale_init_value: float,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.dwconv = nn.Conv1d(
+ dim, dim, kernel_size=7, padding=3, groups=dim
+ ) # depthwise conv
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(
+ dim, intermediate_dim
+ ) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(intermediate_dim, dim)
+ self.gamma = (
+ nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True)
+ if layer_scale_init_value > 0
+ else None
+ )
+
+ def forward(
+ self, x: torch.Tensor, cond_embedding_id: Optional[torch.Tensor] = None
+ ) -> torch.Tensor:
+ residual = x
+ x = self.dwconv(x)
+ x = x.transpose(1, 2) # (B, C, T) -> (B, T, C)
+ if self.adanorm:
+ assert cond_embedding_id is not None
+ x = self.norm(x, cond_embedding_id)
+ else:
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.transpose(1, 2) # (B, T, C) -> (B, C, T)
+
+ x = residual + x
+ return x
+
+
+class AdaLayerNorm(nn.Module):
+ """
+ Adaptive Layer Normalization module with learnable embeddings per `num_embeddings` classes
+
+ Args:
+ num_embeddings (int): Number of embeddings.
+ embedding_dim (int): Dimension of the embeddings.
+ """
+
+ def __init__(self, num_embeddings: int, embedding_dim: int, eps: float = 1e-6):
+ super().__init__()
+ self.eps = eps
+ self.dim = embedding_dim
+ self.scale = nn.Embedding(
+ num_embeddings=num_embeddings, embedding_dim=embedding_dim
+ )
+ self.shift = nn.Embedding(
+ num_embeddings=num_embeddings, embedding_dim=embedding_dim
+ )
+ torch.nn.init.ones_(self.scale.weight)
+ torch.nn.init.zeros_(self.shift.weight)
+
+ def forward(self, x: torch.Tensor, cond_embedding_id: torch.Tensor) -> torch.Tensor:
+ scale = self.scale(cond_embedding_id)
+ shift = self.shift(cond_embedding_id)
+ x = nn.functional.layer_norm(x, (self.dim,), eps=self.eps)
+ x = x * scale + shift
+ return x
+
+
+class ResBlock1(nn.Module):
+ """
+ ResBlock adapted from HiFi-GAN V1 (https://github.com/jik876/hifi-gan) with dilated 1D convolutions,
+ but without upsampling layers.
+
+ Args:
+ dim (int): Number of input channels.
+ kernel_size (int, optional): Size of the convolutional kernel. Defaults to 3.
+ dilation (tuple[int], optional): Dilation factors for the dilated convolutions.
+ Defaults to (1, 3, 5).
+ lrelu_slope (float, optional): Negative slope of the LeakyReLU activation function.
+ Defaults to 0.1.
+ layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
+ Defaults to None.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ kernel_size: int = 3,
+ dilation: Tuple[int, int, int] = (1, 3, 5),
+ lrelu_slope: float = 0.1,
+ layer_scale_init_value: Optional[float] = None,
+ ):
+ super().__init__()
+ self.lrelu_slope = lrelu_slope
+ self.convs1 = nn.ModuleList(
+ [
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[0],
+ padding=self.get_padding(kernel_size, dilation[0]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[1],
+ padding=self.get_padding(kernel_size, dilation[1]),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=dilation[2],
+ padding=self.get_padding(kernel_size, dilation[2]),
+ )
+ ),
+ ]
+ )
+
+ self.convs2 = nn.ModuleList(
+ [
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ weight_norm(
+ nn.Conv1d(
+ dim,
+ dim,
+ kernel_size,
+ 1,
+ dilation=1,
+ padding=self.get_padding(kernel_size, 1),
+ )
+ ),
+ ]
+ )
+
+ self.gamma = nn.ParameterList(
+ [
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ (
+ nn.Parameter(
+ layer_scale_init_value * torch.ones(dim, 1), requires_grad=True
+ )
+ if layer_scale_init_value is not None
+ else None
+ ),
+ ]
+ )
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ for c1, c2, gamma in zip(self.convs1, self.convs2, self.gamma):
+ xt = torch.nn.functional.leaky_relu(x, negative_slope=self.lrelu_slope)
+ xt = c1(xt)
+ xt = torch.nn.functional.leaky_relu(xt, negative_slope=self.lrelu_slope)
+ xt = c2(xt)
+ if gamma is not None:
+ xt = gamma * xt
+ x = xt + x
+ return x
+
+ def remove_weight_norm(self):
+ for l in self.convs1:
+ remove_weight_norm(l)
+ for l in self.convs2:
+ remove_weight_norm(l)
+
+ @staticmethod
+ def get_padding(kernel_size: int, dilation: int = 1) -> int:
+ return int((kernel_size * dilation - dilation) / 2)
+
+
+class Backbone(nn.Module):
+ """Base class for the generator's backbone. It preserves the same temporal resolution across all layers."""
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ """
+ Args:
+ x (Tensor): Input tensor of shape (B, C, L), where B is the batch size,
+ C denotes output features, and L is the sequence length.
+
+ Returns:
+ Tensor: Output of shape (B, L, H), where B is the batch size, L is the sequence length,
+ and H denotes the model dimension.
+ """
+ raise NotImplementedError("Subclasses must implement the forward method.")
+
+
+class VocosBackbone(Backbone):
+ """
+ Vocos backbone module built with ConvNeXt blocks. Supports additional conditioning with Adaptive Layer Normalization
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ intermediate_dim (int): Intermediate dimension used in ConvNeXtBlock.
+ num_layers (int): Number of ConvNeXtBlock layers.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to `1 / num_layers`.
+ adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
+ None means non-conditional model. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ input_channels: int,
+ dim: int,
+ intermediate_dim: int,
+ num_layers: int,
+ layer_scale_init_value: Optional[float] = None,
+ adanorm_num_embeddings: Optional[int] = None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = nn.Conv1d(input_channels, dim, kernel_size=7, padding=3)
+ self.adanorm = adanorm_num_embeddings is not None
+ if adanorm_num_embeddings:
+ self.norm = AdaLayerNorm(adanorm_num_embeddings, dim, eps=1e-6)
+ else:
+ self.norm = nn.LayerNorm(dim, eps=1e-6)
+ layer_scale_init_value = layer_scale_init_value or 1 / num_layers
+ self.convnext = nn.ModuleList(
+ [
+ ConvNeXtBlock(
+ dim=dim,
+ intermediate_dim=intermediate_dim,
+ layer_scale_init_value=layer_scale_init_value,
+ adanorm_num_embeddings=adanorm_num_embeddings,
+ )
+ for _ in range(num_layers)
+ ]
+ )
+ self.final_layer_norm = nn.LayerNorm(dim, eps=1e-6)
+ self.apply(self._init_weights)
+
+ def _init_weights(self, m):
+ if isinstance(m, (nn.Conv1d, nn.Linear)):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ bandwidth_id = kwargs.get("bandwidth_id", None)
+ x = self.embed(x)
+ if self.adanorm:
+ assert bandwidth_id is not None
+ x = self.norm(x.transpose(1, 2), cond_embedding_id=bandwidth_id)
+ else:
+ x = self.norm(x.transpose(1, 2))
+ x = x.transpose(1, 2)
+ for conv_block in self.convnext:
+ x = conv_block(x, cond_embedding_id=bandwidth_id)
+ x = self.final_layer_norm(x.transpose(1, 2))
+ return x
+
+
+class VocosResNetBackbone(Backbone):
+ """
+ Vocos backbone module built with ResBlocks.
+
+ Args:
+ input_channels (int): Number of input features channels.
+ dim (int): Hidden dimension of the model.
+ num_blocks (int): Number of ResBlock1 blocks.
+ layer_scale_init_value (float, optional): Initial value for layer scaling. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ input_channels,
+ dim,
+ num_blocks,
+ layer_scale_init_value=None,
+ ):
+ super().__init__()
+ self.input_channels = input_channels
+ self.embed = weight_norm(
+ nn.Conv1d(input_channels, dim, kernel_size=3, padding=1)
+ )
+ layer_scale_init_value = layer_scale_init_value or 1 / num_blocks / 3
+ self.resnet = nn.Sequential(
+ *[
+ ResBlock1(dim=dim, layer_scale_init_value=layer_scale_init_value)
+ for _ in range(num_blocks)
+ ]
+ )
+
+ def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+ x = self.embed(x)
+ x = self.resnet(x)
+ x = x.transpose(1, 2)
+ return x
+
+
+class Vocos(nn.Module):
+ def __init__(
+ self,
+ input_channels: int = 256,
+ dim: int = 384,
+ intermediate_dim: int = 1152,
+ num_layers: int = 8,
+ adanorm_num_embeddings: int = 4,
+ n_fft: int = 800,
+ hop_size: int = 200,
+ padding: str = "same",
+ ):
+ super().__init__()
+
+ self.backbone = VocosBackbone(
+ input_channels=input_channels,
+ dim=dim,
+ intermediate_dim=intermediate_dim,
+ num_layers=num_layers,
+ adanorm_num_embeddings=adanorm_num_embeddings,
+ )
+ self.head = ISTFTHead(dim, n_fft, hop_size, padding)
+
+ def forward(self, x):
+ x = self.backbone(x)
+ x = self.head(x)
+
+ return x[:, None, :]
diff --git a/indextts/utils/maskgct/models/codec/melvqgan/melspec.py b/indextts/utils/maskgct/models/codec/melvqgan/melspec.py
new file mode 100755
index 0000000..0cfa6b8
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/melvqgan/melspec.py
@@ -0,0 +1,108 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import pyworld as pw
+import numpy as np
+import soundfile as sf
+import os
+from torchaudio.functional import pitch_shift
+import librosa
+from librosa.filters import mel as librosa_mel_fn
+import torch.nn as nn
+import torch.nn.functional as F
+import tqdm
+
+
+def dynamic_range_compression(x, C=1, clip_val=1e-5):
+ return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)
+
+
+def dynamic_range_decompression(x, C=1):
+ return np.exp(x) / C
+
+
+def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
+ return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression_torch(x, C=1):
+ return torch.exp(x) / C
+
+
+def spectral_normalize_torch(magnitudes):
+ output = dynamic_range_compression_torch(magnitudes)
+ return output
+
+
+def spectral_de_normalize_torch(magnitudes):
+ output = dynamic_range_decompression_torch(magnitudes)
+ return output
+
+
+class MelSpectrogram(nn.Module):
+ def __init__(
+ self,
+ n_fft,
+ num_mels,
+ sampling_rate,
+ hop_size,
+ win_size,
+ fmin,
+ fmax,
+ center=False,
+ ):
+ super(MelSpectrogram, self).__init__()
+ self.n_fft = n_fft
+ self.hop_size = hop_size
+ self.win_size = win_size
+ self.sampling_rate = sampling_rate
+ self.num_mels = num_mels
+ self.fmin = fmin
+ self.fmax = fmax
+ self.center = center
+
+ mel_basis = {}
+ hann_window = {}
+
+ mel = librosa_mel_fn(
+ sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax
+ )
+ mel_basis = torch.from_numpy(mel).float()
+ hann_window = torch.hann_window(win_size)
+
+ self.register_buffer("mel_basis", mel_basis)
+ self.register_buffer("hann_window", hann_window)
+
+ def forward(self, y):
+ y = torch.nn.functional.pad(
+ y.unsqueeze(1),
+ (
+ int((self.n_fft - self.hop_size) / 2),
+ int((self.n_fft - self.hop_size) / 2),
+ ),
+ mode="reflect",
+ )
+ y = y.squeeze(1)
+ spec = torch.stft(
+ y,
+ self.n_fft,
+ hop_length=self.hop_size,
+ win_length=self.win_size,
+ window=self.hann_window,
+ center=self.center,
+ pad_mode="reflect",
+ normalized=False,
+ onesided=True,
+ return_complex=True,
+ )
+ spec = torch.view_as_real(spec)
+
+ spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
+
+ spec = torch.matmul(self.mel_basis, spec)
+ spec = spectral_normalize_torch(spec)
+
+ return spec
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/README.md b/indextts/utils/maskgct/models/codec/ns3_codec/README.md
new file mode 100644
index 0000000..1283d67
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/README.md
@@ -0,0 +1,216 @@
+## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
+
+[
+
+Install Amphion
+```bash
+git clone https://github.com/open-mmlab/Amphion.git
+```
+
+Few lines of code to use the pre-trained FACodec model
+```python
+from Amphion.models.codec.ns3_codec import FACodecEncoder, FACodecDecoder
+from huggingface_hub import hf_hub_download
+
+fa_encoder = FACodecEncoder(
+ ngf=32,
+ up_ratios=[2, 4, 5, 5],
+ out_channels=256,
+)
+
+fa_decoder = FACodecDecoder(
+ in_channels=256,
+ upsample_initial_channel=1024,
+ ngf=32,
+ up_ratios=[5, 5, 4, 2],
+ vq_num_q_c=2,
+ vq_num_q_p=1,
+ vq_num_q_r=3,
+ vq_dim=256,
+ codebook_dim=8,
+ codebook_size_prosody=10,
+ codebook_size_content=10,
+ codebook_size_residual=10,
+ use_gr_x_timbre=True,
+ use_gr_residual_f0=True,
+ use_gr_residual_phone=True,
+)
+
+encoder_ckpt = hf_hub_download(repo_id="amphion/naturalspeech3_facodec", filename="ns3_facodec_encoder.bin")
+decoder_ckpt = hf_hub_download(repo_id="amphion/naturalspeech3_facodec", filename="ns3_facodec_decoder.bin")
+
+fa_encoder.load_state_dict(torch.load(encoder_ckpt))
+fa_decoder.load_state_dict(torch.load(decoder_ckpt))
+
+fa_encoder.eval()
+fa_decoder.eval()
+
+```
+
+Inference
+```python
+test_wav_path = "test.wav"
+test_wav = librosa.load(test_wav_path, sr=16000)[0]
+test_wav = torch.from_numpy(test_wav).float()
+test_wav = test_wav.unsqueeze(0).unsqueeze(0)
+
+with torch.no_grad():
+
+ # encode
+ enc_out = fa_encoder(test_wav)
+ print(enc_out.shape)
+
+ # quantize
+ vq_post_emb, vq_id, _, quantized, spk_embs = fa_decoder(enc_out, eval_vq=False, vq=True)
+
+ # latent after quantization
+ print(vq_post_emb.shape)
+
+ # codes
+ print("vq id shape:", vq_id.shape)
+
+ # get prosody code
+ prosody_code = vq_id[:1]
+ print("prosody code shape:", prosody_code.shape)
+
+ # get content code
+ cotent_code = vq_id[1:3]
+ print("content code shape:", cotent_code.shape)
+
+ # get residual code (acoustic detail codes)
+ residual_code = vq_id[3:]
+ print("residual code shape:", residual_code.shape)
+
+ # speaker embedding
+ print("speaker embedding shape:", spk_embs.shape)
+
+ # decode (recommand)
+ recon_wav = fa_decoder.inference(vq_post_emb, spk_embs)
+ print(recon_wav.shape)
+ sf.write("recon.wav", recon_wav[0][0].cpu().numpy(), 16000)
+```
+
+FACodec can achieve zero-shot voice conversion with FACodecEncoderV2/FACodecDecoderV2 or FACodecRedecoder
+```python
+from Amphion.models.codec.ns3_codec import FACodecEncoderV2, FACodecDecoderV2
+
+# Same parameters as FACodecEncoder/FACodecDecoder
+fa_encoder_v2 = FACodecEncoderV2(...)
+fa_decoder_v2 = FACodecDecoderV2(...)
+
+encoder_v2_ckpt = hf_hub_download(repo_id="amphion/naturalspeech3_facodec", filename="ns3_facodec_encoder_v2.bin")
+decoder_v2_ckpt = hf_hub_download(repo_id="amphion/naturalspeech3_facodec", filename="ns3_facodec_decoder_v2.bin")
+
+fa_encoder_v2.load_state_dict(torch.load(encoder_v2_ckpt))
+fa_decoder_v2.load_state_dict(torch.load(decoder_v2_ckpt))
+
+with torch.no_grad():
+ enc_out_a = fa_encoder_v2(wav_a)
+ prosody_a = fa_encoder_v2.get_prosody_feature(wav_a)
+ enc_out_b = fa_encoder_v2(wav_b)
+ prosody_b = fa_encoder_v2.get_prosody_feature(wav_b)
+
+ vq_post_emb_a, vq_id_a, _, quantized, spk_embs_a = fa_decoder_v2(
+ enc_out_a, prosody_a, eval_vq=False, vq=True
+ )
+ vq_post_emb_b, vq_id_b, _, quantized, spk_embs_b = fa_decoder_v2(
+ enc_out_b, prosody_b, eval_vq=False, vq=True
+ )
+
+ vq_post_emb_a_to_b = fa_decoder_v2.vq2emb(vq_id_a, use_residual=False)
+ recon_wav_a_to_b = fa_decoder_v2.inference(vq_post_emb_a_to_b, spk_embs_b)
+```
+
+or
+
+```python
+from Amphion.models.codec.ns3_codec import FACodecRedecoder
+
+fa_redecoder = FACodecRedecoder()
+
+redecoder_ckpt = hf_hub_download(repo_id="amphion/naturalspeech3_facodec", filename="ns3_facodec_redecoder.bin")
+
+fa_redecoder.load_state_dict(torch.load(redecoder_ckpt))
+
+with torch.no_grad():
+ enc_out_a = fa_encoder(wav_a)
+ enc_out_b = fa_encoder(wav_b)
+
+ vq_post_emb_a, vq_id_a, _, quantized_a, spk_embs_a = fa_decoder(enc_out_a, eval_vq=False, vq=True)
+ vq_post_emb_b, vq_id_b, _, quantized_b, spk_embs_b = fa_decoder(enc_out_b, eval_vq=False, vq=True)
+
+ # convert speaker
+ vq_post_emb_a_to_b = fa_redecoder.vq2emb(vq_id_a, spk_embs_b, use_residual=False)
+ recon_wav_a_to_b = fa_redecoder.inference(vq_post_emb_a_to_b, spk_embs_b)
+
+ sf.write("recon_a_to_b.wav", recon_wav_a_to_b[0][0].cpu().numpy(), 16000)
+```
+
+## Q&A
+
+Q1: What audio sample rate does FACodec support? What is the hop size? How many codes will be generated for each frame?
+
+A1: FACodec supports 16KHz speech audio. The hop size is 200 samples, and (16000/200) * 6 (total number of codebooks) codes will be generated for each frame.
+
+Q2: Is it possible to train an autoregressive TTS model like VALL-E using FACodec?
+
+A2: Yes. In fact, the authors of NaturalSpeech 3 have already employ explore the autoregressive generative model for discrete token generation with FACodec. They use an autoregressive language model to generate prosody codes, followed by a non-autoregressive model to generate the remaining content and acoustic details codes.
+
+Q3: Is it possible to train a latent diffusion TTS model like NaturalSpeech2 using FACodec?
+
+A3: Yes. You can use the latent getted after quanzaition as the modelling target for the latent diffusion model.
+
+Q4: Can FACodec compress and reconstruct audio from other domains? Such as sound effects, music, etc.
+
+A4: Since FACodec is designed for speech, it may not be suitable for other audio domains. However, it is possible to use the FACodec model to compress and reconstruct audio from other domains, but the quality may not be as good as the original audio.
+
+Q5: Can FACodec be used for content feature for some other tasks like voice conversion?
+
+A5: I think the answer is yes. Researchers can use the content code of FACodec as the content feature for voice conversion. We hope to see more research in this direction.
+
+## Citations
+
+If you use our FACodec model, please cite the following paper:
+
+```bibtex
+@article{ju2024naturalspeech,
+ title={NaturalSpeech 3: Zero-Shot Speech Synthesis with Factorized Codec and Diffusion Models},
+ author={Ju, Zeqian and Wang, Yuancheng and Shen, Kai and Tan, Xu and Xin, Detai and Yang, Dongchao and Liu, Yanqing and Leng, Yichong and Song, Kaitao and Tang, Siliang and others},
+ journal={arXiv preprint arXiv:2403.03100},
+ year={2024}
+}
+
+@article{zhang2023amphion,
+ title={Amphion: An Open-Source Audio, Music and Speech Generation Toolkit},
+ author={Xueyao Zhang and Liumeng Xue and Yicheng Gu and Yuancheng Wang and Haorui He and Chaoren Wang and Xi Chen and Zihao Fang and Haopeng Chen and Junan Zhang and Tze Ying Tang and Lexiao Zou and Mingxuan Wang and Jun Han and Kai Chen and Haizhou Li and Zhizheng Wu},
+ journal={arXiv},
+ year={2024},
+ volume={abs/2312.09911}
+}
+```
+
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py b/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py
new file mode 100644
index 0000000..1f0e4c1
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .facodec import *
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py
new file mode 100644
index 0000000..b3bccdb
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py
@@ -0,0 +1,5 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+from .filter import *
+from .resample import *
+from .act import *
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py
new file mode 100644
index 0000000..779d58d
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py
@@ -0,0 +1,29 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch.nn as nn
+from .resample import UpSample1d, DownSample1d
+
+
+class Activation1d(nn.Module):
+ def __init__(
+ self,
+ activation,
+ up_ratio: int = 2,
+ down_ratio: int = 2,
+ up_kernel_size: int = 12,
+ down_kernel_size: int = 12,
+ ):
+ super().__init__()
+ self.up_ratio = up_ratio
+ self.down_ratio = down_ratio
+ self.act = activation
+ self.upsample = UpSample1d(up_ratio, up_kernel_size)
+ self.downsample = DownSample1d(down_ratio, down_kernel_size)
+
+ # x: [B,C,T]
+ def forward(self, x):
+ x = self.upsample(x)
+ x = self.act(x)
+ x = self.downsample(x)
+
+ return x
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py
new file mode 100644
index 0000000..ece8e02
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py
@@ -0,0 +1,96 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+if "sinc" in dir(torch):
+ sinc = torch.sinc
+else:
+ # This code is adopted from adefossez's julius.core.sinc under the MIT License
+ # https://adefossez.github.io/julius/julius/core.html
+ def sinc(x: torch.Tensor):
+ """
+ Implementation of sinc, i.e. sin(pi * x) / (pi * x)
+ __Warning__: Different to julius.sinc, the input is multiplied by `pi`!
+ """
+ return torch.where(
+ x == 0,
+ torch.tensor(1.0, device=x.device, dtype=x.dtype),
+ torch.sin(math.pi * x) / math.pi / x,
+ )
+
+
+# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
+# https://adefossez.github.io/julius/julius/lowpass.html
+def kaiser_sinc_filter1d(
+ cutoff, half_width, kernel_size
+): # return filter [1,1,kernel_size]
+ even = kernel_size % 2 == 0
+ half_size = kernel_size // 2
+
+ # For kaiser window
+ delta_f = 4 * half_width
+ A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
+ if A > 50.0:
+ beta = 0.1102 * (A - 8.7)
+ elif A >= 21.0:
+ beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21.0)
+ else:
+ beta = 0.0
+ window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
+
+ # ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
+ if even:
+ time = torch.arange(-half_size, half_size) + 0.5
+ else:
+ time = torch.arange(kernel_size) - half_size
+ if cutoff == 0:
+ filter_ = torch.zeros_like(time)
+ else:
+ filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
+ # Normalize filter to have sum = 1, otherwise we will have a small leakage
+ # of the constant component in the input signal.
+ filter_ /= filter_.sum()
+ filter = filter_.view(1, 1, kernel_size)
+
+ return filter
+
+
+class LowPassFilter1d(nn.Module):
+ def __init__(
+ self,
+ cutoff=0.5,
+ half_width=0.6,
+ stride: int = 1,
+ padding: bool = True,
+ padding_mode: str = "replicate",
+ kernel_size: int = 12,
+ ):
+ # kernel_size should be even number for stylegan3 setup,
+ # in this implementation, odd number is also possible.
+ super().__init__()
+ if cutoff < -0.0:
+ raise ValueError("Minimum cutoff must be larger than zero.")
+ if cutoff > 0.5:
+ raise ValueError("A cutoff above 0.5 does not make sense.")
+ self.kernel_size = kernel_size
+ self.even = kernel_size % 2 == 0
+ self.pad_left = kernel_size // 2 - int(self.even)
+ self.pad_right = kernel_size // 2
+ self.stride = stride
+ self.padding = padding
+ self.padding_mode = padding_mode
+ filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
+ self.register_buffer("filter", filter)
+
+ # input [B, C, T]
+ def forward(self, x):
+ _, C, _ = x.shape
+
+ if self.padding:
+ x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode)
+ out = F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C)
+
+ return out
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py
new file mode 100644
index 0000000..ee993b1
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py
@@ -0,0 +1,57 @@
+# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
+
+import torch.nn as nn
+from torch.nn import functional as F
+from .filter import LowPassFilter1d
+from .filter import kaiser_sinc_filter1d
+
+
+class UpSample1d(nn.Module):
+ def __init__(self, ratio=2, kernel_size=None):
+ super().__init__()
+ self.ratio = ratio
+ self.kernel_size = (
+ int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+ )
+ self.stride = ratio
+ self.pad = self.kernel_size // ratio - 1
+ self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
+ self.pad_right = (
+ self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
+ )
+ filter = kaiser_sinc_filter1d(
+ cutoff=0.5 / ratio, half_width=0.6 / ratio, kernel_size=self.kernel_size
+ )
+ self.register_buffer("filter", filter)
+
+ # x: [B, C, T]
+ def forward(self, x):
+ _, C, _ = x.shape
+
+ x = F.pad(x, (self.pad, self.pad), mode="replicate")
+ x = self.ratio * F.conv_transpose1d(
+ x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C
+ )
+ x = x[..., self.pad_left : -self.pad_right]
+
+ return x
+
+
+class DownSample1d(nn.Module):
+ def __init__(self, ratio=2, kernel_size=None):
+ super().__init__()
+ self.ratio = ratio
+ self.kernel_size = (
+ int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+ )
+ self.lowpass = LowPassFilter1d(
+ cutoff=0.5 / ratio,
+ half_width=0.6 / ratio,
+ stride=ratio,
+ kernel_size=self.kernel_size,
+ )
+
+ def forward(self, x):
+ xx = self.lowpass(x)
+
+ return xx
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py b/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py
new file mode 100644
index 0000000..87f661b
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py
@@ -0,0 +1,1222 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from torch import nn, sin, pow
+from torch.nn import Parameter
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+from .alias_free_torch import *
+from .quantize import *
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from .transformer import TransformerEncoder
+from .gradient_reversal import GradientReversal
+from .melspec import MelSpectrogram
+
+
+def init_weights(m):
+ if isinstance(m, nn.Conv1d):
+ nn.init.trunc_normal_(m.weight, std=0.02)
+ nn.init.constant_(m.bias, 0)
+
+
+def WNConv1d(*args, **kwargs):
+ return weight_norm(nn.Conv1d(*args, **kwargs))
+
+
+def WNConvTranspose1d(*args, **kwargs):
+ return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
+
+
+class CNNLSTM(nn.Module):
+ def __init__(self, indim, outdim, head, global_pred=False):
+ super().__init__()
+ self.global_pred = global_pred
+ self.model = nn.Sequential(
+ ResidualUnit(indim, dilation=1),
+ ResidualUnit(indim, dilation=2),
+ ResidualUnit(indim, dilation=3),
+ Activation1d(activation=SnakeBeta(indim, alpha_logscale=True)),
+ Rearrange("b c t -> b t c"),
+ )
+ self.heads = nn.ModuleList([nn.Linear(indim, outdim) for i in range(head)])
+
+ def forward(self, x):
+ # x: [B, C, T]
+ x = self.model(x)
+ if self.global_pred:
+ x = torch.mean(x, dim=1, keepdim=False)
+ outs = [head(x) for head in self.heads]
+ return outs
+
+
+class SnakeBeta(nn.Module):
+ """
+ A modified Snake function which uses separate parameters for the magnitude of the periodic components
+ Shape:
+ - Input: (B, C, T)
+ - Output: (B, C, T), same shape as the input
+ Parameters:
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ References:
+ - This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
+ https://arxiv.org/abs/2006.08195
+ Examples:
+ >>> a1 = snakebeta(256)
+ >>> x = torch.randn(256)
+ >>> x = a1(x)
+ """
+
+ def __init__(
+ self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False
+ ):
+ """
+ Initialization.
+ INPUT:
+ - in_features: shape of the input
+ - alpha - trainable parameter that controls frequency
+ - beta - trainable parameter that controls magnitude
+ alpha is initialized to 1 by default, higher values = higher-frequency.
+ beta is initialized to 1 by default, higher values = higher-magnitude.
+ alpha will be trained along with the rest of your model.
+ """
+ super(SnakeBeta, self).__init__()
+ self.in_features = in_features
+
+ # initialize alpha
+ self.alpha_logscale = alpha_logscale
+ if self.alpha_logscale: # log scale alphas initialized to zeros
+ self.alpha = Parameter(torch.zeros(in_features) * alpha)
+ self.beta = Parameter(torch.zeros(in_features) * alpha)
+ else: # linear scale alphas initialized to ones
+ self.alpha = Parameter(torch.ones(in_features) * alpha)
+ self.beta = Parameter(torch.ones(in_features) * alpha)
+
+ self.alpha.requires_grad = alpha_trainable
+ self.beta.requires_grad = alpha_trainable
+
+ self.no_div_by_zero = 0.000000001
+
+ def forward(self, x):
+ """
+ Forward pass of the function.
+ Applies the function to the input elementwise.
+ SnakeBeta := x + 1/b * sin^2 (xa)
+ """
+ alpha = self.alpha.unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
+ beta = self.beta.unsqueeze(0).unsqueeze(-1)
+ if self.alpha_logscale:
+ alpha = torch.exp(alpha)
+ beta = torch.exp(beta)
+ x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+
+ return x
+
+
+class ResidualUnit(nn.Module):
+ def __init__(self, dim: int = 16, dilation: int = 1):
+ super().__init__()
+ pad = ((7 - 1) * dilation) // 2
+ self.block = nn.Sequential(
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=7, dilation=dilation, padding=pad),
+ Activation1d(activation=SnakeBeta(dim, alpha_logscale=True)),
+ WNConv1d(dim, dim, kernel_size=1),
+ )
+
+ def forward(self, x):
+ return x + self.block(x)
+
+
+class EncoderBlock(nn.Module):
+ def __init__(self, dim: int = 16, stride: int = 1):
+ super().__init__()
+ self.block = nn.Sequential(
+ ResidualUnit(dim // 2, dilation=1),
+ ResidualUnit(dim // 2, dilation=3),
+ ResidualUnit(dim // 2, dilation=9),
+ Activation1d(activation=SnakeBeta(dim // 2, alpha_logscale=True)),
+ WNConv1d(
+ dim // 2,
+ dim,
+ kernel_size=2 * stride,
+ stride=stride,
+ padding=stride // 2 + stride % 2,
+ ),
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class FACodecEncoder(nn.Module):
+ def __init__(
+ self,
+ ngf=32,
+ up_ratios=(2, 4, 5, 5),
+ out_channels=1024,
+ ):
+ super().__init__()
+ self.hop_length = np.prod(up_ratios)
+ self.up_ratios = up_ratios
+
+ # Create first convolution
+ d_model = ngf
+ self.block = [WNConv1d(1, d_model, kernel_size=7, padding=3)]
+
+ # Create EncoderBlocks that double channels as they downsample by `stride`
+ for stride in up_ratios:
+ d_model *= 2
+ self.block += [EncoderBlock(d_model, stride=stride)]
+
+ # Create last convolution
+ self.block += [
+ Activation1d(activation=SnakeBeta(d_model, alpha_logscale=True)),
+ WNConv1d(d_model, out_channels, kernel_size=3, padding=1),
+ ]
+
+ # Wrap black into nn.Sequential
+ self.block = nn.Sequential(*self.block)
+ self.enc_dim = d_model
+
+ self.reset_parameters()
+
+ def forward(self, x):
+ out = self.block(x)
+ return out
+
+ def inference(self, x):
+ return self.block(x)
+
+ def remove_weight_norm(self):
+ """Remove weight normalization module from all of the layers."""
+
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+
+ self.apply(_remove_weight_norm)
+
+ def apply_weight_norm(self):
+ """Apply weight normalization module from all of the layers."""
+
+ def _apply_weight_norm(m):
+ if isinstance(m, nn.Conv1d):
+ torch.nn.utils.weight_norm(m)
+
+ self.apply(_apply_weight_norm)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
+
+
+class DecoderBlock(nn.Module):
+ def __init__(self, input_dim: int = 16, output_dim: int = 8, stride: int = 1):
+ super().__init__()
+ self.block = nn.Sequential(
+ Activation1d(activation=SnakeBeta(input_dim, alpha_logscale=True)),
+ WNConvTranspose1d(
+ input_dim,
+ output_dim,
+ kernel_size=2 * stride,
+ stride=stride,
+ padding=stride // 2 + stride % 2,
+ output_padding=stride % 2,
+ ),
+ ResidualUnit(output_dim, dilation=1),
+ ResidualUnit(output_dim, dilation=3),
+ ResidualUnit(output_dim, dilation=9),
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class FACodecDecoder(nn.Module):
+ def __init__(
+ self,
+ in_channels=256,
+ upsample_initial_channel=1536,
+ ngf=32,
+ up_ratios=(5, 5, 4, 2),
+ vq_num_q_c=2,
+ vq_num_q_p=1,
+ vq_num_q_r=3,
+ vq_dim=1024,
+ vq_commit_weight=0.005,
+ vq_weight_init=False,
+ vq_full_commit_loss=False,
+ codebook_dim=8,
+ codebook_size_prosody=10, # true codebook size is equal to 2^codebook_size
+ codebook_size_content=10,
+ codebook_size_residual=10,
+ quantizer_dropout=0.0,
+ dropout_type="linear",
+ use_gr_content_f0=False,
+ use_gr_prosody_phone=False,
+ use_gr_residual_f0=False,
+ use_gr_residual_phone=False,
+ use_gr_x_timbre=False,
+ use_random_mask_residual=True,
+ prob_random_mask_residual=0.75,
+ ):
+ super().__init__()
+ self.hop_length = np.prod(up_ratios)
+ self.ngf = ngf
+ self.up_ratios = up_ratios
+
+ self.use_random_mask_residual = use_random_mask_residual
+ self.prob_random_mask_residual = prob_random_mask_residual
+
+ self.vq_num_q_p = vq_num_q_p
+ self.vq_num_q_c = vq_num_q_c
+ self.vq_num_q_r = vq_num_q_r
+
+ self.codebook_size_prosody = codebook_size_prosody
+ self.codebook_size_content = codebook_size_content
+ self.codebook_size_residual = codebook_size_residual
+
+ quantizer_class = ResidualVQ
+
+ self.quantizer = nn.ModuleList()
+
+ # prosody
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_p,
+ dim=vq_dim,
+ codebook_size=codebook_size_prosody,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # phone
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_c,
+ dim=vq_dim,
+ codebook_size=codebook_size_content,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # residual
+ if self.vq_num_q_r > 0:
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_r,
+ dim=vq_dim,
+ codebook_size=codebook_size_residual,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # Add first conv layer
+ channels = upsample_initial_channel
+ layers = [WNConv1d(in_channels, channels, kernel_size=7, padding=3)]
+
+ # Add upsampling + MRF blocks
+ for i, stride in enumerate(up_ratios):
+ input_dim = channels // 2**i
+ output_dim = channels // 2 ** (i + 1)
+ layers += [DecoderBlock(input_dim, output_dim, stride)]
+
+ # Add final conv layer
+ layers += [
+ Activation1d(activation=SnakeBeta(output_dim, alpha_logscale=True)),
+ WNConv1d(output_dim, 1, kernel_size=7, padding=3),
+ nn.Tanh(),
+ ]
+
+ self.model = nn.Sequential(*layers)
+
+ self.timbre_encoder = TransformerEncoder(
+ enc_emb_tokens=None,
+ encoder_layer=4,
+ encoder_hidden=256,
+ encoder_head=4,
+ conv_filter_size=1024,
+ conv_kernel_size=5,
+ encoder_dropout=0.1,
+ use_cln=False,
+ )
+
+ self.timbre_linear = nn.Linear(in_channels, in_channels * 2)
+ self.timbre_linear.bias.data[:in_channels] = 1
+ self.timbre_linear.bias.data[in_channels:] = 0
+ self.timbre_norm = nn.LayerNorm(in_channels, elementwise_affine=False)
+
+ self.f0_predictor = CNNLSTM(in_channels, 1, 2)
+ self.phone_predictor = CNNLSTM(in_channels, 5003, 1)
+
+ self.use_gr_content_f0 = use_gr_content_f0
+ self.use_gr_prosody_phone = use_gr_prosody_phone
+ self.use_gr_residual_f0 = use_gr_residual_f0
+ self.use_gr_residual_phone = use_gr_residual_phone
+ self.use_gr_x_timbre = use_gr_x_timbre
+
+ if self.vq_num_q_r > 0 and self.use_gr_residual_f0:
+ self.res_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 1, 2)
+ )
+
+ if self.vq_num_q_r > 0 and self.use_gr_residual_phone > 0:
+ self.res_phone_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 5003, 1)
+ )
+
+ if self.use_gr_content_f0:
+ self.content_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 1, 2)
+ )
+
+ if self.use_gr_prosody_phone:
+ self.prosody_phone_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 5003, 1)
+ )
+
+ if self.use_gr_x_timbre:
+ self.x_timbre_predictor = nn.Sequential(
+ GradientReversal(alpha=1),
+ CNNLSTM(in_channels, 245200, 1, global_pred=True),
+ )
+
+ self.reset_parameters()
+
+ def quantize(self, x, n_quantizers=None):
+ outs, qs, commit_loss, quantized_buf = 0, [], [], []
+
+ # prosody
+ f0_input = x # (B, d, T)
+ f0_quantizer = self.quantizer[0]
+ out, q, commit, quantized = f0_quantizer(f0_input, n_quantizers=n_quantizers)
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0))
+ commit_loss.append(commit)
+
+ # phone
+ phone_input = x
+ phone_quantizer = self.quantizer[1]
+ out, q, commit, quantized = phone_quantizer(
+ phone_input, n_quantizers=n_quantizers
+ )
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0))
+ commit_loss.append(commit)
+
+ # residual
+ if self.vq_num_q_r > 0:
+ residual_quantizer = self.quantizer[2]
+ residual_input = x - (quantized_buf[0] + quantized_buf[1]).detach()
+ out, q, commit, quantized = residual_quantizer(
+ residual_input, n_quantizers=n_quantizers
+ )
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0)) # [L, B, C, T] -> [B, C, T]
+ commit_loss.append(commit)
+
+ qs = torch.cat(qs, dim=0)
+ commit_loss = torch.cat(commit_loss, dim=0)
+ return outs, qs, commit_loss, quantized_buf
+
+ def forward(
+ self,
+ x,
+ vq=True,
+ get_vq=False,
+ eval_vq=True,
+ speaker_embedding=None,
+ n_quantizers=None,
+ quantized=None,
+ ):
+ if get_vq:
+ return self.quantizer.get_emb()
+ if vq is True:
+ if eval_vq:
+ self.quantizer.eval()
+ x_timbre = x
+ outs, qs, commit_loss, quantized_buf = self.quantize(
+ x, n_quantizers=n_quantizers
+ )
+
+ x_timbre = x_timbre.transpose(1, 2)
+ x_timbre = self.timbre_encoder(x_timbre, None, None)
+ x_timbre = x_timbre.transpose(1, 2)
+ spk_embs = torch.mean(x_timbre, dim=2)
+ return outs, qs, commit_loss, quantized_buf, spk_embs
+
+ out = {}
+
+ layer_0 = quantized[0]
+ f0, uv = self.f0_predictor(layer_0)
+ f0 = rearrange(f0, "... 1 -> ...")
+ uv = rearrange(uv, "... 1 -> ...")
+
+ layer_1 = quantized[1]
+ (phone,) = self.phone_predictor(layer_1)
+
+ out = {"f0": f0, "uv": uv, "phone": phone}
+
+ if self.use_gr_prosody_phone:
+ (prosody_phone,) = self.prosody_phone_predictor(layer_0)
+ out["prosody_phone"] = prosody_phone
+
+ if self.use_gr_content_f0:
+ content_f0, content_uv = self.content_f0_predictor(layer_1)
+ content_f0 = rearrange(content_f0, "... 1 -> ...")
+ content_uv = rearrange(content_uv, "... 1 -> ...")
+ out["content_f0"] = content_f0
+ out["content_uv"] = content_uv
+
+ if self.vq_num_q_r > 0:
+ layer_2 = quantized[2]
+
+ if self.use_gr_residual_f0:
+ res_f0, res_uv = self.res_f0_predictor(layer_2)
+ res_f0 = rearrange(res_f0, "... 1 -> ...")
+ res_uv = rearrange(res_uv, "... 1 -> ...")
+ out["res_f0"] = res_f0
+ out["res_uv"] = res_uv
+
+ if self.use_gr_residual_phone:
+ (res_phone,) = self.res_phone_predictor(layer_2)
+ out["res_phone"] = res_phone
+
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ if self.vq_num_q_r > 0:
+ if self.use_random_mask_residual:
+ bsz = quantized[2].shape[0]
+ res_mask = np.random.choice(
+ [0, 1],
+ size=bsz,
+ p=[
+ self.prob_random_mask_residual,
+ 1 - self.prob_random_mask_residual,
+ ],
+ )
+ res_mask = (
+ torch.from_numpy(res_mask).unsqueeze(1).unsqueeze(1)
+ ) # (B, 1, 1)
+ res_mask = res_mask.to(
+ device=quantized[2].device, dtype=quantized[2].dtype
+ )
+ x = (
+ quantized[0].detach()
+ + quantized[1].detach()
+ + quantized[2] * res_mask
+ )
+ # x = quantized_perturbe[0].detach() + quantized[1].detach() + quantized[2] * res_mask
+ else:
+ x = quantized[0].detach() + quantized[1].detach() + quantized[2]
+ # x = quantized_perturbe[0].detach() + quantized[1].detach() + quantized[2]
+ else:
+ x = quantized[0].detach() + quantized[1].detach()
+ # x = quantized_perturbe[0].detach() + quantized[1].detach()
+
+ if self.use_gr_x_timbre:
+ (x_timbre,) = self.x_timbre_predictor(x)
+ out["x_timbre"] = x_timbre
+
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+
+ x = self.model(x)
+ out["audio"] = x
+
+ return out
+
+ def vq2emb(self, vq, use_residual_code=True):
+ # vq: [num_quantizer, B, T]
+ self.quantizer = self.quantizer.eval()
+ out = 0
+ out += self.quantizer[0].vq2emb(vq[0 : self.vq_num_q_p])
+ out += self.quantizer[1].vq2emb(
+ vq[self.vq_num_q_p : self.vq_num_q_p + self.vq_num_q_c]
+ )
+ if self.vq_num_q_r > 0 and use_residual_code:
+ out += self.quantizer[2].vq2emb(vq[self.vq_num_q_p + self.vq_num_q_c :])
+ return out
+
+ def inference(self, x, speaker_embedding):
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+ x = self.model(x)
+ return x
+
+ def remove_weight_norm(self):
+ """Remove weight normalization module from all of the layers."""
+
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+
+ self.apply(_remove_weight_norm)
+
+ def apply_weight_norm(self):
+ """Apply weight normalization module from all of the layers."""
+
+ def _apply_weight_norm(m):
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d):
+ torch.nn.utils.weight_norm(m)
+
+ self.apply(_apply_weight_norm)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
+
+
+class FACodecRedecoder(nn.Module):
+ def __init__(
+ self,
+ in_channels=256,
+ upsample_initial_channel=1280,
+ up_ratios=(5, 5, 4, 2),
+ vq_num_q_c=2,
+ vq_num_q_p=1,
+ vq_num_q_r=3,
+ vq_dim=256,
+ codebook_size_prosody=10,
+ codebook_size_content=10,
+ codebook_size_residual=10,
+ ):
+ super().__init__()
+ self.hop_length = np.prod(up_ratios)
+ self.up_ratios = up_ratios
+
+ self.vq_num_q_p = vq_num_q_p
+ self.vq_num_q_c = vq_num_q_c
+ self.vq_num_q_r = vq_num_q_r
+
+ self.vq_dim = vq_dim
+
+ self.codebook_size_prosody = codebook_size_prosody
+ self.codebook_size_content = codebook_size_content
+ self.codebook_size_residual = codebook_size_residual
+
+ self.prosody_embs = nn.ModuleList()
+ for i in range(self.vq_num_q_p):
+ emb_tokens = nn.Embedding(
+ num_embeddings=2**self.codebook_size_prosody,
+ embedding_dim=self.vq_dim,
+ )
+ emb_tokens.weight.data.normal_(mean=0.0, std=1e-5)
+ self.prosody_embs.append(emb_tokens)
+ self.content_embs = nn.ModuleList()
+ for i in range(self.vq_num_q_c):
+ emb_tokens = nn.Embedding(
+ num_embeddings=2**self.codebook_size_content,
+ embedding_dim=self.vq_dim,
+ )
+ emb_tokens.weight.data.normal_(mean=0.0, std=1e-5)
+ self.content_embs.append(emb_tokens)
+ self.residual_embs = nn.ModuleList()
+ for i in range(self.vq_num_q_r):
+ emb_tokens = nn.Embedding(
+ num_embeddings=2**self.codebook_size_residual,
+ embedding_dim=self.vq_dim,
+ )
+ emb_tokens.weight.data.normal_(mean=0.0, std=1e-5)
+ self.residual_embs.append(emb_tokens)
+
+ # Add first conv layer
+ channels = upsample_initial_channel
+ layers = [WNConv1d(in_channels, channels, kernel_size=7, padding=3)]
+
+ # Add upsampling + MRF blocks
+ for i, stride in enumerate(up_ratios):
+ input_dim = channels // 2**i
+ output_dim = channels // 2 ** (i + 1)
+ layers += [DecoderBlock(input_dim, output_dim, stride)]
+
+ # Add final conv layer
+ layers += [
+ Activation1d(activation=SnakeBeta(output_dim, alpha_logscale=True)),
+ WNConv1d(output_dim, 1, kernel_size=7, padding=3),
+ nn.Tanh(),
+ ]
+
+ self.model = nn.Sequential(*layers)
+
+ self.timbre_linear = nn.Linear(in_channels, in_channels * 2)
+ self.timbre_linear.bias.data[:in_channels] = 1
+ self.timbre_linear.bias.data[in_channels:] = 0
+ self.timbre_norm = nn.LayerNorm(in_channels, elementwise_affine=False)
+
+ self.timbre_cond_prosody_enc = TransformerEncoder(
+ enc_emb_tokens=None,
+ encoder_layer=4,
+ encoder_hidden=256,
+ encoder_head=4,
+ conv_filter_size=1024,
+ conv_kernel_size=5,
+ encoder_dropout=0.1,
+ use_cln=True,
+ cfg=None,
+ )
+
+ def forward(
+ self,
+ vq,
+ speaker_embedding,
+ use_residual_code=False,
+ ):
+
+ x = 0
+
+ x_p = 0
+ for i in range(self.vq_num_q_p):
+ x_p = x_p + self.prosody_embs[i](vq[i]) # (B, T, d)
+ spk_cond = speaker_embedding.unsqueeze(1).expand(-1, x_p.shape[1], -1)
+ x_p = self.timbre_cond_prosody_enc(
+ x_p, key_padding_mask=None, condition=spk_cond
+ )
+ x = x + x_p
+
+ x_c = 0
+ for i in range(self.vq_num_q_c):
+ x_c = x_c + self.content_embs[i](vq[self.vq_num_q_p + i])
+
+ x = x + x_c
+
+ if use_residual_code:
+
+ x_r = 0
+ for i in range(self.vq_num_q_r):
+ x_r = x_r + self.residual_embs[i](
+ vq[self.vq_num_q_p + self.vq_num_q_c + i]
+ )
+ x = x + x_r
+
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+ x = self.model(x)
+
+ return x
+
+ def vq2emb(self, vq, speaker_embedding, use_residual=True):
+
+ out = 0
+
+ x_t = 0
+ for i in range(self.vq_num_q_p):
+ x_t += self.prosody_embs[i](vq[i]) # (B, T, d)
+ spk_cond = speaker_embedding.unsqueeze(1).expand(-1, x_t.shape[1], -1)
+ x_t = self.timbre_cond_prosody_enc(
+ x_t, key_padding_mask=None, condition=spk_cond
+ )
+
+ # prosody
+ out += x_t
+
+ # content
+ for i in range(self.vq_num_q_c):
+ out += self.content_embs[i](vq[self.vq_num_q_p + i])
+
+ # residual
+ if use_residual:
+ for i in range(self.vq_num_q_r):
+ out += self.residual_embs[i](vq[self.vq_num_q_p + self.vq_num_q_c + i])
+
+ out = out.transpose(1, 2) # (B, T, d) -> (B, d, T)
+ return out
+
+ def inference(self, x, speaker_embedding):
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+ x = self.model(x)
+ return x
+
+
+class FACodecEncoderV2(nn.Module):
+ def __init__(
+ self,
+ ngf=32,
+ up_ratios=(2, 4, 5, 5),
+ out_channels=1024,
+ ):
+ super().__init__()
+ self.hop_length = np.prod(up_ratios)
+ self.up_ratios = up_ratios
+
+ # Create first convolution
+ d_model = ngf
+ self.block = [WNConv1d(1, d_model, kernel_size=7, padding=3)]
+
+ # Create EncoderBlocks that double channels as they downsample by `stride`
+ for stride in up_ratios:
+ d_model *= 2
+ self.block += [EncoderBlock(d_model, stride=stride)]
+
+ # Create last convolution
+ self.block += [
+ Activation1d(activation=SnakeBeta(d_model, alpha_logscale=True)),
+ WNConv1d(d_model, out_channels, kernel_size=3, padding=1),
+ ]
+
+ # Wrap black into nn.Sequential
+ self.block = nn.Sequential(*self.block)
+ self.enc_dim = d_model
+
+ self.mel_transform = MelSpectrogram(
+ n_fft=1024,
+ num_mels=80,
+ sampling_rate=16000,
+ hop_size=200,
+ win_size=800,
+ fmin=0,
+ fmax=8000,
+ )
+
+ self.reset_parameters()
+
+ def forward(self, x):
+ out = self.block(x)
+ return out
+
+ def inference(self, x):
+ return self.block(x)
+
+ def get_prosody_feature(self, x):
+ return self.mel_transform(x.squeeze(1))[:, :20, :]
+
+ def remove_weight_norm(self):
+ """Remove weight normalization module from all of the layers."""
+
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+
+ self.apply(_remove_weight_norm)
+
+ def apply_weight_norm(self):
+ """Apply weight normalization module from all of the layers."""
+
+ def _apply_weight_norm(m):
+ if isinstance(m, nn.Conv1d):
+ torch.nn.utils.weight_norm(m)
+
+ self.apply(_apply_weight_norm)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
+
+
+class FACodecDecoderV2(nn.Module):
+ def __init__(
+ self,
+ in_channels=256,
+ upsample_initial_channel=1536,
+ ngf=32,
+ up_ratios=(5, 5, 4, 2),
+ vq_num_q_c=2,
+ vq_num_q_p=1,
+ vq_num_q_r=3,
+ vq_dim=1024,
+ vq_commit_weight=0.005,
+ vq_weight_init=False,
+ vq_full_commit_loss=False,
+ codebook_dim=8,
+ codebook_size_prosody=10, # true codebook size is equal to 2^codebook_size
+ codebook_size_content=10,
+ codebook_size_residual=10,
+ quantizer_dropout=0.0,
+ dropout_type="linear",
+ use_gr_content_f0=False,
+ use_gr_prosody_phone=False,
+ use_gr_residual_f0=False,
+ use_gr_residual_phone=False,
+ use_gr_x_timbre=False,
+ use_random_mask_residual=True,
+ prob_random_mask_residual=0.75,
+ ):
+ super().__init__()
+ self.hop_length = np.prod(up_ratios)
+ self.ngf = ngf
+ self.up_ratios = up_ratios
+
+ self.use_random_mask_residual = use_random_mask_residual
+ self.prob_random_mask_residual = prob_random_mask_residual
+
+ self.vq_num_q_p = vq_num_q_p
+ self.vq_num_q_c = vq_num_q_c
+ self.vq_num_q_r = vq_num_q_r
+
+ self.codebook_size_prosody = codebook_size_prosody
+ self.codebook_size_content = codebook_size_content
+ self.codebook_size_residual = codebook_size_residual
+
+ quantizer_class = ResidualVQ
+
+ self.quantizer = nn.ModuleList()
+
+ # prosody
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_p,
+ dim=vq_dim,
+ codebook_size=codebook_size_prosody,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # phone
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_c,
+ dim=vq_dim,
+ codebook_size=codebook_size_content,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # residual
+ if self.vq_num_q_r > 0:
+ quantizer = quantizer_class(
+ num_quantizers=vq_num_q_r,
+ dim=vq_dim,
+ codebook_size=codebook_size_residual,
+ codebook_dim=codebook_dim,
+ threshold_ema_dead_code=2,
+ commitment=vq_commit_weight,
+ weight_init=vq_weight_init,
+ full_commit_loss=vq_full_commit_loss,
+ quantizer_dropout=quantizer_dropout,
+ dropout_type=dropout_type,
+ )
+ self.quantizer.append(quantizer)
+
+ # Add first conv layer
+ channels = upsample_initial_channel
+ layers = [WNConv1d(in_channels, channels, kernel_size=7, padding=3)]
+
+ # Add upsampling + MRF blocks
+ for i, stride in enumerate(up_ratios):
+ input_dim = channels // 2**i
+ output_dim = channels // 2 ** (i + 1)
+ layers += [DecoderBlock(input_dim, output_dim, stride)]
+
+ # Add final conv layer
+ layers += [
+ Activation1d(activation=SnakeBeta(output_dim, alpha_logscale=True)),
+ WNConv1d(output_dim, 1, kernel_size=7, padding=3),
+ nn.Tanh(),
+ ]
+
+ self.model = nn.Sequential(*layers)
+
+ self.timbre_encoder = TransformerEncoder(
+ enc_emb_tokens=None,
+ encoder_layer=4,
+ encoder_hidden=256,
+ encoder_head=4,
+ conv_filter_size=1024,
+ conv_kernel_size=5,
+ encoder_dropout=0.1,
+ use_cln=False,
+ )
+
+ self.timbre_linear = nn.Linear(in_channels, in_channels * 2)
+ self.timbre_linear.bias.data[:in_channels] = 1
+ self.timbre_linear.bias.data[in_channels:] = 0
+ self.timbre_norm = nn.LayerNorm(in_channels, elementwise_affine=False)
+
+ self.f0_predictor = CNNLSTM(in_channels, 1, 2)
+ self.phone_predictor = CNNLSTM(in_channels, 5003, 1)
+
+ self.use_gr_content_f0 = use_gr_content_f0
+ self.use_gr_prosody_phone = use_gr_prosody_phone
+ self.use_gr_residual_f0 = use_gr_residual_f0
+ self.use_gr_residual_phone = use_gr_residual_phone
+ self.use_gr_x_timbre = use_gr_x_timbre
+
+ if self.vq_num_q_r > 0 and self.use_gr_residual_f0:
+ self.res_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 1, 2)
+ )
+
+ if self.vq_num_q_r > 0 and self.use_gr_residual_phone > 0:
+ self.res_phone_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 5003, 1)
+ )
+
+ if self.use_gr_content_f0:
+ self.content_f0_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 1, 2)
+ )
+
+ if self.use_gr_prosody_phone:
+ self.prosody_phone_predictor = nn.Sequential(
+ GradientReversal(alpha=1.0), CNNLSTM(in_channels, 5003, 1)
+ )
+
+ if self.use_gr_x_timbre:
+ self.x_timbre_predictor = nn.Sequential(
+ GradientReversal(alpha=1),
+ CNNLSTM(in_channels, 245200, 1, global_pred=True),
+ )
+
+ self.melspec_linear = nn.Linear(20, 256)
+ self.melspec_encoder = TransformerEncoder(
+ enc_emb_tokens=None,
+ encoder_layer=4,
+ encoder_hidden=256,
+ encoder_head=4,
+ conv_filter_size=1024,
+ conv_kernel_size=5,
+ encoder_dropout=0.1,
+ use_cln=False,
+ cfg=None,
+ )
+
+ self.reset_parameters()
+
+ def quantize(self, x, prosody_feature, n_quantizers=None):
+ outs, qs, commit_loss, quantized_buf = 0, [], [], []
+
+ # prosody
+ f0_input = prosody_feature.transpose(1, 2) # (B, T, 20)
+ f0_input = self.melspec_linear(f0_input)
+ f0_input = self.melspec_encoder(f0_input, None, None)
+ f0_input = f0_input.transpose(1, 2)
+ f0_quantizer = self.quantizer[0]
+ out, q, commit, quantized = f0_quantizer(f0_input, n_quantizers=n_quantizers)
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0))
+ commit_loss.append(commit)
+
+ # phone
+ phone_input = x
+ phone_quantizer = self.quantizer[1]
+ out, q, commit, quantized = phone_quantizer(
+ phone_input, n_quantizers=n_quantizers
+ )
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0))
+ commit_loss.append(commit)
+
+ # residual
+ if self.vq_num_q_r > 0:
+ residual_quantizer = self.quantizer[2]
+ residual_input = x - (quantized_buf[0] + quantized_buf[1]).detach()
+ out, q, commit, quantized = residual_quantizer(
+ residual_input, n_quantizers=n_quantizers
+ )
+ outs += out
+ qs.append(q)
+ quantized_buf.append(quantized.sum(0)) # [L, B, C, T] -> [B, C, T]
+ commit_loss.append(commit)
+
+ qs = torch.cat(qs, dim=0)
+ commit_loss = torch.cat(commit_loss, dim=0)
+ return outs, qs, commit_loss, quantized_buf
+
+ def forward(
+ self,
+ x,
+ prosody_feature,
+ vq=True,
+ get_vq=False,
+ eval_vq=True,
+ speaker_embedding=None,
+ n_quantizers=None,
+ quantized=None,
+ ):
+ if get_vq:
+ return self.quantizer.get_emb()
+ if vq is True:
+ if eval_vq:
+ self.quantizer.eval()
+ x_timbre = x
+ outs, qs, commit_loss, quantized_buf = self.quantize(
+ x, prosody_feature, n_quantizers=n_quantizers
+ )
+
+ x_timbre = x_timbre.transpose(1, 2)
+ x_timbre = self.timbre_encoder(x_timbre, None, None)
+ x_timbre = x_timbre.transpose(1, 2)
+ spk_embs = torch.mean(x_timbre, dim=2)
+ return outs, qs, commit_loss, quantized_buf, spk_embs
+
+ out = {}
+
+ layer_0 = quantized[0]
+ f0, uv = self.f0_predictor(layer_0)
+ f0 = rearrange(f0, "... 1 -> ...")
+ uv = rearrange(uv, "... 1 -> ...")
+
+ layer_1 = quantized[1]
+ (phone,) = self.phone_predictor(layer_1)
+
+ out = {"f0": f0, "uv": uv, "phone": phone}
+
+ if self.use_gr_prosody_phone:
+ (prosody_phone,) = self.prosody_phone_predictor(layer_0)
+ out["prosody_phone"] = prosody_phone
+
+ if self.use_gr_content_f0:
+ content_f0, content_uv = self.content_f0_predictor(layer_1)
+ content_f0 = rearrange(content_f0, "... 1 -> ...")
+ content_uv = rearrange(content_uv, "... 1 -> ...")
+ out["content_f0"] = content_f0
+ out["content_uv"] = content_uv
+
+ if self.vq_num_q_r > 0:
+ layer_2 = quantized[2]
+
+ if self.use_gr_residual_f0:
+ res_f0, res_uv = self.res_f0_predictor(layer_2)
+ res_f0 = rearrange(res_f0, "... 1 -> ...")
+ res_uv = rearrange(res_uv, "... 1 -> ...")
+ out["res_f0"] = res_f0
+ out["res_uv"] = res_uv
+
+ if self.use_gr_residual_phone:
+ (res_phone,) = self.res_phone_predictor(layer_2)
+ out["res_phone"] = res_phone
+
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ if self.vq_num_q_r > 0:
+ if self.use_random_mask_residual:
+ bsz = quantized[2].shape[0]
+ res_mask = np.random.choice(
+ [0, 1],
+ size=bsz,
+ p=[
+ self.prob_random_mask_residual,
+ 1 - self.prob_random_mask_residual,
+ ],
+ )
+ res_mask = (
+ torch.from_numpy(res_mask).unsqueeze(1).unsqueeze(1)
+ ) # (B, 1, 1)
+ res_mask = res_mask.to(
+ device=quantized[2].device, dtype=quantized[2].dtype
+ )
+ x = (
+ quantized[0].detach()
+ + quantized[1].detach()
+ + quantized[2] * res_mask
+ )
+ # x = quantized_perturbe[0].detach() + quantized[1].detach() + quantized[2] * res_mask
+ else:
+ x = quantized[0].detach() + quantized[1].detach() + quantized[2]
+ # x = quantized_perturbe[0].detach() + quantized[1].detach() + quantized[2]
+ else:
+ x = quantized[0].detach() + quantized[1].detach()
+ # x = quantized_perturbe[0].detach() + quantized[1].detach()
+
+ if self.use_gr_x_timbre:
+ (x_timbre,) = self.x_timbre_predictor(x)
+ out["x_timbre"] = x_timbre
+
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+
+ x = self.model(x)
+ out["audio"] = x
+
+ return out
+
+ def vq2emb(self, vq, use_residual=True):
+ # vq: [num_quantizer, B, T]
+ self.quantizer = self.quantizer.eval()
+ out = 0
+ out += self.quantizer[0].vq2emb(vq[0 : self.vq_num_q_p])
+ out += self.quantizer[1].vq2emb(
+ vq[self.vq_num_q_p : self.vq_num_q_p + self.vq_num_q_c]
+ )
+ if self.vq_num_q_r > 0 and use_residual:
+ out += self.quantizer[2].vq2emb(vq[self.vq_num_q_p + self.vq_num_q_c :])
+ return out
+
+ def inference(self, x, speaker_embedding):
+ style = self.timbre_linear(speaker_embedding).unsqueeze(2) # (B, 2d, 1)
+ gamma, beta = style.chunk(2, 1) # (B, d, 1)
+ x = x.transpose(1, 2)
+ x = self.timbre_norm(x)
+ x = x.transpose(1, 2)
+ x = x * gamma + beta
+ x = self.model(x)
+ return x
+
+ def remove_weight_norm(self):
+ """Remove weight normalization module from all of the layers."""
+
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+
+ self.apply(_remove_weight_norm)
+
+ def apply_weight_norm(self):
+ """Apply weight normalization module from all of the layers."""
+
+ def _apply_weight_norm(m):
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d):
+ torch.nn.utils.weight_norm(m)
+
+ self.apply(_apply_weight_norm)
+
+ def reset_parameters(self):
+ self.apply(init_weights)
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py b/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py
new file mode 100644
index 0000000..d09396e
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.autograd import Function
+import torch
+from torch import nn
+
+
+class GradientReversal(Function):
+ @staticmethod
+ def forward(ctx, x, alpha):
+ ctx.save_for_backward(x, alpha)
+ return x
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ grad_input = None
+ _, alpha = ctx.saved_tensors
+ if ctx.needs_input_grad[0]:
+ grad_input = -alpha * grad_output
+ return grad_input, None
+
+
+revgrad = GradientReversal.apply
+
+
+class GradientReversal(nn.Module):
+ def __init__(self, alpha):
+ super().__init__()
+ self.alpha = torch.tensor(alpha, requires_grad=False)
+
+ def forward(self, x):
+ return revgrad(x, self.alpha)
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py b/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py
new file mode 100644
index 0000000..cbf1cd2
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py
@@ -0,0 +1,102 @@
+import torch
+import pyworld as pw
+import numpy as np
+import soundfile as sf
+import os
+from torchaudio.functional import pitch_shift
+import librosa
+from librosa.filters import mel as librosa_mel_fn
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def dynamic_range_compression(x, C=1, clip_val=1e-5):
+ return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)
+
+
+def dynamic_range_decompression(x, C=1):
+ return np.exp(x) / C
+
+
+def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
+ return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression_torch(x, C=1):
+ return torch.exp(x) / C
+
+
+def spectral_normalize_torch(magnitudes):
+ output = dynamic_range_compression_torch(magnitudes)
+ return output
+
+
+def spectral_de_normalize_torch(magnitudes):
+ output = dynamic_range_decompression_torch(magnitudes)
+ return output
+
+
+class MelSpectrogram(nn.Module):
+ def __init__(
+ self,
+ n_fft,
+ num_mels,
+ sampling_rate,
+ hop_size,
+ win_size,
+ fmin,
+ fmax,
+ center=False,
+ ):
+ super(MelSpectrogram, self).__init__()
+ self.n_fft = n_fft
+ self.hop_size = hop_size
+ self.win_size = win_size
+ self.sampling_rate = sampling_rate
+ self.num_mels = num_mels
+ self.fmin = fmin
+ self.fmax = fmax
+ self.center = center
+
+ mel_basis = {}
+ hann_window = {}
+
+ mel = librosa_mel_fn(
+ sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax
+ )
+ mel_basis = torch.from_numpy(mel).float()
+ hann_window = torch.hann_window(win_size)
+
+ self.register_buffer("mel_basis", mel_basis)
+ self.register_buffer("hann_window", hann_window)
+
+ def forward(self, y):
+ y = torch.nn.functional.pad(
+ y.unsqueeze(1),
+ (
+ int((self.n_fft - self.hop_size) / 2),
+ int((self.n_fft - self.hop_size) / 2),
+ ),
+ mode="reflect",
+ )
+ y = y.squeeze(1)
+ spec = torch.stft(
+ y,
+ self.n_fft,
+ hop_length=self.hop_size,
+ win_length=self.win_size,
+ window=self.hann_window,
+ center=self.center,
+ pad_mode="reflect",
+ normalized=False,
+ onesided=True,
+ return_complex=True,
+ )
+ spec = torch.view_as_real(spec)
+
+ spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
+
+ spec = torch.matmul(self.mel_basis, spec)
+ spec = spectral_normalize_torch(spec)
+
+ return spec
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py
new file mode 100644
index 0000000..2cb7b40
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .fvq import *
+from .rvq import *
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py
new file mode 100644
index 0000000..4ade35d
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from torch.nn.utils import weight_norm
+
+
+class FactorizedVectorQuantize(nn.Module):
+ def __init__(self, dim, codebook_size, codebook_dim, commitment, **kwargs):
+ super().__init__()
+ self.codebook_size = codebook_size
+ self.codebook_dim = codebook_dim
+ self.commitment = commitment
+
+ if dim != self.codebook_dim:
+ self.in_proj = weight_norm(nn.Linear(dim, self.codebook_dim))
+ self.out_proj = weight_norm(nn.Linear(self.codebook_dim, dim))
+ else:
+ self.in_proj = nn.Identity()
+ self.out_proj = nn.Identity()
+ self._codebook = nn.Embedding(codebook_size, self.codebook_dim)
+
+ @property
+ def codebook(self):
+ return self._codebook
+
+ def forward(self, z):
+ """Quantized the input tensor using a fixed codebook and returns
+ the corresponding codebook vectors
+
+ Parameters
+ ----------
+ z : Tensor[B x D x T]
+
+ Returns
+ -------
+ Tensor[B x D x T]
+ Quantized continuous representation of input
+ Tensor[1]
+ Commitment loss to train encoder to predict vectors closer to codebook
+ entries
+ Tensor[1]
+ Codebook loss to update the codebook
+ Tensor[B x T]
+ Codebook indices (quantized discrete representation of input)
+ Tensor[B x D x T]
+ Projected latents (continuous representation of input before quantization)
+ """
+ # transpose since we use linear
+
+ z = rearrange(z, "b d t -> b t d")
+
+ # Factorized codes project input into low-dimensional space
+ z_e = self.in_proj(z) # z_e : (B x T x D)
+ z_e = rearrange(z_e, "b t d -> b d t")
+ z_q, indices = self.decode_latents(z_e)
+
+ if self.training:
+ commitment_loss = (
+ F.mse_loss(z_e, z_q.detach(), reduction="none").mean([1, 2])
+ * self.commitment
+ )
+ codebook_loss = F.mse_loss(z_q, z_e.detach(), reduction="none").mean([1, 2])
+ commit_loss = commitment_loss + codebook_loss
+ else:
+ commit_loss = torch.zeros(z.shape[0], device=z.device)
+
+ z_q = (
+ z_e + (z_q - z_e).detach()
+ ) # noop in forward pass, straight-through gradient estimator in backward pass
+
+ z_q = rearrange(z_q, "b d t -> b t d")
+ z_q = self.out_proj(z_q)
+ z_q = rearrange(z_q, "b t d -> b d t")
+
+ return z_q, indices, commit_loss
+
+ def vq2emb(self, vq, proj=True):
+ emb = self.embed_code(vq)
+ if proj:
+ emb = self.out_proj(emb)
+ return emb.transpose(1, 2)
+
+ def get_emb(self):
+ return self.codebook.weight
+
+ def embed_code(self, embed_id):
+ return F.embedding(embed_id, self.codebook.weight)
+
+ def decode_code(self, embed_id):
+ return self.embed_code(embed_id).transpose(1, 2)
+
+ def decode_latents(self, latents):
+ encodings = rearrange(latents, "b d t -> (b t) d")
+ codebook = self.codebook.weight # codebook: (N x D)
+ # L2 normalize encodings and codebook
+ encodings = F.normalize(encodings)
+ codebook = F.normalize(codebook)
+
+ # Compute euclidean distance with codebook
+ dist = (
+ encodings.pow(2).sum(1, keepdim=True)
+ - 2 * encodings @ codebook.t()
+ + codebook.pow(2).sum(1, keepdim=True).t()
+ )
+ indices = rearrange((-dist).max(1)[1], "(b t) -> b t", b=latents.size(0))
+ z_q = self.decode_code(indices)
+ return z_q, indices
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py
new file mode 100644
index 0000000..d22d88d
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import torch
+from torch import nn
+from .fvq import FactorizedVectorQuantize
+
+
+class ResidualVQ(nn.Module):
+ """Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf"""
+
+ def __init__(self, *, num_quantizers, codebook_size, **kwargs):
+ super().__init__()
+ VQ = FactorizedVectorQuantize
+ if type(codebook_size) == int:
+ codebook_size = [codebook_size] * num_quantizers
+ self.layers = nn.ModuleList(
+ [VQ(codebook_size=2**size, **kwargs) for size in codebook_size]
+ )
+ self.num_quantizers = num_quantizers
+ self.quantizer_dropout = kwargs.get("quantizer_dropout", 0.0)
+ self.dropout_type = kwargs.get("dropout_type", None)
+
+ def forward(self, x, n_quantizers=None):
+ quantized_out = 0.0
+ residual = x
+
+ all_losses = []
+ all_indices = []
+ all_quantized = []
+
+ if n_quantizers is None:
+ n_quantizers = self.num_quantizers
+ if self.training:
+ n_quantizers = torch.ones((x.shape[0],)) * self.num_quantizers + 1
+ if self.dropout_type == "linear":
+ dropout = torch.randint(1, self.num_quantizers + 1, (x.shape[0],))
+ elif self.dropout_type == "exp":
+ dropout = torch.randint(
+ 1, int(math.log2(self.num_quantizers)), (x.shape[0],)
+ )
+ dropout = torch.pow(2, dropout)
+ n_dropout = int(x.shape[0] * self.quantizer_dropout)
+ n_quantizers[:n_dropout] = dropout[:n_dropout]
+ n_quantizers = n_quantizers.to(x.device)
+
+ for idx, layer in enumerate(self.layers):
+ if not self.training and idx >= n_quantizers:
+ break
+ quantized, indices, loss = layer(residual)
+
+ mask = (
+ torch.full((x.shape[0],), fill_value=idx, device=x.device)
+ < n_quantizers
+ )
+
+ residual = residual - quantized
+
+ quantized_out = quantized_out + quantized * mask[:, None, None]
+
+ # loss
+ loss = (loss * mask).mean()
+
+ all_indices.append(indices)
+ all_losses.append(loss)
+ all_quantized.append(quantized)
+ all_losses, all_indices, all_quantized = map(
+ torch.stack, (all_losses, all_indices, all_quantized)
+ )
+ return quantized_out, all_indices, all_losses, all_quantized
+
+ def vq2emb(self, vq):
+ # vq: [n_quantizers, B, T]
+ quantized_out = 0.0
+ for idx, layer in enumerate(self.layers):
+ quantized = layer.vq2emb(vq[idx])
+ quantized_out += quantized
+ return quantized_out
+
+ def get_emb(self):
+ embs = []
+ for idx, layer in enumerate(self.layers):
+ embs.append(layer.get_emb())
+ return embs
diff --git a/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py b/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py
new file mode 100644
index 0000000..146d0f3
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py
@@ -0,0 +1,234 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import math
+from torch.nn import functional as F
+
+
+class StyleAdaptiveLayerNorm(nn.Module):
+ def __init__(self, normalized_shape, eps=1e-5):
+ super().__init__()
+ self.in_dim = normalized_shape
+ self.norm = nn.LayerNorm(self.in_dim, eps=eps, elementwise_affine=False)
+ self.style = nn.Linear(self.in_dim, self.in_dim * 2)
+ self.style.bias.data[: self.in_dim] = 1
+ self.style.bias.data[self.in_dim :] = 0
+
+ def forward(self, x, condition):
+ # x: (B, T, d); condition: (B, T, d)
+
+ style = self.style(torch.mean(condition, dim=1, keepdim=True))
+
+ gamma, beta = style.chunk(2, -1)
+
+ out = self.norm(x)
+
+ out = gamma * out + beta
+ return out
+
+
+class PositionalEncoding(nn.Module):
+ def __init__(self, d_model, dropout, max_len=5000):
+ super().__init__()
+
+ self.dropout = dropout
+ position = torch.arange(max_len).unsqueeze(1)
+ div_term = torch.exp(
+ torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
+ )
+ pe = torch.zeros(max_len, 1, d_model)
+ pe[:, 0, 0::2] = torch.sin(position * div_term)
+ pe[:, 0, 1::2] = torch.cos(position * div_term)
+ self.register_buffer("pe", pe)
+
+ def forward(self, x):
+ x = x + self.pe[: x.size(0)]
+ return F.dropout(x, self.dropout, training=self.training)
+
+
+class TransformerFFNLayer(nn.Module):
+ def __init__(
+ self, encoder_hidden, conv_filter_size, conv_kernel_size, encoder_dropout
+ ):
+ super().__init__()
+
+ self.encoder_hidden = encoder_hidden
+ self.conv_filter_size = conv_filter_size
+ self.conv_kernel_size = conv_kernel_size
+ self.encoder_dropout = encoder_dropout
+
+ self.ffn_1 = nn.Conv1d(
+ self.encoder_hidden,
+ self.conv_filter_size,
+ self.conv_kernel_size,
+ padding=self.conv_kernel_size // 2,
+ )
+ self.ffn_1.weight.data.normal_(0.0, 0.02)
+ self.ffn_2 = nn.Linear(self.conv_filter_size, self.encoder_hidden)
+ self.ffn_2.weight.data.normal_(0.0, 0.02)
+
+ def forward(self, x):
+ # x: (B, T, d)
+ x = self.ffn_1(x.permute(0, 2, 1)).permute(
+ 0, 2, 1
+ ) # (B, T, d) -> (B, d, T) -> (B, T, d)
+ x = F.relu(x)
+ x = F.dropout(x, self.encoder_dropout, training=self.training)
+ x = self.ffn_2(x)
+ return x
+
+
+class TransformerEncoderLayer(nn.Module):
+ def __init__(
+ self,
+ encoder_hidden,
+ encoder_head,
+ conv_filter_size,
+ conv_kernel_size,
+ encoder_dropout,
+ use_cln,
+ ):
+ super().__init__()
+ self.encoder_hidden = encoder_hidden
+ self.encoder_head = encoder_head
+ self.conv_filter_size = conv_filter_size
+ self.conv_kernel_size = conv_kernel_size
+ self.encoder_dropout = encoder_dropout
+ self.use_cln = use_cln
+
+ if not self.use_cln:
+ self.ln_1 = nn.LayerNorm(self.encoder_hidden)
+ self.ln_2 = nn.LayerNorm(self.encoder_hidden)
+ else:
+ self.ln_1 = StyleAdaptiveLayerNorm(self.encoder_hidden)
+ self.ln_2 = StyleAdaptiveLayerNorm(self.encoder_hidden)
+
+ self.self_attn = nn.MultiheadAttention(
+ self.encoder_hidden, self.encoder_head, batch_first=True
+ )
+
+ self.ffn = TransformerFFNLayer(
+ self.encoder_hidden,
+ self.conv_filter_size,
+ self.conv_kernel_size,
+ self.encoder_dropout,
+ )
+
+ def forward(self, x, key_padding_mask, conditon=None):
+ # x: (B, T, d); key_padding_mask: (B, T), mask is 0; condition: (B, T, d)
+
+ # self attention
+ residual = x
+ if self.use_cln:
+ x = self.ln_1(x, conditon)
+ else:
+ x = self.ln_1(x)
+
+ if key_padding_mask != None:
+ key_padding_mask_input = ~(key_padding_mask.bool())
+ else:
+ key_padding_mask_input = None
+ x, _ = self.self_attn(
+ query=x, key=x, value=x, key_padding_mask=key_padding_mask_input
+ )
+ x = F.dropout(x, self.encoder_dropout, training=self.training)
+ x = residual + x
+
+ # ffn
+ residual = x
+ if self.use_cln:
+ x = self.ln_2(x, conditon)
+ else:
+ x = self.ln_2(x)
+ x = self.ffn(x)
+ x = residual + x
+
+ return x
+
+
+class TransformerEncoder(nn.Module):
+ def __init__(
+ self,
+ enc_emb_tokens=None,
+ encoder_layer=4,
+ encoder_hidden=256,
+ encoder_head=4,
+ conv_filter_size=1024,
+ conv_kernel_size=5,
+ encoder_dropout=0.1,
+ use_cln=False,
+ cfg=None,
+ ):
+ super().__init__()
+
+ self.encoder_layer = (
+ encoder_layer if encoder_layer is not None else cfg.encoder_layer
+ )
+ self.encoder_hidden = (
+ encoder_hidden if encoder_hidden is not None else cfg.encoder_hidden
+ )
+ self.encoder_head = (
+ encoder_head if encoder_head is not None else cfg.encoder_head
+ )
+ self.conv_filter_size = (
+ conv_filter_size if conv_filter_size is not None else cfg.conv_filter_size
+ )
+ self.conv_kernel_size = (
+ conv_kernel_size if conv_kernel_size is not None else cfg.conv_kernel_size
+ )
+ self.encoder_dropout = (
+ encoder_dropout if encoder_dropout is not None else cfg.encoder_dropout
+ )
+ self.use_cln = use_cln if use_cln is not None else cfg.use_cln
+
+ if enc_emb_tokens != None:
+ self.use_enc_emb = True
+ self.enc_emb_tokens = enc_emb_tokens
+ else:
+ self.use_enc_emb = False
+
+ self.position_emb = PositionalEncoding(
+ self.encoder_hidden, self.encoder_dropout
+ )
+
+ self.layers = nn.ModuleList([])
+ self.layers.extend(
+ [
+ TransformerEncoderLayer(
+ self.encoder_hidden,
+ self.encoder_head,
+ self.conv_filter_size,
+ self.conv_kernel_size,
+ self.encoder_dropout,
+ self.use_cln,
+ )
+ for i in range(self.encoder_layer)
+ ]
+ )
+
+ if self.use_cln:
+ self.last_ln = StyleAdaptiveLayerNorm(self.encoder_hidden)
+ else:
+ self.last_ln = nn.LayerNorm(self.encoder_hidden)
+
+ def forward(self, x, key_padding_mask, condition=None):
+ if len(x.shape) == 2 and self.use_enc_emb:
+ x = self.enc_emb_tokens(x)
+ x = self.position_emb(x)
+ else:
+ x = self.position_emb(x) # (B, T, d)
+
+ for layer in self.layers:
+ x = layer(x, key_padding_mask, condition)
+
+ if self.use_cln:
+ x = self.last_ln(x, condition)
+ else:
+ x = self.last_ln(x)
+
+ return x
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/model.py b/indextts/utils/maskgct/models/codec/speechtokenizer/model.py
new file mode 100644
index 0000000..b722d38
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/model.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2023 Amphion.
+#
+# This code is modified from https://github.com/ZhangXInFD/SpeechTokenizer/blob/main/speechtokenizer/model.py
+# Licensed under Apache License 2.0
+
+from .modules.seanet import SEANetEncoder, SEANetDecoder
+from .modules.quantization import ResidualVectorQuantizer
+import torch.nn as nn
+from einops import rearrange
+import torch
+import numpy as np
+
+
+class SpeechTokenizer(nn.Module):
+ def __init__(self, config):
+ """
+
+ Parameters
+ ----------
+ config : json
+ Model Config.
+
+ """
+ super().__init__()
+ self.encoder = SEANetEncoder(
+ n_filters=config.get("n_filters"),
+ dimension=config.get("dimension"),
+ ratios=config.get("strides"),
+ lstm=config.get("lstm_layers"),
+ bidirectional=config.get("bidirectional"),
+ dilation_base=config.get("dilation_base"),
+ residual_kernel_size=config.get("residual_kernel_size"),
+ n_residual_layers=config.get("n_residual_layers"),
+ activation=config.get("activation"),
+ )
+ self.sample_rate = config.get("sample_rate")
+ self.n_q = config.get("n_q")
+ self.downsample_rate = np.prod(config.get("strides"))
+ if config.get("dimension") != config.get("semantic_dimension"):
+ self.transform = nn.Linear(
+ config.get("dimension"), config.get("semantic_dimension")
+ )
+ else:
+ self.transform = nn.Identity()
+ self.quantizer = ResidualVectorQuantizer(
+ dimension=config.get("dimension"),
+ n_q=config.get("n_q"),
+ bins=config.get("codebook_size"),
+ )
+ self.decoder = SEANetDecoder(
+ n_filters=config.get("n_filters"),
+ dimension=config.get("dimension"),
+ ratios=config.get("strides"),
+ lstm=config.get("lstm_layers"),
+ bidirectional=False,
+ dilation_base=config.get("dilation_base"),
+ residual_kernel_size=config.get("residual_kernel_size"),
+ n_residual_layers=config.get("n_residual_layers"),
+ activation=config.get("activation"),
+ )
+
+ @classmethod
+ def load_from_checkpoint(cls, config_path: str, ckpt_path: str):
+ """
+
+ Parameters
+ ----------
+ config_path : str
+ Path of model configuration file.
+ ckpt_path : str
+ Path of model checkpoint.
+
+ Returns
+ -------
+ model : SpeechTokenizer
+ SpeechTokenizer model.
+
+ """
+ import json
+
+ with open(config_path) as f:
+ cfg = json.load(f)
+ model = cls(cfg)
+ params = torch.load(ckpt_path, map_location="cpu")
+ model.load_state_dict(params)
+ return model
+
+ def forward(self, x: torch.tensor, n_q: int = None, layers: list = [0]):
+ """
+
+ Parameters
+ ----------
+ x : torch.tensor
+ Input wavs. Shape: (batch, channels, timesteps).
+ n_q : int, optional
+ Number of quantizers in RVQ used to encode. The default is all layers.
+ layers : list[int], optional
+ Layers of RVQ should return quantized result. The default is the first layer.
+
+ Returns
+ -------
+ o : torch.tensor
+ Output wavs. Shape: (batch, channels, timesteps).
+ commit_loss : torch.tensor
+ Commitment loss from residual vector quantizers.
+ feature : torch.tensor
+ Output of RVQ's first layer. Shape: (batch, timesteps, dimension)
+
+ """
+ n_q = n_q if n_q else self.n_q
+ e = self.encoder(x)
+ quantized, codes, commit_loss, quantized_list = self.quantizer(
+ e, n_q=n_q, layers=layers
+ )
+ feature = rearrange(quantized_list[0], "b d t -> b t d")
+ feature = self.transform(feature)
+ o = self.decoder(quantized)
+ return o, commit_loss, feature
+
+ def forward_feature(self, x: torch.tensor, layers: list = None):
+ """
+
+ Parameters
+ ----------
+ x : torch.tensor
+ Input wavs. Shape should be (batch, channels, timesteps).
+ layers : list[int], optional
+ Layers of RVQ should return quantized result. The default is all layers.
+
+ Returns
+ -------
+ quantized_list : list[torch.tensor]
+ Quantized of required layers.
+
+ """
+ e = self.encoder(x)
+ layers = layers if layers else list(range(self.n_q))
+ quantized, codes, commit_loss, quantized_list = self.quantizer(e, layers=layers)
+ return quantized_list
+
+ def encode(self, x: torch.tensor, n_q: int = None, st: int = None):
+ """
+
+ Parameters
+ ----------
+ x : torch.tensor
+ Input wavs. Shape: (batch, channels, timesteps).
+ n_q : int, optional
+ Number of quantizers in RVQ used to encode. The default is all layers.
+ st : int, optional
+ Start quantizer index in RVQ. The default is 0.
+
+ Returns
+ -------
+ codes : torch.tensor
+ Output indices for each quantizer. Shape: (n_q, batch, timesteps)
+
+ """
+ e = self.encoder(x)
+ if st is None:
+ st = 0
+ n_q = n_q if n_q else self.n_q
+ codes = self.quantizer.encode(e, n_q=n_q, st=st)
+ return codes
+
+ def decode(self, codes: torch.tensor, st: int = 0):
+ """
+
+ Parameters
+ ----------
+ codes : torch.tensor
+ Indices for each quantizer. Shape: (n_q, batch, timesteps).
+ st : int, optional
+ Start quantizer index in RVQ. The default is 0.
+
+ Returns
+ -------
+ o : torch.tensor
+ Reconstruct wavs from codes. Shape: (batch, channels, timesteps)
+
+ """
+ quantized = self.quantizer.decode(codes, st=st)
+ o = self.decoder(quantized)
+ return o
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py
new file mode 100644
index 0000000..0581347
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Torch modules."""
+
+# flake8: noqa
+from .conv import (
+ pad1d,
+ unpad1d,
+ NormConv1d,
+ NormConvTranspose1d,
+ NormConv2d,
+ NormConvTranspose2d,
+ SConv1d,
+ SConvTranspose1d,
+)
+from .lstm import SLSTM
+from .seanet import SEANetEncoder, SEANetDecoder
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py
new file mode 100644
index 0000000..0352b8b
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py
@@ -0,0 +1,346 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Convolutional layers wrappers and utilities."""
+
+import math
+import typing as tp
+import warnings
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn.utils import spectral_norm, weight_norm
+
+from .norm import ConvLayerNorm
+
+
+CONV_NORMALIZATIONS = frozenset(
+ [
+ "none",
+ "weight_norm",
+ "spectral_norm",
+ "time_layer_norm",
+ "layer_norm",
+ "time_group_norm",
+ ]
+)
+
+
+def apply_parametrization_norm(module: nn.Module, norm: str = "none") -> nn.Module:
+ assert norm in CONV_NORMALIZATIONS
+ if norm == "weight_norm":
+ return weight_norm(module)
+ elif norm == "spectral_norm":
+ return spectral_norm(module)
+ else:
+ # We already check was in CONV_NORMALIZATION, so any other choice
+ # doesn't need reparametrization.
+ return module
+
+
+def get_norm_module(
+ module: nn.Module, causal: bool = False, norm: str = "none", **norm_kwargs
+) -> nn.Module:
+ """Return the proper normalization module. If causal is True, this will ensure the returned
+ module is causal, or return an error if the normalization doesn't support causal evaluation.
+ """
+ assert norm in CONV_NORMALIZATIONS
+ if norm == "layer_norm":
+ assert isinstance(module, nn.modules.conv._ConvNd)
+ return ConvLayerNorm(module.out_channels, **norm_kwargs)
+ elif norm == "time_group_norm":
+ if causal:
+ raise ValueError("GroupNorm doesn't support causal evaluation.")
+ assert isinstance(module, nn.modules.conv._ConvNd)
+ return nn.GroupNorm(1, module.out_channels, **norm_kwargs)
+ else:
+ return nn.Identity()
+
+
+def get_extra_padding_for_conv1d(
+ x: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0
+) -> int:
+ """See `pad_for_conv1d`."""
+ length = x.shape[-1]
+ n_frames = (length - kernel_size + padding_total) / stride + 1
+ ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total)
+ return ideal_length - length
+
+
+def pad_for_conv1d(
+ x: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0
+):
+ """Pad for a convolution to make sure that the last window is full.
+ Extra padding is added at the end. This is required to ensure that we can rebuild
+ an output of the same length, as otherwise, even with padding, some time steps
+ might get removed.
+ For instance, with total padding = 4, kernel size = 4, stride = 2:
+ 0 0 1 2 3 4 5 0 0 # (0s are padding)
+ 1 2 3 # (output frames of a convolution, last 0 is never used)
+ 0 0 1 2 3 4 5 0 # (output of tr. conv., but pos. 5 is going to get removed as padding)
+ 1 2 3 4 # once you removed padding, we are missing one time step !
+ """
+ extra_padding = get_extra_padding_for_conv1d(x, kernel_size, stride, padding_total)
+ return F.pad(x, (0, extra_padding))
+
+
+def pad1d(
+ x: torch.Tensor,
+ paddings: tp.Tuple[int, int],
+ mode: str = "zero",
+ value: float = 0.0,
+):
+ """Tiny wrapper around F.pad, just to allow for reflect padding on small input.
+ If this is the case, we insert extra 0 padding to the right before the reflection happen.
+ """
+ length = x.shape[-1]
+ padding_left, padding_right = paddings
+ assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right)
+ if mode == "reflect":
+ max_pad = max(padding_left, padding_right)
+ extra_pad = 0
+ if length <= max_pad:
+ extra_pad = max_pad - length + 1
+ x = F.pad(x, (0, extra_pad))
+ padded = F.pad(x, paddings, mode, value)
+ end = padded.shape[-1] - extra_pad
+ return padded[..., :end]
+ else:
+ return F.pad(x, paddings, mode, value)
+
+
+def unpad1d(x: torch.Tensor, paddings: tp.Tuple[int, int]):
+ """Remove padding from x, handling properly zero padding. Only for 1d!"""
+ padding_left, padding_right = paddings
+ assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right)
+ assert (padding_left + padding_right) <= x.shape[-1]
+ end = x.shape[-1] - padding_right
+ return x[..., padding_left:end]
+
+
+class NormConv1d(nn.Module):
+ """Wrapper around Conv1d and normalization applied to this conv
+ to provide a uniform interface across normalization approaches.
+ """
+
+ def __init__(
+ self,
+ *args,
+ causal: bool = False,
+ norm: str = "none",
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.conv = apply_parametrization_norm(nn.Conv1d(*args, **kwargs), norm)
+ self.norm = get_norm_module(self.conv, causal, norm, **norm_kwargs)
+ self.norm_type = norm
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.norm(x)
+ return x
+
+
+class NormConv2d(nn.Module):
+ """Wrapper around Conv2d and normalization applied to this conv
+ to provide a uniform interface across normalization approaches.
+ """
+
+ def __init__(
+ self,
+ *args,
+ norm: str = "none",
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.conv = apply_parametrization_norm(nn.Conv2d(*args, **kwargs), norm)
+ self.norm = get_norm_module(self.conv, causal=False, norm=norm, **norm_kwargs)
+ self.norm_type = norm
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.norm(x)
+ return x
+
+
+class NormConvTranspose1d(nn.Module):
+ """Wrapper around ConvTranspose1d and normalization applied to this conv
+ to provide a uniform interface across normalization approaches.
+ """
+
+ def __init__(
+ self,
+ *args,
+ causal: bool = False,
+ norm: str = "none",
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.convtr = apply_parametrization_norm(
+ nn.ConvTranspose1d(*args, **kwargs), norm
+ )
+ self.norm = get_norm_module(self.convtr, causal, norm, **norm_kwargs)
+ self.norm_type = norm
+
+ def forward(self, x):
+ x = self.convtr(x)
+ x = self.norm(x)
+ return x
+
+
+class NormConvTranspose2d(nn.Module):
+ """Wrapper around ConvTranspose2d and normalization applied to this conv
+ to provide a uniform interface across normalization approaches.
+ """
+
+ def __init__(
+ self,
+ *args,
+ norm: str = "none",
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.convtr = apply_parametrization_norm(
+ nn.ConvTranspose2d(*args, **kwargs), norm
+ )
+ self.norm = get_norm_module(self.convtr, causal=False, norm=norm, **norm_kwargs)
+
+ def forward(self, x):
+ x = self.convtr(x)
+ x = self.norm(x)
+ return x
+
+
+class SConv1d(nn.Module):
+ """Conv1d with some builtin handling of asymmetric or causal padding
+ and normalization.
+ """
+
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: int,
+ stride: int = 1,
+ dilation: int = 1,
+ groups: int = 1,
+ bias: bool = True,
+ causal: bool = False,
+ norm: str = "none",
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ pad_mode: str = "reflect",
+ ):
+ super().__init__()
+ # warn user on unusual setup between dilation and stride
+ if stride > 1 and dilation > 1:
+ warnings.warn(
+ "SConv1d has been initialized with stride > 1 and dilation > 1"
+ f" (kernel_size={kernel_size} stride={stride}, dilation={dilation})."
+ )
+ self.conv = NormConv1d(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ dilation=dilation,
+ groups=groups,
+ bias=bias,
+ causal=causal,
+ norm=norm,
+ norm_kwargs=norm_kwargs,
+ )
+ self.causal = causal
+ self.pad_mode = pad_mode
+
+ def forward(self, x):
+ B, C, T = x.shape
+ kernel_size = self.conv.conv.kernel_size[0]
+ stride = self.conv.conv.stride[0]
+ dilation = self.conv.conv.dilation[0]
+ padding_total = (kernel_size - 1) * dilation - (stride - 1)
+ extra_padding = get_extra_padding_for_conv1d(
+ x, kernel_size, stride, padding_total
+ )
+ if self.causal:
+ # Left padding for causal
+ x = pad1d(x, (padding_total, extra_padding), mode=self.pad_mode)
+ else:
+ # Asymmetric padding required for odd strides
+ padding_right = padding_total // 2
+ padding_left = padding_total - padding_right
+ x = pad1d(
+ x, (padding_left, padding_right + extra_padding), mode=self.pad_mode
+ )
+ return self.conv(x)
+
+
+class SConvTranspose1d(nn.Module):
+ """ConvTranspose1d with some builtin handling of asymmetric or causal padding
+ and normalization.
+ """
+
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: int,
+ stride: int = 1,
+ causal: bool = False,
+ norm: str = "none",
+ trim_right_ratio: float = 1.0,
+ norm_kwargs: tp.Dict[str, tp.Any] = {},
+ ):
+ super().__init__()
+ self.convtr = NormConvTranspose1d(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ causal=causal,
+ norm=norm,
+ norm_kwargs=norm_kwargs,
+ )
+ self.causal = causal
+ self.trim_right_ratio = trim_right_ratio
+ assert (
+ self.causal or self.trim_right_ratio == 1.0
+ ), "`trim_right_ratio` != 1.0 only makes sense for causal convolutions"
+ assert self.trim_right_ratio >= 0.0 and self.trim_right_ratio <= 1.0
+
+ def forward(self, x):
+ kernel_size = self.convtr.convtr.kernel_size[0]
+ stride = self.convtr.convtr.stride[0]
+ padding_total = kernel_size - stride
+
+ y = self.convtr(x)
+
+ # We will only trim fixed padding. Extra padding from `pad_for_conv1d` would be
+ # removed at the very end, when keeping only the right length for the output,
+ # as removing it here would require also passing the length at the matching layer
+ # in the encoder.
+ if self.causal:
+ # Trim the padding on the right according to the specified ratio
+ # if trim_right_ratio = 1.0, trim everything from right
+ padding_right = math.ceil(padding_total * self.trim_right_ratio)
+ padding_left = padding_total - padding_right
+ y = unpad1d(y, (padding_left, padding_right))
+ else:
+ # Asymmetric padding required for odd strides
+ padding_right = padding_total // 2
+ padding_left = padding_total - padding_right
+ y = unpad1d(y, (padding_left, padding_right))
+ return y
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py
new file mode 100644
index 0000000..7f7e431
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""LSTM layers module."""
+
+from torch import nn
+
+
+class SLSTM(nn.Module):
+ """
+ LSTM without worrying about the hidden state, nor the layout of the data.
+ Expects input as convolutional layout.
+ """
+
+ def __init__(
+ self,
+ dimension: int,
+ num_layers: int = 2,
+ skip: bool = True,
+ bidirectional: bool = False,
+ ):
+ super().__init__()
+ self.bidirectional = bidirectional
+ self.skip = skip
+ self.lstm = nn.LSTM(
+ dimension, dimension, num_layers, bidirectional=bidirectional
+ )
+
+ def forward(self, x):
+ x = x.permute(2, 0, 1)
+ y, _ = self.lstm(x)
+ if self.bidirectional:
+ x = x.repeat(1, 1, 2)
+ if self.skip:
+ y = y + x
+ y = y.permute(1, 2, 0)
+ return y
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py
new file mode 100644
index 0000000..ff5eaef
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Normalization modules."""
+
+import typing as tp
+
+import einops
+import torch
+from torch import nn
+
+
+class ConvLayerNorm(nn.LayerNorm):
+ """
+ Convolution-friendly LayerNorm that moves channels to last dimensions
+ before running the normalization and moves them back to original position right after.
+ """
+
+ def __init__(
+ self, normalized_shape: tp.Union[int, tp.List[int], torch.Size], **kwargs
+ ):
+ super().__init__(normalized_shape, **kwargs)
+
+ def forward(self, x):
+ x = einops.rearrange(x, "b ... t -> b t ...")
+ x = super().forward(x)
+ x = einops.rearrange(x, "b t ... -> b ... t")
+ return
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py
new file mode 100644
index 0000000..79d90a1
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# flake8: noqa
+from .vq import QuantizedResult, ResidualVectorQuantizer
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py
new file mode 100644
index 0000000..5695ea8
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py
@@ -0,0 +1,317 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Arithmetic coder."""
+
+import io
+import math
+import random
+import typing as tp
+import torch
+
+from ..binary import BitPacker, BitUnpacker
+
+
+def build_stable_quantized_cdf(
+ pdf: torch.Tensor,
+ total_range_bits: int,
+ roundoff: float = 1e-8,
+ min_range: int = 2,
+ check: bool = True,
+) -> torch.Tensor:
+ """Turn the given PDF into a quantized CDF that splits
+ [0, 2 ** self.total_range_bits - 1] into chunks of size roughly proportional
+ to the PDF.
+
+ Args:
+ pdf (torch.Tensor): probability distribution, shape should be `[N]`.
+ total_range_bits (int): see `ArithmeticCoder`, the typical range we expect
+ during the coding process is `[0, 2 ** total_range_bits - 1]`.
+ roundoff (float): will round the pdf up to that level to remove difference coming
+ from e.g. evaluating the Language Model on different architectures.
+ min_range (int): minimum range width. Should always be at least 2 for numerical
+ stability. Use this to avoid pathological behavior is a value
+ that is expected to be rare actually happens in real life.
+ check (bool): if True, checks that nothing bad happened, can be deactivated for speed.
+ """
+ pdf = pdf.detach()
+ if roundoff:
+ pdf = (pdf / roundoff).floor() * roundoff
+ # interpolate with uniform distribution to achieve desired minimum probability.
+ total_range = 2**total_range_bits
+ cardinality = len(pdf)
+ alpha = min_range * cardinality / total_range
+ assert alpha <= 1, "you must reduce min_range"
+ ranges = (((1 - alpha) * total_range) * pdf).floor().long()
+ ranges += min_range
+ quantized_cdf = torch.cumsum(ranges, dim=-1)
+ if min_range < 2:
+ raise ValueError("min_range must be at least 2.")
+ if check:
+ assert quantized_cdf[-1] <= 2**total_range_bits, quantized_cdf[-1]
+ if (
+ (quantized_cdf[1:] - quantized_cdf[:-1]) < min_range
+ ).any() or quantized_cdf[0] < min_range:
+ raise ValueError("You must increase your total_range_bits.")
+ return quantized_cdf
+
+
+class ArithmeticCoder:
+ """ArithmeticCoder,
+ Let us take a distribution `p` over `N` symbols, and assume we have a stream
+ of random variables `s_t` sampled from `p`. Let us assume that we have a budget
+ of `B` bits that we can afford to write on device. There are `2**B` possible numbers,
+ corresponding to the range `[0, 2 ** B - 1]`. We can map each of those number to a single
+ sequence `(s_t)` by doing the following:
+
+ 1) Initialize the current range to` [0 ** 2 B - 1]`.
+ 2) For each time step t, split the current range into contiguous chunks,
+ one for each possible outcome, with size roughly proportional to `p`.
+ For instance, if `p = [0.75, 0.25]`, and the range is `[0, 3]`, the chunks
+ would be `{[0, 2], [3, 3]}`.
+ 3) Select the chunk corresponding to `s_t`, and replace the current range with this.
+ 4) When done encoding all the values, just select any value remaining in the range.
+
+ You will notice that this procedure can fail: for instance if at any point in time
+ the range is smaller than `N`, then we can no longer assign a non-empty chunk to each
+ possible outcome. Intuitively, the more likely a value is, the less the range width
+ will reduce, and the longer we can go on encoding values. This makes sense: for any efficient
+ coding scheme, likely outcomes would take less bits, and more of them can be coded
+ with a fixed budget.
+
+ In practice, we do not know `B` ahead of time, but we have a way to inject new bits
+ when the current range decreases below a given limit (given by `total_range_bits`), without
+ having to redo all the computations. If we encode mostly likely values, we will seldom
+ need to inject new bits, but a single rare value can deplete our stock of entropy!
+
+ In this explanation, we assumed that the distribution `p` was constant. In fact, the present
+ code works for any sequence `(p_t)` possibly different for each timestep.
+ We also assume that `s_t ~ p_t`, but that doesn't need to be true, although the smaller
+ the KL between the true distribution and `p_t`, the most efficient the coding will be.
+
+ Args:
+ fo (IO[bytes]): file-like object to which the bytes will be written to.
+ total_range_bits (int): the range `M` described above is `2 ** total_range_bits.
+ Any time the current range width fall under this limit, new bits will
+ be injected to rescale the initial range.
+ """
+
+ def __init__(self, fo: tp.IO[bytes], total_range_bits: int = 24):
+ assert total_range_bits <= 30
+ self.total_range_bits = total_range_bits
+ self.packer = BitPacker(bits=1, fo=fo) # we push single bits at a time.
+ self.low: int = 0
+ self.high: int = 0
+ self.max_bit: int = -1
+ self._dbg: tp.List[tp.Any] = []
+ self._dbg2: tp.List[tp.Any] = []
+
+ @property
+ def delta(self) -> int:
+ """Return the current range width."""
+ return self.high - self.low + 1
+
+ def _flush_common_prefix(self):
+ # If self.low and self.high start with the sames bits,
+ # those won't change anymore as we always just increase the range
+ # by powers of 2, and we can flush them out to the bit stream.
+ assert self.high >= self.low, (self.low, self.high)
+ assert self.high < 2 ** (self.max_bit + 1)
+ while self.max_bit >= 0:
+ b1 = self.low >> self.max_bit
+ b2 = self.high >> self.max_bit
+ if b1 == b2:
+ self.low -= b1 << self.max_bit
+ self.high -= b1 << self.max_bit
+ assert self.high >= self.low, (self.high, self.low, self.max_bit)
+ assert self.low >= 0
+ self.max_bit -= 1
+ self.packer.push(b1)
+ else:
+ break
+
+ def push(self, symbol: int, quantized_cdf: torch.Tensor):
+ """Push the given symbol on the stream, flushing out bits
+ if possible.
+
+ Args:
+ symbol (int): symbol to encode with the AC.
+ quantized_cdf (torch.Tensor): use `build_stable_quantized_cdf`
+ to build this from your pdf estimate.
+ """
+ while self.delta < 2**self.total_range_bits:
+ self.low *= 2
+ self.high = self.high * 2 + 1
+ self.max_bit += 1
+
+ range_low = 0 if symbol == 0 else quantized_cdf[symbol - 1].item()
+ range_high = quantized_cdf[symbol].item() - 1
+ effective_low = int(
+ math.ceil(range_low * (self.delta / (2**self.total_range_bits)))
+ )
+ effective_high = int(
+ math.floor(range_high * (self.delta / (2**self.total_range_bits)))
+ )
+ assert self.low <= self.high
+ self.high = self.low + effective_high
+ self.low = self.low + effective_low
+ assert self.low <= self.high, (
+ effective_low,
+ effective_high,
+ range_low,
+ range_high,
+ )
+ self._dbg.append((self.low, self.high))
+ self._dbg2.append((self.low, self.high))
+ outs = self._flush_common_prefix()
+ assert self.low <= self.high
+ assert self.max_bit >= -1
+ assert self.max_bit <= 61, self.max_bit
+ return outs
+
+ def flush(self):
+ """Flush the remaining information to the stream."""
+ while self.max_bit >= 0:
+ b1 = (self.low >> self.max_bit) & 1
+ self.packer.push(b1)
+ self.max_bit -= 1
+ self.packer.flush()
+
+
+class ArithmeticDecoder:
+ """ArithmeticDecoder, see `ArithmeticCoder` for a detailed explanation.
+
+ Note that this must be called with **exactly** the same parameters and sequence
+ of quantized cdf as the arithmetic encoder or the wrong values will be decoded.
+
+ If the AC encoder current range is [L, H], with `L` and `H` having the some common
+ prefix (i.e. the same most significant bits), then this prefix will be flushed to the stream.
+ For instances, having read 3 bits `b1 b2 b3`, we know that `[L, H]` is contained inside
+ `[b1 b2 b3 0 ... 0 b1 b3 b3 1 ... 1]`. Now this specific sub-range can only be obtained
+ for a specific sequence of symbols and a binary-search allows us to decode those symbols.
+ At some point, the prefix `b1 b2 b3` will no longer be sufficient to decode new symbols,
+ and we will need to read new bits from the stream and repeat the process.
+
+ """
+
+ def __init__(self, fo: tp.IO[bytes], total_range_bits: int = 24):
+ self.total_range_bits = total_range_bits
+ self.low: int = 0
+ self.high: int = 0
+ self.current: int = 0
+ self.max_bit: int = -1
+ self.unpacker = BitUnpacker(bits=1, fo=fo) # we pull single bits at a time.
+ # Following is for debugging
+ self._dbg: tp.List[tp.Any] = []
+ self._dbg2: tp.List[tp.Any] = []
+ self._last: tp.Any = None
+
+ @property
+ def delta(self) -> int:
+ return self.high - self.low + 1
+
+ def _flush_common_prefix(self):
+ # Given the current range [L, H], if both have a common prefix,
+ # we know we can remove it from our representation to avoid handling large numbers.
+ while self.max_bit >= 0:
+ b1 = self.low >> self.max_bit
+ b2 = self.high >> self.max_bit
+ if b1 == b2:
+ self.low -= b1 << self.max_bit
+ self.high -= b1 << self.max_bit
+ self.current -= b1 << self.max_bit
+ assert self.high >= self.low
+ assert self.low >= 0
+ self.max_bit -= 1
+ else:
+ break
+
+ def pull(self, quantized_cdf: torch.Tensor) -> tp.Optional[int]:
+ """Pull a symbol, reading as many bits from the stream as required.
+ This returns `None` when the stream has been exhausted.
+
+ Args:
+ quantized_cdf (torch.Tensor): use `build_stable_quantized_cdf`
+ to build this from your pdf estimate. This must be **exatly**
+ the same cdf as the one used at encoding time.
+ """
+ while self.delta < 2**self.total_range_bits:
+ bit = self.unpacker.pull()
+ if bit is None:
+ return None
+ self.low *= 2
+ self.high = self.high * 2 + 1
+ self.current = self.current * 2 + bit
+ self.max_bit += 1
+
+ def bin_search(low_idx: int, high_idx: int):
+ # Binary search is not just for coding interviews :)
+ if high_idx < low_idx:
+ raise RuntimeError("Binary search failed")
+ mid = (low_idx + high_idx) // 2
+ range_low = quantized_cdf[mid - 1].item() if mid > 0 else 0
+ range_high = quantized_cdf[mid].item() - 1
+ effective_low = int(
+ math.ceil(range_low * (self.delta / (2**self.total_range_bits)))
+ )
+ effective_high = int(
+ math.floor(range_high * (self.delta / (2**self.total_range_bits)))
+ )
+ low = effective_low + self.low
+ high = effective_high + self.low
+ if self.current >= low:
+ if self.current <= high:
+ return (mid, low, high, self.current)
+ else:
+ return bin_search(mid + 1, high_idx)
+ else:
+ return bin_search(low_idx, mid - 1)
+
+ self._last = (self.low, self.high, self.current, self.max_bit)
+ sym, self.low, self.high, self.current = bin_search(0, len(quantized_cdf) - 1)
+ self._dbg.append((self.low, self.high, self.current))
+ self._flush_common_prefix()
+ self._dbg2.append((self.low, self.high, self.current))
+
+ return sym
+
+
+def test():
+ torch.manual_seed(1234)
+ random.seed(1234)
+ for _ in range(4):
+ pdfs = []
+ cardinality = random.randrange(4000)
+ steps = random.randrange(100, 500)
+ fo = io.BytesIO()
+ encoder = ArithmeticCoder(fo)
+ symbols = []
+ for step in range(steps):
+ pdf = torch.softmax(torch.randn(cardinality), dim=0)
+ pdfs.append(pdf)
+ q_cdf = build_stable_quantized_cdf(pdf, encoder.total_range_bits)
+ symbol = torch.multinomial(pdf, 1).item()
+ symbols.append(symbol)
+ encoder.push(symbol, q_cdf)
+ encoder.flush()
+
+ fo.seek(0)
+ decoder = ArithmeticDecoder(fo)
+ for idx, (pdf, symbol) in enumerate(zip(pdfs, symbols)):
+ q_cdf = build_stable_quantized_cdf(pdf, encoder.total_range_bits)
+ decoded_symbol = decoder.pull(q_cdf)
+ assert decoded_symbol == symbol, idx
+ assert decoder.pull(torch.zeros(1)) is None
+
+
+if __name__ == "__main__":
+ test()
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py
new file mode 100644
index 0000000..5799725
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py
@@ -0,0 +1,388 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# This implementation is inspired from
+# https://github.com/lucidrains/vector-quantize-pytorch
+# which is released under MIT License. Hereafter, the original license:
+# MIT License
+#
+# Copyright (c) 2020 Phil Wang
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Core vector quantization implementation."""
+import typing as tp
+
+from einops import rearrange, repeat
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from .distrib import broadcast_tensors, rank
+
+
+def default(val: tp.Any, d: tp.Any) -> tp.Any:
+ return val if val is not None else d
+
+
+def ema_inplace(moving_avg, new, decay: float):
+ moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay))
+
+
+def laplace_smoothing(x, n_categories: int, epsilon: float = 1e-5):
+ return (x + epsilon) / (x.sum() + n_categories * epsilon)
+
+
+def uniform_init(*shape: int):
+ t = torch.empty(shape)
+ nn.init.kaiming_uniform_(t)
+ return t
+
+
+def sample_vectors(samples, num: int):
+ num_samples, device = samples.shape[0], samples.device
+
+ if num_samples >= num:
+ indices = torch.randperm(num_samples, device=device)[:num]
+ else:
+ indices = torch.randint(0, num_samples, (num,), device=device)
+
+ return samples[indices]
+
+
+def kmeans(samples, num_clusters: int, num_iters: int = 10):
+ dim, dtype = samples.shape[-1], samples.dtype
+
+ means = sample_vectors(samples, num_clusters)
+
+ for _ in range(num_iters):
+ diffs = rearrange(samples, "n d -> n () d") - rearrange(means, "c d -> () c d")
+ dists = -(diffs**2).sum(dim=-1)
+
+ buckets = dists.max(dim=-1).indices
+ bins = torch.bincount(buckets, minlength=num_clusters)
+ zero_mask = bins == 0
+ bins_min_clamped = bins.masked_fill(zero_mask, 1)
+
+ new_means = buckets.new_zeros(num_clusters, dim, dtype=dtype)
+ new_means.scatter_add_(0, repeat(buckets, "n -> n d", d=dim), samples)
+ new_means = new_means / bins_min_clamped[..., None]
+
+ means = torch.where(zero_mask[..., None], means, new_means)
+
+ return means, bins
+
+
+class EuclideanCodebook(nn.Module):
+ """Codebook with Euclidean distance.
+ Args:
+ dim (int): Dimension.
+ codebook_size (int): Codebook size.
+ kmeans_init (bool): Whether to use k-means to initialize the codebooks.
+ If set to true, run the k-means algorithm on the first training batch and use
+ the learned centroids as initialization.
+ kmeans_iters (int): Number of iterations used for k-means algorithm at initialization.
+ decay (float): Decay for exponential moving average over the codebooks.
+ epsilon (float): Epsilon value for numerical stability.
+ threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
+ that have an exponential moving average cluster size less than the specified threshold with
+ randomly selected vector from the current batch.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ codebook_size: int,
+ kmeans_init: int = False,
+ kmeans_iters: int = 10,
+ decay: float = 0.99,
+ epsilon: float = 1e-5,
+ threshold_ema_dead_code: int = 2,
+ ):
+ super().__init__()
+ self.decay = decay
+ init_fn: tp.Union[tp.Callable[..., torch.Tensor], tp.Any] = (
+ uniform_init if not kmeans_init else torch.zeros
+ )
+ embed = init_fn(codebook_size, dim)
+
+ self.codebook_size = codebook_size
+
+ self.kmeans_iters = kmeans_iters
+ self.epsilon = epsilon
+ self.threshold_ema_dead_code = threshold_ema_dead_code
+
+ self.register_buffer("inited", torch.Tensor([not kmeans_init]))
+ self.register_buffer("cluster_size", torch.zeros(codebook_size))
+ self.register_buffer("embed", embed)
+ self.register_buffer("embed_avg", embed.clone())
+
+ @torch.jit.ignore
+ def init_embed_(self, data):
+ if self.inited:
+ return
+
+ embed, cluster_size = kmeans(data, self.codebook_size, self.kmeans_iters)
+ self.embed.data.copy_(embed)
+ self.embed_avg.data.copy_(embed.clone())
+ self.cluster_size.data.copy_(cluster_size)
+ self.inited.data.copy_(torch.Tensor([True]))
+ # Make sure all buffers across workers are in sync after initialization
+ # broadcast_tensors(self.buffers())
+
+ def replace_(self, samples, mask):
+ modified_codebook = torch.where(
+ mask[..., None], sample_vectors(samples, self.codebook_size), self.embed
+ )
+ self.embed.data.copy_(modified_codebook)
+
+ def expire_codes_(self, batch_samples):
+ if self.threshold_ema_dead_code == 0:
+ return
+
+ expired_codes = self.cluster_size < self.threshold_ema_dead_code
+ if not torch.any(expired_codes):
+ return
+
+ batch_samples = rearrange(batch_samples, "... d -> (...) d")
+ self.replace_(batch_samples, mask=expired_codes)
+ # broadcast_tensors(self.buffers())
+
+ def preprocess(self, x):
+ x = rearrange(x, "... d -> (...) d")
+ return x
+
+ def quantize(self, x):
+ embed = self.embed.t()
+ dist = -(
+ x.pow(2).sum(1, keepdim=True)
+ - 2 * x @ embed
+ + embed.pow(2).sum(0, keepdim=True)
+ )
+ embed_ind = dist.max(dim=-1).indices
+ return embed_ind
+
+ def postprocess_emb(self, embed_ind, shape):
+ return embed_ind.view(*shape[:-1])
+
+ def dequantize(self, embed_ind):
+ quantize = F.embedding(embed_ind, self.embed)
+ return quantize
+
+ def encode(self, x):
+ shape = x.shape
+ # pre-process
+ x = self.preprocess(x)
+ # quantize
+ embed_ind = self.quantize(x)
+ # post-process
+ embed_ind = self.postprocess_emb(embed_ind, shape)
+ return embed_ind
+
+ def decode(self, embed_ind):
+ quantize = self.dequantize(embed_ind)
+ return quantize
+
+ def forward(self, x):
+ shape, dtype = x.shape, x.dtype
+ x = self.preprocess(x)
+
+ self.init_embed_(x)
+
+ embed_ind = self.quantize(x)
+ embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype)
+ embed_ind = self.postprocess_emb(embed_ind, shape)
+ quantize = self.dequantize(embed_ind)
+
+ if self.training:
+ # We do the expiry of code at that point as buffers are in sync
+ # and all the workers will take the same decision.
+ self.expire_codes_(x)
+ ema_inplace(self.cluster_size, embed_onehot.sum(0), self.decay)
+ embed_sum = x.t() @ embed_onehot
+ ema_inplace(self.embed_avg, embed_sum.t(), self.decay)
+ cluster_size = (
+ laplace_smoothing(self.cluster_size, self.codebook_size, self.epsilon)
+ * self.cluster_size.sum()
+ )
+ embed_normalized = self.embed_avg / cluster_size.unsqueeze(1)
+ self.embed.data.copy_(embed_normalized)
+
+ return quantize, embed_ind
+
+
+class VectorQuantization(nn.Module):
+ """Vector quantization implementation.
+ Currently supports only euclidean distance.
+ Args:
+ dim (int): Dimension
+ codebook_size (int): Codebook size
+ codebook_dim (int): Codebook dimension. If not defined, uses the specified dimension in dim.
+ decay (float): Decay for exponential moving average over the codebooks.
+ epsilon (float): Epsilon value for numerical stability.
+ kmeans_init (bool): Whether to use kmeans to initialize the codebooks.
+ kmeans_iters (int): Number of iterations used for kmeans initialization.
+ threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
+ that have an exponential moving average cluster size less than the specified threshold with
+ randomly selected vector from the current batch.
+ commitment_weight (float): Weight for commitment loss.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ codebook_size: int,
+ codebook_dim: tp.Optional[int] = None,
+ decay: float = 0.99,
+ epsilon: float = 1e-5,
+ kmeans_init: bool = True,
+ kmeans_iters: int = 50,
+ threshold_ema_dead_code: int = 2,
+ commitment_weight: float = 1.0,
+ ):
+ super().__init__()
+ _codebook_dim: int = default(codebook_dim, dim)
+
+ requires_projection = _codebook_dim != dim
+ self.project_in = (
+ nn.Linear(dim, _codebook_dim) if requires_projection else nn.Identity()
+ )
+ self.project_out = (
+ nn.Linear(_codebook_dim, dim) if requires_projection else nn.Identity()
+ )
+
+ self.epsilon = epsilon
+ self.commitment_weight = commitment_weight
+
+ self._codebook = EuclideanCodebook(
+ dim=_codebook_dim,
+ codebook_size=codebook_size,
+ kmeans_init=kmeans_init,
+ kmeans_iters=kmeans_iters,
+ decay=decay,
+ epsilon=epsilon,
+ threshold_ema_dead_code=threshold_ema_dead_code,
+ )
+ self.codebook_size = codebook_size
+
+ @property
+ def codebook(self):
+ return self._codebook.embed
+
+ def encode(self, x):
+ x = rearrange(x, "b d n -> b n d")
+ x = self.project_in(x)
+ embed_in = self._codebook.encode(x)
+ return embed_in
+
+ def decode(self, embed_ind):
+ quantize = self._codebook.decode(embed_ind)
+ quantize = self.project_out(quantize)
+ quantize = rearrange(quantize, "b n d -> b d n")
+ return quantize
+
+ def forward(self, x):
+ device = x.device
+ x = rearrange(x, "b d n -> b n d")
+ x = self.project_in(x)
+
+ quantize, embed_ind = self._codebook(x)
+
+ if self.training:
+ quantize = x + (quantize - x).detach()
+
+ loss = torch.tensor([0.0], device=device, requires_grad=self.training)
+
+ if self.training:
+ if self.commitment_weight > 0:
+ commit_loss = F.mse_loss(quantize.detach(), x)
+ loss = loss + commit_loss * self.commitment_weight
+
+ quantize = self.project_out(quantize)
+ quantize = rearrange(quantize, "b n d -> b d n")
+ return quantize, embed_ind, loss
+
+
+class ResidualVectorQuantization(nn.Module):
+ """Residual vector quantization implementation.
+ Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf
+ """
+
+ def __init__(self, *, num_quantizers, **kwargs):
+ super().__init__()
+ self.layers = nn.ModuleList(
+ [VectorQuantization(**kwargs) for _ in range(num_quantizers)]
+ )
+
+ def forward(
+ self, x, n_q: tp.Optional[int] = None, layers: tp.Optional[list] = None
+ ):
+ quantized_out = 0.0
+ residual = x
+
+ all_losses = []
+ all_indices = []
+ out_quantized = []
+
+ n_q = n_q or len(self.layers)
+
+ for i, layer in enumerate(self.layers[:n_q]):
+ quantized, indices, loss = layer(residual)
+ residual = residual - quantized
+ quantized_out = quantized_out + quantized
+
+ all_indices.append(indices)
+ all_losses.append(loss)
+ if layers and i in layers:
+ out_quantized.append(quantized)
+
+ out_losses, out_indices = map(torch.stack, (all_losses, all_indices))
+ return quantized_out, out_indices, out_losses, out_quantized
+
+ def encode(
+ self, x: torch.Tensor, n_q: tp.Optional[int] = None, st: tp.Optional[int] = None
+ ) -> torch.Tensor:
+ residual = x
+ all_indices = []
+ n_q = n_q or len(self.layers)
+ st = st or 0
+ for layer in self.layers[st:n_q]:
+ indices = layer.encode(residual)
+ quantized = layer.decode(indices)
+ residual = residual - quantized
+ all_indices.append(indices)
+ out_indices = torch.stack(all_indices)
+ return out_indices
+
+ def decode(self, q_indices: torch.Tensor, st: int = 0) -> torch.Tensor:
+ quantized_out = torch.tensor(0.0, device=q_indices.device)
+ for i, indices in enumerate(q_indices):
+ layer = self.layers[st + i]
+ quantized = layer.decode(indices)
+ quantized_out = quantized_out + quantized
+ return quantized_out
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py
new file mode 100644
index 0000000..7b9a9b8
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Torch distributed utilities."""
+
+import typing as tp
+
+import torch
+
+
+def rank():
+ if torch.distributed.is_initialized():
+ return torch.distributed.get_rank()
+ else:
+ return 0
+
+
+def world_size():
+ if torch.distributed.is_initialized():
+ return torch.distributed.get_world_size()
+ else:
+ return 1
+
+
+def is_distributed():
+ return world_size() > 1
+
+
+def all_reduce(tensor: torch.Tensor, op=torch.distributed.ReduceOp.SUM):
+ if is_distributed():
+ return torch.distributed.all_reduce(tensor, op)
+
+
+def _is_complex_or_float(tensor):
+ return torch.is_floating_point(tensor) or torch.is_complex(tensor)
+
+
+def _check_number_of_params(params: tp.List[torch.Tensor]):
+ # utility function to check that the number of params in all workers is the same,
+ # and thus avoid a deadlock with distributed all reduce.
+ if not is_distributed() or not params:
+ return
+ # print('params[0].device ', params[0].device)
+ tensor = torch.tensor([len(params)], device=params[0].device, dtype=torch.long)
+ all_reduce(tensor)
+ if tensor.item() != len(params) * world_size():
+ # If not all the workers have the same number, for at least one of them,
+ # this inequality will be verified.
+ raise RuntimeError(
+ f"Mismatch in number of params: ours is {len(params)}, "
+ "at least one worker has a different one."
+ )
+
+
+def broadcast_tensors(tensors: tp.Iterable[torch.Tensor], src: int = 0):
+ """Broadcast the tensors from the given parameters to all workers.
+ This can be used to ensure that all workers have the same model to start with.
+ """
+ if not is_distributed():
+ return
+ tensors = [tensor for tensor in tensors if _is_complex_or_float(tensor)]
+ _check_number_of_params(tensors)
+ handles = []
+ for tensor in tensors:
+ # src = int(rank()) # added code
+ handle = torch.distributed.broadcast(tensor.data, src=src, async_op=True)
+ handles.append(handle)
+ for handle in handles:
+ handle.wait()
+
+
+def sync_buffer(buffers, average=True):
+ """
+ Sync grad for buffers. If average is False, broadcast instead of averaging.
+ """
+ if not is_distributed():
+ return
+ handles = []
+ for buffer in buffers:
+ if torch.is_floating_point(buffer.data):
+ if average:
+ handle = torch.distributed.all_reduce(
+ buffer.data, op=torch.distributed.ReduceOp.SUM, async_op=True
+ )
+ else:
+ handle = torch.distributed.broadcast(buffer.data, src=0, async_op=True)
+ handles.append((buffer, handle))
+ for buffer, handle in handles:
+ handle.wait()
+ if average:
+ buffer.data /= world_size
+
+
+def sync_grad(params):
+ """
+ Simpler alternative to DistributedDataParallel, that doesn't rely
+ on any black magic. For simple models it can also be as fast.
+ Just call this on your model parameters after the call to backward!
+ """
+ if not is_distributed():
+ return
+ handles = []
+ for p in params:
+ if p.grad is not None:
+ handle = torch.distributed.all_reduce(
+ p.grad.data, op=torch.distributed.ReduceOp.SUM, async_op=True
+ )
+ handles.append((p, handle))
+ for p, handle in handles:
+ handle.wait()
+ p.grad.data /= world_size()
+
+
+def average_metrics(metrics: tp.Dict[str, float], count=1.0):
+ """Average a dictionary of metrics across all workers, using the optional
+ `count` as unormalized weight.
+ """
+ if not is_distributed():
+ return metrics
+ keys, values = zip(*metrics.items())
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tensor = torch.tensor(list(values) + [1], device=device, dtype=torch.float32)
+ tensor *= count
+ all_reduce(tensor)
+ averaged = (tensor[:-1] / tensor[-1]).cpu().tolist()
+ return dict(zip(keys, averaged))
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py
new file mode 100644
index 0000000..ec7df0f
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Residual vector quantizer implementation."""
+
+from dataclasses import dataclass, field
+import math
+import typing as tp
+
+import torch
+from torch import nn
+
+from .core_vq import ResidualVectorQuantization
+
+
+@dataclass
+class QuantizedResult:
+ quantized: torch.Tensor
+ codes: torch.Tensor
+ bandwidth: torch.Tensor # bandwidth in kb/s used, per batch item.
+ penalty: tp.Optional[torch.Tensor] = None
+ metrics: dict = field(default_factory=dict)
+
+
+class ResidualVectorQuantizer(nn.Module):
+ """Residual Vector Quantizer.
+ Args:
+ dimension (int): Dimension of the codebooks.
+ n_q (int): Number of residual vector quantizers used.
+ bins (int): Codebook size.
+ decay (float): Decay for exponential moving average over the codebooks.
+ kmeans_init (bool): Whether to use kmeans to initialize the codebooks.
+ kmeans_iters (int): Number of iterations used for kmeans initialization.
+ threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
+ that have an exponential moving average cluster size less than the specified threshold with
+ randomly selected vector from the current batch.
+ """
+
+ def __init__(
+ self,
+ dimension: int = 256,
+ n_q: int = 8,
+ bins: int = 1024,
+ decay: float = 0.99,
+ kmeans_init: bool = True,
+ kmeans_iters: int = 50,
+ threshold_ema_dead_code: int = 2,
+ ):
+ super().__init__()
+ self.n_q = n_q
+ self.dimension = dimension
+ self.bins = bins
+ self.decay = decay
+ self.kmeans_init = kmeans_init
+ self.kmeans_iters = kmeans_iters
+ self.threshold_ema_dead_code = threshold_ema_dead_code
+ self.vq = ResidualVectorQuantization(
+ dim=self.dimension,
+ codebook_size=self.bins,
+ num_quantizers=self.n_q,
+ decay=self.decay,
+ kmeans_init=self.kmeans_init,
+ kmeans_iters=self.kmeans_iters,
+ threshold_ema_dead_code=self.threshold_ema_dead_code,
+ )
+
+ def forward(
+ self,
+ x: torch.Tensor,
+ n_q: tp.Optional[int] = None,
+ layers: tp.Optional[list] = None,
+ ) -> QuantizedResult:
+ """Residual vector quantization on the given input tensor.
+ Args:
+ x (torch.Tensor): Input tensor.
+ n_q (int): Number of quantizer used to quantize. Default: All quantizers.
+ layers (list): Layer that need to return quantized. Defalt: None.
+ Returns:
+ QuantizedResult:
+ The quantized (or approximately quantized) representation with
+ the associated numbert quantizers and layer quantized required to return.
+ """
+ n_q = n_q if n_q else self.n_q
+ if layers and max(layers) >= n_q:
+ raise ValueError(
+ f"Last layer index in layers: A {max(layers)}. Number of quantizers in RVQ: B {self.n_q}. A must less than B."
+ )
+ quantized, codes, commit_loss, quantized_list = self.vq(
+ x, n_q=n_q, layers=layers
+ )
+ return quantized, codes, torch.mean(commit_loss), quantized_list
+
+ def encode(
+ self, x: torch.Tensor, n_q: tp.Optional[int] = None, st: tp.Optional[int] = None
+ ) -> torch.Tensor:
+ """Encode a given input tensor with the specified sample rate at the given bandwidth.
+ The RVQ encode method sets the appropriate number of quantizer to use
+ and returns indices for each quantizer.
+ Args:
+ x (torch.Tensor): Input tensor.
+ n_q (int): Number of quantizer used to quantize. Default: All quantizers.
+ st (int): Start to encode input from which layers. Default: 0.
+ """
+ n_q = n_q if n_q else self.n_q
+ st = st or 0
+ codes = self.vq.encode(x, n_q=n_q, st=st)
+ return codes
+
+ def decode(self, codes: torch.Tensor, st: int = 0) -> torch.Tensor:
+ """Decode the given codes to the quantized representation.
+ Args:
+ codes (torch.Tensor): Input indices for each quantizer.
+ st (int): Start to decode input codes from which layers. Default: 0.
+ """
+ quantized = self.vq.decode(codes, st=st)
+ return quantized
diff --git a/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py
new file mode 100644
index 0000000..481de20
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py
@@ -0,0 +1,414 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# This source file is copied from https://github.com/facebookresearch/encodec
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Encodec SEANet-based encoder and decoder implementation."""
+
+import typing as tp
+
+import numpy as np
+import torch.nn as nn
+import torch
+
+from . import SConv1d, SConvTranspose1d, SLSTM
+
+
+@torch.jit.script
+def snake(x, alpha):
+ shape = x.shape
+ x = x.reshape(shape[0], shape[1], -1)
+ x = x + (alpha + 1e-9).reciprocal() * torch.sin(alpha * x).pow(2)
+ x = x.reshape(shape)
+ return x
+
+
+class Snake1d(nn.Module):
+ def __init__(self, channels):
+ super().__init__()
+ self.alpha = nn.Parameter(torch.ones(1, channels, 1))
+
+ def forward(self, x):
+ return snake(x, self.alpha)
+
+
+class SEANetResnetBlock(nn.Module):
+ """Residual block from SEANet model.
+ Args:
+ dim (int): Dimension of the input/output
+ kernel_sizes (list): List of kernel sizes for the convolutions.
+ dilations (list): List of dilations for the convolutions.
+ activation (str): Activation function.
+ activation_params (dict): Parameters to provide to the activation function
+ norm (str): Normalization method.
+ norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
+ causal (bool): Whether to use fully causal convolution.
+ pad_mode (str): Padding mode for the convolutions.
+ compress (int): Reduced dimensionality in residual branches (from Demucs v3)
+ true_skip (bool): Whether to use true skip connection or a simple convolution as the skip connection.
+ """
+
+ def __init__(
+ self,
+ dim: int,
+ kernel_sizes: tp.List[int] = [3, 1],
+ dilations: tp.List[int] = [1, 1],
+ activation: str = "ELU",
+ activation_params: dict = {"alpha": 1.0},
+ norm: str = "weight_norm",
+ norm_params: tp.Dict[str, tp.Any] = {},
+ causal: bool = False,
+ pad_mode: str = "reflect",
+ compress: int = 2,
+ true_skip: bool = True,
+ ):
+ super().__init__()
+ assert len(kernel_sizes) == len(
+ dilations
+ ), "Number of kernel sizes should match number of dilations"
+ act = getattr(nn, activation) if activation != "Snake" else Snake1d
+ hidden = dim // compress
+ block = []
+ for i, (kernel_size, dilation) in enumerate(zip(kernel_sizes, dilations)):
+ in_chs = dim if i == 0 else hidden
+ out_chs = dim if i == len(kernel_sizes) - 1 else hidden
+ block += [
+ act(**activation_params) if activation != "Snake" else act(in_chs),
+ SConv1d(
+ in_chs,
+ out_chs,
+ kernel_size=kernel_size,
+ dilation=dilation,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ ),
+ ]
+ self.block = nn.Sequential(*block)
+ self.shortcut: nn.Module
+ if true_skip:
+ self.shortcut = nn.Identity()
+ else:
+ self.shortcut = SConv1d(
+ dim,
+ dim,
+ kernel_size=1,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ )
+
+ def forward(self, x):
+ return self.shortcut(x) + self.block(x)
+
+
+class SEANetEncoder(nn.Module):
+ """SEANet encoder.
+ Args:
+ channels (int): Audio channels.
+ dimension (int): Intermediate representation dimension.
+ n_filters (int): Base width for the model.
+ n_residual_layers (int): nb of residual layers.
+ ratios (Sequence[int]): kernel size and stride ratios. The encoder uses downsampling ratios instead of
+ upsampling ratios, hence it will use the ratios in the reverse order to the ones specified here
+ that must match the decoder order
+ activation (str): Activation function.
+ activation_params (dict): Parameters to provide to the activation function
+ norm (str): Normalization method.
+ norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
+ kernel_size (int): Kernel size for the initial convolution.
+ last_kernel_size (int): Kernel size for the initial convolution.
+ residual_kernel_size (int): Kernel size for the residual layers.
+ dilation_base (int): How much to increase the dilation with each layer.
+ causal (bool): Whether to use fully causal convolution.
+ pad_mode (str): Padding mode for the convolutions.
+ true_skip (bool): Whether to use true skip connection or a simple
+ (streamable) convolution as the skip connection in the residual network blocks.
+ compress (int): Reduced dimensionality in residual branches (from Demucs v3).
+ lstm (int): Number of LSTM layers at the end of the encoder.
+ """
+
+ def __init__(
+ self,
+ channels: int = 1,
+ dimension: int = 128,
+ n_filters: int = 32,
+ n_residual_layers: int = 1,
+ ratios: tp.List[int] = [8, 5, 4, 2],
+ activation: str = "ELU",
+ activation_params: dict = {"alpha": 1.0},
+ norm: str = "weight_norm",
+ norm_params: tp.Dict[str, tp.Any] = {},
+ kernel_size: int = 7,
+ last_kernel_size: int = 7,
+ residual_kernel_size: int = 3,
+ dilation_base: int = 2,
+ causal: bool = False,
+ pad_mode: str = "reflect",
+ true_skip: bool = False,
+ compress: int = 2,
+ lstm: int = 2,
+ bidirectional: bool = False,
+ ):
+ super().__init__()
+ self.channels = channels
+ self.dimension = dimension
+ self.n_filters = n_filters
+ self.ratios = list(reversed(ratios))
+ del ratios
+ self.n_residual_layers = n_residual_layers
+ self.hop_length = np.prod(self.ratios) # 计算乘积
+
+ act = getattr(nn, activation) if activation != "Snake" else Snake1d
+ mult = 1
+ model: tp.List[nn.Module] = [
+ SConv1d(
+ channels,
+ mult * n_filters,
+ kernel_size,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ )
+ ]
+ # Downsample to raw audio scale
+ for i, ratio in enumerate(self.ratios):
+ # Add residual layers
+ for j in range(n_residual_layers):
+ model += [
+ SEANetResnetBlock(
+ mult * n_filters,
+ kernel_sizes=[residual_kernel_size, 1],
+ dilations=[dilation_base**j, 1],
+ norm=norm,
+ norm_params=norm_params,
+ activation=activation,
+ activation_params=activation_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ compress=compress,
+ true_skip=true_skip,
+ )
+ ]
+
+ # Add downsampling layers
+ model += [
+ (
+ act(**activation_params)
+ if activation != "Snake"
+ else act(mult * n_filters)
+ ),
+ SConv1d(
+ mult * n_filters,
+ mult * n_filters * 2,
+ kernel_size=ratio * 2,
+ stride=ratio,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ ),
+ ]
+ mult *= 2
+
+ if lstm:
+ model += [
+ SLSTM(mult * n_filters, num_layers=lstm, bidirectional=bidirectional)
+ ]
+
+ mult = mult * 2 if bidirectional else mult
+ model += [
+ (
+ act(**activation_params)
+ if activation != "Snake"
+ else act(mult * n_filters)
+ ),
+ SConv1d(
+ mult * n_filters,
+ dimension,
+ last_kernel_size,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ ),
+ ]
+
+ self.model = nn.Sequential(*model)
+
+ def forward(self, x):
+ return self.model(x)
+
+
+class SEANetDecoder(nn.Module):
+ """SEANet decoder.
+ Args:
+ channels (int): Audio channels.
+ dimension (int): Intermediate representation dimension.
+ n_filters (int): Base width for the model.
+ n_residual_layers (int): nb of residual layers.
+ ratios (Sequence[int]): kernel size and stride ratios
+ activation (str): Activation function.
+ activation_params (dict): Parameters to provide to the activation function
+ final_activation (str): Final activation function after all convolutions.
+ final_activation_params (dict): Parameters to provide to the activation function
+ norm (str): Normalization method.
+ norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
+ kernel_size (int): Kernel size for the initial convolution.
+ last_kernel_size (int): Kernel size for the initial convolution.
+ residual_kernel_size (int): Kernel size for the residual layers.
+ dilation_base (int): How much to increase the dilation with each layer.
+ causal (bool): Whether to use fully causal convolution.
+ pad_mode (str): Padding mode for the convolutions.
+ true_skip (bool): Whether to use true skip connection or a simple
+ (streamable) convolution as the skip connection in the residual network blocks.
+ compress (int): Reduced dimensionality in residual branches (from Demucs v3).
+ lstm (int): Number of LSTM layers at the end of the encoder.
+ trim_right_ratio (float): Ratio for trimming at the right of the transposed convolution under the causal setup.
+ If equal to 1.0, it means that all the trimming is done at the right.
+ """
+
+ def __init__(
+ self,
+ channels: int = 1,
+ dimension: int = 128,
+ n_filters: int = 32,
+ n_residual_layers: int = 1,
+ ratios: tp.List[int] = [8, 5, 4, 2],
+ activation: str = "ELU",
+ activation_params: dict = {"alpha": 1.0},
+ final_activation: tp.Optional[str] = None,
+ final_activation_params: tp.Optional[dict] = None,
+ norm: str = "weight_norm",
+ norm_params: tp.Dict[str, tp.Any] = {},
+ kernel_size: int = 7,
+ last_kernel_size: int = 7,
+ residual_kernel_size: int = 3,
+ dilation_base: int = 2,
+ causal: bool = False,
+ pad_mode: str = "reflect",
+ true_skip: bool = False,
+ compress: int = 2,
+ lstm: int = 2,
+ trim_right_ratio: float = 1.0,
+ bidirectional: bool = False,
+ ):
+ super().__init__()
+ self.dimension = dimension
+ self.channels = channels
+ self.n_filters = n_filters
+ self.ratios = ratios
+ del ratios
+ self.n_residual_layers = n_residual_layers
+ self.hop_length = np.prod(self.ratios)
+
+ act = getattr(nn, activation) if activation != "Snake" else Snake1d
+ mult = int(2 ** len(self.ratios))
+ model: tp.List[nn.Module] = [
+ SConv1d(
+ dimension,
+ mult * n_filters,
+ kernel_size,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ )
+ ]
+
+ if lstm:
+ model += [
+ SLSTM(mult * n_filters, num_layers=lstm, bidirectional=bidirectional)
+ ]
+
+ # Upsample to raw audio scale
+ for i, ratio in enumerate(self.ratios):
+ # Add upsampling layers
+ model += [
+ (
+ act(**activation_params)
+ if activation != "Snake"
+ else act(mult * n_filters)
+ ),
+ SConvTranspose1d(
+ mult * n_filters,
+ mult * n_filters // 2,
+ kernel_size=ratio * 2,
+ stride=ratio,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ trim_right_ratio=trim_right_ratio,
+ ),
+ ]
+ # Add residual layers
+ for j in range(n_residual_layers):
+ model += [
+ SEANetResnetBlock(
+ mult * n_filters // 2,
+ kernel_sizes=[residual_kernel_size, 1],
+ dilations=[dilation_base**j, 1],
+ activation=activation,
+ activation_params=activation_params,
+ norm=norm,
+ norm_params=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ compress=compress,
+ true_skip=true_skip,
+ )
+ ]
+
+ mult //= 2
+
+ # Add final layers
+ model += [
+ act(**activation_params) if activation != "Snake" else act(n_filters),
+ SConv1d(
+ n_filters,
+ channels,
+ last_kernel_size,
+ norm=norm,
+ norm_kwargs=norm_params,
+ causal=causal,
+ pad_mode=pad_mode,
+ ),
+ ]
+ # Add optional final activation to decoder (eg. tanh)
+ if final_activation is not None:
+ final_act = getattr(nn, final_activation)
+ final_activation_params = final_activation_params or {}
+ model += [final_act(**final_activation_params)]
+ self.model = nn.Sequential(*model)
+
+ def forward(self, z):
+ y = self.model(z)
+ return y
+
+
+def test():
+ import torch
+
+ encoder = SEANetEncoder()
+ decoder = SEANetDecoder()
+ x = torch.randn(1, 1, 24000)
+ z = encoder(x)
+ print("z ", z.shape)
+ assert 1 == 2
+ assert list(z.shape) == [1, 128, 75], z.shape
+ y = decoder(z)
+ assert y.shape == x.shape, (x.shape, y.shape)
+
+
+if __name__ == "__main__":
+ test()
diff --git a/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py b/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py
new file mode 100755
index 0000000..224ad25
--- /dev/null
+++ b/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py
@@ -0,0 +1,592 @@
+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Copyright (c) ByteDance, Inc. and its affiliates.
+# Copyright (c) Chutong Meng
+#
+# This source code is licensed under the CC BY-NC license found in the
+# LICENSE file in the root directory of this source tree.
+# Based on AudioDec (https://github.com/facebookresearch/AudioDec)
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class VectorQuantize(nn.Module):
+ """Vector quantization w/ exponential moving averages (EMA)"""
+
+ def __init__(
+ self,
+ dim: int,
+ codebook_size: int,
+ decay=0.8,
+ commitment=1.0,
+ eps=1e-5,
+ n_embed=None,
+ ):
+ super().__init__()
+ n_embed = self.default(n_embed, codebook_size)
+
+ self.dim = dim
+ self.n_embed = n_embed
+ self.decay = decay
+ self.eps = eps
+ self.commitment = commitment
+
+ embed = torch.randn(dim, n_embed)
+ self.register_buffer("embed", embed)
+ self.register_buffer("cluster_size", torch.zeros(n_embed))
+ self.register_buffer("embed_avg", embed.clone())
+
+ @property
+ def codebook(self):
+ return self.embed.transpose(0, 1)
+
+ def exists(self, val):
+ return val is not None
+
+ def default(self, val, d):
+ return val if self.exists(val) else d
+
+ def ema_inplace(self, moving_avg, new, decay):
+ moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay))
+
+ def laplace_smoothing(self, x, n_categories, eps=1e-5):
+ return (x + eps) / (x.sum() + n_categories * eps)
+
+ def forward(self, input):
+ dtype = input.dtype
+ flatten = input.reshape(-1, self.dim)
+ dist = (
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ self.embed
+ + self.embed.pow(2).sum(0, keepdim=True)
+ )
+ _, embed_ind = (-dist).max(1)
+ embed_onehot = F.one_hot(embed_ind, self.n_embed).type(dtype)
+ embed_ind = embed_ind.view(*input.shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed.transpose(0, 1))
+
+ if self.training:
+ self.ema_inplace(self.cluster_size, embed_onehot.sum(0), self.decay)
+ embed_sum = flatten.transpose(0, 1) @ embed_onehot
+ self.ema_inplace(self.embed_avg, embed_sum, self.decay)
+ cluster_size = (
+ self.laplace_smoothing(self.cluster_size, self.n_embed, self.eps)
+ * self.cluster_size.sum()
+ )
+ embed_normalized = self.embed_avg / cluster_size.unsqueeze(0)
+ self.embed.data.copy_(embed_normalized)
+
+ loss = F.mse_loss(quantize.detach(), input) * self.commitment
+ quantize = input + (quantize - input).detach()
+
+ avg_probs = torch.mean(embed_onehot, dim=0)
+ perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10)))
+
+ return quantize, loss, perplexity
+
+ def forward_index(self, input):
+ dtype = input.dtype
+ flatten = input.reshape(-1, self.dim)
+ dist = (
+ flatten.pow(2).sum(1, keepdim=True)
+ - 2 * flatten @ self.embed
+ + self.embed.pow(2).sum(0, keepdim=True)
+ )
+ _, embed_ind = (-dist).max(1)
+ embed_onehot = F.one_hot(embed_ind, self.n_embed).type(dtype)
+ embed_ind = embed_ind.view(*input.shape[:-1])
+ quantize = F.embedding(embed_ind, self.embed.transpose(0, 1))
+ quantize = input + (quantize - input).detach()
+
+ return quantize, embed_ind
+
+
+class ResidualVQ(nn.Module):
+ """Residual VQ following algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf"""
+
+ def __init__(self, *, num_quantizers, **kwargs):
+ super().__init__()
+ self.layers = nn.ModuleList(
+ [VectorQuantize(**kwargs) for _ in range(num_quantizers)]
+ )
+
+ def forward(self, x):
+ quantized_out = 0.0
+ residual = x
+ all_losses = []
+ all_perplexities = []
+ for layer in self.layers:
+ quantized, loss, perplexity = layer(residual)
+ # Issue: https://github.com/lucidrains/vector-quantize-pytorch/issues/33
+ # We found considering only the 1st layer VQ's graident results in better performance
+ # residual = residual - quantized.detach() # considering all layers' graidents
+ residual = (
+ residual - quantized
+ ) # considering only the first layer's graident
+ quantized_out = quantized_out + quantized
+ all_losses.append(loss)
+ all_perplexities.append(perplexity)
+ all_losses, all_perplexities = map(torch.stack, (all_losses, all_perplexities))
+ return quantized_out, all_losses, all_perplexities
+
+ def forward_index(self, x, flatten_idx=False):
+ """
+ all_indices: [num_of_quantizers, B, T]
+ """
+ quantized_out = 0.0
+ residual = x
+ all_indices = []
+ for i, layer in enumerate(self.layers):
+ quantized, indices = layer.forward_index(residual)
+ # residual = residual - quantized.detach()
+ residual = residual - quantized
+ quantized_out = quantized_out + quantized
+ if flatten_idx:
+ indices += self.codebook_size * i
+ all_indices.append(indices)
+ all_indices = torch.stack(all_indices)
+ return quantized_out, all_indices
+
+ def initial(self):
+ self.codebook = []
+ for layer in self.layers:
+ self.codebook.append(layer.codebook)
+ self.codebook_size = self.codebook[0].size(0)
+ self.codebook = torch.stack(self.codebook)
+ self.codebook = self.codebook.reshape(-1, self.codebook.size(-1))
+
+ def lookup(self, indices):
+ quantized_out = F.embedding(indices, self.codebook) # Num x T x C
+ return torch.sum(quantized_out, dim=0, keepdim=True)
+
+
+class Quantizer(nn.Module):
+ def __init__(
+ self,
+ code_dim: int,
+ codebook_num: int,
+ codebook_size: int,
+ ):
+ super().__init__()
+ self.codebook = ResidualVQ(
+ dim=code_dim, num_quantizers=codebook_num, codebook_size=codebook_size
+ )
+
+ def initial(self):
+ self.codebook.initial()
+
+ def forward(self, z):
+ zq, vqloss, perplexity = self.codebook(z.transpose(2, 1))
+ zq = zq.transpose(2, 1)
+ return zq, vqloss, perplexity
+
+ def inference(self, z):
+ zq, indices = self.codebook.forward_index(z.transpose(2, 1))
+ zq = zq.transpose(2, 1)
+ return zq, indices
+
+ def encode(self, z):
+ zq, indices = self.codebook.forward_index(z.transpose(2, 1), flatten_idx=True)
+ return zq, indices
+
+ def decode(self, indices):
+ z = self.codebook.lookup(indices)
+ return z
+
+
+class Conv1d1x1(nn.Conv1d):
+ """1x1 Conv1d."""
+
+ def __init__(self, in_channels, out_channels, bias=True):
+ super(Conv1d1x1, self).__init__(
+ in_channels, out_channels, kernel_size=1, bias=bias
+ )
+
+
+class Conv1d(nn.Module):
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: int,
+ stride: int = 1,
+ padding: int = -1,
+ dilation: int = 1,
+ groups: int = 1,
+ bias: bool = True,
+ ):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ if padding < 0:
+ padding = (kernel_size - 1) // 2 * dilation
+ self.dilation = dilation
+ self.conv = nn.Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias,
+ )
+
+ def forward(self, x):
+ """
+ Args:
+ x (Tensor): Float tensor variable with the shape (B, C, T).
+ Returns:
+ Tensor: Float tensor variable with the shape (B, C, T).
+ """
+ x = self.conv(x)
+ return x
+
+
+class ConvTranspose1d(nn.Module):
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: int,
+ stride: int,
+ padding=-1,
+ output_padding=-1,
+ groups=1,
+ bias=True,
+ ):
+ super().__init__()
+ if padding < 0:
+ padding = (stride + 1) // 2
+ if output_padding < 0:
+ output_padding = 1 if stride % 2 else 0
+ self.deconv = nn.ConvTranspose1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ output_padding=output_padding,
+ groups=groups,
+ bias=bias,
+ )
+
+ def forward(self, x):
+ """
+ Args:
+ x (Tensor): Float tensor variable with the shape (B, C, T).
+ Returns:
+ Tensor: Float tensor variable with the shape (B, C', T').
+ """
+ x = self.deconv(x)
+ return x
+
+
+class ResidualUnit(nn.Module):
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size=3,
+ dilation=1,
+ bias=False,
+ nonlinear_activation="ELU",
+ nonlinear_activation_params={},
+ ):
+ super().__init__()
+ self.activation = getattr(nn, nonlinear_activation)(
+ **nonlinear_activation_params
+ )
+ self.conv1 = Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ dilation=dilation,
+ bias=bias,
+ )
+ self.conv2 = Conv1d1x1(out_channels, out_channels, bias)
+
+ def forward(self, x):
+ y = self.conv1(self.activation(x))
+ y = self.conv2(self.activation(y))
+ return x + y
+
+
+class Projector(nn.Module):
+ def __init__(
+ self, input_channels: int, code_dim: int, kernel_size=3, stride=1, bias=False
+ ):
+ super().__init__()
+ self.project = Conv1d(
+ input_channels, code_dim, kernel_size=kernel_size, stride=stride, bias=bias
+ )
+
+ def forward(self, x):
+ return self.project(x)
+
+
+class EncoderBlock(nn.Module):
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ stride: int,
+ dilations=(1, 1),
+ unit_kernel_size=3,
+ bias=True,
+ ):
+ super().__init__()
+ self.res_units = torch.nn.ModuleList()
+ for dilation in dilations:
+ self.res_units += [
+ ResidualUnit(
+ in_channels,
+ in_channels,
+ kernel_size=unit_kernel_size,
+ dilation=dilation,
+ )
+ ]
+ self.num_res = len(self.res_units)
+
+ self.conv = Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(
+ 3 if stride == 1 else (2 * stride)
+ ), # special case: stride=1, do not use kernel=2
+ stride=stride,
+ bias=bias,
+ )
+
+ def forward(self, x):
+ for idx in range(self.num_res):
+ x = self.res_units[idx](x)
+ x = self.conv(x)
+ return x
+
+
+class Encoder(nn.Module):
+ def __init__(
+ self,
+ input_channels: int,
+ encode_channels: int,
+ channel_ratios=(1, 1),
+ strides=(1, 1),
+ kernel_size=3,
+ bias=True,
+ block_dilations=(1, 1),
+ unit_kernel_size=3,
+ ):
+ super().__init__()
+ assert len(channel_ratios) == len(strides)
+
+ self.conv = Conv1d(
+ in_channels=input_channels,
+ out_channels=encode_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ bias=False,
+ )
+ self.conv_blocks = torch.nn.ModuleList()
+ in_channels = encode_channels
+ for idx, stride in enumerate(strides):
+ out_channels = int(encode_channels * channel_ratios[idx]) # could be float
+ self.conv_blocks += [
+ EncoderBlock(
+ in_channels,
+ out_channels,
+ stride,
+ dilations=block_dilations,
+ unit_kernel_size=unit_kernel_size,
+ bias=bias,
+ )
+ ]
+ in_channels = out_channels
+ self.num_blocks = len(self.conv_blocks)
+ self.out_channels = out_channels
+
+ def forward(self, x):
+ x = self.conv(x)
+ for i in range(self.num_blocks):
+ x = self.conv_blocks[i](x)
+ return x
+
+
+class DecoderBlock(nn.Module):
+ """Decoder block (no up-sampling)"""
+
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ stride: int,
+ dilations=(1, 1),
+ unit_kernel_size=3,
+ bias=True,
+ ):
+ super().__init__()
+
+ if stride == 1:
+ self.conv = Conv1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3, # fix kernel=3 when stride=1 for unchanged shape
+ stride=stride,
+ bias=bias,
+ )
+ else:
+ self.conv = ConvTranspose1d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(2 * stride),
+ stride=stride,
+ bias=bias,
+ )
+
+ self.res_units = torch.nn.ModuleList()
+ for idx, dilation in enumerate(dilations):
+ self.res_units += [
+ ResidualUnit(
+ out_channels,
+ out_channels,
+ kernel_size=unit_kernel_size,
+ dilation=dilation,
+ )
+ ]
+ self.num_res = len(self.res_units)
+
+ def forward(self, x):
+ x = self.conv(x)
+ for idx in range(self.num_res):
+ x = self.res_units[idx](x)
+ return x
+
+
+class Decoder(nn.Module):
+ def __init__(
+ self,
+ code_dim: int,
+ output_channels: int,
+ decode_channels: int,
+ channel_ratios=(1, 1),
+ strides=(1, 1),
+ kernel_size=3,
+ bias=True,
+ block_dilations=(1, 1),
+ unit_kernel_size=3,
+ ):
+ super().__init__()
+ assert len(channel_ratios) == len(strides)
+
+ self.conv1 = Conv1d(
+ in_channels=code_dim,
+ out_channels=int(decode_channels * channel_ratios[0]),
+ kernel_size=kernel_size,
+ stride=1,
+ bias=False,
+ )
+
+ self.conv_blocks = torch.nn.ModuleList()
+ for idx, stride in enumerate(strides):
+ in_channels = int(decode_channels * channel_ratios[idx])
+ if idx < (len(channel_ratios) - 1):
+ out_channels = int(decode_channels * channel_ratios[idx + 1])
+ else:
+ out_channels = decode_channels
+ self.conv_blocks += [
+ DecoderBlock(
+ in_channels,
+ out_channels,
+ stride,
+ dilations=block_dilations,
+ unit_kernel_size=unit_kernel_size,
+ bias=bias,
+ )
+ ]
+ self.num_blocks = len(self.conv_blocks)
+
+ self.conv2 = Conv1d(out_channels, output_channels, kernel_size, 1, bias=False)
+
+ def forward(self, z):
+ x = self.conv1(z)
+ for i in range(self.num_blocks):
+ x = self.conv_blocks[i](x)
+ x = self.conv2(x)
+ return x
+
+
+class VevoRepCodec(nn.Module):
+ def __init__(
+ self,
+ input_channels=768,
+ output_channels=768,
+ encode_channels=768,
+ decode_channels=768,
+ code_dim=768,
+ codebook_num=1,
+ codebook_size=1024,
+ bias=True,
+ enc_ratios=(1, 1),
+ dec_ratios=(1, 1),
+ enc_strides=(1, 1),
+ dec_strides=(1, 1),
+ enc_kernel_size=3,
+ dec_kernel_size=3,
+ enc_block_dilations=(1, 1),
+ enc_block_kernel_size=3,
+ dec_block_dilations=(1, 1),
+ dec_block_kernel_size=3,
+ ):
+ super().__init__()
+
+ self.input_channels = input_channels
+
+ self.encoder = Encoder(
+ input_channels=input_channels,
+ encode_channels=encode_channels,
+ channel_ratios=enc_ratios,
+ strides=enc_strides,
+ kernel_size=enc_kernel_size,
+ bias=bias,
+ block_dilations=enc_block_dilations,
+ unit_kernel_size=enc_block_kernel_size,
+ )
+
+ self.decoder = Decoder(
+ code_dim=code_dim,
+ output_channels=output_channels,
+ decode_channels=decode_channels,
+ channel_ratios=dec_ratios,
+ strides=dec_strides,
+ kernel_size=dec_kernel_size,
+ bias=bias,
+ block_dilations=dec_block_dilations,
+ unit_kernel_size=dec_block_kernel_size,
+ )
+
+ self.projector = Projector(
+ input_channels=self.encoder.out_channels,
+ code_dim=code_dim,
+ kernel_size=3,
+ stride=1,
+ bias=False,
+ )
+
+ self.quantizer = Quantizer(
+ code_dim=code_dim, codebook_num=codebook_num, codebook_size=codebook_size
+ )
+
+ def forward(self, x):
+ x = self.encoder(x)
+ z = self.projector(x)
+ zq, vqloss, perplexity = self.quantizer(z)
+ y = self.decoder(zq)
+ return y, zq, z, vqloss, perplexity
diff --git a/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt b/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt
new file mode 100644
index 0000000..bd06a43
Binary files /dev/null and b/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt differ
diff --git a/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py b/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py
new file mode 100644
index 0000000..14c8ae8
--- /dev/null
+++ b/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py
@@ -0,0 +1,650 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from transformers import LlamaConfig, LlamaForCausalLM, LlamaModel
+import torch
+import torch.nn.functional as F
+import numpy as np
+import os
+import torch.nn as nn
+from typing import List, Optional, Tuple, Union
+import math
+
+from transformers.models.llama.modeling_llama import LlamaDecoderLayer
+from transformers.models.llama.modeling_llama import BaseModelOutputWithPast
+
+
+# sinusoidal positional encoding
+class SinusoidalPosEmb(nn.Module):
+ def __init__(self, dim):
+ super().__init__()
+ self.dim = dim
+
+ def forward(self, x):
+ device = x.device
+ half_dim = self.dim // 2
+ emb = math.log(10000) / (half_dim - 1)
+ emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
+ emb = x[:, None] * emb[None, :] * 1.0
+ emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
+ return emb
+
+
+class LlamaAdaptiveRMSNorm(nn.Module):
+ def __init__(self, hidden_size=1024, eps=1e-6, dim_cond=1024):
+ super().__init__()
+ self.to_weight = nn.Linear(dim_cond, hidden_size)
+ nn.init.zeros_(self.to_weight.weight)
+ nn.init.ones_(self.to_weight.bias)
+ self.variance_epsilon = eps
+ self._is_hf_initialized = True # disable automatic init
+
+ def forward(self, hidden_states, cond_embedding):
+ input_dtype = hidden_states.dtype
+ variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+
+ weight = self.to_weight(cond_embedding)
+ if len(weight.shape) == 2:
+ weight = weight.unsqueeze(1)
+
+ return (weight * hidden_states).to(input_dtype)
+
+
+class LlamaNARDecoderLayer(LlamaDecoderLayer):
+ def __init__(self, config: LlamaConfig, layer_idx: int):
+ """Override to adaptive layer norm"""
+ super().__init__(config, layer_idx) # init attention, mlp, etc.
+ self.input_layernorm = LlamaAdaptiveRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps, dim_cond=config.hidden_size
+ )
+ self.post_attention_layernorm = LlamaAdaptiveRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps, dim_cond=config.hidden_size
+ )
+
+ # add `cond` in forward function
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ cond_embedding: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ ) -> Tuple[
+ torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
+ ]:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+ """
+
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(
+ hidden_states, cond_embedding=cond_embedding
+ )
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+ hidden_states = residual + hidden_states
+
+ # Fully Connected
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(
+ hidden_states, cond_embedding=cond_embedding
+ )
+ hidden_states = self.mlp(hidden_states)
+ hidden_states = residual + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
+
+ def __init__(self, config: LlamaConfig, layer_idx: int):
+ """Override to adaptive layer norm"""
+ super().__init__(config, layer_idx) # init attention, mlp, etc.
+ self.layer_idx = layer_idx
+ self.input_layernorm = LlamaAdaptiveRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps, dim_cond=config.hidden_size
+ )
+ self.post_attention_layernorm = LlamaAdaptiveRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps, dim_cond=config.hidden_size
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ cond_embedding: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ ) -> Tuple[
+ torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
+ ]:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+ """
+
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(
+ hidden_states, cond_embedding=cond_embedding
+ )
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+ hidden_states = residual + hidden_states
+
+ # Fully Connected
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(
+ hidden_states, cond_embedding=cond_embedding
+ )
+ hidden_states = self.mlp(hidden_states)
+ hidden_states = residual + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
+
+
+class DiffLlama(LlamaModel):
+ def __init__(
+ self,
+ hidden_size=1024,
+ num_heads=16,
+ num_layers=16,
+ config=LlamaConfig(0, 256, 1024, 1, 1),
+ ):
+ super().__init__(config)
+
+ self.layers = nn.ModuleList(
+ [
+ LlamaNARDecoderLayer(
+ LlamaConfig(
+ hidden_size=hidden_size,
+ num_attention_heads=num_heads,
+ max_position_embeddings=4096,
+ intermediate_size=hidden_size * 4,
+ ),
+ layer_idx=i,
+ )
+ for i in range(num_layers)
+ ]
+ )
+
+ self.norm = LlamaAdaptiveRMSNorm(hidden_size, dim_cond=hidden_size)
+
+ self.diff_step_embedding = SinusoidalPosEmb(hidden_size)
+ self.diff_step_mlp = nn.Sequential(
+ nn.Linear(hidden_size, hidden_size * 4),
+ nn.SiLU(),
+ nn.Linear(hidden_size * 4, hidden_size),
+ )
+
+ # self.position_embedding = PositionalEncoding(hidden_size, dropout=0.0)
+
+ self.cond_mlp = nn.Sequential(
+ nn.Linear(hidden_size, hidden_size * 4),
+ nn.SiLU(),
+ nn.Linear(hidden_size * 4, hidden_size),
+ )
+
+ for layer in self.layers:
+ layer.input_layernorm = LlamaAdaptiveRMSNorm(
+ hidden_size, dim_cond=hidden_size
+ )
+ layer.post_attention_layernorm = LlamaAdaptiveRMSNorm(
+ hidden_size, dim_cond=hidden_size
+ )
+
+ self.post_init()
+
+ # self.reset_parameters()
+
+ def _prepare_decoder_attention_mask(
+ self, attention_mask, input_shape, inputs_embeds, past_key_values_length
+ ):
+ # create noncausal mask
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ combined_attention_mask = None
+
+ def _expand_mask(
+ mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None
+ ):
+ """
+ Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+ """
+ bsz, src_len = mask.size()
+ tgt_len = tgt_len if tgt_len is not None else src_len
+
+ expanded_mask = (
+ mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
+ )
+
+ inverted_mask = 1.0 - expanded_mask
+
+ return inverted_mask.masked_fill(
+ inverted_mask.to(torch.bool), torch.finfo(dtype).min
+ )
+
+ if attention_mask is not None:
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ expanded_attn_mask = _expand_mask(
+ attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+ ).to(inputs_embeds.device)
+ combined_attention_mask = (
+ expanded_attn_mask
+ if combined_attention_mask is None
+ else expanded_attn_mask + combined_attention_mask
+ )
+
+ return combined_attention_mask
+
+ def forward(
+ self,
+ x,
+ diffusion_step,
+ cond,
+ x_mask,
+ input_ids: torch.LongTensor = None, # [num_quant, B, T]
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
+
+ # retrieve some shape info
+ batch_size, seq_length, _ = x.shape
+
+ # condtion mlp
+ cond_embedding = self.cond_mlp(cond) # (B, T, C)
+
+ # diffusion step embedding
+ diffusion_step = self.diff_step_embedding(diffusion_step).to(x.device)
+ diffusion_step = self.diff_step_mlp(diffusion_step) # (B, C)
+ x = x + cond_embedding
+
+ inputs_embeds = x
+ attention_mask = x_mask
+
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ seq_length_with_past = seq_length
+ past_key_values_length = 0
+
+ if past_key_values is not None:
+ past_key_values_length = past_key_values[0][0].shape[2]
+ seq_length_with_past = seq_length_with_past + past_key_values_length
+
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length,
+ seq_length + past_key_values_length,
+ dtype=torch.long,
+ device=device,
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+
+ # embed positions
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (batch_size, seq_length_with_past),
+ dtype=torch.bool,
+ device=inputs_embeds.device,
+ )
+ attention_mask = self._prepare_decoder_attention_mask(
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ )
+
+ hidden_states = inputs_embeds
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ use_cache = False
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ past_key_value = (
+ past_key_values[idx] if past_key_values is not None else None
+ )
+
+ if self.gradient_checkpointing and self.training:
+ raise NotImplementedError
+
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cond_embedding=diffusion_step,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ hidden_states = self.norm(hidden_states, cond_embedding=diffusion_step)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+
+ return hidden_states
+
+
+class DiffLlamaPrefix(LlamaModel):
+ def __init__(
+ self,
+ hidden_size=1024,
+ num_heads=16,
+ num_layers=16,
+ config=LlamaConfig(0, 256, 1024, 1, 1),
+ ):
+ super().__init__(config)
+
+ self.layers = nn.ModuleList(
+ [
+ LlamaNARDecoderLayer(
+ LlamaConfig(
+ hidden_size=hidden_size,
+ num_attention_heads=num_heads,
+ max_position_embeddings=4096,
+ intermediate_size=hidden_size * 4,
+ ),
+ layer_idx=i,
+ )
+ for i in range(num_layers)
+ ]
+ )
+
+ self.norm = LlamaAdaptiveRMSNorm(hidden_size, dim_cond=hidden_size)
+
+ self.diff_step_embedding = SinusoidalPosEmb(hidden_size)
+ self.diff_step_mlp = nn.Sequential(
+ nn.Linear(hidden_size, hidden_size * 4),
+ nn.SiLU(),
+ nn.Linear(hidden_size * 4, hidden_size),
+ )
+
+ self.cond_mlp = nn.Sequential(
+ nn.Linear(hidden_size, hidden_size * 4),
+ nn.SiLU(),
+ nn.Linear(hidden_size * 4, hidden_size),
+ )
+
+ for layer in self.layers:
+ layer.input_layernorm = LlamaAdaptiveRMSNorm(
+ hidden_size, dim_cond=hidden_size
+ )
+ layer.post_attention_layernorm = LlamaAdaptiveRMSNorm(
+ hidden_size, dim_cond=hidden_size
+ )
+
+ self.embed_tokens = None
+
+ self.post_init()
+
+ def _prepare_decoder_attention_mask(
+ self, attention_mask, input_shape, inputs_embeds, past_key_values_length
+ ):
+ # create noncausal mask
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ combined_attention_mask = None
+
+ def _expand_mask(
+ mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None
+ ):
+ """
+ Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+ """
+ bsz, src_len = mask.size()
+ tgt_len = tgt_len if tgt_len is not None else src_len
+
+ expanded_mask = (
+ mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
+ )
+
+ inverted_mask = 1.0 - expanded_mask
+
+ return inverted_mask.masked_fill(
+ inverted_mask.to(torch.bool), torch.finfo(dtype).min
+ )
+
+ if attention_mask is not None:
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ expanded_attn_mask = _expand_mask(
+ attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+ ).to(inputs_embeds.device)
+ combined_attention_mask = (
+ expanded_attn_mask
+ if combined_attention_mask is None
+ else expanded_attn_mask + combined_attention_mask
+ )
+
+ return combined_attention_mask
+
+ def forward(
+ self,
+ x,
+ diffusion_step,
+ x_mask,
+ phone_embedding: Optional[torch.LongTensor] = None,
+ phone_mask: Optional[torch.FloatTensor] = None,
+ input_ids: torch.LongTensor = None, # [num_quant, B, T]
+ attention_mask: Optional[torch.LongTensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
+
+ # retrieve some shape info
+
+ phone_embedding = self.cond_mlp(phone_embedding) # (B, T, C)
+ phone_length = phone_embedding.shape[1]
+ inputs_embeds = torch.cat([phone_embedding, x], dim=1)
+ attention_mask = torch.cat([phone_mask, x_mask], dim=1)
+
+ # diffusion step embedding
+ diffusion_step = self.diff_step_embedding(diffusion_step).to(x.device)
+ diffusion_step = self.diff_step_mlp(diffusion_step) # (B, C)
+
+ batch_size, seq_length, _ = inputs_embeds.shape
+
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ seq_length_with_past = seq_length
+ past_key_values_length = 0
+
+ if past_key_values is not None:
+ past_key_values_length = past_key_values[0][0].shape[2]
+ seq_length_with_past = seq_length_with_past + past_key_values_length
+
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length,
+ seq_length + past_key_values_length,
+ dtype=torch.long,
+ device=device,
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+
+ # embed positions
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (batch_size, seq_length_with_past),
+ dtype=torch.bool,
+ device=inputs_embeds.device,
+ )
+ attention_mask = self._prepare_decoder_attention_mask(
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ )
+
+ hidden_states = inputs_embeds
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ use_cache = False
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ past_key_value = (
+ past_key_values[idx] if past_key_values is not None else None
+ )
+
+ if self.gradient_checkpointing and self.training:
+ raise NotImplementedError
+
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cond_embedding=diffusion_step,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ hidden_states = self.norm(hidden_states, cond_embedding=diffusion_step)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+
+ return hidden_states[
+ :,
+ phone_length:,
+ ]
diff --git a/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py b/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py
new file mode 100644
index 0000000..8e3fe12
--- /dev/null
+++ b/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py
@@ -0,0 +1,503 @@
+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import numpy as np
+import torch.nn as nn
+import math
+from einops import rearrange
+from indextts.utils.maskgct.models.tts.maskgct.llama_nar import DiffLlama
+
+
+def top_k(logits, thres=0.9):
+ k = math.ceil((1 - thres) * logits.shape[-1])
+ val, ind = logits.topk(k, dim=-1)
+ probs = torch.full_like(logits, float("-inf"))
+ probs.scatter_(2, ind, val)
+ return probs
+
+
+def log(t, eps=1e-10):
+ return torch.log(t + eps)
+
+
+def gumbel_noise(t):
+ noise = torch.zeros_like(t).uniform_(0, 1)
+ return -log(-log(noise))
+
+
+def gumbel_sample(t, temperature=1.0, dim=-1):
+ return ((t / max(temperature, 1e-10)) + gumbel_noise(t)).argmax(dim=dim)
+
+
+def top_k(logits, thres=0.9):
+ k = math.ceil((1 - thres) * logits.shape[-1])
+ val, ind = logits.topk(k, dim=-1)
+ probs = torch.full_like(logits, float("-inf"))
+ probs.scatter_(2, ind, val)
+ return probs
+
+
+def log(t, eps=1e-10):
+ return torch.log(t + eps)
+
+
+def gumbel_noise(t):
+ noise = torch.zeros_like(t).uniform_(0, 1)
+ return -log(-log(noise))
+
+
+def gumbel_sample(t, temperature=1.0, dim=-1):
+ return ((t / max(temperature, 1e-10)) + gumbel_noise(t)).argmax(dim=dim)
+
+
+class MaskGCT_S2A(nn.Module):
+ def __init__(
+ self,
+ num_quantizer=12,
+ hidden_size=1024,
+ num_layers=16,
+ num_heads=16,
+ codebook_size=1024,
+ cfg_scale=0.15,
+ mask_layer_schedule="linear",
+ cond_codebook_size=1024,
+ cond_dim=1024,
+ predict_layer_1=True,
+ cfg=None,
+ ):
+ super().__init__()
+
+ num_quantizer = (
+ cfg.num_quantizer
+ if cfg is not None and hasattr(cfg, "num_quantizer")
+ else num_quantizer
+ )
+ hidden_size = (
+ cfg.hidden_size
+ if cfg is not None and hasattr(cfg, "hidden_size")
+ else hidden_size
+ )
+ num_layers = (
+ cfg.num_layers
+ if cfg is not None and hasattr(cfg, "num_layers")
+ else num_layers
+ )
+ num_heads = (
+ cfg.num_heads
+ if cfg is not None and hasattr(cfg, "num_heads")
+ else num_heads
+ )
+ codebook_size = (
+ cfg.codebook_size
+ if cfg is not None and hasattr(cfg, "codebook_size")
+ else codebook_size
+ )
+ cfg_scale = (
+ cfg.cfg_scale
+ if cfg is not None and hasattr(cfg, "cfg_scale")
+ else cfg_scale
+ )
+ mask_layer_schedule = (
+ cfg.mask_layer_schedule
+ if cfg is not None and hasattr(cfg, "mask_layer_schedule")
+ else mask_layer_schedule
+ )
+ cond_codebook_size = (
+ cfg.cond_codebook_size
+ if cfg is not None and hasattr(cfg, "cond_codebook_size")
+ else cond_codebook_size
+ )
+ cond_dim = (
+ cfg.cond_dim if cfg is not None and hasattr(cfg, "cond_dim") else cond_dim
+ )
+ predict_layer_1 = (
+ cfg.predict_layer_1
+ if cfg is not None and hasattr(cfg, "predict_layer_1")
+ else predict_layer_1
+ )
+
+ self.num_quantizer = num_quantizer
+ self.hidden_size = hidden_size
+ self.codebook_size = codebook_size
+ self.num_layers = num_layers
+ self.num_heads = num_heads
+ self.cfg_scale = cfg_scale
+ self.mask_layer_schedule = mask_layer_schedule
+ self.cond_codebook_size = cond_codebook_size
+ self.cond_dim = cond_dim
+ self.predict_layer_1 = predict_layer_1
+
+ self.layer_emb = nn.Embedding(self.num_quantizer, self.hidden_size)
+ self.mask_emb = nn.Embedding(1, self.hidden_size)
+
+ self.token_emb = torch.nn.ModuleList(
+ [
+ nn.Embedding(self.codebook_size, self.hidden_size)
+ for _ in range(self.num_quantizer)
+ ]
+ )
+
+ self.to_logits = torch.nn.ModuleList(
+ [
+ nn.Linear(self.hidden_size, self.codebook_size)
+ for _ in range(self.num_quantizer)
+ ]
+ )
+
+ self.cond_emb = nn.Embedding(cond_codebook_size, self.hidden_size)
+
+ self.reset_parameters()
+
+ self.diff_estimator = DiffLlama(
+ hidden_size=hidden_size,
+ num_heads=self.num_heads,
+ num_layers=num_layers,
+ )
+
+ def mask_prob(self, t):
+ return torch.sin(t * np.pi / 2).to(t.device)
+
+ def mask_layer(self, t):
+ # print(self.predict_layer_1)
+ if self.mask_layer_schedule == "uniform":
+ if self.predict_layer_1:
+ mask_layer = torch.randint(0, self.num_quantizer, (1,)).to(t.device)
+ else:
+ mask_layer = torch.randint(1, self.num_quantizer, (1,)).to(t.device)
+ elif self.mask_layer_schedule == "cosine":
+ if self.predict_layer_1:
+ weights = torch.tensor(
+ [
+ np.cos(i / self.num_quantizer * np.pi / 2)
+ for i in range(self.num_quantizer)
+ ]
+ )
+ else:
+ weights = torch.tensor(
+ [0]
+ + [
+ np.cos((i - 1) / self.num_quantizer * np.pi / 2)
+ for i in range(1, self.num_quantizer)
+ ]
+ )
+ mask_layer = torch.multinomial(weights, 1).to(t.device)
+ elif self.mask_layer_schedule == "linear":
+ if self.predict_layer_1:
+ weights = torch.tensor(
+ [self.num_quantizer - i for i in range(self.num_quantizer)]
+ )
+ else:
+ weights = torch.tensor(
+ [0]
+ + [
+ self.num_quantizer - (i - 1)
+ for i in range(1, self.num_quantizer)
+ ]
+ )
+ weights = weights / weights.sum()
+ mask_layer = torch.multinomial(weights, 1).to(t.device)
+ # print(mask_layer)
+ new_t = t
+
+ return mask_layer, new_t
+
+ def forward_diffusion(self, x0, t):
+ # x0: (B, T, num_quantizer)
+ mask_layer, new_t = self.mask_layer(t) # (1,)
+ mask_prob = self.mask_prob(new_t) # (B,)
+ mask_token = self.mask_emb(torch.zeros_like(mask_layer)) # (1, hidden_size)
+
+ xt = torch.zeros(x0.shape[0], x0.shape[1], self.hidden_size).to(x0.device)
+
+ cfg_scale = self.cfg_scale
+
+ # get prompt len
+ if torch.rand(1) > cfg_scale:
+ prompt_len = torch.randint(
+ min(x0.shape[1] // 4, 5), x0.shape[1] // 2, (x0.shape[0],)
+ ).to(
+ x0.device
+ ) # (B,)
+ else:
+ prompt_len = torch.zeros(x0.shape[0]).to(x0) # (B,)
+
+ # get is prompt
+ is_prompt = torch.zeros_like(x0[:, :, 0]) # (B, T)
+ col_indices = (
+ torch.arange(is_prompt.shape[1])
+ .repeat(is_prompt.shape[0], 1)
+ .to(prompt_len)
+ ) # (B, T)
+ is_prompt[col_indices < prompt_len.unsqueeze(1)] = 1 # (B, T) 1 if prompt
+
+ for idx, token_emb_idx in enumerate(self.token_emb):
+ if idx < mask_layer:
+ xt = xt + token_emb_idx(x0[:, :, idx]) # (B, T, hidden_size)
+
+ elif idx == mask_layer:
+ mask = torch.bernoulli(
+ torch.ones_like(x0[:, :, idx]) * mask_prob[..., None]
+ ) # mask if 1, not mask if 0
+ # prompt part don't need to be masked
+ mask[is_prompt.bool()] = 0
+ # Ensure at least one token is masked
+ mask_num = mask[:,].sum(dim=1, keepdim=False)
+ all_zero_mask = (mask_num == 0).bool()
+ row_indices_to_modify = torch.nonzero(all_zero_mask)
+ # mask the first token if all tokens are not masked (may mask pad if random indices)
+ mask[row_indices_to_modify, prompt_len[row_indices_to_modify]] = 1
+
+ mask = mask[..., None] # (B, T, 1)
+ xt = (
+ xt
+ + mask * mask_token[:, None, :]
+ + (1 - mask) * token_emb_idx(x0[:, :, idx])
+ ) # (B, T, hidden_size)
+
+ else:
+ # prompt part don't need to be masked
+ xt = (
+ xt
+ + token_emb_idx(x0[:, :, idx]) * is_prompt[..., None]
+ + mask_token * (1 - is_prompt[..., None])
+ )
+
+ return xt, new_t, mask_layer, mask, prompt_len, mask_prob
+
+ def loss_t(self, x0, x_mask, t, cond=None):
+ xt, new_t, mask_layer, mask, prompt_len, mask_prob = self.forward_diffusion(
+ x0, t
+ )
+ # xt: (B, T, hidden_size)
+ # new_t: (B,)
+ # mask_layer: (1,)
+ # mask: (B, T, 1) mask if 1, not mask if 0
+ # prompt_len: (B,)
+ # mask_prob: (B,)
+
+ mask_layer_cond = self.layer_emb(mask_layer).unsqueeze(1) # (1, 1, hidden_size)
+ cond = cond + mask_layer_cond # (B, T, hidden_size)
+
+ embeds = self.diff_estimator(xt, new_t, cond, x_mask) # (B, T, hidden_size)
+
+ logits = self.to_logits[mask_layer.item()](embeds) # (B, T, codebook_size)
+
+ # final mask used for loss calculation
+ final_mask = mask * x_mask[..., None] # (B, T, 1)
+
+ return logits, mask_layer, final_mask, x0, prompt_len, mask_prob
+
+ def compute_loss(self, x0, x_mask, cond=None):
+ # x0: (B, T, num_quantizer)
+ # x_mask: (B, T) mask is 0 for padding
+ t = torch.rand(x0.shape[0], device=x0.device, requires_grad=False)
+ t = torch.clamp(t, 1e-5, 1.0)
+ return self.loss_t(x0, x_mask, t, cond)
+
+ def reset_parameters(self):
+ def _reset_parameters(m):
+ if isinstance(m, nn.MultiheadAttention):
+ if m._qkv_same_embed_dim:
+ nn.init.normal_(m.in_proj_weight, std=0.02)
+ else:
+ nn.init.normal_(m.q_proj_weight, std=0.02)
+ nn.init.normal_(m.k_proj_weight, std=0.02)
+ nn.init.normal_(m.v_proj_weight, std=0.02)
+
+ if m.in_proj_bias is not None:
+ nn.init.constant_(m.in_proj_bias, 0.0)
+ nn.init.constant_(m.out_proj.bias, 0.0)
+ if m.bias_k is not None:
+ nn.init.xavier_normal_(m.bias_k)
+ if m.bias_v is not None:
+ nn.init.xavier_normal_(m.bias_v)
+
+ elif (
+ isinstance(m, nn.Conv1d)
+ or isinstance(m, nn.ConvTranspose1d)
+ or isinstance(m, nn.Conv2d)
+ or isinstance(m, nn.ConvTranspose2d)
+ ):
+ m.weight.data.normal_(0.0, 0.02)
+
+ elif isinstance(m, nn.Linear):
+ m.weight.data.normal_(mean=0.0, std=0.02)
+ if m.bias is not None:
+ m.bias.data.zero_()
+
+ elif isinstance(m, nn.Embedding):
+ m.weight.data.normal_(mean=0.0, std=0.02)
+ if m.padding_idx is not None:
+ m.weight.data[m.padding_idx].zero_()
+
+ self.apply(_reset_parameters)
+
+ @torch.no_grad()
+ def reverse_diffusion(
+ self,
+ cond,
+ prompt,
+ x_mask=None,
+ prompt_mask=None,
+ temp=1.5,
+ filter_thres=0.98,
+ max_layer=None,
+ gt_code=None,
+ n_timesteps=[10, 4, 4, 4, 4, 4, 4, 4],
+ cfg=1.0,
+ rescale_cfg=1.0,
+ ):
+
+ assert (
+ len(n_timesteps) == self.num_quantizer
+ ) # each layer has a number of steps
+
+ prompt_code = prompt # (B, prompt_len, num_quantizer)
+ prompt_len = prompt_code.shape[1]
+ target_len = cond.shape[1] - prompt_len
+
+ if x_mask == None:
+ x_mask = torch.ones(cond.shape[0], target_len).to(cond.device) # (B, T)
+ if prompt_mask == None:
+ prompt_mask = torch.ones(cond.shape[0], prompt_len).to(
+ cond.device
+ ) # (B, prompt_len)
+
+ cum = torch.zeros(x_mask.shape[0], x_mask.shape[1], self.hidden_size).to(
+ x_mask.device
+ ) # (B, T, hidden_size)
+
+ bsz, seq_len, _ = cum.shape
+
+ choice_temp = 1.0
+ start_temp = temp # temperature for sampling
+ start_choice_temp = choice_temp # temperature for choicing mask tokens
+
+ if max_layer is None:
+ max_layer = self.num_quantizer
+
+ xt = torch.LongTensor(bsz, seq_len, max_layer).to(x_mask.device)
+
+ if gt_code is not None:
+ gt_layer = gt_code.shape[-1]
+ xt[:, :, :gt_layer] = gt_code
+ for i in range(gt_layer):
+ cum += self.token_emb[i](xt[:, :, i])
+ else:
+ gt_layer = 0
+
+ for mask_layer in range(gt_layer, max_layer):
+ steps = n_timesteps[mask_layer]
+ to_logits = self.to_logits[mask_layer]
+ token_emb = self.token_emb[mask_layer]
+ mask_layer = torch.tensor(mask_layer).to(x_mask.device).long().unsqueeze(0)
+ mask_layer_cond = self.layer_emb(mask_layer).unsqueeze(
+ 1
+ ) # (1,) -> (1, 1, hidden_size)
+ temp_cond = cond + mask_layer_cond # (B, T, hidden_size)
+
+ mask_token = self.mask_emb(torch.zeros_like(mask_layer)) # (1, hidden_size)
+ mask = torch.full((bsz, seq_len, 1), True).to(x_mask.device) # (B, T, 1)
+ seq = torch.full((bsz, seq_len), 0).to(x_mask.device)
+
+ h = 1.0 / steps
+
+ # prompt_code: (B, prompt_len, num_quantizer)
+ cur_prompt = 0
+ for idx, emb in enumerate(self.token_emb):
+ cur_prompt = cur_prompt + emb(
+ prompt_code[:, :, idx]
+ ) # (B, prompt_len, hidden_size)
+
+ t_list = [1.0 - i * h for i in range(steps)]
+ t_list.append(0.0)
+ for i in range(steps):
+ t = t_list[i] * torch.ones(bsz).to(x_mask.device)
+ token = token_emb(seq) # (B, T, hidden_size)
+ cur = cum + mask * mask_token[:, None, :] + (~mask) * token
+ cur = cur + mask_token[:, None, :] * (max_layer - 1 - mask_layer)
+
+ xt_input = torch.cat([cur_prompt, cur], dim=1) # (B, T, hidden_size)
+ xt_mask = torch.cat(
+ [prompt_mask, x_mask], dim=1
+ ) # (B, T), mask is 0 for padding
+
+ embeds = self.diff_estimator(xt_input, t, temp_cond, xt_mask)
+ embeds = embeds[:, prompt_len:, :]
+
+ # cfg
+ if cfg > 0:
+ mask_embeds = self.diff_estimator(
+ cur, t, temp_cond[:, prompt_len:, :], x_mask
+ )
+ pos_emb_std = embeds.std() # std(g_cond)
+ embeds = embeds + cfg * (embeds - mask_embeds) # g_cfg
+ rescale_embeds = embeds * pos_emb_std / embeds.std() # g_final
+ embeds = rescale_cfg * rescale_embeds + (1 - rescale_cfg) * embeds
+
+ logits = to_logits(embeds) # (B, T, codebook_size)
+ annealing_scale = t_list[i]
+
+ choice_temp = start_choice_temp * annealing_scale
+ temp = start_temp * annealing_scale
+ logits = top_k(logits, filter_thres)
+
+ if i == steps - 1:
+ # greedy
+ if steps == 1:
+ temp = 0.2
+ sampled_ids = gumbel_sample(logits, temperature=max(temp, 1e-3))
+ else:
+ sampled_ids = logits.argmax(dim=-1)
+
+ else:
+ # sampling
+ sampled_ids = gumbel_sample(logits, temperature=max(temp, 1e-3))
+
+ seq = torch.where(mask.squeeze(-1), sampled_ids, seq)
+
+ scores = logits.softmax(dim=-1)
+ scores = scores.gather(2, rearrange(sampled_ids, "b n -> b n 1"))
+ scores = rearrange(scores, "b n 1 -> b n")
+
+ scores = choice_temp * gumbel_noise(scores) + scores
+ scores = 1 - scores
+
+ next_t = t_list[i + 1] * torch.ones(bsz).to(x_mask.device)
+
+ next_mask_num = (self.mask_prob(next_t) * seq_len).long()[0].item()
+
+ if next_mask_num == 0:
+ break
+ scores = scores.masked_fill(
+ ~mask.squeeze(-1), -torch.finfo(scores.dtype).max
+ )
+
+ mask_indices = scores.topk(next_mask_num, dim=-1).indices
+ mask = torch.zeros_like(scores, dtype=torch.bool).scatter(
+ 1, mask_indices, True
+ )
+ seq = seq.masked_fill(mask, 0)
+
+ mask = mask.unsqueeze(-1)
+
+ cum = cum + token_emb(seq)
+ xt[..., mask_layer.squeeze(0).item()] = seq
+
+ return xt
+
+ def forward(self, x0, x_mask, cond_code=None):
+ # x0: (B, T, num_quantizer)
+ # x_mask: (B, T) mask is 0 for padding
+ # cond_code: semantic token (B, T)
+ cond = self.cond_emb(cond_code)
+
+ logits, mask_layer, final_mask, x0, prompt_len, mask_prob = self.compute_loss(
+ x0,
+ x_mask,
+ cond,
+ )
+ return logits, mask_layer, final_mask, x0, prompt_len, mask_prob
diff --git a/indextts/utils/maskgct_utils.py b/indextts/utils/maskgct_utils.py
new file mode 100644
index 0000000..40b9cb0
--- /dev/null
+++ b/indextts/utils/maskgct_utils.py
@@ -0,0 +1,259 @@
+import torch
+import librosa
+import json5
+from huggingface_hub import hf_hub_download
+from transformers import SeamlessM4TFeatureExtractor, Wav2Vec2BertModel
+import safetensors
+import numpy as np
+
+from indextts.utils.maskgct.models.codec.kmeans.repcodec_model import RepCodec
+from indextts.utils.maskgct.models.tts.maskgct.maskgct_s2a import MaskGCT_S2A
+from indextts.utils.maskgct.models.codec.amphion_codec.codec import CodecEncoder, CodecDecoder
+import time
+
+
+def _load_config(config_fn, lowercase=False):
+ """Load configurations into a dictionary
+
+ Args:
+ config_fn (str): path to configuration file
+ lowercase (bool, optional): whether changing keys to lower case. Defaults to False.
+
+ Returns:
+ dict: dictionary that stores configurations
+ """
+ with open(config_fn, "r") as f:
+ data = f.read()
+ config_ = json5.loads(data)
+ if "base_config" in config_:
+ # load configurations from new path
+ p_config_path = os.path.join(os.getenv("WORK_DIR"), config_["base_config"])
+ p_config_ = _load_config(p_config_path)
+ config_ = override_config(p_config_, config_)
+ if lowercase:
+ # change keys in config_ to lower case
+ config_ = get_lowercase_keys_config(config_)
+ return config_
+
+
+def load_config(config_fn, lowercase=False):
+ """Load configurations into a dictionary
+
+ Args:
+ config_fn (str): path to configuration file
+ lowercase (bool, optional): _description_. Defaults to False.
+
+ Returns:
+ JsonHParams: an object that stores configurations
+ """
+ config_ = _load_config(config_fn, lowercase=lowercase)
+ # create an JsonHParams object with configuration dict
+ cfg = JsonHParams(**config_)
+ return cfg
+
+
+class JsonHParams:
+ def __init__(self, **kwargs):
+ for k, v in kwargs.items():
+ if type(v) == dict:
+ v = JsonHParams(**v)
+ self[k] = v
+
+ def keys(self):
+ return self.__dict__.keys()
+
+ def items(self):
+ return self.__dict__.items()
+
+ def values(self):
+ return self.__dict__.values()
+
+ def __len__(self):
+ return len(self.__dict__)
+
+ def __getitem__(self, key):
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ return setattr(self, key, value)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def __repr__(self):
+ return self.__dict__.__repr__()
+
+
+def build_semantic_model(path_='./models/tts/maskgct/ckpt/wav2vec2bert_stats.pt'):
+ semantic_model = Wav2Vec2BertModel.from_pretrained("facebook/w2v-bert-2.0")
+ semantic_model.eval()
+ stat_mean_var = torch.load(path_)
+ semantic_mean = stat_mean_var["mean"]
+ semantic_std = torch.sqrt(stat_mean_var["var"])
+ return semantic_model, semantic_mean, semantic_std
+
+
+def build_semantic_codec(cfg):
+ semantic_codec = RepCodec(cfg=cfg)
+ semantic_codec.eval()
+ return semantic_codec
+
+
+def build_s2a_model(cfg, device):
+ soundstorm_model = MaskGCT_S2A(cfg=cfg)
+ soundstorm_model.eval()
+ soundstorm_model.to(device)
+ return soundstorm_model
+
+
+def build_acoustic_codec(cfg, device):
+ codec_encoder = CodecEncoder(cfg=cfg.encoder)
+ codec_decoder = CodecDecoder(cfg=cfg.decoder)
+ codec_encoder.eval()
+ codec_decoder.eval()
+ codec_encoder.to(device)
+ codec_decoder.to(device)
+ return codec_encoder, codec_decoder
+
+
+class Inference_Pipeline():
+ def __init__(
+ self,
+ semantic_model,
+ semantic_codec,
+ semantic_mean,
+ semantic_std,
+ codec_encoder,
+ codec_decoder,
+ s2a_model_1layer,
+ s2a_model_full,
+ ):
+ self.semantic_model = semantic_model
+ self.semantic_codec = semantic_codec
+ self.semantic_mean = semantic_mean
+ self.semantic_std = semantic_std
+
+ self.codec_encoder = codec_encoder
+ self.codec_decoder = codec_decoder
+ self.s2a_model_1layer = s2a_model_1layer
+ self.s2a_model_full = s2a_model_full
+
+ @torch.no_grad()
+ def get_emb(self, input_features, attention_mask):
+ vq_emb = self.semantic_model(
+ input_features=input_features,
+ attention_mask=attention_mask,
+ output_hidden_states=True,
+ )
+ feat = vq_emb.hidden_states[17] # (B, T, C)
+ feat = (feat - self.semantic_mean.to(feat)) / self.semantic_std.to(feat)
+ return feat
+
+ @torch.no_grad()
+ def extract_acoustic_code(self, speech):
+ vq_emb = self.codec_encoder(speech.unsqueeze(1))
+ _, vq, _, _, _ = self.codec_decoder.quantizer(vq_emb)
+ acoustic_code = vq.permute(1, 2, 0)
+ return acoustic_code
+
+ @torch.no_grad()
+ def get_scode(self, inputs):
+ semantic_code, feat = self.semantic_codec.quantize(inputs)
+ # vq = self.semantic_codec.quantizer.vq2emb(semantic_code.unsqueeze(1))
+ # vq = vq.transpose(1,2)
+ return semantic_code
+
+ @torch.no_grad()
+ def semantic2acoustic(
+ self,
+ combine_semantic_code,
+ acoustic_code,
+ n_timesteps=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ cfg=2.5,
+ rescale_cfg=0.75,
+ ):
+ semantic_code = combine_semantic_code
+
+ cond = self.s2a_model_1layer.cond_emb(semantic_code)
+ prompt = acoustic_code[:, :, :]
+ predict_1layer = self.s2a_model_1layer.reverse_diffusion(
+ cond=cond,
+ prompt=prompt,
+ temp=1.5,
+ filter_thres=0.98,
+ n_timesteps=n_timesteps[:1],
+ cfg=cfg,
+ rescale_cfg=rescale_cfg,
+ )
+
+ cond = self.s2a_model_full.cond_emb(semantic_code)
+ prompt = acoustic_code[:, :, :]
+ predict_full = self.s2a_model_full.reverse_diffusion(
+ cond=cond,
+ prompt=prompt,
+ temp=1.5,
+ filter_thres=0.98,
+ n_timesteps=n_timesteps,
+ cfg=cfg,
+ rescale_cfg=rescale_cfg,
+ gt_code=predict_1layer,
+ )
+
+ vq_emb = self.codec_decoder.vq2emb(
+ predict_full.permute(2, 0, 1), n_quantizers=12
+ )
+ recovered_audio = self.codec_decoder(vq_emb)
+ prompt_vq_emb = self.codec_decoder.vq2emb(
+ prompt.permute(2, 0, 1), n_quantizers=12
+ )
+ recovered_prompt_audio = self.codec_decoder(prompt_vq_emb)
+ recovered_prompt_audio = recovered_prompt_audio[0][0].cpu().numpy()
+ recovered_audio = recovered_audio[0][0].cpu().numpy()
+ combine_audio = np.concatenate([recovered_prompt_audio, recovered_audio])
+
+ return combine_audio, recovered_audio
+
+ def s2a_inference(
+ self,
+ prompt_speech_path,
+ combine_semantic_code,
+ cfg=2.5,
+ n_timesteps_s2a=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ cfg_s2a=2.5,
+ rescale_cfg_s2a=0.75,
+ ):
+ speech = librosa.load(prompt_speech_path, sr=24000)[0]
+ acoustic_code = self.extract_acoustic_code(
+ torch.tensor(speech).unsqueeze(0).to(combine_semantic_code.device)
+ )
+ _, recovered_audio = self.semantic2acoustic(
+ combine_semantic_code,
+ acoustic_code,
+ n_timesteps=n_timesteps_s2a,
+ cfg=cfg_s2a,
+ rescale_cfg=rescale_cfg_s2a,
+ )
+
+ return recovered_audio
+
+ @torch.no_grad()
+ def gt_inference(
+ self,
+ prompt_speech_path,
+ combine_semantic_code,
+ ):
+ speech = librosa.load(prompt_speech_path, sr=24000)[0]
+ '''
+ acoustic_code = self.extract_acoustic_code(
+ torch.tensor(speech).unsqueeze(0).to(combine_semantic_code.device)
+ )
+ prompt = acoustic_code[:, :, :]
+ prompt_vq_emb = self.codec_decoder.vq2emb(
+ prompt.permute(2, 0, 1), n_quantizers=12
+ )
+ '''
+
+ prompt_vq_emb = self.codec_encoder(torch.tensor(speech).unsqueeze(0).unsqueeze(1).to(combine_semantic_code.device))
+ recovered_prompt_audio = self.codec_decoder(prompt_vq_emb)
+ recovered_prompt_audio = recovered_prompt_audio[0][0].cpu().numpy()
+ return recovered_prompt_audio
diff --git a/indextts/utils/text_utils.py b/indextts/utils/text_utils.py
new file mode 100644
index 0000000..d8cfc6e
--- /dev/null
+++ b/indextts/utils/text_utils.py
@@ -0,0 +1,41 @@
+import re
+
+from textstat import textstat
+
+
+def contains_chinese(text):
+ # 正则表达式,用于匹配中文字符 + 数字 -> 都认为是 zh
+ if re.search(r'[\u4e00-\u9fff0-9]', text):
+ return True
+ return False
+
+
+def get_text_syllable_num(text):
+ chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]')
+ number_char_pattern = re.compile(r'[0-9]')
+ syllable_num = 0
+ tokens = re.findall(r'[\u4e00-\u9fff]+|[a-zA-Z]+|[0-9]+', text)
+ # print(tokens)
+ if contains_chinese(text):
+ for token in tokens:
+ if chinese_char_pattern.search(token) or number_char_pattern.search(token):
+ syllable_num += len(token)
+ else:
+ syllable_num += textstat.syllable_count(token)
+ else:
+ syllable_num = textstat.syllable_count(text)
+
+ return syllable_num
+
+
+def get_text_tts_dur(text):
+ min_speed = 3 # 2.18 #
+ max_speed = 5.50
+
+ ratio = 0.8517 if contains_chinese(text) else 1.0
+
+ syllable_num = get_text_syllable_num(text)
+ max_dur = syllable_num * ratio / max_speed
+ min_dur = syllable_num * ratio / min_speed
+
+ return max_dur, min_dur
\ No newline at end of file
diff --git a/indextts/utils/utils.py b/indextts/utils/utils.py
new file mode 100644
index 0000000..82cf878
--- /dev/null
+++ b/indextts/utils/utils.py
@@ -0,0 +1,93 @@
+import os
+import re
+import random
+import torch
+import torchaudio
+
+MATPLOTLIB_FLAG = False
+
+
+def load_audio(audiopath, sampling_rate):
+ audio, sr = torchaudio.load(audiopath)
+ #print(f"wave shape: {audio.shape}, sample_rate: {sr}")
+
+ if audio.size(0) > 1: # mix to mono
+ audio = audio[0].unsqueeze(0)
+
+ if sr != sampling_rate:
+ try:
+ audio = torchaudio.functional.resample(audio, sr, sampling_rate)
+ except Exception as e:
+ print(f"Warning: {audiopath}, wave shape: {audio.shape}, sample_rate: {sr}")
+ return None
+ # clip audio invalid values
+ audio.clip_(-1, 1)
+ return audio
+
+
+def tokenize_by_CJK_char(line: str) -> str:
+ """
+ Tokenize a line of text with CJK char.
+
+ Note: All return charaters will be upper case.
+
+ Example:
+ input = "你好世界是 hello world 的中文"
+ output = "你 好 世 界 是 HELLO WORLD 的 中 文"
+
+ Args:
+ line:
+ The input text.
+
+ Return:
+ A new string tokenize by CJK char.
+ """
+ # The CJK ranges is from https://github.com/alvations/nltk/blob/79eed6ddea0d0a2c212c1060b477fc268fec4d4b/nltk/tokenize/util.py
+ pattern = re.compile(
+ r"([\u1100-\u11ff\u2e80-\ua4cf\ua840-\uD7AF\uF900-\uFAFF\uFE30-\uFE4F\uFF65-\uFFDC\U00020000-\U0002FFFF])"
+ )
+ chars = pattern.split(line.strip().upper())
+ return " ".join([w.strip() for w in chars if w.strip()])
+
+
+def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
+ """Make mask tensor containing indices of padded part.
+
+ See description of make_non_pad_mask.
+
+ Args:
+ lengths (torch.Tensor): Batch of lengths (B,).
+ Returns:
+ torch.Tensor: Mask tensor containing indices of padded part.
+
+ Examples:
+ >>> lengths = [5, 3, 2]
+ >>> make_pad_mask(lengths)
+ masks = [[0, 0, 0, 0 ,0],
+ [0, 0, 0, 1, 1],
+ [0, 0, 1, 1, 1]]
+ """
+ batch_size = lengths.size(0)
+ max_len = max_len if max_len > 0 else lengths.max().item()
+ seq_range = torch.arange(0,
+ max_len,
+ dtype=torch.int64,
+ device=lengths.device)
+ seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
+ seq_length_expand = lengths.unsqueeze(-1)
+ mask = seq_range_expand >= seq_length_expand
+ return mask
+
+
+def safe_log(x: torch.Tensor, clip_val: float = 1e-7) -> torch.Tensor:
+ """
+ Computes the element-wise logarithm of the input tensor with clipping to avoid near-zero values.
+
+ Args:
+ x (Tensor): Input tensor.
+ clip_val (float, optional): Minimum value to clip the input tensor. Defaults to 1e-7.
+
+ Returns:
+ Tensor: Element-wise logarithm of the input tensor with clipping applied.
+ """
+ return torch.log(torch.clip(x, min=clip_val))
diff --git a/pyproject.toml b/pyproject.toml
index 638dd9c..16a5ecd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,40 @@
-[build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
+[project]
+name = "index-tts"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "accelerate==1.8.1",
+ "cn2an==0.5.22",
+ "cython==3.0.7",
+ "deepspeed==0.17.1",
+ "descript-audiotools==0.7.2",
+ "ffmpeg-python==0.2.0",
+ "g2p-en==2.1.0",
+ "gradio>=5.44.1",
+ "jieba==0.42.1",
+ "json5==0.10.0",
+ "keras==2.9.0",
+ "librosa==0.10.2.post1",
+ "matplotlib==3.8.2",
+ "modelscope==1.27.0",
+ "munch==4.0.0",
+ "numba==0.58.1",
+ "numpy==1.26.2",
+ "omegaconf>=2.3.0",
+ "opencv-python==4.9.0.80",
+ "pandas==2.1.3",
+ "safetensors==0.5.2",
+ "sentencepiece>=0.2.1",
+ "tensorboard==2.9.1",
+ "textstat>=0.7.10",
+ "tokenizers==0.21.0",
+ "tqdm>=4.67.1",
+ "transformers==4.52.1",
+ "wetext>=0.0.9;sys_platform == 'darwin'",
+ "WeTextProcessing;sys_platform != 'darwin'",
+]
+
+[tool.uv]
+extra-index-url = ["https://pypi.bilibili.co/repository/pypi-public/simple/"]
diff --git a/requirements.txt b/requirements.txt
index 5491e63..3276b05 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,25 +1,30 @@
-accelerate==0.25.0
-transformers==4.36.2
-tokenizers==0.15.0
+accelerate==1.8.1
+descript-audiotools==0.7.2
+transformers==4.52.1
+tokenizers==0.21.0
cn2an==0.5.22
ffmpeg-python==0.2.0
Cython==3.0.7
g2p-en==2.1.0
jieba==0.42.1
+json5==0.10.0
keras==2.9.0
numba==0.58.1
numpy==1.26.2
pandas==2.1.3
matplotlib==3.8.2
+munch==4.0.0
opencv-python==4.9.0.80
-vocos==0.1.0
-accelerate==0.25.0
tensorboard==2.9.1
+librosa==0.10.2.post1
+safetensors==0.5.2
+deepspeed==0.17.1
+modelscope==1.27.0
omegaconf
sentencepiece
-librosa
gradio
tqdm
+textstat
WeTextProcessing; platform_machine != "Darwin"
-wetext; platform_system == "Darwin"
\ No newline at end of file
+wetext; platform_system == "Darwin"
diff --git a/tests/sample_prompt.wav b/tests/sample_prompt.wav
index ccc65c2..b031ae1 100644
Binary files a/tests/sample_prompt.wav and b/tests/sample_prompt.wav differ
diff --git a/tools/i18n/i18n.py b/tools/i18n/i18n.py
index e256941..484e236 100644
--- a/tools/i18n/i18n.py
+++ b/tools/i18n/i18n.py
@@ -18,7 +18,7 @@ def scan_language_list():
class I18nAuto:
def __init__(self, language=None):
if language in ["Auto", None]:
- language = locale.getdefaultlocale()[0]
+ language = locale.getdefaultlocale()[0]
# getlocale can't identify the system's language ((None, None))
if not os.path.exists(os.path.join(I18N_JSON_DIR, f"{language}.json")):
language = "en_US"
diff --git a/tools/i18n/locale/en_US.json b/tools/i18n/locale/en_US.json
index f1c8016..8ef3daf 100644
--- a/tools/i18n/locale/en_US.json
+++ b/tools/i18n/locale/en_US.json
@@ -1,4 +1,46 @@
{
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.": "This software is open-sourced under the MIT License. The author has no control over the software, and users of the software, as well as those who distribute the audio generated by the software, assume full responsibility.",
- "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "If you do not agree to these terms, you are not permitted to use or reference any code or files within the software package. For further details, please refer to the LICENSE file in the root directory."
+ "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "If you do not agree to these terms, you are not permitted to use or reference any code or files within the software package. For further details, please refer to the LICENSE file in the root directory.",
+ "时长必须为正数": "Duration must be a positive number",
+ "请输入有效的浮点数": "Please enter a valid floating-point number",
+ "使用情感参考音频": "Use emotion reference audio",
+ "使用情感向量控制": "Use emotion vector",
+ "使用情感描述文本控制": "Use text description to control emotion",
+ "上传情感参考音频": "Upload emotion reference audio",
+ "情感权重": "Emotion control weight",
+ "喜": "Happy",
+ "怒": "Angry",
+ "哀": "Sad",
+ "惧": "Fear",
+ "厌恶": "Hate",
+ "低落": "Low",
+ "惊喜": "Surprise",
+ "平静": "Neutral",
+ "情感描述文本": "Emotion description",
+ "请输入情感描述文本": "Please input emotion description",
+ "高级生成参数设置": "Advanced generation parameter settings",
+ "情感向量之和不能超过1.5,请调整后重试。": "The sum of the emotion vectors cannot exceed 1.5. Please adjust and try again.",
+ "音色参考音频": "Voice reference",
+ "音频生成": "Speech Synthesis",
+ "文本": "Text",
+ "生成语音": "Synthesize",
+ "生成结果": "Synthesis Result",
+ "功能设置": "Settings",
+ "分句设置": "Sentence segmentation settings",
+ "参数会影响音频质量和生成速度": "Parameters below affect audio quality and generation speed",
+ "分句最大Token数": "Max tokens per sentence",
+ "建议80~200之间,值越大,分句越长;值越小,分句越碎;过小过大都可能导致音频质量不高": "Recommended between 80 and 200. The larger the value, the longer the sentences; the smaller the value, the more fragmented the sentences. Values that are too small or too large may lead to poor audio quality.",
+ "预览分句结果": "Preview sentence segmentation result",
+ "序号": "Index",
+ "分句内容": "Content",
+ "Token数": "Token Count",
+ "情感控制方式": "Emotion control method",
+ "GPT2 采样设置": "GPT-2 Sampling Configuration",
+ "参数会影响音频多样性和生成速度详见": "Influence both the diversity of the generated audio and the generation speed. For further details, refer to",
+ "请上传情感参考音频": "Please upload emotion reference audio",
+ "当前模型版本": "Current model version ",
+ "请输入目标文本": "Please input text to synthesize",
+ "例如:高兴,愤怒,悲伤等": "e.g., happy, angry, sad, etc.",
+ "与音色参考音频相同": "Same as the voice reference",
+ "情感随机采样": "Random emotion sampling"
}
\ No newline at end of file
diff --git a/tools/i18n/locale/zh_CN.json b/tools/i18n/locale/zh_CN.json
new file mode 100644
index 0000000..26a7596
--- /dev/null
+++ b/tools/i18n/locale/zh_CN.json
@@ -0,0 +1,40 @@
+{
+ "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.",
+ "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.",
+ "时长必须为正数": "时长必须为正数",
+ "请输入有效的浮点数": "请输入有效的浮点数",
+ "使用情感参考音频": "使用情感参考音频",
+ "使用情感向量控制": "使用情感向量控制",
+ "使用情感描述文本控制": "使用情感描述文本控制",
+ "上传情感参考音频": "上传情感参考音频",
+ "情感权重": "情感权重",
+ "喜": "喜",
+ "怒": "怒",
+ "哀": "哀",
+ "惧": "惧",
+ "厌恶": "厌恶",
+ "低落": "低落",
+ "惊喜": "惊喜",
+ "平静": "平静",
+ "情感描述文本": "情感描述文本",
+ "请输入情感描述文本": "请输入情感描述文本",
+ "高级生成参数设置": "高级生成参数设置",
+ "情感向量之和不能超过1.5,请调整后重试。": "情感向量之和不能超过1.5,请调整后重试。",
+ "音色参考音频": "音色参考音频",
+ "音频生成": "音频生成",
+ "文本": "文本",
+ "生成语音": "生成语音",
+ "生成结果": "生成结果",
+ "功能设置": "功能设置",
+ "分句设置": "分句设置",
+ "参数会影响音频质量和生成速度": "参数会影响音频质量和生成速度",
+ "分句最大Token数": "分句最大Token数",
+ "建议80~200之间,值越大,分句越长;值越小,分句越碎;过小过大都可能导致音频质量不高": "建议80~200之间,值越大,分句越长;值越小,分句越碎;过小过大都可能导致音频质量不高",
+ "预览分句结果": "预览分句结果",
+ "序号": "序号",
+ "分句内容": "分句内容",
+ "Token数": "Token数",
+ "情感控制方式": "情感控制方式",
+ "GPT2 采样设置": "GPT2 采样设置",
+ "参数会影响音频多样性和生成速度详见": "参数会影响音频多样性和生成速度详见"
+}
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..d675771
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,3701 @@
+version = 1
+revision = 3
+requires-python = ">=3.10"
+resolution-markers = [
+ "python_full_version >= '3.13' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+ "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+
+[[package]]
+name = "absl-py"
+version = "2.3.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/absl-py/2.3.1/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/absl-py/2.3.1/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d" },
+]
+
+[[package]]
+name = "accelerate"
+version = "1.8.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "huggingface-hub" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "psutil" },
+ { name = "pyyaml" },
+ { name = "safetensors" },
+ { name = "torch" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/accelerate/1.8.1/accelerate-1.8.1.tar.gz", hash = "sha256:f60df931671bc4e75077b852990469d4991ce8bd3a58e72375c3c95132034db9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/accelerate/1.8.1/accelerate-1.8.1-py3-none-any.whl", hash = "sha256:c47b8994498875a2b1286e945bd4d20e476956056c7941d512334f4eb44ff991" },
+]
+
+[[package]]
+name = "aiofiles"
+version = "24.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/aiofiles/24.1.0/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/aiofiles/24.1.0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/annotated-types/0.7.0/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/annotated-types/0.7.0/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53" },
+]
+
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/antlr4-python3-runtime/4.9.3/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b" }
+
+[[package]]
+name = "anyio"
+version = "4.10.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+ { name = "idna" },
+ { name = "sniffio" },
+ { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/anyio/4.10.0/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/anyio/4.10.0/anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1" },
+]
+
+[[package]]
+name = "argbind"
+version = "0.3.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "docstring-parser" },
+ { name = "pyyaml" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/argbind/0.3.9/argbind-0.3.9.tar.gz", hash = "sha256:1b159c04af56858a91d59c7a47bc9ea39d96adfce1d7fcfa38050d7ac9815745" }
+
+[[package]]
+name = "asttokens"
+version = "3.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/asttokens/3.0.0/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/asttokens/3.0.0/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2" },
+]
+
+[[package]]
+name = "audioop-lts"
+version = "0.2.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2.tar.gz", hash = "sha256:64d0c62d88e67b98a1a5e71987b7aa7b5bcffc7dcee65b635823dbdd0a8dbbd0" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd3d4602dc64914d462924a08c1a9816435a2155d74f325853c1f1ac3b2d9800" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:550c114a8df0aafe9a05442a1162dfc8fec37e9af1d625ae6060fed6e756f303" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a13dc409f2564de15dd68be65b462ba0dde01b19663720c68c1140c782d1d75" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51c916108c56aa6e426ce611946f901badac950ee2ddaf302b7ed35d9958970d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47eba38322370347b1c47024defbd36374a211e8dd5b0dcbce7b34fdb6f8847b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba7c3a7e5f23e215cb271516197030c32aef2e754252c4c70a50aaff7031a2c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:def246fe9e180626731b26e89816e79aae2276f825420a07b4a647abaa84becc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e160bf9df356d841bb6c180eeeea1834085464626dc1b68fa4e1d59070affdc3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4b4cd51a57b698b2d06cb9993b7ac8dfe89a3b2878e96bc7948e9f19ff51dba6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_ppc64le.whl", hash = "sha256:4a53aa7c16a60a6857e6b0b165261436396ef7293f8b5c9c828a3a203147ed4a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_riscv64.whl", hash = "sha256:3fc38008969796f0f689f1453722a0f463da1b8a6fbee11987830bfbb664f623" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_s390x.whl", hash = "sha256:15ab25dd3e620790f40e9ead897f91e79c0d3ce65fe193c8ed6c26cffdd24be7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:03f061a1915538fd96272bac9551841859dbb2e3bf73ebe4a23ef043766f5449" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-win32.whl", hash = "sha256:3bcddaaf6cc5935a300a8387c99f7a7fbbe212a11568ec6cf6e4bc458c048636" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-win_amd64.whl", hash = "sha256:a2c2a947fae7d1062ef08c4e369e0ba2086049a5e598fda41122535557012e9e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-abi3-win_arm64.whl", hash = "sha256:5f93a5db13927a37d2d09637ccca4b2b6b48c19cd9eda7b17a2e9f77edee6a6f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:73f80bf4cd5d2ca7814da30a120de1f9408ee0619cc75da87d0641273d202a09" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:106753a83a25ee4d6f473f2be6b0966fc1c9af7e0017192f5531a3e7463dce58" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fbdd522624141e40948ab3e8cdae6e04c748d78710e9f0f8d4dae2750831de19" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:143fad0311e8209ece30a8dbddab3b65ab419cbe8c0dde6e8828da25999be911" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfbbc74ec68a0fd08cfec1f4b5e8cca3d3cd7de5501b01c4b5d209995033cde9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cfcac6aa6f42397471e4943e0feb2244549db5c5d01efcd02725b96af417f3fe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:752d76472d9804ac60f0078c79cdae8b956f293177acd2316cd1e15149aee132" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:83c381767e2cc10e93e40281a04852facc4cd9334550e0f392f72d1c0a9c5753" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c0022283e9556e0f3643b7c3c03f05063ca72b3063291834cca43234f20c60bb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a2d4f1513d63c795e82948e1305f31a6d530626e5f9f2605408b300ae6095093" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:c9c8e68d8b4a56fda8c025e538e639f8c5953f5073886b596c93ec9b620055e7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:96f19de485a2925314f5020e85911fb447ff5fbef56e8c7c6927851b95533a1c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e541c3ef484852ef36545f66209444c48b28661e864ccadb29daddb6a4b8e5f5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-win32.whl", hash = "sha256:d5e73fa573e273e4f2e5ff96f9043858a5e9311e94ffefd88a3186a910c70917" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9191d68659eda01e448188f60364c7763a7ca6653ed3f87ebb165822153a8547" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c174e322bb5783c099aaf87faeb240c8d210686b04bd61dfd05a8e5a83d88969" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f9ee9b52f5f857fbaf9d605a360884f034c92c1c23021fb90b2e39b8e64bede6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:49ee1a41738a23e98d98b937a0638357a2477bc99e61b0f768a8f654f45d9b7a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5b00be98ccd0fc123dcfad31d50030d25fcf31488cde9e61692029cd7394733b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6d2e0f9f7a69403e388894d4ca5ada5c47230716a03f2847cfc7bd1ecb589d6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9b0b8a03ef474f56d1a842af1a2e01398b8f7654009823c6d9e0ecff4d5cfbf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2b267b70747d82125f1a021506565bdc5609a2b24bcb4773c16d79d2bb260bbd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0337d658f9b81f4cd0fdb1f47635070cc084871a3d4646d9de74fdf4e7c3d24a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:167d3b62586faef8b6b2275c3218796b12621a60e43f7e9d5845d627b9c9b80e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0d9385e96f9f6da847f4d571ce3cb15b5091140edf3db97276872647ce37efd7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:48159d96962674eccdca9a3df280e864e8ac75e40a577cc97c5c42667ffabfc5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8fefe5868cd082db1186f2837d64cfbfa78b548ea0d0543e9b28935ccce81ce9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:58cf54380c3884fb49fdd37dfb7a772632b6701d28edd3e2904743c5e1773602" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:088327f00488cdeed296edd9215ca159f3a5a5034741465789cad403fcf4bec0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-win32.whl", hash = "sha256:068aa17a38b4e0e7de771c62c60bbca2455924b67a8814f3b0dee92b5820c0b3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:a5bf613e96f49712073de86f20dbdd4014ca18efd4d34ed18c75bd808337851b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioop-lts/0.2.2/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd" },
+]
+
+[[package]]
+name = "audioread"
+version = "3.0.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioread/3.0.1/audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/audioread/3.0.1/audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33" },
+]
+
+[[package]]
+name = "brotli"
+version = "1.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/brotli/1.1.0/Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b" },
+]
+
+[[package]]
+name = "cachetools"
+version = "5.5.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cachetools/5.5.2/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cachetools/5.5.2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.8.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/certifi/2025.8.3/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/certifi/2025.8.3/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5" },
+]
+
+[[package]]
+name = "cffi"
+version = "1.17.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "pycparser" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cffi/1.17.1/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb7f67a1bfa6e40b438170ebdc8158b78dc465a5a67b6dde178a46987b244a72" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc9370a2da1ac13f0153780040f465839e6cccb4a1e44810124b4e22483c93fe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:07a0eae9e2787b586e129fdcbe1af6997f8d0e5abaa0bc98c0e20e124d67e601" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:74d77e25adda8581ffc1c720f1c81ca082921329452eba58b16233ab1842141c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0e909868420b7049dafd3a31d45125b31143eec59235311fc4c57ea26a4acd2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c6f162aabe9a91a309510d74eeb6507fab5fff92337a15acbe77753d88d9dcf0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4ca4c094de7771a98d7fbd67d9e5dbf1eb73efa4f744a730437d8a3a5cf994f0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:02425242e96bcf29a49711b0ca9f37e451da7c70562bc10e8ed992a5a7a25cc0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:78deba4d8f9590fe4dae384aeff04082510a709957e968753ff3c48399f6f92a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-win32.whl", hash = "sha256:d79c198e27580c8e958906f803e63cddb77653731be08851c7df0b1a14a8fc0f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:c6e490913a46fa054e03699c70019ab869e990270597018cef1d8562132c2669" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/charset-normalizer/3.4.3/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a" },
+]
+
+[[package]]
+name = "click"
+version = "8.2.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/click/8.2.2/click-8.2.2.tar.gz", hash = "sha256:068616e6ef9705a07b6db727cb9c248f4eb9dae437a30239f56fa94b18b852ef" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/click/8.2.2/click-8.2.2-py3-none-any.whl", hash = "sha256:52e1e9f5d3db8c85aa76968c7c67ed41ddbacb167f43201511c8fd61eb5ba2ca" },
+]
+
+[[package]]
+name = "cn2an"
+version = "0.5.22"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "proces" },
+ { name = "setuptools" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cn2an/0.5.22/cn2an-0.5.22.tar.gz", hash = "sha256:27ae5b56441d7329ed2ececffa026bfa8fc353dcf1fb0d9146b303b9cce3ac37" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cn2an/0.5.22/cn2an-0.5.22-py3-none-any.whl", hash = "sha256:cba4c8f305b43da01f50696047cca3116c727424ac62338da6a3426e01454f3e" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/colorama/0.4.6/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/colorama/0.4.6/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
+]
+
+[[package]]
+name = "contourpy"
+version = "1.3.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "numpy", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9be002b31c558d1ddf1b9b415b162c603405414bacd6932d031c5b5a8b757f0d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2e74acbcba3bfdb6d9d8384cdc4f9260cae86ed9beee8bd5f54fee49a430b9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e259bced5549ac64410162adc973c5e2fb77f04df4a439d00b478e57a0e65512" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad687a04bc802cbe8b9c399c07162a3c35e227e2daccf1668eb1f278cb698631" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cdd22595308f53ef2f891040ab2b93d79192513ffccbd7fe19be7aa773a5e09f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4f54d6a2defe9f257327b0f243612dd051cc43825587520b1bf74a31e2f6ef2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-win32.whl", hash = "sha256:f939a054192ddc596e031e50bb13b657ce318cf13d264f095ce9db7dc6ae81c0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c440093bbc8fc21c637c03bafcbef95ccd963bc6e0514ad887932c18ca2a759a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a37a2fb93d4df3fc4c0e363ea4d16f83195fc09c891bc8ce072b9d084853445" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7cd50c38f500bbcc9b6a46643a40e0913673f869315d8e70de0438817cb7773" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6658ccc7251a4433eebd89ed2672c2ed96fba367fd25ca9512aa92a4b46c4f1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:70771a461aaeb335df14deb6c97439973d253ae70660ca085eec25241137ef43" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a887a6e8c4cd0897507d814b14c54a8c2e2aa4ac9f7686292f9769fcf9a6ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3859783aefa2b8355697f16642695a5b9792e7a46ab86da1118a4a23a51a33d7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eab0f6db315fa4d70f1d8ab514e527f0366ec021ff853d7ed6a2d33605cf4b83" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d91a3ccc7fea94ca0acab82ceb77f396d50a1f67412efe4c526f5d20264e6ecd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-win32.whl", hash = "sha256:1c48188778d4d2f3d48e4643fb15d8608b1d01e4b4d6b0548d9b336c28fc9b6f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:5ebac872ba09cb8f2131c46b8739a7ff71de28a24c869bcad554477eb089a878" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:de39db2604ae755316cb5967728f4bea92685884b1e767b7c24e983ef5f771cb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f9e896f447c5c8618f1edb2bafa9a4030f22a575ec418ad70611450720b5b08" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71e2bd4a1c4188f5c2b8d274da78faab884b59df20df63c34f74aa1813c4427c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de425af81b6cea33101ae95ece1f696af39446db9682a0b56daaa48cfc29f38f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:977e98a0e0480d3fe292246417239d2d45435904afd6d7332d8455981c408b85" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c66c4906cdbc50e9cba65978823e6e00b45682eb09adbb78c9775b74eb222422" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b7fc0cd78ba2f4695fd0a6ad81a19e7e3ab825c31b577f384aa9d7817dc3bef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-win32.whl", hash = "sha256:15ce6ab60957ca74cff444fe66d9045c1fd3e92c8936894ebd1f3eef2fff075f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e1578f7eafce927b168752ed7e22646dad6cd9bca673c60bff55889fa236ebf9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0475b1f6604896bc7c53bb070e355e9321e1bc0d381735421a2d2068ec56531f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c85bb486e9be652314bb5b9e2e3b0d1b2e643d5eec4992c0fbe8ac71775da739" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:745b57db7758f3ffc05a10254edd3182a2a83402a89c00957a8e8a22f5582823" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:970e9173dbd7eba9b4e01aab19215a48ee5dd3f43cef736eebde064a171f89a5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c4639a9c22230276b7bffb6a850dfc8258a2521305e1faefe804d006b2e532" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc829960f34ba36aad4302e78eabf3ef16a3a100863f0d4eeddf30e8a485a03b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d32530b534e986374fc19eaa77fcb87e8a99e5431499949b828312bdcd20ac52" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e298e7e70cf4eb179cc1077be1c725b5fd131ebc81181bf0c03525c8abc297fd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-win32.whl", hash = "sha256:d0e589ae0d55204991450bb5c23f571c64fe43adaa53f93fc902a84c96f52fe1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fd93cc7f3139b6dd7aab2f26a90dde0aa9fc264dbf70f6740d498a70b860b82c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:107ba8a6a7eec58bb475329e6d3b95deba9440667c4d62b9b6063942b61d7f16" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ded1706ed0c1049224531b81128efbd5084598f18d8a2d9efae833edbd2b40ad" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f5964cdad279256c084b69c3f412b7801e15356b16efa9d78aa974041903da0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b65a95d642d4efa8f64ba12558fcb83407e58a2dfba9d796d77b63ccfcaff5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.2/contourpy-1.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8c5acb8dddb0752bf252e01a3035b21443158910ac16a3b0d20e7fed7d534ce5" },
+]
+
+[[package]]
+name = "contourpy"
+version = "1.3.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version >= '3.13' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+ "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "numpy", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/contourpy/1.3.3/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77" },
+]
+
+[[package]]
+name = "cycler"
+version = "0.12.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cycler/0.12.1/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cycler/0.12.1/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30" },
+]
+
+[[package]]
+name = "cython"
+version = "3.0.7"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7.tar.gz", hash = "sha256:fb299acf3a578573c190c858d49e0cf9d75f4bc49c3f24c5a63804997ef09213" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3c0e19bb41de6be9d8afc85795159ca16296be81a586cd9588be0400d44a855" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e8bf00ec1dd1d92e9ae74d2e6891f087a939e1dfb40c9c7fa5d8d6a26c94f5a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd6ae43ef2e596c9a88dbf2a8895be2e32cc2f5bc3c8ba2e7753b69068fc0b2d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f674be92673e87dd8ee7cfe553d5960ec4effc5ab15063b9a5e265a51585a31a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:861cf254bf5836d47c2aee86aa75dd93d3de00ccd1b077c3c7a2bb22cba358e7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-win32.whl", hash = "sha256:f6d8ff62ad55dc0393686438eac4b457a916e4d1118a0b550746bb52b4c756cc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:e13abb14843397b76d0472c7d33cd260d5f262ab05cc27ed423317e645e29643" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c636c9ab92c7838231a1ba769e519d953af8294612f3f772a54d3a5250ff23f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22d2a684122dfb531853d57c8c85c1d5d44be709e12466dca99fa6aee7d8054f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1bdf8a107fdf9e174991aa87a0be7504f60de1ec6bfb1ccfb30e33acac818a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3a83e04fde663b84905f3a20213a4333d13a07b79434300704b70dc552761f8b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e34b4b08d795ccca920fa26b099558f4f1e4e3f794e4ba8d3433c5bc2454d50a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-win32.whl", hash = "sha256:133057ac45b6fa7fe5d7baada9d3545d09339432f75c0545f556e8c6fecc2932" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:b65abca78aa5ebc8675c8480b9a53006f6efea9910ad099cf32c9fb5617ef251" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ceac5315fe899c229e874328742154e331fa41337bb03f6f5264636c351c9e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea936cf5931297ba07bce121388c4c6266c1b63a9f4d648ae16c92ff090204b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9fcd9a18ee3ac7f460e0841954feb495102ffbdbec0e6c78562f3495cda000dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7c8d579d13cb81abe704c8b0908d122b81d6e2623265a19c4a6a7377f440debb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ef5bb0268bfe5992da3ef9292463a5a895ed8700b134ed2c00008d5471b3ba6e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-win32.whl", hash = "sha256:55f93d3822bc196b37a8bdfa4ec6a35232a399e97f2baa714bd5ed8ea9b0ce68" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-cp312-cp312-win_amd64.whl", hash = "sha256:f3845c4506e0d207c5e268fb02813928f3a1e135de954a379f165ef0d581da47" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/cython/3.0.7/Cython-3.0.7-py2.py3-none-any.whl", hash = "sha256:936ec37b261b226d7404eff23a9aad284098338150d42a53d6a9af12b18d3892" },
+]
+
+[[package]]
+name = "decorator"
+version = "5.2.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/decorator/5.2.1/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/decorator/5.2.1/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a" },
+]
+
+[[package]]
+name = "deepspeed"
+version = "0.17.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "einops" },
+ { name = "hjson" },
+ { name = "msgpack" },
+ { name = "ninja" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "psutil" },
+ { name = "py-cpuinfo" },
+ { name = "pydantic" },
+ { name = "torch" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/deepspeed/0.17.1/deepspeed-0.17.1.tar.gz", hash = "sha256:6d6e21796982b9e024f489e1c211666cc6c0be6e344751368610b9d2da285d6e" }
+
+[[package]]
+name = "descript-audiotools"
+version = "0.7.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "argbind" },
+ { name = "ffmpy" },
+ { name = "flatten-dict" },
+ { name = "importlib-resources" },
+ { name = "ipython", version = "8.37.0", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "ipython", version = "9.5.0", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "julius" },
+ { name = "librosa" },
+ { name = "markdown2" },
+ { name = "matplotlib" },
+ { name = "numpy" },
+ { name = "protobuf" },
+ { name = "pyloudnorm" },
+ { name = "pystoi" },
+ { name = "randomname" },
+ { name = "rich" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "soundfile" },
+ { name = "tensorboard" },
+ { name = "torch" },
+ { name = "torch-stoi" },
+ { name = "torchaudio" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/descript-audiotools/0.7.2/descript-audiotools-0.7.2.tar.gz", hash = "sha256:2cdd363025c771b8acc53d5ef9ec77e34f92e182272c4be7fd0118a99b6a5e2a" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/descript-audiotools/0.7.2/descript_audiotools-0.7.2-py2.py3-none-any.whl", hash = "sha256:bb7a4e837f92395e43894d99a89406485325b88c6f158caf59e18876cf5b06ea" },
+]
+
+[[package]]
+name = "distance"
+version = "0.1.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/distance/0.1.3/Distance-0.1.3.tar.gz", hash = "sha256:60807584f5b6003f5c521aa73f39f51f631de3be5cccc5a1d67166fcbf0d4551" }
+
+[[package]]
+name = "docstring-parser"
+version = "0.17.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/docstring-parser/0.17.0/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/docstring-parser/0.17.0/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708" },
+]
+
+[[package]]
+name = "einops"
+version = "0.8.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/einops/0.8.1/einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/einops/0.8.1/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/exceptiongroup/1.3.0/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/exceptiongroup/1.3.0/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
+]
+
+[[package]]
+name = "executing"
+version = "2.2.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/executing/2.2.1/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/executing/2.2.1/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017" },
+]
+
+[[package]]
+name = "fastapi"
+version = "0.116.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "pydantic" },
+ { name = "starlette" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fastapi/0.116.1/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fastapi/0.116.1/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565" },
+]
+
+[[package]]
+name = "ffmpeg-python"
+version = "0.2.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "future" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ffmpeg-python/0.2.0/ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ffmpeg-python/0.2.0/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5" },
+]
+
+[[package]]
+name = "ffmpy"
+version = "0.6.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ffmpy/0.6.1/ffmpy-0.6.1.tar.gz", hash = "sha256:b5830fd05f72bace05b8fb28724d54a7a63c5119d7f74ca36a75df33f749142d" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ffmpy/0.6.1/ffmpy-0.6.1-py3-none-any.whl", hash = "sha256:69a37e2d7d6feb840e233d5640f3499a8b0a8657336774c86e4c52a3219222d4" },
+]
+
+[[package]]
+name = "filelock"
+version = "3.19.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/filelock/3.19.1/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/filelock/3.19.1/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d" },
+]
+
+[[package]]
+name = "fire"
+version = "0.7.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "termcolor" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fire/0.7.1/fire-0.7.1.tar.gz", hash = "sha256:3b208f05c736de98fb343310d090dcc4d8c78b2a89ea4f32b837c586270a9cbf" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fire/0.7.1/fire-0.7.1-py3-none-any.whl", hash = "sha256:e43fd8a5033a9001e7e2973bab96070694b9f12f2e0ecf96d4683971b5ab1882" },
+]
+
+[[package]]
+name = "flatten-dict"
+version = "0.4.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "six" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/flatten-dict/0.4.2/flatten-dict-0.4.2.tar.gz", hash = "sha256:506a96b6e6f805b81ae46a0f9f31290beb5fa79ded9d80dbe1b7fa236ab43076" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/flatten-dict/0.4.2/flatten_dict-0.4.2-py2.py3-none-any.whl", hash = "sha256:7e245b20c4c718981212210eec4284a330c9f713e632e98765560e05421e48ad" },
+]
+
+[[package]]
+name = "fonttools"
+version = "4.59.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2.tar.gz", hash = "sha256:e72c0749b06113f50bcb80332364c6be83a9582d6e3db3fe0b280f996dc2ef22" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2a159e36ae530650acd13604f364b3a2477eff7408dcac6a640d74a3744d2514" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8bd733e47bf4c6dee2b2d8af7a1f7b0c091909b22dbb969a29b2b991e61e5ba4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7bb32e0e33795e3b7795bb9b88cb6a9d980d3cbe26dd57642471be547708e17a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cdcdf7aad4bab7fd0f2938624a5a84eb4893be269f43a6701b0720b726f24df0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4d974312a9f405628e64f475b1f5015a61fd338f0a1b61d15c4822f97d6b045b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:12dc4670e6e6cc4553e8de190f86a549e08ca83a036363115d94a2d67488831e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-win32.whl", hash = "sha256:1603b85d5922042563eea518e272b037baf273b9a57d0f190852b0b075079000" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp310-cp310-win_amd64.whl", hash = "sha256:2543b81641ea5b8ddfcae7926e62aafd5abc604320b1b119e5218c014a7a5d3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:511946e8d7ea5c0d6c7a53c4cb3ee48eda9ab9797cd9bf5d95829a398400354f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8e5e2682cf7be766d84f462ba8828d01e00c8751a8e8e7ce12d7784ccb69a30d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5729e12a982dba3eeae650de48b06f3b9ddb51e9aee2fcaf195b7d09a96250e2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c52694eae5d652361d59ecdb5a2246bff7cff13b6367a12da8499e9df56d148d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f1f1bbc23ba1312bd8959896f46f667753b90216852d2a8cfa2d07e0cb234144" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a1bfe5378962825dabe741720885e8b9ae9745ec7ecc4a5ec1f1ce59a6062bf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-win32.whl", hash = "sha256:e937790f3c2c18a1cbc7da101550a84319eb48023a715914477d2e7faeaba570" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp311-cp311-win_amd64.whl", hash = "sha256:9836394e2f4ce5f9c0a7690ee93bd90aa1adc6b054f1a57b562c5d242c903104" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:82906d002c349cad647a7634b004825a7335f8159d0d035ae89253b4abf6f3ea" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a10c1bd7644dc58f8862d8ba0cf9fb7fef0af01ea184ba6ce3f50ab7dfe74d5a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:738f31f23e0339785fd67652a94bc69ea49e413dfdb14dcb8c8ff383d249464e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ec99f9bdfee9cdb4a9172f9e8fd578cce5feb231f598909e0aecf5418da4f25" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0476ea74161322e08c7a982f83558a2b81b491509984523a1a540baf8611cc31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95922a922daa1f77cc72611747c156cfb38030ead72436a2c551d30ecef519b9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-win32.whl", hash = "sha256:39ad9612c6a622726a6a130e8ab15794558591f999673f1ee7d2f3d30f6a3e1c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp312-cp312-win_amd64.whl", hash = "sha256:980fd7388e461b19a881d35013fec32c713ffea1fc37aef2f77d11f332dfd7da" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:381bde13216ba09489864467f6bc0c57997bd729abfbb1ce6f807ba42c06cceb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f33839aa091f7eef4e9078f5b7ab1b8ea4b1d8a50aeaef9fdb3611bba80869ec" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6235fc06bcbdb40186f483ba9d5d68f888ea68aa3c8dac347e05a7c54346fbc8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83ad6e5d06ef3a2884c4fa6384a20d6367b5cfe560e3b53b07c9dc65a7020e73" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d029804c70fddf90be46ed5305c136cae15800a2300cb0f6bba96d48e770dde0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:95807a3b5e78f2714acaa26a33bc2143005cc05c0217b322361a772e59f32b89" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-win32.whl", hash = "sha256:b3ebda00c3bb8f32a740b72ec38537d54c7c09f383a4cfefb0b315860f825b08" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp313-cp313-win_amd64.whl", hash = "sha256:a72155928d7053bbde499d32a9c77d3f0f3d29ae72b5a121752481bcbd71e50f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d09e487d6bfbe21195801323ba95c91cb3523f0fcc34016454d4d9ae9eaa57fe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:dec2f22486d7781087b173799567cffdcc75e9fb2f1c045f05f8317ccce76a3e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1647201af10993090120da2e66e9526c4e20e88859f3e34aa05b8c24ded2a564" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47742c33fe65f41eabed36eec2d7313a8082704b7b808752406452f766c573fc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92ac2d45794f95d1ad4cb43fa07e7e3776d86c83dc4b9918cf82831518165b4b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fa9ecaf2dcef8941fb5719e16322345d730f4c40599bbf47c9753de40eb03882" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-win32.whl", hash = "sha256:a8d40594982ed858780e18a7e4c80415af65af0f22efa7de26bdd30bf24e1e14" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314-win_amd64.whl", hash = "sha256:9cde8b6a6b05f68516573523f2013a3574cb2c75299d7d500f44de82ba947b80" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:036cd87a2dbd7ef72f7b68df8314ced00b8d9973aee296f2464d06a836aeb9a9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:14870930181493b1d740b6f25483e20185e5aea58aec7d266d16da7be822b4bb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ff58ea1eb8fc7e05e9a949419f031890023f8785c925b44d6da17a6a7d6e85d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6dee142b8b3096514c96ad9e2106bf039e2fe34a704c587585b569a36df08c3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8991bdbae39cf78bcc9cd3d81f6528df1f83f2e7c23ccf6f990fa1f0b6e19708" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:53c1a411b7690042535a4f0edf2120096a39a506adeb6c51484a232e59f2aa0c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-win32.whl", hash = "sha256:59d85088e29fa7a8f87d19e97a1beae2a35821ee48d8ef6d2c4f965f26cb9f8a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-cp314-cp314t-win_amd64.whl", hash = "sha256:7ad5d8d8cc9e43cb438b3eb4a0094dd6d4088daa767b0a24d52529361fd4c199" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fonttools/4.59.2/fonttools-4.59.2-py3-none-any.whl", hash = "sha256:8bd0f759020e87bb5d323e6283914d9bf4ae35a7307dafb2cbd1e379e720ad37" },
+]
+
+[[package]]
+name = "fsspec"
+version = "2025.9.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fsspec/2025.9.0/fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/fsspec/2025.9.0/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7" },
+]
+
+[[package]]
+name = "future"
+version = "1.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/future/1.0.0/future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/future/1.0.0/future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216" },
+]
+
+[[package]]
+name = "g2p-en"
+version = "2.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "distance" },
+ { name = "inflect" },
+ { name = "nltk" },
+ { name = "numpy" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/g2p-en/2.1.0/g2p_en-2.1.0.tar.gz", hash = "sha256:32ecb119827a3b10ea8c1197276f4ea4f44070ae56cbbd01f0f261875f556a58" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/g2p-en/2.1.0/g2p_en-2.1.0-py3-none-any.whl", hash = "sha256:2a7aabf1fc7f270fcc3349881407988c9245173c2413debbe5432f4a4f31319f" },
+]
+
+[[package]]
+name = "google-auth"
+version = "2.40.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "cachetools" },
+ { name = "pyasn1-modules" },
+ { name = "rsa" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/google-auth/2.40.3/google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/google-auth/2.40.3/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca" },
+]
+
+[[package]]
+name = "google-auth-oauthlib"
+version = "0.4.6"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "requests-oauthlib" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/google-auth-oauthlib/0.4.6/google-auth-oauthlib-0.4.6.tar.gz", hash = "sha256:a90a072f6993f2c327067bf65270046384cda5a8ecb20b94ea9a687f1f233a7a" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/google-auth-oauthlib/0.4.6/google_auth_oauthlib-0.4.6-py2.py3-none-any.whl", hash = "sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73" },
+]
+
+[[package]]
+name = "gradio"
+version = "5.44.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "aiofiles" },
+ { name = "anyio" },
+ { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
+ { name = "brotli" },
+ { name = "fastapi" },
+ { name = "ffmpy" },
+ { name = "gradio-client" },
+ { name = "groovy" },
+ { name = "httpx" },
+ { name = "huggingface-hub" },
+ { name = "jinja2" },
+ { name = "markupsafe" },
+ { name = "numpy" },
+ { name = "orjson" },
+ { name = "packaging" },
+ { name = "pandas" },
+ { name = "pillow" },
+ { name = "pydantic" },
+ { name = "pydub" },
+ { name = "python-multipart" },
+ { name = "pyyaml" },
+ { name = "ruff", marker = "sys_platform != 'emscripten'" },
+ { name = "safehttpx" },
+ { name = "semantic-version" },
+ { name = "starlette", marker = "sys_platform != 'emscripten'" },
+ { name = "tomlkit" },
+ { name = "typer", marker = "sys_platform != 'emscripten'" },
+ { name = "typing-extensions" },
+ { name = "urllib3", marker = "sys_platform == 'emscripten'" },
+ { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/gradio/5.44.1/gradio-5.44.1.tar.gz", hash = "sha256:8527837aa6de4b0d2398dab11baac8e3eac9da69140ed0da6efc6ac497fa818d" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/gradio/5.44.1/gradio-5.44.1-py3-none-any.whl", hash = "sha256:cb22dd519c3bb2f8c7960cdcc23ca3b869511c85e320f486d7aef6e3627f97b9" },
+]
+
+[[package]]
+name = "gradio-client"
+version = "1.12.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "fsspec" },
+ { name = "httpx" },
+ { name = "huggingface-hub" },
+ { name = "packaging" },
+ { name = "typing-extensions" },
+ { name = "websockets" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/gradio-client/1.12.1/gradio_client-1.12.1.tar.gz", hash = "sha256:64ae7b1d951482194e3a2f8d20cd3fbdaaa13418ee988445d3c9edb28da50ea2" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/gradio-client/1.12.1/gradio_client-1.12.1-py3-none-any.whl", hash = "sha256:37c0bcd0e6b3794b2b2e0b5039696d6962d8125bdb96960ad1b79412326b1664" },
+]
+
+[[package]]
+name = "groovy"
+version = "0.1.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/groovy/0.1.2/groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/groovy/0.1.2/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64" },
+]
+
+[[package]]
+name = "grpcio"
+version = "1.74.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e154d230dc1bbbd78ad2fdc3039fa50ad7ffcf438e4eb2fa30bce223a70c7486" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8978003816c7b9eabe217f88c78bc26adc8f9304bf6a594b02e5a49b2ef9c11" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3d7bd6e3929fd2ea7fbc3f562e4987229ead70c9ae5f01501a46701e08f1ad9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:136b53c91ac1d02c8c24201bfdeb56f8b3ac3278668cbb8e0ba49c88069e1bdc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fe0f540750a13fd8e5da4b3eaba91a785eea8dca5ccd2bc2ffe978caa403090e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4e4181bfc24413d1e3a37a0b7889bea68d973d4b45dd2bc68bb766c140718f82" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-win32.whl", hash = "sha256:1733969040989f7acc3d94c22f55b4a9501a30f6aaacdbccfaba0a3ffb255ab7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e912d3c993a29df6c627459af58975b2e5c897d93287939b9d5065f000249b5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:69e1a8180868a2576f02356565f16635b99088da7df3d45aaa7e24e73a054e31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8efe72fde5500f47aca1ef59495cb59c885afe04ac89dd11d810f2de87d935d4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a8f0302f9ac4e9923f98d8e243939a6fb627cd048f5cd38595c97e38020dffce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f609a39f62a6f6f05c7512746798282546358a37ea93c1fcbadf8b2fed162e3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98e0b7434a7fa4e3e63f250456eaef52499fba5ae661c58cc5b5477d11e7182" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:662456c4513e298db6d7bd9c3b8df6f75f8752f0ba01fb653e252ed4a59b5a5d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3d14e3c4d65e19d8430a4e28ceb71ace4728776fd6c3ce34016947474479683f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bf949792cee20d2078323a9b02bacbbae002b9e3b9e2433f2741c15bdeba1c4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-win32.whl", hash = "sha256:55b453812fa7c7ce2f5c88be3018fb4a490519b6ce80788d5913f3f9d7da8c7b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp311-cp311-win_amd64.whl", hash = "sha256:86ad489db097141a907c559988c29718719aa3e13370d40e20506f11b4de0d11" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/grpcio/1.74.0/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/h11/0.16.0/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/h11/0.16.0/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" },
+]
+
+[[package]]
+name = "hf-xet"
+version = "1.1.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9.tar.gz", hash = "sha256:c99073ce404462e909f1d5839b2d14a3827b8fe75ed8aed551ba6609c026c803" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a3b6215f88638dd7a6ff82cb4e738dcbf3d863bf667997c093a3c990337d1160" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9b486de7a64a66f9a172f4b3e0dfe79c9f0a93257c501296a2521a13495a698a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c5a840c2c4e6ec875ed13703a60e3523bc7f48031dfd750923b2a4d1a5fc3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:96a6139c9e44dad1c52c52520db0fffe948f6bce487cfb9d69c125f254bb3790" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ad1022e9a998e784c97b2173965d07fe33ee26e4594770b7785a8cc8f922cd95" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86754c2d6d5afb11b0a435e6e18911a4199262fe77553f8c50d75e21242193ea" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hf-xet/1.1.9/hf_xet-1.1.9-cp37-abi3-win_amd64.whl", hash = "sha256:5aad3933de6b725d61d51034e04174ed1dce7a57c63d530df0014dea15a40127" },
+]
+
+[[package]]
+name = "hjson"
+version = "3.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hjson/3.1.0/hjson-3.1.0.tar.gz", hash = "sha256:55af475a27cf83a7969c808399d7bccdec8fb836a07ddbd574587593b9cdcf75" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/hjson/3.1.0/hjson-3.1.0-py3-none-any.whl", hash = "sha256:65713cdcf13214fb554eb8b4ef803419733f4f5e551047c9b711098ab7186b89" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "certifi" },
+ { name = "h11" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/httpcore/1.0.9/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/httpcore/1.0.9/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "anyio" },
+ { name = "certifi" },
+ { name = "httpcore" },
+ { name = "idna" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/httpx/0.28.1/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/httpx/0.28.1/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad" },
+]
+
+[[package]]
+name = "huggingface-hub"
+version = "0.34.4"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "filelock" },
+ { name = "fsspec" },
+ { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+ { name = "packaging" },
+ { name = "pyyaml" },
+ { name = "requests" },
+ { name = "tqdm" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/huggingface-hub/0.34.4/huggingface_hub-0.34.4.tar.gz", hash = "sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/huggingface-hub/0.34.4/huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a" },
+]
+
+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/idna/3.10/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/idna/3.10/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" },
+]
+
+[[package]]
+name = "importlib-resources"
+version = "6.5.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/importlib-resources/6.5.2/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/importlib-resources/6.5.2/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec" },
+]
+
+[[package]]
+name = "index-tts"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+ { name = "accelerate" },
+ { name = "cn2an" },
+ { name = "cython" },
+ { name = "deepspeed" },
+ { name = "descript-audiotools" },
+ { name = "ffmpeg-python" },
+ { name = "g2p-en" },
+ { name = "gradio" },
+ { name = "jieba" },
+ { name = "json5" },
+ { name = "keras" },
+ { name = "librosa" },
+ { name = "matplotlib" },
+ { name = "modelscope" },
+ { name = "munch" },
+ { name = "numba" },
+ { name = "numpy" },
+ { name = "omegaconf" },
+ { name = "opencv-python" },
+ { name = "pandas" },
+ { name = "safetensors" },
+ { name = "sentencepiece" },
+ { name = "tensorboard" },
+ { name = "textstat" },
+ { name = "tokenizers" },
+ { name = "tqdm" },
+ { name = "transformers" },
+ { name = "wetext" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "accelerate", specifier = "==1.8.1" },
+ { name = "cn2an", specifier = "==0.5.22" },
+ { name = "cython", specifier = "==3.0.7" },
+ { name = "deepspeed", specifier = "==0.17.1" },
+ { name = "descript-audiotools", specifier = "==0.7.2" },
+ { name = "ffmpeg-python", specifier = "==0.2.0" },
+ { name = "g2p-en", specifier = "==2.1.0" },
+ { name = "gradio", specifier = ">=5.44.1" },
+ { name = "jieba", specifier = "==0.42.1" },
+ { name = "json5", specifier = "==0.10.0" },
+ { name = "keras", specifier = "==2.9.0" },
+ { name = "librosa", specifier = "==0.10.2.post1" },
+ { name = "matplotlib", specifier = "==3.8.2" },
+ { name = "modelscope", specifier = "==1.27.0" },
+ { name = "munch", specifier = "==4.0.0" },
+ { name = "numba", specifier = "==0.58.1" },
+ { name = "numpy", specifier = "==1.26.2" },
+ { name = "omegaconf", specifier = ">=2.3.0" },
+ { name = "opencv-python", specifier = "==4.9.0.80" },
+ { name = "pandas", specifier = "==2.1.3" },
+ { name = "safetensors", specifier = "==0.5.2" },
+ { name = "sentencepiece", specifier = ">=0.2.1" },
+ { name = "tensorboard", specifier = "==2.9.1" },
+ { name = "textstat", specifier = ">=0.7.10" },
+ { name = "tokenizers", specifier = "==0.21.0" },
+ { name = "tqdm", specifier = ">=4.67.1" },
+ { name = "transformers", specifier = "==4.52.1" },
+ { name = "wetext", specifier = ">=0.0.9" },
+]
+
+[[package]]
+name = "inflect"
+version = "7.5.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "more-itertools" },
+ { name = "typeguard" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/inflect/7.5.0/inflect-7.5.0.tar.gz", hash = "sha256:faf19801c3742ed5a05a8ce388e0d8fe1a07f8d095c82201eb904f5d27ad571f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/inflect/7.5.0/inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344" },
+]
+
+[[package]]
+name = "ipython"
+version = "8.37.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "colorama", marker = "python_full_version < '3.11' and sys_platform == 'win32'" },
+ { name = "decorator", marker = "python_full_version < '3.11'" },
+ { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+ { name = "jedi", marker = "python_full_version < '3.11'" },
+ { name = "matplotlib-inline", marker = "python_full_version < '3.11'" },
+ { name = "pexpect", marker = "python_full_version < '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "prompt-toolkit", marker = "python_full_version < '3.11'" },
+ { name = "pygments", marker = "python_full_version < '3.11'" },
+ { name = "stack-data", marker = "python_full_version < '3.11'" },
+ { name = "traitlets", marker = "python_full_version < '3.11'" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython/8.37.0/ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython/8.37.0/ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2" },
+]
+
+[[package]]
+name = "ipython"
+version = "9.5.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version >= '3.13' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+ "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" },
+ { name = "decorator", marker = "python_full_version >= '3.11'" },
+ { name = "ipython-pygments-lexers", marker = "python_full_version >= '3.11'" },
+ { name = "jedi", marker = "python_full_version >= '3.11'" },
+ { name = "matplotlib-inline", marker = "python_full_version >= '3.11'" },
+ { name = "pexpect", marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
+ { name = "prompt-toolkit", marker = "python_full_version >= '3.11'" },
+ { name = "pygments", marker = "python_full_version >= '3.11'" },
+ { name = "stack-data", marker = "python_full_version >= '3.11'" },
+ { name = "traitlets", marker = "python_full_version >= '3.11'" },
+ { name = "typing-extensions", marker = "python_full_version == '3.11.*'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython/9.5.0/ipython-9.5.0.tar.gz", hash = "sha256:129c44b941fe6d9b82d36fc7a7c18127ddb1d6f02f78f867f402e2e3adde3113" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython/9.5.0/ipython-9.5.0-py3-none-any.whl", hash = "sha256:88369ffa1d5817d609120daa523a6da06d02518e582347c29f8451732a9c5e72" },
+]
+
+[[package]]
+name = "ipython-pygments-lexers"
+version = "1.1.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "pygments", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython-pygments-lexers/1.1.1/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ipython-pygments-lexers/1.1.1/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c" },
+]
+
+[[package]]
+name = "jedi"
+version = "0.19.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "parso" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/jedi/0.19.2/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/jedi/0.19.2/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9" },
+]
+
+[[package]]
+name = "jieba"
+version = "0.42.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/jieba/0.42.1/jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2" }
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "markupsafe" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/jinja2/3.1.6/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/jinja2/3.1.6/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67" },
+]
+
+[[package]]
+name = "joblib"
+version = "1.5.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/joblib/1.5.2/joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/joblib/1.5.2/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241" },
+]
+
+[[package]]
+name = "json5"
+version = "0.10.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/json5/0.10.0/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/json5/0.10.0/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa" },
+]
+
+[[package]]
+name = "julius"
+version = "0.2.7"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "torch" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/julius/0.2.7/julius-0.2.7.tar.gz", hash = "sha256:3c0f5f5306d7d6016fcc95196b274cae6f07e2c9596eed314e4e7641554fbb08" }
+
+[[package]]
+name = "kaldifst"
+version = "1.7.17"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17.tar.gz", hash = "sha256:58d9255c9d5e37bcdc786847d24ce36b59c703c956155112084fca6743d59253" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:c0a26415455d5b3923871206ce744974515c2ad28fec1cf5293c8b1f14952983" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:dabc7a0aea82f96ebadf5632ef6b134c53a125a7aa22ee6b84402dbddee7aa7c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:27416ece4f23e9f7611d6050b12c1a70693a95a1e11e8d3430d1490c84d221d7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ea6261af24c39383defa5a4f3ef0eb9276bd3688692e232b5577cb3ae004c87" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3963876d0343ad344edf3507d539b9f822a308f9990c6b7c32399fd480361352" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-win32.whl", hash = "sha256:9a20cd4ba43aefc973787c3baaf754d7d4886b327b0632c097a279a097c63d26" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp310-cp310-win_amd64.whl", hash = "sha256:1b6e3667a6c758c18da9e1a1e5803640c954e22a15dfd7bad9d5247c1d1ce279" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:194ac06ab4962b2e0fb1874b7512102745afed04c761ac604fd4faa949663a73" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:41621b463b1e4dcfb6b38042c2db64a13e6bcfe336eea33f5e182b740fe2811c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ebfead12e5a1b82cecafda2ea4c926ce6d263123e3740f416bf8058e2bd85788" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d30710bc1f3eb86af349858366a8b0fc837114e6b51dd8feb10e82f297c928f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1760fc4e94bd15e02befd5e80b440af7e86460da7be857f6792b335a8025215" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-win32.whl", hash = "sha256:8bd88d6c1a7ae43f0b852ed20378286ba9b0b170b10b6796706c6583a432e53d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp311-cp311-win_amd64.whl", hash = "sha256:8afbe843b03dee44c4927962cc83f43c3da005f9ff34a7fa23a558a97b8a4310" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:aa5d968f47f0d0f6a30e3563045e809fa2a2ca7fc26a2eefe0a7606cb01237a2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:516317d46a7d35a508056a2a77e50a679ba463452cfeb2be228db8f370333ad8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d840d7ff1f311d619262e9b5b6147cec48a356817861337abf8301e37e9219d7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bb7aa861c4639cf9788a85411afa29f5b22d1aee65d64f6c37809e615f78fa64" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d92a55c7cd0913e40a9849d14f433b4ea035c12a214ba945da7bfc076879efc6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-win32.whl", hash = "sha256:8e4cf45fc60d9cf7a4dba65803df5928a96523ba5e0ad6ec4c538495c9edac46" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp312-cp312-win_amd64.whl", hash = "sha256:6134c001853dc8b5fab2dc3c99e0cf0adfbd4f12c43d153d9ce5638ebaaffc8d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:31dcdb92ce43052b96443330ca82c603b7f10153123962f6015f3d96d5c62d22" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:88523ee64936833a89469efb658542349ea90683eea3506aee496db54bc22530" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:47681b0e74ed633e7e081e6986608d3ed110d6e47d0684eb6cf66326bf98cde3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31d21c62bac03afeb15a66641aed019784d44342ecc95eeeab98561c70c0d65a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e54604e03c41b2e0f0a56a5343cfb5a49194845f5c17fd666e8cad1e2ac671aa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-win32.whl", hash = "sha256:502217ea878f24a7044371502ed74463c20ad554738eb78416b949847f227baf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp313-cp313-win_amd64.whl", hash = "sha256:9c0cf66c71a71e790d6a8b8a0ee8b4ac1f25d5be0d186f847a0c2ed8e936889b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8a9cbd55dc5363d7308deb1d11c63bf143a5646b2821e0c90ff008af24dadc1f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e2145ab4d4b96cf6af045aa3e30a57f49859367f47b2cc63f935965d060720c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e4d01992a5bd69f5a6a2775024f268e8ab63fb2865e947c27f8760979d23daa9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10ed2f58c4ffb1d3203fa1ee6cd6ccdbd577a6947e13102310b776e7b540391c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31bc316702382a4c2427999d2bdb3d57b2bfb1333b8a20d10d328e2a7ef29a65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kaldifst/1.7.17/kaldifst-1.7.17-cp314-cp314-win32.whl", hash = "sha256:8cde76483ffe39edf747a7b1133e364546d2eca49e36975c27e92842f9499b72" },
+]
+
+[[package]]
+name = "keras"
+version = "2.9.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/keras/2.9.0/keras-2.9.0-py2.py3-none-any.whl", hash = "sha256:55911256f89cfc9343c9fbe4b61ec45a2d33d89729cbe1ab9dcacf8b07b8b6ab" },
+]
+
+[[package]]
+name = "kiwisolver"
+version = "1.4.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84fd60810829c27ae375114cd379da1fa65e6918e1da405f356a775d49a62bcf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b78efa4c6e804ecdf727e580dbb9cba85624d2e1c6b5cb059c66290063bd99a9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4efec7bcf21671db6a3294ff301d2fc861c31faa3c8740d1a94689234d1b415" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90f47e70293fc3688b71271100a1a5453aa9944a81d27ff779c108372cf5567b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fdca1def57a2e88ef339de1737a1449d6dbf5fab184c54a1fca01d541317154" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cf554f21be770f5111a1690d42313e140355e687e05cf82cb23d0a721a64a48" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1795ac5cd0510207482c3d1d3ed781143383b8cfd36f5c645f3897ce066220" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ccd09f20ccdbbd341b21a67ab50a119b64a403b09288c27481575105283c1586" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:540c7c72324d864406a009d72f5d6856f49693db95d1fbb46cf86febef873634" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-win_amd64.whl", hash = "sha256:ede8c6d533bc6601a47ad4046080d36b8fc99f81e6f1c17b0ac3c2dc91ac7611" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp310-cp310-win_arm64.whl", hash = "sha256:7b4da0d01ac866a57dd61ac258c5607b4cd677f63abaec7b148354d2b2cdd536" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d1d9e582ad4d63062d34077a9a1e9f3c34088a2ec5135b1f7190c07cf366527" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:deed0c7258ceb4c44ad5ec7d9918f9f14fd05b2be86378d86cf50e63d1e7b771" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a590506f303f512dff6b7f75fd2fd18e16943efee932008fe7140e5fa91d80e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e09c2279a4d01f099f52d5c4b3d9e208e91edcbd1a175c9662a8b16e000fece9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e7cdf45d594ee04d5be1b24dd9d49f3d1590959b2271fb30b5ca2b262c00fb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/kiwisolver/1.4.9/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1" },
+]
+
+[[package]]
+name = "lazy-loader"
+version = "0.4"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "packaging" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/lazy-loader/0.4/lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/lazy-loader/0.4/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc" },
+]
+
+[[package]]
+name = "librosa"
+version = "0.10.2.post1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "audioread" },
+ { name = "decorator" },
+ { name = "joblib" },
+ { name = "lazy-loader" },
+ { name = "msgpack" },
+ { name = "numba" },
+ { name = "numpy" },
+ { name = "pooch" },
+ { name = "scikit-learn" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "soundfile" },
+ { name = "soxr" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/librosa/0.10.2.post1/librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/librosa/0.10.2.post1/librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0" },
+]
+
+[[package]]
+name = "llvmlite"
+version = "0.41.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1.tar.gz", hash = "sha256:f19f767a018e6ec89608e1f6b13348fa2fcde657151137cb64e56d48598a92db" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1e1029d47ee66d3a0c4d6088641882f75b93db82bd0e6178f7bd744ebce42b9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:150d0bc275a8ac664a705135e639178883293cf08c1a38de3bbaa2f693a0a867" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eee5cf17ec2b4198b509272cf300ee6577229d237c98cc6e63861b08463ddc6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dd0338da625346538f1173a17cabf21d1e315cf387ca21b294ff209d176e244" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-win32.whl", hash = "sha256:fa1469901a2e100c17eb8fe2678e34bd4255a3576d1a543421356e9c14d6e2ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp310-cp310-win_amd64.whl", hash = "sha256:2b76acee82ea0e9304be6be9d4b3840208d050ea0dcad75b1635fa06e949a0ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:210e458723436b2469d61b54b453474e09e12a94453c97ea3fbb0742ba5a83d8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:855f280e781d49e0640aef4c4af586831ade8f1a6c4df483fb901cbe1a48d127" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b67340c62c93a11fae482910dc29163a50dff3dfa88bc874872d28ee604a83be" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2181bb63ef3c607e6403813421b46982c3ac6bfc1f11fa16a13eaafb46f578e6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/llvmlite/0.41.1/llvmlite-0.41.1-cp311-cp311-win_amd64.whl", hash = "sha256:9564c19b31a0434f01d2025b06b44c7ed422f51e719ab5d24ff03b7560066c9a" },
+]
+
+[[package]]
+name = "markdown"
+version = "3.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown/3.9/markdown-3.9.tar.gz", hash = "sha256:d2900fe1782bd33bdbbd56859defef70c2e78fc46668f8eb9df3128138f2cb6a" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown/3.9/markdown-3.9-py3-none-any.whl", hash = "sha256:9f4d91ed810864ea88a6f32c07ba8bee1346c0cc1f6b1f9f6c822f2a9667d280" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "mdurl" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown-it-py/4.0.0/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown-it-py/4.0.0/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147" },
+]
+
+[[package]]
+name = "markdown2"
+version = "2.5.4"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown2/2.5.4/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markdown2/2.5.4/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/markupsafe/3.0.2/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f" },
+]
+
+[[package]]
+name = "matplotlib"
+version = "3.8.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "cycler" },
+ { name = "fonttools" },
+ { name = "kiwisolver" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pillow" },
+ { name = "pyparsing" },
+ { name = "python-dateutil" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2.tar.gz", hash = "sha256:01a978b871b881ee76017152f1f1a0cbf6bd5f7b8ff8c96df0df1bd57d8755a1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:09796f89fb71a0c0e1e2f4bdaf63fb2cefc84446bb963ecdeb40dfee7dfa98c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9c6976748a25e8b9be51ea028df49b8e561eed7809146da7a47dbecebab367" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b78e4f2cedf303869b782071b55fdde5987fda3038e9d09e58c91cc261b5ad18" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e208f46cf6576a7624195aa047cb344a7f802e113bb1a06cfd4bee431de5e31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:46a569130ff53798ea5f50afce7406e91fdc471ca1e0e26ba976a8c734c9427a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:830f00640c965c5b7f6bc32f0d4ce0c36dfe0379f7dd65b07a00c801713ec40a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d86593ccf546223eb75a39b44c32788e6f6440d13cfc4750c1c15d0fcb850b63" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9a5430836811b7652991939012f43d2808a2db9b64ee240387e8c43e2e5578c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9576723858a78751d5aacd2497b8aef29ffea6d1c95981505877f7ac28215c6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ba9cbd8ac6cf422f3102622b20f8552d601bf8837e49a3afed188d560152788" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:03f9d160a29e0b65c0790bb07f4f45d6a181b1ac33eb1bb0dd225986450148f0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:3773002da767f0a9323ba1a9b9b5d00d6257dbd2a93107233167cfb581f64717" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4c318c1e95e2f5926fba326f68177dee364aa791d6df022ceb91b8221bd0a627" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:091275d18d942cf1ee9609c830a1bc36610607d8223b1b981c37d5c9fc3e46a4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b0f3b8ea0e99e233a4bcc44590f01604840d833c280ebb8fe5554fd3e6cfe8d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7b1704a530395aaf73912be741c04d181f82ca78084fbd80bc737be04848331" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533b0e3b0c6768eef8cbe4b583731ce25a91ab54a22f830db2b031e83cca9213" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib/3.8.2/matplotlib-3.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:0f4fc5d72b75e2c18e55eb32292659cf731d9d5b312a6eb036506304f4675630" },
+]
+
+[[package]]
+name = "matplotlib-inline"
+version = "0.1.7"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "traitlets" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib-inline/0.1.7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/matplotlib-inline/0.1.7/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/mdurl/0.1.2/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/mdurl/0.1.2/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8" },
+]
+
+[[package]]
+name = "modelscope"
+version = "1.27.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "requests" },
+ { name = "setuptools" },
+ { name = "tqdm" },
+ { name = "urllib3" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/modelscope/1.27.0/modelscope-1.27.0.tar.gz", hash = "sha256:645e8b9cd346f3538ddce9f905b0ac7e57fb6f48710c6c256ae3e5bb37303df5" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/modelscope/1.27.0/modelscope-1.27.0-py3-none-any.whl", hash = "sha256:95071d3f9a7bf944a485d1ae86e96a75e4148030e96969b8d1971672795a47d4" },
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/more-itertools/10.8.0/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/more-itertools/10.8.0/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b" },
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/mpmath/1.3.0/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/mpmath/1.3.0/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" },
+]
+
+[[package]]
+name = "msgpack"
+version = "1.1.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/msgpack/1.1.1/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69" },
+]
+
+[[package]]
+name = "munch"
+version = "4.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/munch/4.0.0/munch-4.0.0.tar.gz", hash = "sha256:542cb151461263216a4e37c3fd9afc425feeaf38aaa3025cd2a981fadb422235" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/munch/4.0.0/munch-4.0.0-py2.py3-none-any.whl", hash = "sha256:71033c45db9fb677a0b7eb517a4ce70ae09258490e419b0e7f00d1e386ecb1b4" },
+]
+
+[[package]]
+name = "networkx"
+version = "3.4.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/networkx/3.4.2/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/networkx/3.4.2/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f" },
+]
+
+[[package]]
+name = "networkx"
+version = "3.5"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version >= '3.13' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+ "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/networkx/3.5/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/networkx/3.5/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec" },
+]
+
+[[package]]
+name = "ninja"
+version = "1.13.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ninja/1.13.0/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9" },
+]
+
+[[package]]
+name = "nltk"
+version = "3.9.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "click" },
+ { name = "joblib" },
+ { name = "regex" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nltk/3.9.1/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nltk/3.9.1/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1" },
+]
+
+[[package]]
+name = "numba"
+version = "0.58.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "llvmlite" },
+ { name = "numpy" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1.tar.gz", hash = "sha256:487ded0633efccd9ca3a46364b40006dbdaca0f95e99b8b83e778d1195ebcbaa" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:07f2fa7e7144aa6f275f27260e73ce0d808d3c62b30cff8906ad1dec12d87bbe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7bf1ddd4f7b9c2306de0384bf3854cac3edd7b4d8dffae2ec1b925e4c436233f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bc2d904d0319d7a5857bd65062340bed627f5bfe9ae4a495aef342f072880d50" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e79b6cc0d2bf064a955934a2e02bf676bc7995ab2db929dbbc62e4c16551be6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp310-cp310-win_amd64.whl", hash = "sha256:81fe5b51532478149b5081311b0fd4206959174e660c372b94ed5364cfb37c82" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bcecd3fb9df36554b342140a4d77d938a549be635d64caf8bd9ef6c47a47f8aa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1eaa744f518bbd60e1f7ccddfb8002b3d06bd865b94a5d7eac25028efe0e0ff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bf68df9c307fb0aa81cacd33faccd6e419496fdc621e83f1efce35cdc5e79cac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:55a01e1881120e86d54efdff1be08381886fe9f04fc3006af309c602a72bc44d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numba/0.58.1/numba-0.58.1-cp311-cp311-win_amd64.whl", hash = "sha256:811305d5dc40ae43c3ace5b192c670c358a89a4d2ae4f86d1665003798ea7a1a" },
+]
+
+[[package]]
+name = "numpy"
+version = "1.26.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/numpy/1.26.2/numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e" },
+]
+
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cublas-cu12/12.8.4.1/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cuda-cupti-cu12/12.8.90/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cuda-nvrtc-cu12/12.8.93/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cuda-runtime-cu12/12.8.90/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cudnn-cu12/9.10.2.21/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cufft-cu12/11.3.3.83/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cufile-cu12/1.13.1.3/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-curand-cu12/10.3.9.90/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+ { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cusolver-cu12/11.7.3.90/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cusparse-cu12/12.5.8.93/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-cusparselt-cu12/0.7.1/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623" },
+]
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-nccl-cu12/2.27.3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-nvjitlink-cu12/12.8.93/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/nvidia-nvtx-cu12/12.8.90/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f" },
+]
+
+[[package]]
+name = "oauthlib"
+version = "3.3.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/oauthlib/3.3.1/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/oauthlib/3.3.1/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1" },
+]
+
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "antlr4-python3-runtime" },
+ { name = "pyyaml" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/omegaconf/2.3.0/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/omegaconf/2.3.0/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b" },
+]
+
+[[package]]
+name = "opencv-python"
+version = "4.9.0.80"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "numpy" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/opencv-python/4.9.0.80/opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0" },
+]
+
+[[package]]
+name = "orjson"
+version = "3.11.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3.tar.gz", hash = "sha256:1c0603b1d2ffcd43a411d64797a19556ef76958aef1c182f22dc30860152a98a" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:29cb1f1b008d936803e2da3d7cba726fc47232c45df531b29edf0b232dd737e7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97dceed87ed9139884a55db8722428e27bd8452817fbf1869c58b49fecab1120" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:58533f9e8266cb0ac298e259ed7b4d42ed3fa0b78ce76860626164de49e0d467" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c212cfdd90512fe722fa9bd620de4d46cda691415be86b2e02243242ae81873" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff835b5d3e67d9207343effb03760c00335f8b5285bfceefd4dc967b0e48f6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5aa4682912a450c2db89cbd92d356fef47e115dffba07992555542f344d301b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d18dd34ea2e860553a579df02041845dee0af8985dff7f8661306f95504ddf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d8b11701bc43be92ea42bd454910437b355dfb63696c06fe953ffb40b5f763b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:90368277087d4af32d38bd55f9da2ff466d25325bf6167c8f382d8ee40cb2bbc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fd7ff459fb393358d3a155d25b275c60b07a2c83dcd7ea962b1923f5a1134569" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f8d902867b699bcd09c176a280b1acdab57f924489033e53d0afe79817da37e6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-win32.whl", hash = "sha256:bb93562146120bb51e6b154962d3dadc678ed0fce96513fa6bc06599bb6f6edc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp310-cp310-win_amd64.whl", hash = "sha256:976c6f1975032cc327161c65d4194c549f2589d88b105a5e3499429a54479770" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d2ae0cc6aeb669633e0124531f342a17d8e97ea999e42f12a5ad4adaa304c5f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:ba21dbb2493e9c653eaffdc38819b004b7b1b246fb77bfc93dc016fe664eac91" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00f1a271e56d511d1569937c0447d7dce5a99a33ea0dec76673706360a051904" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b67e71e47caa6680d1b6f075a396d04fa6ca8ca09aafb428731da9b3ea32a5a6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7d012ebddffcce8c85734a6d9e5f08180cd3857c5f5a3ac70185b43775d043d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd759f75d6b8d1b62012b7f5ef9461d03c804f94d539a5515b454ba3a6588038" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6890ace0809627b0dff19cfad92d69d0fa3f089d3e359a2a532507bb6ba34efb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d4a5e041ae435b815e568537755773d05dac031fee6a57b4ba70897a44d9d2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d68bf97a771836687107abfca089743885fb664b90138d8761cce61d5625d55" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:bfc27516ec46f4520b18ef645864cee168d2a027dbf32c5537cb1f3e3c22dac1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f66b001332a017d7945e177e282a40b6997056394e3ed7ddb41fb1813b83e824" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:212e67806525d2561efbfe9e799633b17eb668b8964abed6b5319b2f1cfbae1f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-win32.whl", hash = "sha256:6e8e0c3b85575a32f2ffa59de455f85ce002b8bdc0662d6b9c2ed6d80ab5d204" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-win_amd64.whl", hash = "sha256:6be2f1b5d3dc99a5ce5ce162fc741c22ba9f3443d3dd586e6a1211b7bc87bc7b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp311-cp311-win_arm64.whl", hash = "sha256:fafb1a99d740523d964b15c8db4eabbfc86ff29f84898262bf6e3e4c9e97e43e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8c752089db84333e36d754c4baf19c0e1437012242048439c7e80eb0e6426e3b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:9b8761b6cf04a856eb544acdd82fc594b978f12ac3602d6374a7edb9d86fd2c2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b13974dc8ac6ba22feaa867fc19135a3e01a134b4f7c9c28162fed4d615008a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f83abab5bacb76d9c821fd5c07728ff224ed0e52d7a71b7b3de822f3df04e15c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6fbaf48a744b94091a56c62897b27c31ee2da93d826aa5b207131a1e13d4064" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc779b4f4bba2847d0d2940081a7b6f7b5877e05408ffbb74fa1faf4a136c424" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd4b909ce4c50faa2192da6bb684d9848d4510b736b0611b6ab4020ea6fd2d23" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:524b765ad888dc5518bbce12c77c2e83dee1ed6b0992c1790cc5fb49bb4b6667" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:84fd82870b97ae3cdcea9d8746e592b6d40e1e4d4527835fc520c588d2ded04f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbecb9709111be913ae6879b07bafd4b0785b44c1eb5cac8ac76da048b3885a1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9dba358d55aee552bd868de348f4736ca5a4086d9a62e2bfbbeeb5629fe8b0cc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eabcf2e84f1d7105f84580e03012270c7e97ecb1fb1618bda395061b2a84a049" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-win32.whl", hash = "sha256:3782d2c60b8116772aea8d9b7905221437fdf53e7277282e8d8b07c220f96cca" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:79b44319268af2eaa3e315b92298de9a0067ade6e6003ddaef72f8e0bedb94f1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:0e92a4e83341ef79d835ca21b8bd13e27c859e4e9e4d7b63defc6e58462a3710" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:af40c6612fd2a4b00de648aa26d18186cd1322330bd3a3cc52f87c699e995810" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:9f1587f26c235894c09e8b5b7636a38091a9e6e7fe4531937534749c04face43" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61dcdad16da5bb486d7227a37a2e789c429397793a6955227cedbd7252eb5a27" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11c6d71478e2cbea0a709e8a06365fa63da81da6498a53e4c4f065881d21ae8f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff94112e0098470b665cb0ed06efb187154b63649403b8d5e9aedeb482b4548c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae8b756575aaa2a855a75192f356bbda11a89169830e1439cfb1a3e1a6dde7be" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9416cc19a349c167ef76135b2fe40d03cea93680428efee8771f3e9fb66079d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b822caf5b9752bc6f246eb08124c3d12bf2175b66ab74bac2ef3bbf9221ce1b2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:414f71e3bdd5573893bf5ecdf35c32b213ed20aa15536fe2f588f946c318824f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:828e3149ad8815dc14468f36ab2a4b819237c155ee1370341b91ea4c8672d2ee" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ac9e05f25627ffc714c21f8dfe3a579445a5c392a9c8ae7ba1d0e9fb5333f56e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e44fbe4000bd321d9f3b648ae46e0196d21577cf66ae684a96ff90b1f7c93633" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-win32.whl", hash = "sha256:2039b7847ba3eec1f5886e75e6763a16e18c68a63efc4b029ddf994821e2e66b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:29be5ac4164aa8bdcba5fa0700a3c9c316b411d8ed9d39ef8a882541bd452fae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:18bd1435cb1f2857ceb59cfb7de6f92593ef7b831ccd1b9bfb28ca530e539dce" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:cf4b81227ec86935568c7edd78352a92e97af8da7bd70bdfdaa0d2e0011a1ab4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:bc8bc85b81b6ac9fc4dae393a8c159b817f4c2c9dee5d12b773bddb3b95fc07e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:88dcfc514cfd1b0de038443c7b3e6a9797ffb1b3674ef1fd14f701a13397f82d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d61cd543d69715d5fc0a690c7c6f8dcc307bc23abef9738957981885f5f38229" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2b7b153ed90ababadbef5c3eb39549f9476890d339cf47af563aea7e07db2451" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7909ae2460f5f494fecbcd10613beafe40381fd0316e35d6acb5f3a05bfda167" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:2030c01cbf77bc67bee7eef1e7e31ecf28649353987775e3583062c752da0077" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a0169ebd1cbd94b26c7a7ad282cf5c2744fce054133f959e02eb5265deae1872" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-win32.whl", hash = "sha256:0c6d7328c200c349e3a4c6d8c83e0a5ad029bdc2d417f234152bf34842d0fc8d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:317bbe2c069bbc757b1a2e4105b64aacd3bc78279b66a6b9e51e846e4809f804" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/orjson/3.11.3/orjson-3.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:e8f6a7a27d7b7bec81bd5924163e9af03d49bbb63013f107b48eb5d16db711bc" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/packaging/25.0/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/packaging/25.0/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484" },
+]
+
+[[package]]
+name = "pandas"
+version = "2.1.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "numpy" },
+ { name = "python-dateutil" },
+ { name = "pytz" },
+ { name = "tzdata" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pandas/2.1.3/pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549" },
+]
+
+[[package]]
+name = "parso"
+version = "0.8.5"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/parso/0.8.5/parso-0.8.5.tar.gz", hash = "sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/parso/0.8.5/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887" },
+]
+
+[[package]]
+name = "pexpect"
+version = "4.9.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "ptyprocess" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pexpect/4.9.0/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pexpect/4.9.0/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523" },
+]
+
+[[package]]
+name = "pillow"
+version = "11.3.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7107195ddc914f656c7fc8e4a5e1c25f32e9236ea3ea860f257b0436011fddd0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc3e831b563b3114baac7ec2ee86819eb03caa1a2cef0b481a5675b59c4fe23b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f182ebd2303acf8c380a54f615ec883322593320a9b00438eb842c1f37ae50" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4445fa62e15936a028672fd48c4c11a66d641d2c05726c7ec1f8ba6a572036ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71f511f6b3b91dd543282477be45a033e4845a40278fa8dcdbfdb07109bf18f9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040a5b691b0713e1f6cbe222e0f4f74cd233421e105850ae3b3c0ceda520f42e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-win32.whl", hash = "sha256:89bd777bc6624fe4115e9fac3352c79ed60f3bb18651420635f26e643e3dd1f6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:19d2ff547c75b8e3ff46f4d9ef969a06c30ab2d4263a9e287733aa8b2429ce8f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:819931d25e57b513242859ce1876c58c59dc31587847bf74cfe06b2e0cb22d2f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d000f46e2917c705e9fb93a3606ee4a819d1e3aa7a9b442f6444f07e77cf5e25" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527b37216b6ac3a12d7838dc3bd75208ec57c1c6d11ef01902266a5a0c14fc27" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be5463ac478b623b9dd3937afd7fb7ab3d79dd290a28e2b6df292dc75063eb8a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8dc70ca24c110503e16918a658b869019126ecfe03109b754c402daff12b3d9f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pillow/11.3.0/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.4.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/platformdirs/4.4.0/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/platformdirs/4.4.0/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85" },
+]
+
+[[package]]
+name = "pooch"
+version = "1.8.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "packaging" },
+ { name = "platformdirs" },
+ { name = "requests" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pooch/1.8.2/pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pooch/1.8.2/pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47" },
+]
+
+[[package]]
+name = "proces"
+version = "0.1.7"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/proces/0.1.7/proces-0.1.7.tar.gz", hash = "sha256:70a05d9e973dd685f7a9092c58be695a8181a411d63796c213232fd3fdc43775" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/proces/0.1.7/proces-0.1.7-py3-none-any.whl", hash = "sha256:308325bbc96877263f06e57e5e9c760c4b42cc722887ad60be6b18fc37d68762" },
+]
+
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.52"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "wcwidth" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/prompt-toolkit/3.0.52/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/prompt-toolkit/3.0.52/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955" },
+]
+
+[[package]]
+name = "protobuf"
+version = "3.19.6"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6.tar.gz", hash = "sha256:5f5540d57a43042389e87661c6eaa50f47c19c6176e8cf1c4f287aeefeccb5c4" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:010be24d5a44be7b0613750ab40bc8b8cedc796db468eae6c779b395f50d1fa1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11478547958c2dfea921920617eb457bc26867b0d1aa065ab05f35080c5d9eb6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6-cp310-cp310-win32.whl", hash = "sha256:559670e006e3173308c9254d63facb2c03865818f22204037ab76f7a0ff70b5f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6-cp310-cp310-win_amd64.whl", hash = "sha256:347b393d4dd06fb93a77620781e11c058b3b0a5289262f094379ada2920a3730" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/protobuf/3.19.6/protobuf-3.19.6-py2.py3-none-any.whl", hash = "sha256:14082457dc02be946f60b15aad35e9f5c69e738f80ebbc0900a19bc83734a5a4" },
+]
+
+[[package]]
+name = "psutil"
+version = "7.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/psutil/7.0.0/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553" },
+]
+
+[[package]]
+name = "ptyprocess"
+version = "0.7.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ptyprocess/0.7.0/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ptyprocess/0.7.0/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35" },
+]
+
+[[package]]
+name = "pure-eval"
+version = "0.2.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pure-eval/0.2.3/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pure-eval/0.2.3/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0" },
+]
+
+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/py-cpuinfo/9.0.0/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/py-cpuinfo/9.0.0/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5" },
+]
+
+[[package]]
+name = "pyasn1"
+version = "0.6.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyasn1/0.6.1/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyasn1/0.6.1/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "pyasn1" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyasn1-modules/0.4.2/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyasn1-modules/0.4.2/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a" },
+]
+
+[[package]]
+name = "pycparser"
+version = "2.22"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pycparser/2.22/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pycparser/2.22/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.11.7"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "annotated-types" },
+ { name = "pydantic-core" },
+ { name = "typing-extensions" },
+ { name = "typing-inspection" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic/2.11.7/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic/2.11.7/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydantic-core/2.33.2/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1" },
+]
+
+[[package]]
+name = "pydub"
+version = "0.25.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydub/0.25.1/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pydub/0.25.1/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pygments/2.19.2/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pygments/2.19.2/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" },
+]
+
+[[package]]
+name = "pyloudnorm"
+version = "0.1.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "future" },
+ { name = "numpy" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyloudnorm/0.1.1/pyloudnorm-0.1.1.tar.gz", hash = "sha256:63cd4e197dea4e7795160ea08ed02d318091bce883e436a6dbc5963326b71e1e" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyloudnorm/0.1.1/pyloudnorm-0.1.1-py3-none-any.whl", hash = "sha256:d7f12ebdd097a464d87ce2878fc4d942f15f8233e26cc03f33fefa226f869a14" },
+]
+
+[[package]]
+name = "pyparsing"
+version = "3.2.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyparsing/3.2.3/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyparsing/3.2.3/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf" },
+]
+
+[[package]]
+name = "pyphen"
+version = "0.17.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyphen/0.17.2/pyphen-0.17.2.tar.gz", hash = "sha256:f60647a9c9b30ec6c59910097af82bc5dd2d36576b918e44148d8b07ef3b4aa3" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyphen/0.17.2/pyphen-0.17.2-py3-none-any.whl", hash = "sha256:3a07fb017cb2341e1d9ff31b8634efb1ae4dc4b130468c7c39dd3d32e7c3affd" },
+]
+
+[[package]]
+name = "pystoi"
+version = "0.4.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "numpy" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pystoi/0.4.1/pystoi-0.4.1.tar.gz", hash = "sha256:1c6f50d6fbfee46b00c922458cdbd27228d9830ca81cea788fd600fc2f7de6e4" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pystoi/0.4.1/pystoi-0.4.1-py2.py3-none-any.whl", hash = "sha256:e277b671663d26d35a2416c9c8010a74084e6c3970354506398051a554896939" },
+]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "six" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/python-dateutil/2.9.0.post0/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/python-dateutil/2.9.0.post0/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" },
+]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.20"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/python-multipart/0.0.20/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/python-multipart/0.0.20/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104" },
+]
+
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pytz/2025.2/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pytz/2025.2/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/pyyaml/6.0.2/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563" },
+]
+
+[[package]]
+name = "randomname"
+version = "0.2.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "fire" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/randomname/0.2.1/randomname-0.2.1.tar.gz", hash = "sha256:b79b98302ba4479164b0a4f87995b7bebbd1d91012aeda483341e3e58ace520e" }
+
+[[package]]
+name = "regex"
+version = "2025.9.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1.tar.gz", hash = "sha256:88ac07b38d20b54d79e704e38aa3bd2c0f8027432164226bdee201a1c0c9c9ff" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5aa2a6a73bf218515484b36a0d20c6ad9dc63f6339ff6224147b0e2c095ee55" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8c2ff5c01d5e47ad5fc9d31bcd61e78c2fa0068ed00cab86b7320214446da766" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d49dc84e796b666181de8a9973284cad6616335f01b52bf099643253094920fc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9914fe1040874f83c15fcea86d94ea54091b0666eab330aaab69e30d106aabe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e71bceb3947362ec5eabd2ca0870bb78eae4edfc60c6c21495133c01b6cd2df4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67a74456f410fe5e869239ee7a5423510fe5121549af133809d9591a8075893f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5c3b96ed0223b32dbdc53a83149b6de7ca3acd5acd9c8e64b42a166228abe29c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:113d5aa950f428faf46fd77d452df62ebb4cc6531cb619f6cc30a369d326bfbd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fcdeb38de4f7f3d69d798f4f371189061446792a84e7c92b50054c87aae9c07c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4bcdff370509164b67a6c8ec23c9fb40797b72a014766fdc159bb809bd74f7d8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:7383efdf6e8e8c61d85e00cfb2e2e18da1a621b8bfb4b0f1c2747db57b942b8f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1ec2bd3bdf0f73f7e9f48dca550ba7d973692d5e5e9a90ac42cc5f16c4432d8b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-win32.whl", hash = "sha256:9627e887116c4e9c0986d5c3b4f52bcfe3df09850b704f62ec3cbf177a0ae374" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:94533e32dc0065eca43912ee6649c90ea0681d59f56d43c45b5bcda9a740b3dd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp310-cp310-win_arm64.whl", hash = "sha256:a874a61bb580d48642ffd338570ee24ab13fa023779190513fcacad104a6e251" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e5bcf112b09bfd3646e4db6bf2e598534a17d502b0c01ea6550ba4eca780c5e6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:67a0295a3c31d675a9ee0238d20238ff10a9a2fdb7a1323c798fc7029578b15c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea8267fbadc7d4bd7c1301a50e85c2ff0de293ff9452a1a9f8d82c6cafe38179" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6aeff21de7214d15e928fb5ce757f9495214367ba62875100d4c18d293750cc1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d89f1bbbbbc0885e1c230f7770d5e98f4f00b0ee85688c871d10df8b184a6323" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca3affe8ddea498ba9d294ab05f5f2d3b5ad5d515bc0d4a9016dd592a03afe52" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91892a7a9f0a980e4c2c85dd19bc14de2b219a3a8867c4b5664b9f972dcc0c78" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e1cb40406f4ae862710615f9f636c1e030fd6e6abe0e0f65f6a695a2721440c6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:94f6cff6f7e2149c7e6499a6ecd4695379eeda8ccbccb9726e8149f2fe382e92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:6c0226fb322b82709e78c49cc33484206647f8a39954d7e9de1567f5399becd0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a12f59c7c380b4fcf7516e9cbb126f95b7a9518902bcf4a852423ff1dcd03e6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-win32.whl", hash = "sha256:49865e78d147a7a4f143064488da5d549be6bfc3f2579e5044cac61f5c92edd4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:d34b901f6f2f02ef60f4ad3855d3a02378c65b094efc4b80388a3aeb700a5de7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp311-cp311-win_arm64.whl", hash = "sha256:47d7c2dab7e0b95b95fd580087b6ae196039d62306a592fa4e162e49004b6299" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:84a25164bd8dcfa9f11c53f561ae9766e506e580b70279d05a7946510bdd6f6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:645e88a73861c64c1af558dd12294fb4e67b5c1eae0096a60d7d8a2143a611c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:10a450cba5cd5409526ee1d4449f42aad38dd83ac6948cbd6d7f71ca7018f7db" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9dc5991592933a4192c166eeb67b29d9234f9c86344481173d1bc52f73a7104" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a32291add816961aab472f4fad344c92871a2ee33c6c219b6598e98c1f0108f2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:588c161a68a383478e27442a678e3b197b13c5ba51dbba40c1ccb8c4c7bee9e9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47829ffaf652f30d579534da9085fe30c171fa2a6744a93d52ef7195dc38218b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e978e5a35b293ea43f140c92a3269b6ab13fe0a2bf8a881f7ac740f5a6ade85" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4cf09903e72411f4bf3ac1eddd624ecfd423f14b2e4bf1c8b547b72f248b7bf7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d016b0f77be63e49613c9e26aaf4a242f196cd3d7a4f15898f5f0ab55c9b24d2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:656563e620de6908cd1c9d4f7b9e0777e3341ca7db9d4383bcaa44709c90281e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-win32.whl", hash = "sha256:df33f4ef07b68f7ab637b1dbd70accbf42ef0021c201660656601e8a9835de45" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:5aba22dfbc60cda7c0853516104724dc904caa2db55f2c3e6e984eb858d3edf3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp312-cp312-win_arm64.whl", hash = "sha256:ec1efb4c25e1849c2685fa95da44bfde1b28c62d356f9c8d861d4dad89ed56e9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bc6834727d1b98d710a63e6c823edf6ffbf5792eba35d3fa119531349d4142ef" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c3dc05b6d579875719bccc5f3037b4dc80433d64e94681a0061845bd8863c025" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22213527df4c985ec4a729b055a8306272d41d2f45908d7bacb79be0fa7a75ad" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e3f6e3c5a5a1adc3f7ea1b5aec89abfc2f4fbfba55dafb4343cd1d084f715b2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bcb89c02a0d6c2bec9b0bb2d8c78782699afe8434493bfa6b4021cc51503f249" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b0e2f95413eb0c651cd1516a670036315b91b71767af83bc8525350d4375ccba" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a41dc039e1c97d3c2ed3e26523f748e58c4de3ea7a31f95e1cf9ff973fff5a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f0b4258b161094f66857a26ee938d3fe7b8a5063861e44571215c44fbf0e5df" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bf70e18ac390e6977ea7e56f921768002cb0fa359c4199606c7219854ae332e0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b84036511e1d2bb0a4ff1aec26951caa2dea8772b223c9e8a19ed8885b32dbac" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c2e05dcdfe224047f2a59e70408274c325d019aad96227ab959403ba7d58d2d7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-win32.whl", hash = "sha256:3b9a62107a7441b81ca98261808fed30ae36ba06c8b7ee435308806bd53c1ed8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:b38afecc10c177eb34cfae68d669d5161880849ba70c05cbfbe409f08cc939d7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp313-cp313-win_arm64.whl", hash = "sha256:ec329890ad5e7ed9fc292858554d28d58d56bf62cf964faf0aa57964b21155a0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:72fb7a016467d364546f22b5ae86c45680a4e0de6b2a6f67441d22172ff641f1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c9527fa74eba53f98ad86be2ba003b3ebe97e94b6eb2b916b31b5f055622ef03" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c905d925d194c83a63f92422af7544ec188301451b292c8b487f0543726107ca" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74df7c74a63adcad314426b1f4ea6054a5ab25d05b0244f0c07ff9ce640fa597" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4f6e935e98ea48c7a2e8be44494de337b57a204470e7f9c9c42f912c414cd6f5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4a62d033cd9ebefc7c5e466731a508dfabee827d80b13f455de68a50d3c2543d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef971ebf2b93bdc88d8337238be4dfb851cc97ed6808eb04870ef67589415171" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d936a1db208bdca0eca1f2bb2c1ba1d8370b226785c1e6db76e32a228ffd0ad5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:7e786d9e4469698fc63815b8de08a89165a0aa851720eb99f5e0ea9d51dd2b6a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:6b81d7dbc5466ad2c57ce3a0ddb717858fe1a29535c8866f8514d785fdb9fc5b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cd4890e184a6feb0ef195338a6ce68906a8903a0f2eb7e0ab727dbc0a3156273" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-win32.whl", hash = "sha256:34679a86230e46164c9e0396b56cab13c0505972343880b9e705083cc5b8ec86" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:a1196e530a6bfa5f4bde029ac5b0295a6ecfaaffbfffede4bbaf4061d9455b70" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/regex/2025.9.1/regex-2025.9.1-cp314-cp314-win_arm64.whl", hash = "sha256:f46d525934871ea772930e997d577d48c6983e50f206ff7b66d4ac5f8941e993" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "certifi" },
+ { name = "charset-normalizer" },
+ { name = "idna" },
+ { name = "urllib3" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/requests/2.32.5/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/requests/2.32.5/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" },
+]
+
+[[package]]
+name = "requests-oauthlib"
+version = "2.0.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "oauthlib" },
+ { name = "requests" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/requests-oauthlib/2.0.0/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/requests-oauthlib/2.0.0/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36" },
+]
+
+[[package]]
+name = "rich"
+version = "14.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "markdown-it-py" },
+ { name = "pygments" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/rich/14.1.0/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/rich/14.1.0/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f" },
+]
+
+[[package]]
+name = "rsa"
+version = "4.9.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "pyasn1" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/rsa/4.9.1/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/rsa/4.9.1/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
+]
+
+[[package]]
+name = "ruff"
+version = "0.12.12"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12.tar.gz", hash = "sha256:b86cd3415dbe31b3b46a71c598f4c4b2f550346d1ccf6326b347cc0c8fd063d6" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-linux_armv6l.whl", hash = "sha256:de1c4b916d98ab289818e55ce481e2cacfaad7710b01d1f990c497edf217dafc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7acd6045e87fac75a0b0cdedacf9ab3e1ad9d929d149785903cff9bb69ad9727" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:abf4073688d7d6da16611f2f126be86523a8ec4343d15d276c614bda8ec44edb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:968e77094b1d7a576992ac078557d1439df678a34c6fe02fd979f973af167577" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42a67d16e5b1ffc6d21c5f67851e0e769517fb57a8ebad1d0781b30888aa704e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b216ec0a0674e4b1214dcc998a5088e54eaf39417327b19ffefba1c4a1e4971e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:59f909c0fdd8f1dcdbfed0b9569b8bf428cf144bec87d9de298dcd4723f5bee8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ac93d87047e765336f0c18eacad51dad0c1c33c9df7484c40f98e1d773876f5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:01543c137fd3650d322922e8b14cc133b8ea734617c4891c5a9fccf4bfc9aa92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2afc2fa864197634e549d87fb1e7b6feb01df0a80fd510d6489e1ce8c0b1cc45" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0c0945246f5ad776cb8925e36af2438e66188d2b57d9cf2eed2c382c58b371e5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a0fbafe8c58e37aae28b84a80ba1817f2ea552e9450156018a478bf1fa80f4e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b9c456fb2fc8e1282affa932c9e40f5ec31ec9cbb66751a316bd131273b57c23" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f12856123b0ad0147d90b3961f5c90e7427f9acd4b40050705499c98983f489" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:26a1b5a2bf7dd2c47e3b46d077cd9c0fc3b93e6c6cc9ed750bd312ae9dc302ee" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-win32.whl", hash = "sha256:173be2bfc142af07a01e3a759aba6f7791aa47acf3604f610b1c36db888df7b1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-win_amd64.whl", hash = "sha256:e99620bf01884e5f38611934c09dd194eb665b0109104acae3ba6102b600fd0d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/ruff/0.12.12/ruff-0.12.12-py3-none-win_arm64.whl", hash = "sha256:2a8199cab4ce4d72d158319b63370abf60991495fb733db96cd923a34c52d093" },
+]
+
+[[package]]
+name = "safehttpx"
+version = "0.1.6"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "httpx" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safehttpx/0.1.6/safehttpx-0.1.6.tar.gz", hash = "sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safehttpx/0.1.6/safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c" },
+]
+
+[[package]]
+name = "safetensors"
+version = "0.5.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2.tar.gz", hash = "sha256:cb4a8d98ba12fa016f4241932b1fc5e702e5143f5374bba0bbcf7ddc1c4cf2b8" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:45b6092997ceb8aa3801693781a71a99909ab9cc776fbc3fa9322d29b1d3bef2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6d0d6a8ee2215a440e1296b843edf44fd377b055ba350eaba74655a2fe2c4bae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86016d40bcaa3bcc9a56cd74d97e654b5f4f4abe42b038c71e4f00a089c4526c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:990833f70a5f9c7d3fc82c94507f03179930ff7d00941c287f73b6fcbf67f19e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dfa7c2f3fe55db34eba90c29df94bcdac4821043fc391cb5d082d9922013869" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46ff2116150ae70a4e9c490d2ab6b6e1b1b93f25e520e540abe1b81b48560c3a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab696dfdc060caffb61dbe4066b86419107a24c804a4e373ba59be699ebd8d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03c937100f38c9ff4c1507abea9928a6a9b02c9c1c9c3609ed4fb2bf413d4975" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:a00e737948791b94dad83cf0eafc09a02c4d8c2171a239e8c8572fe04e25960e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:d3a06fae62418ec8e5c635b61a8086032c9e281f16c63c3af46a6efbab33156f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1506e4c2eda1431099cebe9abf6c76853e95d0b7a95addceaa74c6019c65d8cf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5c5b5d9da594f638a259fca766046f44c97244cc7ab8bef161b3e80d04becc76" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-win32.whl", hash = "sha256:fe55c039d97090d1f85277d402954dd6ad27f63034fa81985a9cc59655ac3ee2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/safetensors/0.5.2/safetensors-0.5.2-cp38-abi3-win_amd64.whl", hash = "sha256:78abdddd03a406646107f973c7843276e7b64e5e32623529dc17f3d94a20f589" },
+]
+
+[[package]]
+name = "scikit-learn"
+version = "1.7.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "joblib" },
+ { name = "numpy" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "threadpoolctl" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1.tar.gz", hash = "sha256:24b3f1e976a4665aa74ee0fcaac2b8fccc6ae77c8e07ab25da3ba6d3292b9802" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:406204dd4004f0517f0b23cf4b28c6245cbd51ab1b6b78153bc784def214946d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:16af2e44164f05d04337fd1fc3ae7c4ea61fd9b0d527e22665346336920fe0e1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2f2e78e56a40c7587dea9a28dc4a49500fa2ead366869418c66f0fd75b80885c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62b76ad408a821475b43b7bb90a9b1c9a4d8d125d505c2df0539f06d6e631b1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:9963b065677a4ce295e8ccdee80a1dd62b37249e667095039adcd5bce6e90deb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90c8494ea23e24c0fb371afc474618c1019dc152ce4a10e4607e62196113851b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:bb870c0daf3bf3be145ec51df8ac84720d9972170786601039f024bf6d61a518" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40daccd1b5623f39e8943ab39735cadf0bdce80e67cdca2adcb5426e987320a8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:30d1f413cfc0aa5a99132a554f1d80517563c34a9d3e7c118fde2d273c6fe0f7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c711d652829a1805a95d7fe96654604a8f16eab5a9e9ad87b3e60173415cb650" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3cee419b49b5bbae8796ecd690f97aa412ef1674410c23fc3257c6b8b85b8087" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2fd8b8d35817b0d9ebf0b576f7d5ffbbabdb55536b0655a8aaae629d7ffd2e1f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:588410fa19a96a69763202f1d6b7b91d5d7a5d73be36e189bc6396bfb355bd87" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3142f0abe1ad1d1c31a2ae987621e41f6b578144a911ff4ac94781a583adad7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3ddd9092c1bd469acab337d87930067c87eac6bd544f8d5027430983f1e1ae88" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7839687fa46d02e01035ad775982f2470be2668e13ddd151f0f55a5bf123bae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a10f276639195a96c86aa572ee0698ad64ee939a7b042060b98bd1930c261d10" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:13679981fdaebc10cc4c13c43344416a86fcbc61449cb3e6517e1df9d12c8309" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f1262883c6a63f067a980a8cdd2d2e7f2513dddcef6a9eaada6416a7a7cbe43" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca6d31fb10e04d50bfd2b50d66744729dbb512d4efd0223b864e2fdbfc4cee11" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:781674d096303cfe3d351ae6963ff7c958db61cde3421cd490e3a5a58f2a94ae" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:10679f7f125fe7ecd5fad37dd1aa2daae7e3ad8df7f3eefa08901b8254b3e12c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1f812729e38c8cb37f760dce71a9b83ccfb04f59b3dca7c6079dcdc60544fa9e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88e1a20131cf741b84b89567e1717f27a2ced228e0f29103426102bc2e3b8ef7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scikit-learn/1.7.1/scikit_learn-1.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b1bd1d919210b6a10b7554b717c9000b5485aa95a1d0f177ae0d7ee8ec750da5" },
+]
+
+[[package]]
+name = "scipy"
+version = "1.15.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "numpy", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.15.3/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca" },
+]
+
+[[package]]
+name = "scipy"
+version = "1.16.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+resolution-markers = [
+ "python_full_version >= '3.13' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+ "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+]
+dependencies = [
+ { name = "numpy", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1.tar.gz", hash = "sha256:44c76f9e8b6e8e488a586190ab38016e4ed2f8a038af7cd3defa903c0a2238b3" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c033fa32bab91dc98ca59d0cf23bb876454e2bb02cbe592d5023138778f70030" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6e5c2f74e5df33479b5cd4e97a9104c511518fbd979aa9b8f6aec18b2e9ecae7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0a55ffe0ba0f59666e90951971a884d1ff6f4ec3275a48f472cfb64175570f77" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:f8a5d6cd147acecc2603fbd382fed6c46f474cccfcf69ea32582e033fb54dcfe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb18899127278058bcc09e7b9966d41a5a43740b5bb8dcba401bd983f82e885b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adccd93a2fa937a27aae826d33e3bfa5edf9aa672376a4852d23a7cd67a2e5b7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:18aca1646a29ee9a0625a1be5637fa798d4d81fdf426481f06d69af828f16958" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d85495cef541729a70cdddbbf3e6b903421bc1af3e8e3a9a72a06751f33b7c39" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:226652fca853008119c03a8ce71ffe1b3f6d2844cc1686e8f9806edafae68596" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81b433bbeaf35728dad619afc002db9b189e45eebe2cd676effe1fb93fef2b9c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:886cc81fdb4c6903a3bb0464047c25a6d1016fef77bb97949817d0c0d79f9e04" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:15240c3aac087a522b4eaedb09f0ad061753c5eebf1ea430859e5bf8640d5919" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:65f81a25805f3659b48126b5053d9e823d3215e4a63730b5e1671852a1705921" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6c62eea7f607f122069b9bad3f99489ddca1a5173bef8a0c75555d7488b6f725" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f965bbf3235b01c776115ab18f092a95aa74c271a52577bcb0563e85738fd618" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f006e323874ffd0b0b816d8c6a8e7f9a73d55ab3b8c3f72b752b226d0e3ac83d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8fd15fc5085ab4cca74cb91fe0a4263b1f32e4420761ddae531ad60934c2119" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:f7b8013c6c066609577d910d1a2a077021727af07b6fab0ee22c2f901f22352a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5451606823a5e73dfa621a89948096c6528e2896e40b39248295d3a0138d594f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:89728678c5ca5abd610aee148c199ac1afb16e19844401ca97d43dc548a354eb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e756d688cb03fd07de0fffad475649b03cb89bee696c98ce508b17c11a03f95c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5aa2687b9935da3ed89c5dbed5234576589dd28d0bf7cd237501ccfbdf1ad608" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0851f6a1e537fe9399f35986897e395a1aa61c574b178c0d456be5b1a0f5ca1f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fedc2cbd1baed37474b1924c331b97bdff611d762c196fac1a9b71e67b813b1b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2ef500e72f9623a6735769e4b93e9dcb158d40752cdbb077f305487e3e2d1f45" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:978d8311674b05a8f7ff2ea6c6bce5d8b45a0cb09d4c5793e0318f448613ea65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:81929ed0fa7a5713fcdd8b2e6f73697d3b4c4816d090dd34ff937c20fa90e8ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:bcc12db731858abda693cecdb3bdc9e6d4bd200213f49d224fe22df82687bdd6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:744d977daa4becb9fc59135e75c069f8d301a87d64f88f1e602a9ecf51e77b27" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:dc54f76ac18073bcecffb98d93f03ed6b81a92ef91b5d3b135dcc81d55a724c7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:367d567ee9fc1e9e2047d31f39d9d6a7a04e0710c86e701e053f237d14a9b4f6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4cf5785e44e19dcd32a0e4807555e1e9a9b8d475c6afff3d21c3c543a6aa84f4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3d0b80fb26d3e13a794c71d4b837e2a589d839fd574a6bbb4ee1288c213ad4a3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8503517c44c18d1030d666cb70aaac1cc8913608816e06742498833b128488b7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:30cc4bb81c41831ecfd6dc450baf48ffd80ef5aed0f5cf3ea775740e80f16ecc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c24fa02f7ed23ae514460a22c57eca8f530dbfa50b1cfdbf4f37c05b5309cc39" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:796a5a9ad36fa3a782375db8f4241ab02a091308eb079746bc0f874c9b998318" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:3ea0733a2ff73fd6fdc5fecca54ee9b459f4d74f00b99aced7d9a3adb43fb1cc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:85764fb15a2ad994e708258bb4ed8290d1305c62a4e1ef07c414356a24fcfbf8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:ca66d980469cb623b1759bdd6e9fd97d4e33a9fad5b33771ced24d0cb24df67e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e7cc1ffcc230f568549fc56670bcf3df1884c30bd652c5da8138199c8c76dae0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ddfb1e8d0b540cb4ee9c53fc3dea3186f97711248fb94b4142a1b27178d8b4b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4dc0e7be79e95d8ba3435d193e0d8ce372f47f774cffd882f88ea4e1e1ddc731" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f23634f9e5adb51b2a77766dac217063e764337fbc816aa8ad9aaebcd4397fd3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314-win_amd64.whl", hash = "sha256:57d75524cb1c5a374958a2eae3d84e1929bb971204cc9d52213fb8589183fc19" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:d8da7c3dd67bcd93f15618938f43ed0995982eb38973023d46d4646c4283ad65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:cc1d2f2fd48ba1e0620554fe5bc44d3e8f5d4185c8c109c7fbdf5af2792cfad2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:21a611ced9275cb861bacadbada0b8c0623bc00b05b09eb97f23b370fc2ae56d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dfbb25dffc4c3dd9371d8ab456ca81beeaf6f9e1c2119f179392f0dc1ab7695" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f0ebb7204f063fad87fc0a0e4ff4a2ff40b2a226e4ba1b7e34bf4b79bf97cd86" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f1b9e5962656f2734c2b285a8745358ecb4e4efbadd00208c80a389227ec61ff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e1a106f8c023d57a2a903e771228bf5c5b27b5d692088f457acacd3b54511e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:709559a1db68a9abc3b2c8672c4badf1614f3b440b3ab326d86a5c0491eafae3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/scipy/1.16.1/scipy-1.16.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c0c804d60492a0aad7f5b2bb1862f4548b990049e27e828391ff2bf6f7199998" },
+]
+
+[[package]]
+name = "semantic-version"
+version = "2.10.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/semantic-version/2.10.0/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/semantic-version/2.10.0/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177" },
+]
+
+[[package]]
+name = "sentencepiece"
+version = "0.2.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e10fa50bdbaa5e2445dbd387979980d391760faf0ec99a09bd7780ff37eaec44" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f27ae6deea72efdb6f361750c92f6c21fd0ad087445082770cc34015213c526" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60937c959e6f44159fdd9f56fbdd302501f96114a5ba436829496d5f32d8de3f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8b1d91545578852f128650b8cce4ec20f93d39b378ff554ebe66290f2dabb92" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27e38eee653abc3d387862e67bc5c8b6f428cd604e688b85d29170b7e725c26c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-win32.whl", hash = "sha256:251874d720ac7f28024a168501f3c7bb15d1802245f6e66de565f18bbb9b5eaa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:e52144670738b4b477fade6c2a9b6af71a8d0094514c9853ac9f6fc1fcfabae7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:9076430ac25dfa7147d9d05751dbc66a04bc1aaac371c07f84952979ea59f0d0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6356d0986b8b8dc351b943150fcd81a1c6e6e4d439772e8584c64230e58ca987" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8f8ba89a3acb3dc1ae90f65ec1894b0b9596fdb98ab003ff38e058f898b39bc7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02593eca45440ef39247cee8c47322a34bdcc1d8ae83ad28ba5a899a2cf8d79a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a0d15781a171d188b661ae4bde1d998c303f6bd8621498c50c671bd45a4798e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f5a3e0d9f445ed9d66c0fec47d4b23d12cfc858b407a03c194c1b26c2ac2a63" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-win32.whl", hash = "sha256:6d297a1748d429ba8534eebe5535448d78b8acc32d00a29b49acf28102eeb094" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:82d9ead6591015f009cb1be1cb1c015d5e6f04046dbb8c9588b931e869a29728" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:39f8651bd10974eafb9834ce30d9bcf5b73e1fc798a7f7d2528f9820ca86e119" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-win32.whl", hash = "sha256:c415c9de1447e0a74ae3fdb2e52f967cb544113a3a5ce3a194df185cbc1f962f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:881b2e44b14fc19feade3cbed314be37de639fc415375cefaa5bc81a4be137fd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:2005242a16d2dc3ac5fe18aa7667549134d37854823df4c4db244752453b78a8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-win32.whl", hash = "sha256:d3233770f78e637dc8b1fda2cd7c3b99ec77e7505041934188a4e7fe751de3b0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e4366c97b68218fd30ea72d70c525e6e78a6c0a88650f57ac4c43c63b234a9d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sentencepiece/0.2.1/sentencepiece-0.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:105e36e75cbac1292642045458e8da677b2342dcd33df503e640f0b457cb6751" },
+]
+
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/setuptools/80.9.0/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/setuptools/80.9.0/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922" },
+]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/shellingham/1.5.4/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/shellingham/1.5.4/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/six/1.17.0/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/six/1.17.0/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274" },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sniffio/1.3.1/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sniffio/1.3.1/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2" },
+]
+
+[[package]]
+name = "soundfile"
+version = "0.13.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "cffi" },
+ { name = "numpy" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-win32.whl", hash = "sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soundfile/0.13.1/soundfile-0.13.1-py2.py3-none-win_amd64.whl", hash = "sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9" },
+]
+
+[[package]]
+name = "soxr"
+version = "0.5.0.post1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "numpy" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/soxr/0.5.0.post1/soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6" },
+]
+
+[[package]]
+name = "stack-data"
+version = "0.6.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "asttokens" },
+ { name = "executing" },
+ { name = "pure-eval" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/stack-data/0.6.3/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/stack-data/0.6.3/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.47.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "anyio" },
+ { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/starlette/0.47.3/starlette-0.47.3.tar.gz", hash = "sha256:6bc94f839cc176c4858894f1f8908f0ab79dfec1a6b8402f6da9be26ebea52e9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/starlette/0.47.3/starlette-0.47.3-py3-none-any.whl", hash = "sha256:89c0778ca62a76b826101e7c709e70680a1699ca7da6b44d38eb0a7e61fe4b51" },
+]
+
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "mpmath" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sympy/1.14.0/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/sympy/1.14.0/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5" },
+]
+
+[[package]]
+name = "tensorboard"
+version = "2.9.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "absl-py" },
+ { name = "google-auth" },
+ { name = "google-auth-oauthlib" },
+ { name = "grpcio" },
+ { name = "markdown" },
+ { name = "numpy" },
+ { name = "protobuf" },
+ { name = "requests" },
+ { name = "setuptools" },
+ { name = "tensorboard-data-server" },
+ { name = "tensorboard-plugin-wit" },
+ { name = "werkzeug" },
+ { name = "wheel" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tensorboard/2.9.1/tensorboard-2.9.1-py3-none-any.whl", hash = "sha256:baa727f791776f9e5841d347127720ceed4bbd59c36b40604b95fb2ae6029276" },
+]
+
+[[package]]
+name = "tensorboard-data-server"
+version = "0.6.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tensorboard-data-server/0.6.1/tensorboard_data_server-0.6.1-py3-none-any.whl", hash = "sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tensorboard-data-server/0.6.1/tensorboard_data_server-0.6.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fa8cef9be4fcae2f2363c88176638baf2da19c5ec90addb49b1cde05c95c88ee" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tensorboard-data-server/0.6.1/tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl", hash = "sha256:d8237580755e58eff68d1f3abefb5b1e39ae5c8b127cc40920f9c4fb33f4b98a" },
+]
+
+[[package]]
+name = "tensorboard-plugin-wit"
+version = "1.8.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tensorboard-plugin-wit/1.8.1/tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe" },
+]
+
+[[package]]
+name = "termcolor"
+version = "3.1.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/termcolor/3.1.0/termcolor-3.1.0.tar.gz", hash = "sha256:6a6dd7fbee581909eeec6a756cff1d7f7c376063b14e4a298dc4980309e55970" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/termcolor/3.1.0/termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa" },
+]
+
+[[package]]
+name = "textstat"
+version = "0.7.10"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "nltk" },
+ { name = "pyphen" },
+ { name = "setuptools" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/textstat/0.7.10/textstat-0.7.10.tar.gz", hash = "sha256:b197ada1137cda8b19eadccd2e31ac8b69fb5a9cb281690535154e8af7ba3ba8" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/textstat/0.7.10/textstat-0.7.10-py3-none-any.whl", hash = "sha256:b8cfa0d2cefddc52acb249db9800394c052811ddf9eba5f2d7518064509172fb" },
+]
+
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/threadpoolctl/3.6.0/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/threadpoolctl/3.6.0/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb" },
+]
+
+[[package]]
+name = "tokenizers"
+version = "0.21.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "huggingface-hub" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tokenizers/0.21.0/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c" },
+]
+
+[[package]]
+name = "tomlkit"
+version = "0.13.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tomlkit/0.13.3/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tomlkit/0.13.3/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0" },
+]
+
+[[package]]
+name = "torch"
+version = "2.8.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "filelock" },
+ { name = "fsspec" },
+ { name = "jinja2" },
+ { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version < '3.11'" },
+ { name = "networkx", version = "3.5", source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }, marker = "python_full_version >= '3.11'" },
+ { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "setuptools", marker = "python_full_version >= '3.12'" },
+ { name = "sympy" },
+ { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+ { name = "typing-extensions" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch/2.8.0/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211" },
+]
+
+[[package]]
+name = "torch-stoi"
+version = "0.2.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "numpy" },
+ { name = "pystoi" },
+ { name = "torch" },
+ { name = "torchaudio" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch-stoi/0.2.3/torch_stoi-0.2.3.tar.gz", hash = "sha256:228207b8d63548336c5520b156f1e6b30d3ae3db1fb3c41999f01aee087c5f85" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torch-stoi/0.2.3/torch_stoi-0.2.3-py3-none-any.whl", hash = "sha256:6eee85e33b42fe843a2150de46000f72e7b87cbeb19ae6ab9bbd94b6ec6b3cd2" },
+]
+
+[[package]]
+name = "torchaudio"
+version = "2.8.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "torch" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2f44cf279f673cfcdd8f576c349eee8bedf8caab351a5dd78b32970cc34a212" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d3c1b85b26a09832d139f6d6da6b66caeb51d2e16e08f8587665c44a9e1aa8f9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:58f912bf2d289c709b42a55475b2b483becec79d9affb7684b606bb1f896b434" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:4e2b4712ad6d7547ce82d84567c8c29d5e2966ff1d31d94e1644024fb4b2649f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9276857d241c6de257af765c0f51fc011af38cb725401495121b280913007cf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4573c6042950c20278e3608a9a38050ba0bc72e0049e1bbfd249caf859a8029b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:776c0b4ba84b9e3ddf6304b9c47cd63549d7896a6f3d5184ece074cc3d76ed6b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:078105bf80f725c0215a0bebac8cb2fb1b3993ab32bdc3fcd50145a5b4127001" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:93a8583f280fe83ba021aa713319381ea71362cc87b67ee38e97a43cb2254aee" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:4b82cacd1b8ccd543b1149d8cab257a40dfda8119023d2e3a96c66349c84bffb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f851d32e94ca05e470f0c60e25726ec1e0eb71cb2ca5a0206b7fd03272ccc3c8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:09535a9b727c0793cd07c1ace99f3f353626281bcc3e30c2f2314e3ebc9d3f96" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d2a85b124494736241884372fe1c6dd8c15e9bc1931bd325838c5c00238c7378" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:c1b5139c840367a7855a062a06688a416619f6fd2ca46d9b9299b49a7d133dfd" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:68df9c9068984edff8065c2b6656725e6114fe89281b0cf122c7505305fc98a4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:1951f10ed092f2dda57634f6a3950ef21c9d9352551aa84a9fccd51bbda18095" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4f7d97494698d98854129349b12061e8c3398d33bd84c929fa9aed5fd1389f73" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/torchaudio/2.8.0/torchaudio-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d4a715d09ac28c920d031ee1e60ecbc91e8a5079ad8c61c0277e658436c821a6" },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tqdm/4.67.1/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tqdm/4.67.1/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2" },
+]
+
+[[package]]
+name = "traitlets"
+version = "5.14.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/traitlets/5.14.3/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/traitlets/5.14.3/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f" },
+]
+
+[[package]]
+name = "transformers"
+version = "4.52.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "filelock" },
+ { name = "huggingface-hub" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pyyaml" },
+ { name = "regex" },
+ { name = "requests" },
+ { name = "safetensors" },
+ { name = "tokenizers" },
+ { name = "tqdm" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/transformers/4.52.1/transformers-4.52.1.tar.gz", hash = "sha256:c380d583ed9c7ebe3e30ca5e55ec1249db39eb9ee277f8e74dab1abc6a03c938" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/transformers/4.52.1/transformers-4.52.1-py3-none-any.whl", hash = "sha256:604b2bb357c480dc5883b7944e8562c967f6b06f63dfb6a1c4665d13d067148f" },
+]
+
+[[package]]
+name = "triton"
+version = "3.4.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/triton/3.4.0/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/triton/3.4.0/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/triton/3.4.0/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/triton/3.4.0/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/triton/3.4.0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d" },
+]
+
+[[package]]
+name = "typeguard"
+version = "4.4.4"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typeguard/4.4.4/typeguard-4.4.4.tar.gz", hash = "sha256:3a7fd2dffb705d4d0efaed4306a704c89b9dee850b688f060a8b1615a79e5f74" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typeguard/4.4.4/typeguard-4.4.4-py3-none-any.whl", hash = "sha256:b5f562281b6bfa1f5492470464730ef001646128b180769880468bd84b68b09e" },
+]
+
+[[package]]
+name = "typer"
+version = "0.17.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "click" },
+ { name = "rich" },
+ { name = "shellingham" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typer/0.17.3/typer-0.17.3.tar.gz", hash = "sha256:0c600503d472bcf98d29914d4dcd67f80c24cc245395e2e00ba3603c9332e8ba" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typer/0.17.3/typer-0.17.3-py3-none-any.whl", hash = "sha256:643919a79182ab7ac7581056d93c6a2b865b026adf2872c4d02c72758e6f095b" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typing-extensions/4.15.0/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typing-extensions/4.15.0/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typing-inspection/0.4.1/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/typing-inspection/0.4.1/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51" },
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tzdata/2025.2/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/tzdata/2025.2/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/urllib3/2.5.0/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/urllib3/2.5.0/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.35.0"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "click" },
+ { name = "h11" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/uvicorn/0.35.0/uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/uvicorn/0.35.0/uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a" },
+]
+
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wcwidth/0.2.13/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wcwidth/0.2.13/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859" },
+]
+
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122" },
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/websockets/15.0.1/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f" },
+]
+
+[[package]]
+name = "werkzeug"
+version = "3.1.3"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "markupsafe" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/werkzeug/3.1.3/werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/werkzeug/3.1.3/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e" },
+]
+
+[[package]]
+name = "wetext"
+version = "0.0.9"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "kaldifst" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wetext/0.0.9/wetext-0.0.9.tar.gz", hash = "sha256:7bae42acae5515653497bd2e783799a12cc48694de6497d50da45850d7d2ebd9" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wetext/0.0.9/wetext-0.0.9-py3-none-any.whl", hash = "sha256:effa5565c0bacfaed0684ed667f60fd0b59fe076c08b4600ad5d8473923802d9" },
+]
+
+[[package]]
+name = "wheel"
+version = "0.46.1"
+source = { registry = "https://pypi.bilibili.co/repository/pypi-public/simple/" }
+dependencies = [
+ { name = "packaging" },
+]
+sdist = { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wheel/0.46.1/wheel-0.46.1.tar.gz", hash = "sha256:fd477efb5da0f7df1d3c76c73c14394002c844451bd63229d8570f376f5e6a38" }
+wheels = [
+ { url = "https://pypi.bilibili.co/repository/pypi-public/packages/wheel/0.46.1/wheel-0.46.1-py3-none-any.whl", hash = "sha256:f796f65d72750ccde090663e466d0ca37cd72b62870f7520b96d34cdc07d86d8" },
+]
diff --git a/webui.py b/webui.py
index 94b2ab4..ecac7f2 100644
--- a/webui.py
+++ b/webui.py
@@ -1,10 +1,14 @@
import json
+import logging
import os
import sys
import threading
import time
import warnings
+
+import pandas as pd
+
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
@@ -16,8 +20,9 @@ import argparse
parser = argparse.ArgumentParser(description="IndexTTS WebUI")
parser.add_argument("--verbose", action="store_true", default=False, help="Enable verbose mode")
parser.add_argument("--port", type=int, default=7860, help="Port to run the web UI on")
-parser.add_argument("--host", type=str, default="127.0.0.1", help="Host to run the web UI on")
+parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to run the web UI on")
parser.add_argument("--model_dir", type=str, default="checkpoints", help="Model checkpoints directory")
+parser.add_argument("--is_fp16", action="store_true", default=False, help="Fp16 infer")
cmd_args = parser.parse_args()
if not os.path.exists(cmd_args.model_dir):
@@ -25,10 +30,11 @@ if not os.path.exists(cmd_args.model_dir):
sys.exit(1)
for file in [
- "bigvgan_generator.pth",
"bpe.model",
"gpt.pth",
"config.yaml",
+ "s2mel.pth",
+ "wav2vec2bert_stats.pt"
]:
file_path = os.path.join(cmd_args.model_dir, file)
if not os.path.exists(file_path):
@@ -36,29 +42,61 @@ for file in [
sys.exit(1)
import gradio as gr
-
-from indextts.infer import IndexTTS
+from indextts import infer
+from indextts.infer_v2 import IndexTTS2
from tools.i18n.i18n import I18nAuto
+from modelscope.hub import api
-i18n = I18nAuto(language="zh_CN")
+i18n = I18nAuto(language="Auto")
MODE = 'local'
-tts = IndexTTS(model_dir=cmd_args.model_dir, cfg_path=os.path.join(cmd_args.model_dir, "config.yaml"),)
-
+tts = IndexTTS2(model_dir=cmd_args.model_dir, cfg_path=os.path.join(cmd_args.model_dir, "config.yaml"),is_fp16=cmd_args.is_fp16)
+# 支持的语言列表
+LANGUAGES = {
+ "中文": "zh_CN",
+ "English": "en_US"
+}
+EMO_CHOICES = [i18n("与音色参考音频相同"),
+ i18n("使用情感参考音频"),
+ i18n("使用情感向量控制"),
+ i18n("使用情感描述文本控制")]
os.makedirs("outputs/tasks",exist_ok=True)
os.makedirs("prompts",exist_ok=True)
-with open("tests/cases.jsonl", "r", encoding="utf-8") as f:
+MAX_LENGTH_TO_USE_SPEED = 70
+with open("examples/cases.jsonl", "r", encoding="utf-8") as f:
example_cases = []
for line in f:
line = line.strip()
if not line:
continue
example = json.loads(line)
- example_cases.append([os.path.join("tests", example.get("prompt_audio", "sample_prompt.wav")),
- example.get("text"), ["普通推理", "批次推理"][example.get("infer_mode", 0)]])
+ if example.get("emo_audio",None):
+ emo_audio_path = os.path.join("examples",example["emo_audio"])
+ else:
+ emo_audio_path = None
+ example_cases.append([os.path.join("examples", example.get("prompt_audio", "sample_prompt.wav")),
+ EMO_CHOICES[example.get("emo_mode",0)],
+ example.get("text"),
+ emo_audio_path,
+ example.get("emo_weight",1.0),
+ example.get("emo_text",""),
+ example.get("emo_vec_1",0),
+ example.get("emo_vec_2",0),
+ example.get("emo_vec_3",0),
+ example.get("emo_vec_4",0),
+ example.get("emo_vec_5",0),
+ example.get("emo_vec_6",0),
+ example.get("emo_vec_7",0),
+ example.get("emo_vec_8",0)]
+ )
-def gen_single(prompt, text, infer_mode, max_text_tokens_per_sentence=120, sentences_bucket_max_size=4,
+
+def gen_single(emo_control_method,prompt, text,
+ emo_ref_path, emo_weight,
+ vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8,
+ emo_text,emo_random,
+ max_text_tokens_per_sentence=120,
*args, progress=gr.Progress()):
output_path = None
if not output_path:
@@ -79,16 +117,31 @@ def gen_single(prompt, text, infer_mode, max_text_tokens_per_sentence=120, sente
# "typical_sampling": bool(typical_sampling),
# "typical_mass": float(typical_mass),
}
- if infer_mode == "普通推理":
- output = tts.infer(prompt, text, output_path, verbose=cmd_args.verbose,
- max_text_tokens_per_sentence=int(max_text_tokens_per_sentence),
- **kwargs)
+ if type(emo_control_method) is not int:
+ emo_control_method = emo_control_method.value
+ if emo_control_method == 0:
+ emo_ref_path = None
+ emo_weight = 1.0
+ if emo_control_method == 1:
+ emo_weight = emo_weight
+ if emo_control_method == 2:
+ vec = [vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8]
+ vec_sum = sum([vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8])
+ if vec_sum > 1.5:
+ gr.Warning(i18n("情感向量之和不能超过1.5,请调整后重试。"))
+ return
else:
- # 批次推理
- output = tts.infer_fast(prompt, text, output_path, verbose=cmd_args.verbose,
- max_text_tokens_per_sentence=int(max_text_tokens_per_sentence),
- sentences_bucket_max_size=(sentences_bucket_max_size),
- **kwargs)
+ vec = None
+
+ print(f"Emo control mode:{emo_control_method},vec:{vec}")
+ output = tts.infer(spk_audio_prompt=prompt, text=text,
+ output_path=output_path,
+ emo_audio_prompt=emo_ref_path, emo_alpha=emo_weight,
+ emo_vector=vec,
+ use_emo_text=(emo_control_method==3), emo_text=emo_text,use_random=emo_random,
+ verbose=cmd_args.verbose,
+ max_text_tokens_per_sentence=int(max_text_tokens_per_sentence),
+ **kwargs)
return gr.update(value=output,visible=True)
def update_prompt_audio():
@@ -98,33 +151,68 @@ def update_prompt_audio():
with gr.Blocks(title="IndexTTS Demo") as demo:
mutex = threading.Lock()
gr.HTML('''
-
-
+
''')
- with gr.Tab("音频生成"):
+ with gr.Tab(i18n("音频生成")):
with gr.Row():
os.makedirs("prompts",exist_ok=True)
- prompt_audio = gr.Audio(label="参考音频",key="prompt_audio",
+ prompt_audio = gr.Audio(label=i18n("音色参考音频"),key="prompt_audio",
sources=["upload","microphone"],type="filepath")
prompt_list = os.listdir("prompts")
default = ''
if prompt_list:
default = prompt_list[0]
with gr.Column():
- input_text_single = gr.TextArea(label="文本",key="input_text_single", placeholder="请输入目标文本", info="当前模型版本{}".format(tts.model_version or "1.0"))
- infer_mode = gr.Radio(choices=["普通推理", "批次推理"], label="推理模式",info="批次推理:更适合长句,性能翻倍",value="普通推理")
- gen_button = gr.Button("生成语音", key="gen_button",interactive=True)
- output_audio = gr.Audio(label="生成结果", visible=True,key="output_audio")
- with gr.Accordion("高级生成参数设置", open=False):
+ input_text_single = gr.TextArea(label=i18n("文本"),key="input_text_single", placeholder=i18n("请输入目标文本"), info=f"{i18n('当前模型版本')}{tts.model_version or '1.0'}")
+ gen_button = gr.Button(i18n("生成语音"), key="gen_button",interactive=True)
+ output_audio = gr.Audio(label=i18n("生成结果"), visible=True,key="output_audio")
+ with gr.Accordion(i18n("功能设置")):
+ # 情感控制选项部分
+ with gr.Row():
+ emo_control_method = gr.Radio(
+ choices=EMO_CHOICES,
+ type="index",
+ value=EMO_CHOICES[0],label=i18n("情感控制方式"))
+ # 情感参考音频部分
+ with gr.Group(visible=False) as emotion_reference_group:
+ with gr.Row():
+ emo_upload = gr.Audio(label=i18n("上传情感参考音频"), type="filepath")
+
+ with gr.Row():
+ emo_weight = gr.Slider(label=i18n("情感权重"), minimum=0.0, maximum=1.6, value=0.8, step=0.01)
+
+ # 情感随机采样
+ with gr.Row():
+ emo_random = gr.Checkbox(label=i18n("情感随机采样"),value=False,visible=False)
+
+ # 情感向量控制部分
+ with gr.Group(visible=False) as emotion_vector_group:
+ with gr.Row():
+ with gr.Column():
+ vec1 = gr.Slider(label=i18n("喜"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec2 = gr.Slider(label=i18n("怒"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec3 = gr.Slider(label=i18n("哀"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec4 = gr.Slider(label=i18n("惧"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ with gr.Column():
+ vec5 = gr.Slider(label=i18n("厌恶"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec6 = gr.Slider(label=i18n("低落"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec7 = gr.Slider(label=i18n("惊喜"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+ vec8 = gr.Slider(label=i18n("平静"), minimum=0.0, maximum=1.4, value=0.0, step=0.05)
+
+ with gr.Group(visible=False) as emo_text_group:
+ with gr.Row():
+ emo_text = gr.Textbox(label=i18n("情感描述文本"), placeholder=i18n("请输入情感描述文本"), value="", info=i18n("例如:高兴,愤怒,悲伤等"))
+
+ with gr.Accordion(i18n("高级生成参数设置"), open=False):
with gr.Row():
with gr.Column(scale=1):
- gr.Markdown("**GPT2 采样设置** _参数会影响音频多样性和生成速度详见[Generation strategies](https://huggingface.co/docs/transformers/main/en/generation_strategies)_")
+ gr.Markdown(f"**{i18n('GPT2 采样设置')}** _{i18n('参数会影响音频多样性和生成速度详见')}[Generation strategies](https://huggingface.co/docs/transformers/main/en/generation_strategies)_")
with gr.Row():
do_sample = gr.Checkbox(label="do_sample", value=True, info="是否进行采样")
- temperature = gr.Slider(label="temperature", minimum=0.1, maximum=2.0, value=1.0, step=0.1)
+ temperature = gr.Slider(label="temperature", minimum=0.1, maximum=2.0, value=0.8, step=0.1)
with gr.Row():
top_p = gr.Slider(label="top_p", minimum=0.0, maximum=1.0, value=0.8, step=0.01)
top_k = gr.Slider(label="top_k", minimum=0, maximum=100, value=30, step=1)
@@ -132,24 +220,20 @@ with gr.Blocks(title="IndexTTS Demo") as demo:
with gr.Row():
repetition_penalty = gr.Number(label="repetition_penalty", precision=None, value=10.0, minimum=0.1, maximum=20.0, step=0.1)
length_penalty = gr.Number(label="length_penalty", precision=None, value=0.0, minimum=-2.0, maximum=2.0, step=0.1)
- max_mel_tokens = gr.Slider(label="max_mel_tokens", value=600, minimum=50, maximum=tts.cfg.gpt.max_mel_tokens, step=10, info="生成Token最大数量,过小导致音频被截断", key="max_mel_tokens")
+ max_mel_tokens = gr.Slider(label="max_mel_tokens", value=1500, minimum=50, maximum=tts.cfg.gpt.max_mel_tokens, step=10, info="生成Token最大数量,过小导致音频被截断", key="max_mel_tokens")
# with gr.Row():
# typical_sampling = gr.Checkbox(label="typical_sampling", value=False, info="不建议使用")
# typical_mass = gr.Slider(label="typical_mass", value=0.9, minimum=0.0, maximum=1.0, step=0.1)
with gr.Column(scale=2):
- gr.Markdown("**分句设置** _参数会影响音频质量和生成速度_")
+ gr.Markdown(f'**{i18n("分句设置")}** _{i18n("参数会影响音频质量和生成速度")}_')
with gr.Row():
max_text_tokens_per_sentence = gr.Slider(
- label="分句最大Token数", value=120, minimum=20, maximum=tts.cfg.gpt.max_text_tokens, step=2, key="max_text_tokens_per_sentence",
- info="建议80~200之间,值越大,分句越长;值越小,分句越碎;过小过大都可能导致音频质量不高",
+ label=i18n("分句最大Token数"), value=120, minimum=20, maximum=tts.cfg.gpt.max_text_tokens, step=2, key="max_text_tokens_per_sentence",
+ info=i18n("建议80~200之间,值越大,分句越长;值越小,分句越碎;过小过大都可能导致音频质量不高"),
)
- sentences_bucket_max_size = gr.Slider(
- label="分句分桶的最大容量(批次推理生效)", value=4, minimum=1, maximum=16, step=1, key="sentences_bucket_max_size",
- info="建议2-8之间,值越大,一批次推理包含的分句数越多,过大可能导致内存溢出",
- )
- with gr.Accordion("预览分句结果", open=True) as sentences_settings:
+ with gr.Accordion(i18n("预览分句结果"), open=True) as sentences_settings:
sentences_preview = gr.Dataframe(
- headers=["序号", "分句内容", "Token数"],
+ headers=[i18n("序号"), i18n("分句内容"), i18n("Token数")],
key="sentences_preview",
wrap=True,
)
@@ -162,7 +246,14 @@ with gr.Blocks(title="IndexTTS Demo") as demo:
if len(example_cases) > 0:
gr.Examples(
examples=example_cases,
- inputs=[prompt_audio, input_text_single, infer_mode],
+ examples_per_page=20,
+ inputs=[prompt_audio,
+ emo_control_method,
+ input_text_single,
+ emo_upload,
+ emo_weight,
+ emo_text,
+ vec1,vec2,vec3,vec4,vec5,vec6,vec7,vec8]
)
def on_input_text_change(text, max_tokens_per_sentence):
@@ -175,15 +266,47 @@ with gr.Blocks(title="IndexTTS Demo") as demo:
sentence_str = ''.join(s)
tokens_count = len(s)
data.append([i, sentence_str, tokens_count])
-
return {
sentences_preview: gr.update(value=data, visible=True, type="array"),
}
else:
- df = pd.DataFrame([], columns=["序号", "分句内容", "Token数"])
+ df = pd.DataFrame([], columns=[i18n("序号"), i18n("分句内容"), i18n("Token数")])
return {
- sentences_preview: gr.update(value=df)
+ sentences_preview: gr.update(value=df),
}
+ def on_method_select(emo_control_method):
+ if emo_control_method == 1:
+ return (gr.update(visible=True),
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False)
+ )
+ elif emo_control_method == 2:
+ return (gr.update(visible=False),
+ gr.update(visible=True),
+ gr.update(visible=True),
+ gr.update(visible=False)
+ )
+ elif emo_control_method == 3:
+ return (gr.update(visible=False),
+ gr.update(visible=True),
+ gr.update(visible=False),
+ gr.update(visible=True)
+ )
+ else:
+ return (gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False)
+ )
+
+ emo_control_method.select(on_method_select,
+ inputs=[emo_control_method],
+ outputs=[emotion_reference_group,
+ emo_random,
+ emotion_vector_group,
+ emo_text_group]
+ )
input_text_single.change(
on_input_text_change,
@@ -200,13 +323,16 @@ with gr.Blocks(title="IndexTTS Demo") as demo:
outputs=[gen_button])
gen_button.click(gen_single,
- inputs=[prompt_audio, input_text_single, infer_mode,
- max_text_tokens_per_sentence, sentences_bucket_max_size,
+ inputs=[emo_control_method,prompt_audio, input_text_single, emo_upload, emo_weight,
+ vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8,
+ emo_text,emo_random,
+ max_text_tokens_per_sentence,
*advanced_params,
],
outputs=[output_audio])
+
if __name__ == "__main__":
demo.queue(20)
demo.launch(server_name=cmd_args.host, server_port=cmd_args.port)