From 471a45435cda20959dd8d5a9f491d4b6df12f6d2 Mon Sep 17 00:00:00 2001
From: Yrom <yrom@qq.com>
Date: Fri, 11 Apr 2025 14:57:20 +0800
Subject: [PATCH] Add cli mode for inference

---
 .gitignore       |  8 +++++++-
 README.md        | 35 +++++++++++++++++++++++++++++++++
 indextts/cli.py  | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |  6 +++---
 setup.py         | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 webui.py         |  2 +-
 6 files changed, 147 insertions(+), 5 deletions(-)
 create mode 100644 indextts/cli.py
 create mode 100644 setup.py

diff --git a/.gitignore b/.gitignore
index f9f2d87..23e0d7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,10 @@
 venv/
 __pycache__
+*.egg-info
 *.DS_Store
-.idea/
\ No newline at end of file
+.idea/
+checkpoints/*.pth
+checkpoints/*.vocab
+checkpoints/*.model
+checkpoints/.cache
+outputs/
diff --git a/README.md b/README.md
index 2d5eea3..0d15775 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,21 @@ conda activate index-tts
 pip install -r requirements.txt
 apt-get install ffmpeg
 ```
+
 3. Download models:
+
+Download by `huggingface-cli`:
+
+```bash
+# 如果下载速度慢，可以使用官方的镜像
+export HF_ENDPOINT="https://hf-mirror.com"
+huggingface-cli download IndexTeam/Index-TTS \
+  bigvgan_discriminator.pth bigvgan_generator.pth bpe.model dvae.pth gpt.pth unigram_12000.vocab \
+  --local-dir checkpoints
+```
+
+Or by `wget`:
+
 ```bash
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/bigvgan_discriminator.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/bigvgan_generator.pth -P checkpoints
@@ -112,11 +126,32 @@ wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/dvae.pth -P checkpo
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/gpt.pth -P checkpoints
 wget https://huggingface.co/IndexTeam/Index-TTS/resolve/main/unigram_12000.vocab -P checkpoints
 ```
+
 4. Run test script:
+
+
 ```bash
 # Please put your prompt audio in 'test_data' and rename it to 'input.wav'
 PYTHONPATH=. python indextts/infer.py
 ```
+
+5. Use as command line tool:
+
+```bash
+# Make sure pytorch has been installed before running this command
+pip install -e .
+indextts "大家好，我现在正在bilibili 体验 ai 科技，说实话，来之前我绝对想不到！AI技术已经发展到这样匪夷所思的地步了！" \
+  --voice reference_voice.wav \
+  --model_dir checkpoints \
+  --config checkpoints/config.yaml \
+  --output output.wav
+```
+
+Use `--help` to see more options.
+```bash
+indextts --help
+```
+
 #### Web Demo
 ```bash
 python webui.py
diff --git a/indextts/cli.py b/indextts/cli.py
new file mode 100644
index 0000000..285b40c
--- /dev/null
+++ b/indextts/cli.py
@@ -0,0 +1,51 @@
+import os
+import sys
+import warnings
+# Suppress warnings from tensorflow and other libraries
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="IndexTTS Command Line")
+    parser.add_argument("text", type=str, help="Text to be synthesized")
+    parser.add_argument("-v", "--voice", type=str, required=True, help="Path to the audio prompt file (wav format)")
+    parser.add_argument("-o", "--output_path", type=str, default="gen.wav", help="Path to the output wav file")
+    parser.add_argument("-c", "--config", type=str, default="checkpoints/config.yaml", help="Path to the config file. Default is 'checkpoints/config.yaml'")
+    parser.add_argument("--model_dir", type=str, default="checkpoints", help="Path to the model directory. Default is 'checkpoints'")
+    parser.add_argument("--fp16", action="store_true", default=True, help="Use FP16 for inference if available")
+    parser.add_argument("-f", "--force", action="store_true", default=False, help="Force to overwrite the output file if it exists")
+    args = parser.parse_args()
+
+    if not os.path.exists(args.voice):
+        print(f"Audio prompt file {args.voice} does not exist.")
+        parser.print_help()
+        sys.exit(1)
+    if not os.path.exists(args.config):
+        print(f"Config file {args.config} does not exist.")
+        parser.print_help()
+        sys.exit(1)
+
+    output_path = args.output_path
+    if os.path.exists(output_path):
+        if not args.force:
+            print(f"ERROR: Output file {output_path} already exists. Use --force to overwrite.")
+            parser.print_help()
+            sys.exit(1)
+        else:
+            os.remove(output_path)
+
+    try:
+        import torch
+        if not torch.cuda.is_available():
+            print("WARNING: CUDA is not available. Running in CPU mode.")
+    except ImportError:
+        print("ERROR: PyTorch is not installed. Please install it first.")
+        sys.exit(1)
+            
+
+    from indextts.infer import IndexTTS
+    tts = IndexTTS(cfg_path=args.config, model_dir=args.model_dir, is_fp16=args.fp16)
+    tts.infer(audio_prompt=args.voice, text=args.text, output_path=output_path)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 803d193..5491e63 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,9 +17,9 @@ accelerate==0.25.0
 tensorboard==2.9.1
 omegaconf
 sentencepiece
-pypinyin
 librosa
 gradio
 tqdm
-WeTextProcessing # arm机器如果安装失败，请注释此行
-wetext
\ No newline at end of file
+
+WeTextProcessing; platform_machine != "Darwin"
+wetext; platform_system == "Darwin"
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a0e86fc
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,50 @@
+
+import platform
+from setuptools import find_packages, setup
+
+
+
+setup(
+    name="indextts",
+    version="0.1.0",
+    author="Index SpeechTeam",
+    author_email="xuanwu@bilibili.com",
+    long_description=open("README.md", encoding="utf8").read(),
+    long_description_content_type="text/markdown",
+    description="An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System",
+    url="https://github.com/index-tts/index-tts",
+    packages=find_packages(),
+    include_package_data=True,
+    install_requires=[
+        "torch==2.6.0",
+        "torchaudio",
+        "transformers==4.36.2",
+        "accelerate",
+        "tokenizers==0.15.0",
+        "einops==0.8.1",
+        "matplotlib==3.8.2",
+        "omegaconf",
+        "sentencepiece",
+        "librosa",
+        "numpy",
+        "wetext" if platform.system() == "Darwin" else "WeTextProcessing",
+    ],
+    extras_require={
+        "webui": ["gradio"],
+    },
+    entry_points={
+        "console_scripts": [
+            "indextts = indextts.cli:main",
+        ]
+    },
+    license="Apache-2.0",
+    python_requires=">=3.10",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "Operating System :: OS Independent",
+        "License :: OSI Approved :: Apache Software License",
+        "Intended Audience :: Science/Research",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+)
\ No newline at end of file
diff --git a/webui.py b/webui.py
index 1d3c7dd..5a4527f 100644
--- a/webui.py
+++ b/webui.py
@@ -68,4 +68,4 @@ with gr.Blocks() as demo:
 
 if __name__ == "__main__":
     demo.queue(20)
-    demo.launch(server_name="0.0.0.0")
+    demo.launch(server_name="127.0.0.1")