fix packages
This commit is contained in:
parent
4c086f954b
commit
2fe6a73ada
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
venv/
|
||||
__pycache__
|
||||
@ -12,6 +12,7 @@ from indextts.BigVGAN.nnet.CNN import Conv1d as _Conv1d
|
||||
from indextts.BigVGAN.nnet.linear import Linear
|
||||
from indextts.BigVGAN.nnet.normalization import BatchNorm1d as _BatchNorm1d
|
||||
|
||||
|
||||
def length_to_mask(length, max_len=None, dtype=None, device=None):
|
||||
"""Creates a binary mask for each sequence.
|
||||
|
||||
|
||||
0
indextts/BigVGAN/__init__.py
Normal file
0
indextts/BigVGAN/__init__.py
Normal file
@ -2,7 +2,7 @@
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import torch
|
||||
from torch import nn, sin, pow
|
||||
from torch import nn, pow, sin
|
||||
from torch.nn import Parameter
|
||||
|
||||
|
||||
@ -22,6 +22,7 @@ class Snake(nn.Module):
|
||||
>>> x = torch.randn(256)
|
||||
>>> x = a1(x)
|
||||
'''
|
||||
|
||||
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
|
||||
'''
|
||||
Initialization.
|
||||
@ -76,6 +77,7 @@ class SnakeBeta(nn.Module):
|
||||
>>> x = torch.randn(256)
|
||||
>>> x = a1(x)
|
||||
'''
|
||||
|
||||
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
|
||||
'''
|
||||
Initialization.
|
||||
|
||||
@ -3,10 +3,9 @@
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from alias_free_activation.torch.resample import UpSample1d, DownSample1d
|
||||
|
||||
# load fused CUDA kernel: this enables importing anti_alias_activation_cuda
|
||||
from alias_free_activation.cuda import load
|
||||
from alias_free_activation.torch.resample import DownSample1d, UpSample1d
|
||||
|
||||
anti_alias_activation_cuda = load.load()
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
from .act import *
|
||||
from .filter import *
|
||||
from .resample import *
|
||||
from .act import *
|
||||
|
||||
@ -2,7 +2,8 @@
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import torch.nn as nn
|
||||
from .resample import UpSample1d, DownSample1d
|
||||
|
||||
from .resample import DownSample1d, UpSample1d
|
||||
|
||||
|
||||
class Activation1d(nn.Module):
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
|
||||
if "sinc" in dir(torch):
|
||||
sinc = torch.sinc
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
from .filter import LowPassFilter1d
|
||||
from .filter import kaiser_sinc_filter1d
|
||||
|
||||
from .filter import LowPassFilter1d, kaiser_sinc_filter1d
|
||||
|
||||
|
||||
class UpSample1d(nn.Module):
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
from .act import *
|
||||
from .filter import *
|
||||
from .resample import *
|
||||
from .act import *
|
||||
@ -2,7 +2,8 @@
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import torch.nn as nn
|
||||
from .resample import UpSample1d, DownSample1d
|
||||
|
||||
from .resample import DownSample1d, UpSample1d
|
||||
|
||||
|
||||
class Activation1d(nn.Module):
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
|
||||
if 'sinc' in dir(torch):
|
||||
sinc = torch.sinc
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
from .filter import LowPassFilter1d
|
||||
from .filter import kaiser_sinc_filter1d
|
||||
|
||||
from .filter import LowPassFilter1d, kaiser_sinc_filter1d
|
||||
|
||||
|
||||
class UpSample1d(nn.Module):
|
||||
|
||||
@ -4,23 +4,23 @@
|
||||
# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import os
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union, Dict
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
|
||||
from torch.nn import Conv1d, ConvTranspose1d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm
|
||||
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||
|
||||
import indextts.BigVGAN.activations as activations
|
||||
from indextts.BigVGAN.utils import init_weights, get_padding
|
||||
from indextts.BigVGAN.alias_free_activation.torch.act import Activation1d as TorchActivation1d
|
||||
from indextts.BigVGAN.env import AttrDict
|
||||
|
||||
from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
|
||||
from indextts.BigVGAN.alias_free_activation.torch.act import \
|
||||
Activation1d as TorchActivation1d
|
||||
from indextts.BigVGAN.ECAPA_TDNN import ECAPA_TDNN
|
||||
from indextts.BigVGAN.env import AttrDict
|
||||
from indextts.BigVGAN.utils import get_padding, init_weights
|
||||
|
||||
|
||||
def load_hparams_from_json(path) -> AttrDict:
|
||||
@ -94,9 +94,8 @@ class AMPBlock1(torch.nn.Module):
|
||||
|
||||
# Select which Activation1d, lazy-load cuda version to ensure backward compatibility
|
||||
if self.h.get("use_cuda_kernel", False):
|
||||
from alias_free_activation.cuda.activation1d import (
|
||||
Activation1d as CudaActivation1d,
|
||||
)
|
||||
from alias_free_activation.cuda.activation1d import \
|
||||
Activation1d as CudaActivation1d
|
||||
|
||||
Activation1d = CudaActivation1d
|
||||
else:
|
||||
@ -194,9 +193,8 @@ class AMPBlock2(torch.nn.Module):
|
||||
|
||||
# Select which Activation1d, lazy-load cuda version to ensure backward compatibility
|
||||
if self.h.get("use_cuda_kernel", False):
|
||||
from alias_free_activation.cuda.activation1d import (
|
||||
Activation1d as CudaActivation1d,
|
||||
)
|
||||
from alias_free_activation.cuda.activation1d import \
|
||||
Activation1d as CudaActivation1d
|
||||
|
||||
Activation1d = CudaActivation1d
|
||||
else:
|
||||
@ -241,6 +239,7 @@ class AMPBlock2(torch.nn.Module):
|
||||
for l in self.convs:
|
||||
remove_weight_norm(l)
|
||||
|
||||
|
||||
'''
|
||||
PyTorchModelHubMixin,
|
||||
library_name="bigvgan",
|
||||
@ -251,6 +250,7 @@ class AMPBlock2(torch.nn.Module):
|
||||
tags=["neural-vocoder", "audio-generation", "arxiv:2206.04658"],
|
||||
'''
|
||||
|
||||
|
||||
class BigVGAN(
|
||||
torch.nn.Module,
|
||||
):
|
||||
@ -274,9 +274,8 @@ class BigVGAN(
|
||||
|
||||
# Select which Activation1d, lazy-load cuda version to ensure backward compatibility
|
||||
if self.h.get("use_cuda_kernel", False):
|
||||
from alias_free_activation.cuda.activation1d import (
|
||||
Activation1d as CudaActivation1d,
|
||||
)
|
||||
from alias_free_activation.cuda.activation1d import \
|
||||
Activation1d as CudaActivation1d
|
||||
|
||||
Activation1d = CudaActivation1d
|
||||
else:
|
||||
|
||||
@ -4,14 +4,13 @@
|
||||
# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from torch.nn import Conv1d, Conv2d, ConvTranspose1d
|
||||
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||
|
||||
import indextts.BigVGAN.activations as activations
|
||||
from indextts.BigVGAN.utils import init_weights, get_padding
|
||||
from indextts.BigVGAN.alias_free_torch import *
|
||||
|
||||
from indextts.BigVGAN.ECAPA_TDNN import ECAPA_TDNN
|
||||
from indextts.BigVGAN.utils import get_padding, init_weights
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
@ -180,7 +179,6 @@ class BigVGAN(torch.nn.Module):
|
||||
|
||||
# self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
|
||||
|
||||
|
||||
def forward(self, x, mel_ref, lens=None):
|
||||
speaker_embedding = self.speaker_encoder(mel_ref, lens)
|
||||
n_batch = x.size(0)
|
||||
@ -432,4 +430,3 @@ def generator_loss(disc_outputs):
|
||||
loss += l
|
||||
|
||||
return loss, gen_losses
|
||||
|
||||
|
||||
@ -19,6 +19,7 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchaudio
|
||||
|
||||
|
||||
class SincConv(nn.Module):
|
||||
"""This function implements SincConv (SincNet).
|
||||
|
||||
|
||||
0
indextts/BigVGAN/nnet/__init__.py
Normal file
0
indextts/BigVGAN/nnet/__init__.py
Normal file
@ -3,13 +3,14 @@
|
||||
|
||||
import glob
|
||||
import os
|
||||
|
||||
import matplotlib
|
||||
import matplotlib.pylab as plt
|
||||
import torch
|
||||
from scipy.io.wavfile import write
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pylab as plt
|
||||
from scipy.io.wavfile import write
|
||||
|
||||
MAX_WAV_VALUE = 32768.0
|
||||
|
||||
|
||||
0
indextts/__init__.py
Normal file
0
indextts/__init__.py
Normal file
@ -21,6 +21,7 @@ from typing import Tuple, Union
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class PositionalEncoding(torch.nn.Module):
|
||||
"""Positional encoding.
|
||||
|
||||
|
||||
@ -3,11 +3,18 @@ from typing import Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from gpt.conformer.subsampling import Conv2dSubsampling4, Conv2dSubsampling6, \
|
||||
Conv2dSubsampling8, LinearNoSubsampling, Conv2dSubsampling2
|
||||
from gpt.conformer.embedding import PositionalEncoding, RelPositionalEncoding, NoPositionalEncoding
|
||||
from gpt.conformer.attention import MultiHeadedAttention, RelPositionMultiHeadedAttention
|
||||
from utils.utils import make_pad_mask
|
||||
|
||||
from indextts.gpt.conformer.attention import (MultiHeadedAttention,
|
||||
RelPositionMultiHeadedAttention)
|
||||
from indextts.gpt.conformer.embedding import (NoPositionalEncoding,
|
||||
PositionalEncoding,
|
||||
RelPositionalEncoding)
|
||||
from indextts.gpt.conformer.subsampling import (Conv2dSubsampling2,
|
||||
Conv2dSubsampling4,
|
||||
Conv2dSubsampling6,
|
||||
Conv2dSubsampling8,
|
||||
LinearNoSubsampling)
|
||||
from indextts.utils.utils import make_pad_mask
|
||||
|
||||
|
||||
class PositionwiseFeedForward(torch.nn.Module):
|
||||
@ -22,6 +29,7 @@ class PositionwiseFeedForward(torch.nn.Module):
|
||||
dropout_rate (float): Dropout rate.
|
||||
activation (torch.nn.Module): Activation function
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
idim: int,
|
||||
hidden_units: int,
|
||||
@ -47,6 +55,7 @@ class PositionwiseFeedForward(torch.nn.Module):
|
||||
|
||||
class ConvolutionModule(nn.Module):
|
||||
"""ConvolutionModule in Conformer model."""
|
||||
|
||||
def __init__(self,
|
||||
channels: int,
|
||||
kernel_size: int = 15,
|
||||
@ -181,6 +190,7 @@ class ConformerEncoderLayer(nn.Module):
|
||||
True: x -> x + linear(concat(x, att(x)))
|
||||
False: x -> x + att(x)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
size: int,
|
||||
@ -428,6 +438,7 @@ class BaseEncoder(torch.nn.Module):
|
||||
|
||||
class ConformerEncoder(BaseEncoder):
|
||||
"""Conformer encoder module."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_size: int,
|
||||
@ -507,4 +518,3 @@ class ConformerEncoder(BaseEncoder):
|
||||
concat_after,
|
||||
) for _ in range(num_blocks)
|
||||
])
|
||||
|
||||
|
||||
@ -5,11 +5,13 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from transformers import GPT2Config, GPT2PreTrainedModel, LogitsProcessorList
|
||||
from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
|
||||
from transformers.utils.model_parallel_utils import get_device_map, assert_device_map
|
||||
from gpt.perceiver import PerceiverResampler
|
||||
from gpt.conformer_encoder import ConformerEncoder
|
||||
from transformers.utils.model_parallel_utils import (assert_device_map,
|
||||
get_device_map)
|
||||
|
||||
from indextts.gpt.conformer_encoder import ConformerEncoder
|
||||
from indextts.gpt.perceiver import PerceiverResampler
|
||||
from indextts.utils.arch_util import AttentionBlock
|
||||
from utils.typical_sampling import TypicalLogitsWarper
|
||||
from indextts.utils.typical_sampling import TypicalLogitsWarper
|
||||
|
||||
|
||||
def null_position_embeddings(range, dim):
|
||||
@ -20,6 +22,7 @@ class ResBlock(nn.Module):
|
||||
"""
|
||||
Basic residual convolutional block that uses GroupNorm.
|
||||
"""
|
||||
|
||||
def __init__(self, chan):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
@ -619,7 +622,3 @@ class UnifiedVoice(nn.Module):
|
||||
max_length=max_length, logits_processor=logits_processor,
|
||||
num_return_sequences=num_return_sequences, **hf_generate_kwargs)
|
||||
return gen[:, trunc_index:]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,16 +1,18 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import sentencepiece as spm
|
||||
import torch
|
||||
import torchaudio
|
||||
from omegaconf import OmegaConf
|
||||
import sentencepiece as spm
|
||||
from utils.utils import tokenize_by_CJK_char
|
||||
from utils.feature_extractors import MelSpectrogramFeatures
|
||||
from indextts.vqvae.xtts_dvae import DiscreteVAE
|
||||
from indextts.utils.checkpoint import load_checkpoint
|
||||
from indextts.gpt.model import UnifiedVoice
|
||||
|
||||
from indextts.BigVGAN.models import BigVGAN as Generator
|
||||
from indextts.gpt.model import UnifiedVoice
|
||||
from indextts.utils.checkpoint import load_checkpoint
|
||||
from indextts.utils.feature_extractors import MelSpectrogramFeatures
|
||||
from indextts.utils.utils import tokenize_by_CJK_char
|
||||
from indextts.vqvae.xtts_dvae import DiscreteVAE
|
||||
|
||||
|
||||
class IndexTTS:
|
||||
|
||||
0
indextts/utils/__init__.py
Normal file
0
indextts/utils/__init__.py
Normal file
@ -1,6 +1,8 @@
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import math
|
||||
|
||||
from indextts.utils.xtransformers import RelativePositionBias
|
||||
|
||||
|
||||
|
||||
@ -12,15 +12,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import yaml
|
||||
import torch
|
||||
from collections import OrderedDict
|
||||
|
||||
import datetime
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
|
||||
def load_checkpoint(model: torch.nn.Module, model_pth: str) -> dict:
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import torch
|
||||
import torchaudio
|
||||
from torch import nn
|
||||
from utils import safe_log
|
||||
|
||||
from indextts.utils.utils import safe_log
|
||||
|
||||
|
||||
class FeatureExtractor(nn.Module):
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import re
|
||||
import random
|
||||
import re
|
||||
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@ from inspect import isfunction
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from einops import rearrange, repeat
|
||||
from torch import nn, einsum
|
||||
from torch import einsum, nn
|
||||
|
||||
DEFAULT_DIM_HEAD = 64
|
||||
|
||||
|
||||
@ -14,8 +14,8 @@ matplotlib==3.8.2
|
||||
opencv-python==4.9.0.80
|
||||
vocos==0.1.0
|
||||
accelerate==0.25.0
|
||||
omegaconf==2.0.6
|
||||
tensorboard==2.9.1
|
||||
omegaconf
|
||||
sentencepiece
|
||||
pypinyin
|
||||
librosa
|
||||
|
||||
6
webui.py
6
webui.py
@ -1,16 +1,18 @@
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import sys
|
||||
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(current_dir)
|
||||
sys.path.append(os.path.join(current_dir, "indextts"))
|
||||
|
||||
import gradio as gr
|
||||
from utils.webui_utils import next_page, prev_page
|
||||
|
||||
from indextts.infer import IndexTTS
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from utils.webui_utils import next_page, prev_page
|
||||
|
||||
i18n = I18nAuto(language="zh_CN")
|
||||
MODE = 'local'
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user