KAIST-MACLab
diff --git a/‎.gitignore
Lines changed: 5 additions & 1 deletion b/‎.gitignore
Lines changed: 5 additions & 1 deletion
diff --git a/‎.travis.yml
Lines changed: 11 additions & 0 deletions b/‎.travis.yml
Lines changed: 11 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 674 additions & 0 deletions b/‎LICENSE
Lines changed: 674 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 8 additions & 6 deletions b/‎README.md
Lines changed: 8 additions & 6 deletions
diff --git a/‎pyproject.toml
Lines changed: 5 additions & 4 deletions b/‎pyproject.toml
Lines changed: 5 additions & 4 deletions
diff --git a/‎pytsmod/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pytsmod/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytsmod/pvtsm.py
Lines changed: 13 additions & 33 deletions b/‎pytsmod/pvtsm.py
Lines changed: 13 additions & 33 deletions
diff --git a/‎pytsmod/tdpsolatsm.py
Lines changed: 31 additions & 65 deletions b/‎pytsmod/tdpsolatsm.py
Lines changed: 31 additions & 65 deletions
diff --git a/‎pytsmod/utils/__init__.py
Lines changed: 1 addition & 0 deletions b/‎pytsmod/utils/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pytsmod/utils/stft.py
Lines changed: 8 additions & 9 deletions b/‎pytsmod/utils/stft.py
Lines changed: 8 additions & 9 deletions
@@ -1 +1,5 @@
-poetry.lock
+poetry.lock
+dist
+.vscode
+.ipynb_checkpoints
+__pycache__
@@ -0,0 +1,11 @@
+language: python
+python:
+  - "3.6"
+  - "3.7"
+  - "3.8"
+before_install:
+  - pip install poetry
+install:
+  - poetry install
+script:
+  - pytest
@@ -1,5 +1,7 @@
 PyTSMod
 ===================
+[![PyPI](https://img.shields.io/pypi/v/pytsmod.svg)](https://pypi.python.org/pypi/pytsmod)
+[![Build Status](https://travis-ci.org/KAIST-MACLab/PyTSMod.svg?branch=master)](https://travis-ci.org/KAIST-MACLab/PyTSMod)
 
 PyTSMod is a open-source library for Time-Scale Modification algorithms in Python 3. PyTSMod contains basic TSM algorithms such as Overlap-Add (OLA), Waveform-Similarity Overlap-Add (WSOLA), Time-Domain Pitch-Synchronous Overlap-Add (TD-PSOLA), and Phase Vocoder (PV-TSM). We are also planning to add more TSM algorithms and pitch shifting algorithms.
 
@@ -34,11 +36,11 @@ $ poetry build
 
 ### Requirements
 
-To use PyTSMod, following packages are required.
-- NumPy (>=1.13.0)
+To use PyTSMod, Python with version >= 3.6 and following packages are required.
+- NumPy (>=1.16.0)
 - SciPy (>=1.0.0)
-- libROSA
-- soundfile
+- libROSA (>=0.8.0)
+- soundfile (>=0.10.0)
 
 ## Using PyTSMod
 
@@ -48,7 +50,7 @@ OLA, WSOLA, and PV-TSM can be imported as module to be used directly in Python.
 
 ```python
 import numpy as np
-import PyTSMod as tsm
+import pytsmod as tsm
 import soundfile as sf  # you can use other audio load packages.
 
 x, sr = sf.read('/FILEPATH/AUDIOFILE.wav')
@@ -73,7 +75,7 @@ When using TD-PSOLA, the estimated pitch information of the source you want to m
 
 ```python
 import numpy as np
-import PyTSMod as tsm
+import pytsmod as tsm
 import crepe  # you can use other pitch tracking algorithms.
 import soundfile as sf  # you can use other audio load packages.
 
 
@@ -1,14 +1,15 @@
 [tool.poetry]
 name = "pytsmod"
-version = "0.1.1"
+version = "0.2.0"
 description = ""
 authors = ["Sangeon Yong <koragon2@kaist.ac.kr>"]
 
 [tool.poetry.dependencies]
 python = "^3.6"
-numpy = "^1.19.2"
-nptyping = "^1.3.0"
-data-science-types = "^0.2.17"
+numpy = "^1.16.0"
+scipy = "^1.0.0"
+soundfile = "^0.10.0"
+librosa = "^0.8"
 
 [tool.poetry.dev-dependencies]
 pytest = "^5.2"
 
@@ -1,4 +1,4 @@
-__version__ = '0.1.0'
+__version__ = '0.2.0'
 
 from .tdpsolatsm import *
 from .wsolatsm import *
 
@@ -1,6 +1,6 @@
 import numpy as np
-from .utils import win as stft, istft
 from scipy.interpolate import interp1d
+from .utils import stft, istft, _validate_audio, _validate_scale_factor
 
 
 def phase_vocoder(x, s, win_type='sin', win_size=2048, syn_hop_size=512,
@@ -41,25 +41,12 @@ def phase_vocoder(x, s, win_type='sin', win_size=2048, syn_hop_size=512,
     y : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
         the modified output audio sequence.
     """
-
-    if x.ndim == 1:  # make mono source to 2D array with a single row.
-        x = np.expand_dims(x, 0)
-    else:
-        raise Exception("Please use the valid audio source. "
-                        + "Number of dimension of input should be less than 3.")
-
-    if np.isscalar(s):
-        anc_points = np.array([[0, np.shape(x)[1] - 1],
-                               [0, np.ceil(s * np.shape(x)[1]) - 1]])
-    elif s.shape[1] == 2:
-        anc_points = s
-    else:
-        raise Exception('Please use the valid anchorPoints. '
-                        + '(scalar or pair of input/output sample points)')
-
-    output_length = int(anc_points[-1, -1]) + 1
+    # validate the input audio and scale factor.
+    x = _validate_audio(x)
+    anc_points = _validate_scale_factor(x, s)
 
     n_chan = x.shape[0]
+    output_length = int(anc_points[-1, -1]) + 1
 
     sw_pos = np.arange(0, output_length + win_size // 2, syn_hop_size)
     ana_interpolated = interp1d(anc_points[1, :], anc_points[0, :],
@@ -153,28 +140,21 @@ def phase_vocoder_int(x, s, win_type='hann', win_size=2048, syn_hop_size=512,
     y : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
         the modified output audio sequence.
     """
-
-    if zero_pad is None:
-        zero_pad = s * win_size // 2
-
-    if x.ndim == 1:  # make mono source to 2D array with a single row.
-        x = np.expand_dims(x, 0)
-    elif x.ndim > 2:
-        raise Exception("Please use the valid audio source. "
-                        + "Number of dimension of input should be less than 3.")
-
+    # validate the input audio and scale factor.
+    x = _validate_audio(x)
     if np.isscalar(s) and isinstance(s, int) and s >= 1:
         anchor_points = np.array([[0, np.shape(x)[1] - 1],
                                   [0, np.ceil(s * np.shape(x)[1]) - 1]])
     else:
         raise Exception("Please use the valid stretching rate. "
                         + "(integer stretching factors larger than 0)")
 
-    output_length = int(anchor_points[-1, -1]) + 1
+    if zero_pad is None:
+        zero_pad = s * win_size // 2
 
-    win_size_half = int(np.round(win_size / 2))
+    output_length = int(anchor_points[-1, -1]) + 1
 
-    out_win_pos = np.arange(0, output_length + win_size_half, syn_hop_size)
+    out_win_pos = np.arange(0, output_length + win_size // 2, syn_hop_size)
     in_win_pos = ((out_win_pos - 1) / s + 1).astype(int)
 
     n_channels = x.shape[0]
@@ -227,10 +207,10 @@ def _find_peaks(spec):
         return peaks, np.empty(0)
 
     # Find region of influence. Axis 0 represents start and end each.
-    infl_region = np.zeros(2, peaks.shape)
+    infl_region = np.zeros((2, peaks.size))
     infl_region[0, 0] = 0
     infl_region[0, 1:] = np.ceil((peaks[1:] + peaks[: -1]) / 2)
     infl_region[1, : -1] = infl_region[0, 1:] - 1
-    infl_region[1, -1] = infl_region.shape[-1]
+    infl_region[1, -1] = spec.size - 1
 
     return peaks, infl_region.astype(int)
@@ -1,6 +1,7 @@
 import numpy as np
 
 from .utils import win as win_func
+from .utils import _validate_audio, _validate_f0
 
 
 def tdpsola(x, sr, src_f0, tgt_f0=None, alpha=1, beta=None,
@@ -38,85 +39,50 @@ def tdpsola(x, sr, src_f0, tgt_f0=None, alpha=1, beta=None,
     y : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
         the modified output audio sequence.
     """
-
-    if x.ndim == 1:  # make mono source to 2D array with a single row.
-        x = np.expand_dims(x, 0)
-    elif x.ndim > 2:
-        raise Exception("Please use the valid audio source. "
-                        + "Number of dimension of input should be less than 3.")
-
-    if src_f0.ndim == 1:
-        src_f0 = np.expand_dims(src_f0, 0)
-    elif src_f0.ndim == 2:
-        if x.shape[0] != src_f0.shape[0] and src_f0.shape[0] != 1:
-            raise Exception("The number of channels of source f0 value "
-                            + "should 1 or same as the source.")
-    elif src_f0.ndim > 2:
-        raise Exception("Please use the valid source f0 value. "
-                        + "Number of dimension of source f0 "
-                        + "should be less than 3.")
-
-    # Check if system uses target_f0 or beta.
-    if (tgt_f0 is None) and beta is None:
+    # validate the input audio, input pitch and scale factor.
+    x = _validate_audio(x)
+    src_f0 = _validate_f0(x, src_f0)
+    if tgt_f0 is not None:
+        if beta is not None:
+            raise Exception("You cannot use both tgt_f0 and beta as an input.")
+        tgt_f0 = _validate_f0(x, tgt_f0)
+    elif beta is None:
         beta = 1
-    elif beta is None:  # Uses target_f0
-        if tgt_f0.ndim == 1:
-            tgt_f0 = np.expand_dims(tgt_f0, 0)
-        elif tgt_f0.ndim == 2:
-            if x.shape[0] != tgt_f0.shape[0] and tgt_f0.shape[0] != 1:
-                raise Exception("The number of channels of target f0 value "
-                                + "should 1 or same as the source.")
-        elif tgt_f0.ndim > 2:
-            raise Exception("Please use the valid target f0 value. "
-                            + "Number of dimension of target f0 "
-                            + "should be less than 3.")
-    elif (tgt_f0 is not None) and (beta is not None):
-        raise Exception("You cannot use both target_f0 and beta as an input.")
-    elif not np.isscalar(beta):
-        raise Exception("The beta value should be a scalar.")
-
-    n_channels = x.shape[0]
+
+    n_chan = x.shape[0]
     output_length = int(np.ceil(x.shape[1] * alpha))
-    y = np.zeros((n_channels, output_length))
+    y = np.zeros((n_chan, output_length))
 
     for c, x_chan in enumerate(x):
-        if src_f0.ndim == 1:
-            src_f0_chan = src_f0
-        else:
-            src_f0_chan = src_f0[c]
-
+        src_f0_chan = src_f0[c]
         src_f0_chan[np.isnan(src_f0_chan)] = 0
         pm_chan = _find_pitch_marks(x_chan, sr, src_f0_chan, p_hop_size,
                                     p_win_size)
-
-        if tgt_f0 is not None:
-            if tgt_f0.ndim == 1:
-                tgt_f0_chan = tgt_f0
-            else:
-                tgt_f0_chan = tgt_f0[c]
-            beta = _target_f0_to_beta(x_chan, pm_chan,
-                                      src_f0_chan, tgt_f0_chan)
-
         pitch_period = np.diff(pm_chan)  # compute pitch periods
 
-        # make beta to an array if beta is a fixed value.
-        if np.isscalar(beta):
-            beta = np.ones(pitch_period.size) * beta
+        if tgt_f0 is not None:
+            tgt_f0_chan = tgt_f0[c]
+            beta_seq = _target_f0_to_beta(x_chan, pm_chan,
+                                          src_f0_chan, tgt_f0_chan)
+        else:
+            beta_seq = np.ones(pitch_period.size) * beta
 
         if pm_chan[0] <= pitch_period[0]:  # remove first pitch mark
             pm_chan = pm_chan[1:]
             pitch_period = pitch_period[1:]
-            beta = beta[1:]
+            beta_seq = beta_seq[1:]
 
         if pm_chan[-1] + pitch_period[-1] > x_chan.size:  # remove last pitch mark
             pm_chan = pm_chan[: -1]
         else:
             pitch_period = np.append(pitch_period, pitch_period[-1])
-            beta = np.append(beta, beta[-1])
+            beta_seq = np.append(beta_seq, beta_seq[-1])
+
+        output_length = int(np.ceil(x_chan.size * alpha))
 
-        output_length = int(np.ceil(x.size * alpha))
-        x_chan = np.pad(x_chan, (1024, 1024))
-        y_chan = np.zeros(output_length + 2 * 1024)  # output signal
+        pad = int(np.ceil(sr / 100))
+        x_chan = np.pad(x_chan, (pad, pad))
+        y_chan = np.zeros(output_length + 2 * pad)  # output signal
 
         tk = pitch_period[0] + 1  # output pitch mark
         ow = np.zeros(y_chan.shape)
@@ -131,19 +97,19 @@ def tdpsola(x, sr, src_f0, tgt_f0=None, alpha=1, beta=None,
             st = pm_chan[i] - pit
             en = pm_chan[i] + pit
 
-            gr = x_chan[st + 1024: en + 1024 + 1] * win
+            gr = x_chan[st + pad: en + pad + 1] * win
 
-            ini_gr = round(tk) - pit + 1024
-            end_gr = round(tk) + pit + 1024
+            ini_gr = round(tk) - pit + pad
+            end_gr = round(tk) + pit + pad
 
             y_chan[ini_gr: end_gr + 1] = y_chan[ini_gr: end_gr + 1] + gr
             ow[ini_gr: end_gr + 1] = ow[ini_gr: end_gr + 1] + win
-            tk = tk + pit / beta[i]
+            tk = tk + pit / beta_seq[i]
 
         ow[ow < 1e-3] = 1
 
         y_chan = y_chan / ow
-        y_chan = y_chan[1024:]
+        y_chan = y_chan[pad:]
         y_chan = y_chan[: output_length]
         y[c, :] = y_chan
 
 
@@ -1,2 +1,3 @@
 from .stft import *
 from .win import *
+from .validate import _validate_audio, _validate_scale_factor, _validate_f0
@@ -161,17 +161,16 @@ def lsee_mstft(X, syn_hop, win_type, win_size, zero_pad, fft_shift,
     w = win_func(win_type, win_size, zero_pad)
 
     win_len = len(w)
-    win_len_half = round(win_len / 2)
-    num_of_frames = X.shape[1]
-    win_pos = np.arange(num_of_frames) * syn_hop
+    n_frames = X.shape[1]
+    win_pos = np.arange(n_frames) * syn_hop
     signal_length = win_pos[-1] + win_len
 
     x = np.zeros(signal_length)
     ow = np.zeros(signal_length)
-    for i in range(num_of_frames):
+    for i in range(n_frames):
         curr_spec = X[:, i]
 
-        Xi = np.concatenate((curr_spec, np.flip(np.conj(curr_spec[1:-1]))))
+        Xi = np.append(curr_spec, np.flip(np.conj(curr_spec[1:-1])))
         xi = np.real(np.fft.ifft(Xi))
         if fft_shift:
             xi = np.fft.fftshift(xi)
@@ -183,13 +182,13 @@ def lsee_mstft(X, syn_hop, win_type, win_size, zero_pad, fft_shift,
             xiw_energy = np.sum(abs(xiw))
             xiw = xiw * (xi_energy / (xiw_energy + np.finfo(np.float).eps))
 
-        x[win_pos[i]: win_pos[i] + win_len] = x[win_pos[i]: win_pos[i] + win_len] + xiw
+        x[win_pos[i]: win_pos[i] + win_len] += xiw
 
-        ow[win_pos[i]: win_pos[i] + win_len] = ow[win_pos[i]: win_pos[i] + win_len] + np.power(w, 2)
+        ow[win_pos[i]: win_pos[i] + win_len] += np.power(w, 2)
 
-    ow[ow < pow(10, -3)] = 1  # avoid potential division by zero
+    ow[ow < 1e-3] = 1
     x = x / ow
 
-    x = x[win_len_half: - win_len_half + 1]
+    x = x[win_len // 2: - win_len // 2]
 
     return x
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '0.1.0'`
	`1`	`+__version__ = '0.2.0'`
`2`	`2`
`3`	`3`	`from .tdpsolatsm import *`
`4`	`4`	`from .wsolatsm import *`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from .stft import *`
`2`	`2`	`from .win import *`
	`3`	`+from .validate import _validate_audio, _validate_scale_factor, _validate_f0`