Merge pull request #6 from KAIST-MACLab/develop

seyong92 · web-flow · commit a9eb050187d9 · 2020-09-30T00:10:35.000+09:00
Update CLI interface
diff --git a/README.md b/README.md
@@ -96,6 +96,18 @@ In this version, TD-PSOLA only supports the fixed time stretching factor alpha.
 
 You can modify pitch of the audio sequence in two ways. The first one is beta, which is the fixed pitch shifting factor. The other one is target_f0, which supports target pitch sequence you want to convert. You cannot use both of the parameters.
 
+### Using PyTSMod from the command line
+
+From version 0.3.0, this package includes a command-line tool named `tsmod`, which can create the result file easily from a shell. To generate the WSOLA result of `input.wav` with stretching factor 1.3 and save to `output.wav`, please run:
+
+```shell
+$ tsmod wsola input.wav output.wav 1.3  # ola, wsola, pv, pv_int are available.
+```
+
+Currently, OLA, WSOLA, and Phase Vocoder(PV) are supported. TD-PSOLA is excluded due to the difficulty of sending extracted pitch data to TD-PSOLA. Also, non-linear TSM is not supported in command-line.
+
+For more information, use `-h` or `--help` command to see the detailed usage of `tsmod`.
+
 ## Audio examples
 
 The original audio is from TSM toolbox.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pytsmod"
-version = "0.2.0"
+version = "0.3.0"
 description = "An open-source Python library for audio time-scale modification."
 authors = ["Sangeon Yong <koragon2@kaist.ac.kr>"]
 
@@ -18,8 +18,8 @@ librosa = "^0.8"
 pytest = "^5.2"
 flake8 = "^3.8.3"
 
-# [tool.poetry.scripts]
-# tsmod = 'pytsmod.console:run'
+[tool.poetry.scripts]
+tsmod = 'pytsmod.console:run'
 
 [build-system]
 requires = ["poetry>=0.12"]
diff --git a/pytsmod/__init__.py b/pytsmod/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.2.0'
+__version__ = '0.3.0'
 
 from .tdpsolatsm import *
 from .wsolatsm import *
diff --git a/pytsmod/console/__init__.py b/pytsmod/console/__init__.py
@@ -0,0 +1,2 @@
+from .console import run
+from .descs import *
diff --git a/pytsmod/console/console.py b/pytsmod/console/console.py
@@ -0,0 +1,114 @@
+import sys
+sys.path.append('./')
+
+from pytsmod import ola, wsola
+from pytsmod import phase_vocoder as pv
+from pytsmod import phase_vocoder_int as pv_int
+from pytsmod.console import *
+import argparse
+import soundfile as sf
+
+
+def run():
+    parser = argparse.ArgumentParser(description=TSMOD_DESC)
+    subparsers = parser.add_subparsers(help=SUBPARSER_HELP,
+                                       dest='subparser_name')
+
+    # create parser for OLA.
+    parser_ola = subparsers.add_parser('ola', help=OLA_HELP,
+                                       description=OLA_DESC)
+    parser_ola.add_argument('input_file', type=str, help=INPUT_HELP)
+    parser_ola.add_argument('output_file', type=str, help=OUTPUT_HELP)
+    parser_ola.add_argument('alpha', type=float, help=A_HELP)
+    parser_ola.add_argument('--win_type', '-wt', default='hann', type=str,
+                            help=WT_HELP)
+    parser_ola.add_argument('--win_size', '-ws', default=1024, type=int,
+                            help=WS_HELP)
+    parser_ola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
+                            help=SH_HELP)
+
+    # create parser for WSOLA.
+    parser_wsola = subparsers.add_parser('wsola', help=WSOLA_HELP,
+                                         description=WSOLA_DESC)
+    parser_wsola.add_argument('input_file', type=str, help=INPUT_HELP)
+    parser_wsola.add_argument('output_file', type=str, help=OUTPUT_HELP)
+    parser_wsola.add_argument('alpha', type=float, help=A_HELP)
+    parser_wsola.add_argument('--win_type', '-wt', default='hann', type=str,
+                              help=WT_HELP)
+    parser_wsola.add_argument('--win_size', '-ws', default=1024, type=int,
+                              help=WS_HELP)
+    parser_wsola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
+                              help=SH_HELP)
+    parser_wsola.add_argument('--tolerance', '-t', default=512, type=int,
+                              help=TOL_HELP)
+
+    # create parser for phase-vocoder.
+    parser_pv = subparsers.add_parser('pv', help=PV_HELP,
+                                      description=PV_DESC)
+    parser_pv.add_argument('input_file', type=str, help=INPUT_HELP)
+    parser_pv.add_argument('output_file', type=str, help=OUTPUT_HELP)
+    parser_pv.add_argument('alpha', type=float, help=A_HELP)
+    parser_pv.add_argument('--win_type', '-wt', default='sin', type=str,
+                           help=WT_HELP)
+    parser_pv.add_argument('--win_size', '-ws', default=2048, type=int,
+                           help=WS_HELP)
+    parser_pv.add_argument('--syn_hop_size', '-sh', default=512, type=int,
+                           help=SH_HELP)
+    parser_pv.add_argument('--zero_pad', '-z', default=0, type=int,
+                           help=ZP_HELP)
+    parser_pv.add_argument('--restore_energy', '-e', action='store_true',
+                           help=RE_HELP)
+    parser_pv.add_argument('--fft_shift', '-fs', action='store_true',
+                           help=FS_HELP)
+    parser_pv.add_argument('--phase_lock', '-pl', action='store_true',
+                           help=PL_HELP)
+
+    # create parser for phase-vocoder int.
+    parser_pvi = subparsers.add_parser('pv_int', help=PVI_HELP,
+                                       description=PVI_DESC)
+    parser_pvi.add_argument('input_file', type=str, help=INPUT_HELP)
+    parser_pvi.add_argument('output_file', type=str, help=OUTPUT_HELP)
+    parser_pvi.add_argument('alpha', type=int, help=A_PVI_HELP)
+    parser_pvi.add_argument('--win_type', '-wt', default='hann', type=str,
+                            help=WT_HELP)
+    parser_pvi.add_argument('--win_size', '-ws', default=2048, type=int,
+                            help=WS_HELP)
+    parser_pvi.add_argument('--syn_hop_size', '-sh', default=512, type=int,
+                            help=SH_HELP)
+    parser_pvi.add_argument('--zero_pad', '-z', default=None, type=int,
+                            help=ZP_HELP)
+    parser_pvi.add_argument('--restore_energy', '-e', action='store_true',
+                            help=RE_HELP)
+    parser_pvi.add_argument('--fft_shift', '-fs', action='store_true',
+                            help=FS_HELP)
+
+    args = parser.parse_args()
+
+    x, sr = sf.read(args.input_file)
+
+    if args.subparser_name == 'ola':
+        y = ola(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
+                syn_hop_size=args.syn_hop_size)
+    elif args.subparser_name == 'wsola':
+        y = wsola(x, args.alpha, win_type=args.win_type,
+                  win_size=args.win_size, syn_hop_size=args.syn_hop_size,
+                  tolerance=args.tolerance)
+    elif args.subparser_name == 'pv':
+        y = pv(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
+               syn_hop_size=args.syn_hop_size, zero_pad=args.zero_pad,
+               restore_energy=args.restore_energy, fft_shift=args.fft_shift,
+               phase_lock=args.phase_lock)
+    elif args.subparser_name == 'pv_int':
+        y = pv_int(x, args.alpha, win_type=args.win_type,
+                   win_size=args.win_size, syn_hop_size=args.syn_hop_size,
+                   zero_pad=args.zero_pad,
+                   restore_energy=args.restore_energy,
+                   fft_shift=args.fft_shift)
+    # elif args.subparser_name == 'hp':
+    #     pass
+
+    sf.write(args.output_file, y.T, sr)
+
+
+if __name__ == '__main__':
+    run()
diff --git a/pytsmod/console/descs.py b/pytsmod/console/descs.py
@@ -0,0 +1,28 @@
+TSMOD_DESC = 'Processing time-scale modification for given audio file.'
+SUBPARSER_HELP = 'Available TSM algorithms'
+
+INPUT_HELP = 'Input audio file to modify.'
+OUTPUT_HELP = 'Output file path to save.'
+
+A_HELP = 'The time stretching factor alpha.'
+WT_HELP = 'Type of the window function. hann and sin are available.'
+WS_HELP = 'Size of the window function.'
+SH_HELP = 'Hop size of the synthesis window.'
+
+OLA_HELP = 'Using OLA to modify audio file.'
+OLA_DESC = 'Using OLA to modify audio file.'
+
+WSOLA_HELP = 'Using WSOLA to modify audio file.'
+WSOLA_DESC = 'Using WSOLA to modify audio file.'
+TOL_HELP = 'Number of samples the window positions in the input signal may be shifted'
+
+PV_HELP = 'Using phase vocoder to modify audio file.'
+PV_DESC = 'Using phase vocoder to modify audio file.'
+ZP_HELP = 'The size of the zero pad in the window function.'
+RE_HELP = 'Try to reserve potential energy loss.'
+FS_HELP = 'Apply circular shift to STFT and ISTFT.'
+PL_HELP = 'Apply phase locking.'
+
+PVI_HELP = 'Using phase vocoder specialized for integer stretching factor.'
+PVI_DESC = 'Using phase vocoder specialized for integer stretching factor.'
+A_PVI_HELP = 'The time stretching factor alpha. Only integer value is allowed.'
diff --git a/pytsmod/pvtsm.py b/pytsmod/pvtsm.py
@@ -172,7 +172,7 @@ def phase_vocoder_int(x, s, win_type='hann', win_size=2048, syn_hop_size=512,
 
         y[c, :] = y_chan
 
-    return y
+    return y.squeeze()
 
 
 def _find_peaks(spec):
diff --git a/tests/test_console.py b/tests/test_console.py
@@ -0,0 +1,155 @@
+import pytest
+from pytsmod import ola, wsola
+from pytsmod import phase_vocoder as pv
+from pytsmod import phase_vocoder_int as pv_int
+import soundfile as sf
+import numpy as np
+import os
+from subprocess import call
+
+
+@pytest.mark.parametrize('algorithm', ['ola', 'wsola', 'pv', 'pv_int'])
+def test_console_default_params(algorithm):
+    test_file = 'tests/data/castanetsviolin.wav'
+    alpha = 2
+    x, sr = sf.read(test_file)
+    y = globals()[algorithm](x, alpha)
+
+    cmd = ['python', 'pytsmod/console/console.py', algorithm,
+           test_file, 'temp_cli.wav', str(alpha)]
+    if algorithm == 'pv_int':
+        cmd.append('-fs')
+    call(cmd)
+
+    sf.write('temp.wav', y, sr)
+    y_, _ = sf.read('temp.wav')
+
+    y_cli, _ = sf.read('temp_cli.wav')
+
+    os.remove('temp.wav')
+    os.remove('temp_cli.wav')
+
+    assert np.allclose(y_, y_cli)
+
+
+@pytest.mark.parametrize('alpha', [1.25])
+@pytest.mark.parametrize('win_type', ['sin'])
+@pytest.mark.parametrize('win_size', [512])
+@pytest.mark.parametrize('syn_hop_size', [256])
+def test_console_ola(alpha, win_type, win_size, syn_hop_size):
+    test_file = 'tests/data/castanetsviolin.wav'
+    x, sr = sf.read(test_file)
+    y = ola(x, alpha, win_type=win_type, win_size=win_size,
+            syn_hop_size=syn_hop_size)
+
+    cmd = ['python', 'pytsmod/console/console.py', 'ola',
+           test_file, 'temp_cli.wav', str(alpha),
+           '-wt', win_type, '-ws', str(win_size),
+           '-sh', str(syn_hop_size)]
+    call(cmd)
+
+    sf.write('temp.wav', y, sr)
+    y_, _ = sf.read('temp.wav')
+
+    y_cli, _ = sf.read('temp_cli.wav')
+
+    os.remove('temp.wav')
+    os.remove('temp_cli.wav')
+
+    assert np.allclose(y_, y_cli)
+
+
+@pytest.mark.parametrize('alpha', [1.25])
+@pytest.mark.parametrize('win_type', ['sin'])
+@pytest.mark.parametrize('win_size', [512])
+@pytest.mark.parametrize('syn_hop_size', [256])
+@pytest.mark.parametrize('tolerance', [256])
+def test_console_wsola(alpha, win_type, win_size, syn_hop_size, tolerance):
+    test_file = 'tests/data/castanetsviolin.wav'
+    x, sr = sf.read(test_file)
+    y = wsola(x, alpha, win_type=win_type, win_size=win_size,
+              syn_hop_size=syn_hop_size, tolerance=tolerance)
+
+    cmd = ['python', 'pytsmod/console/console.py', 'wsola',
+           test_file, 'temp_cli.wav', str(alpha),
+           '-wt', win_type, '-ws', str(win_size),
+           '-sh', str(syn_hop_size), '-t', str(tolerance)]
+    call(cmd)
+
+    sf.write('temp.wav', y, sr)
+    y_, _ = sf.read('temp.wav')
+
+    y_cli, _ = sf.read('temp_cli.wav')
+
+    os.remove('temp.wav')
+    os.remove('temp_cli.wav')
+
+    assert np.allclose(y_, y_cli)
+
+
+@pytest.mark.parametrize('alpha', [1.25])
+@pytest.mark.parametrize('win_type', ['hann'])
+@pytest.mark.parametrize('win_size', [1024])
+@pytest.mark.parametrize('syn_hop_size', [256])
+@pytest.mark.parametrize('zero_pad', [256])
+@pytest.mark.parametrize('restore_energy', [True])
+@pytest.mark.parametrize('fft_shift', [True])
+@pytest.mark.parametrize('phase_lock', [True])
+def test_console_pv(alpha, win_type, win_size, syn_hop_size, zero_pad,
+                    restore_energy, fft_shift, phase_lock):
+    test_file = 'tests/data/castanetsviolin.wav'
+    x, sr = sf.read(test_file)
+    y = pv(x, alpha, win_type=win_type, win_size=win_size,
+           syn_hop_size=syn_hop_size, zero_pad=zero_pad,
+           restore_energy=restore_energy, fft_shift=fft_shift,
+           phase_lock=phase_lock)
+
+    cmd = ['python', 'pytsmod/console/console.py', 'pv',
+           test_file, 'temp_cli.wav', str(alpha),
+           '-wt', win_type, '-ws', str(win_size),
+           '-sh', str(syn_hop_size), '-z', str(zero_pad),
+           '-e', '-fs', '-pl']
+    call(cmd)
+
+    sf.write('temp.wav', y, sr)
+    y_, _ = sf.read('temp.wav')
+
+    y_cli, _ = sf.read('temp_cli.wav')
+
+    os.remove('temp.wav')
+    os.remove('temp_cli.wav')
+
+    assert np.allclose(y_, y_cli)
+
+
+@pytest.mark.parametrize('alpha', [2])
+@pytest.mark.parametrize('win_type', ['sin'])
+@pytest.mark.parametrize('win_size', [1024])
+@pytest.mark.parametrize('syn_hop_size', [256])
+@pytest.mark.parametrize('zero_pad', [256])
+@pytest.mark.parametrize('restore_energy', [True])
+@pytest.mark.parametrize('fft_shift', [False])
+def test_console_pv_int(alpha, win_type, win_size, syn_hop_size, zero_pad,
+                        restore_energy, fft_shift):
+    test_file = 'tests/data/castanetsviolin.wav'
+    x, sr = sf.read(test_file)
+    y = pv(x, alpha, win_type=win_type, win_size=win_size,
+           syn_hop_size=syn_hop_size, zero_pad=zero_pad,
+           restore_energy=restore_energy, fft_shift=fft_shift)
+
+    cmd = ['python', 'pytsmod/console/console.py', 'pv',
+           test_file, 'temp_cli.wav', str(alpha),
+           '-wt', win_type, '-ws', str(win_size),
+           '-sh', str(syn_hop_size), '-z', str(zero_pad),
+           '-e']
+    call(cmd)
+
+    sf.write('temp.wav', y, sr)
+    y_, _ = sf.read('temp.wav')
+
+    y_cli, _ = sf.read('temp_cli.wav')
+
+    os.remove('temp.wav')
+    os.remove('temp_cli.wav')
+
+    assert np.allclose(y_, y_cli)
diff --git a/tests/test_pytsmod.py b/tests/test_pytsmod.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == '0.2.0'
+    assert __version__ == '0.3.0'

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '0.2.0'`
	`1`	`+__version__ = '0.3.0'`
`2`	`2`
`3`	`3`	`from .tdpsolatsm import *`
`4`	`4`	`from .wsolatsm import *`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .console import run`
	`2`	`+from .descs import *`
Original file line number	Diff line number	Diff line change
`@@ -2,4 +2,4 @@`
`2`	`2`
`3`	`3`
`4`	`4`	`def test_version():`
`5`		`- assert __version__ == '0.2.0'`
	`5`	`+ assert __version__ == '0.3.0'`