Skip to content

Commit a9eb050

Browse files
authored
Merge pull request #6 from KAIST-MACLab/develop
Update CLI interface
2 parents 8935d9b + 680368a commit a9eb050

File tree

9 files changed

+317
-6
lines changed

9 files changed

+317
-6
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ In this version, TD-PSOLA only supports the fixed time stretching factor alpha.
9696

9797
You can modify pitch of the audio sequence in two ways. The first one is beta, which is the fixed pitch shifting factor. The other one is target_f0, which supports target pitch sequence you want to convert. You cannot use both of the parameters.
9898

99+
### Using PyTSMod from the command line
100+
101+
From version 0.3.0, this package includes a command-line tool named `tsmod`, which can create the result file easily from a shell. To generate the WSOLA result of `input.wav` with stretching factor 1.3 and save to `output.wav`, please run:
102+
103+
```shell
104+
$ tsmod wsola input.wav output.wav 1.3 # ola, wsola, pv, pv_int are available.
105+
```
106+
107+
Currently, OLA, WSOLA, and Phase Vocoder(PV) are supported. TD-PSOLA is excluded due to the difficulty of sending extracted pitch data to TD-PSOLA. Also, non-linear TSM is not supported in command-line.
108+
109+
For more information, use `-h` or `--help` command to see the detailed usage of `tsmod`.
110+
99111
## Audio examples
100112

101113
The original audio is from TSM toolbox.

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pytsmod"
3-
version = "0.2.0"
3+
version = "0.3.0"
44
description = "An open-source Python library for audio time-scale modification."
55
authors = ["Sangeon Yong <koragon2@kaist.ac.kr>"]
66

@@ -18,8 +18,8 @@ librosa = "^0.8"
1818
pytest = "^5.2"
1919
flake8 = "^3.8.3"
2020

21-
# [tool.poetry.scripts]
22-
# tsmod = 'pytsmod.console:run'
21+
[tool.poetry.scripts]
22+
tsmod = 'pytsmod.console:run'
2323

2424
[build-system]
2525
requires = ["poetry>=0.12"]

pytsmod/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '0.2.0'
1+
__version__ = '0.3.0'
22

33
from .tdpsolatsm import *
44
from .wsolatsm import *

pytsmod/console/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .console import run
2+
from .descs import *

pytsmod/console/console.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import sys
2+
sys.path.append('./')
3+
4+
from pytsmod import ola, wsola
5+
from pytsmod import phase_vocoder as pv
6+
from pytsmod import phase_vocoder_int as pv_int
7+
from pytsmod.console import *
8+
import argparse
9+
import soundfile as sf
10+
11+
12+
def run():
13+
parser = argparse.ArgumentParser(description=TSMOD_DESC)
14+
subparsers = parser.add_subparsers(help=SUBPARSER_HELP,
15+
dest='subparser_name')
16+
17+
# create parser for OLA.
18+
parser_ola = subparsers.add_parser('ola', help=OLA_HELP,
19+
description=OLA_DESC)
20+
parser_ola.add_argument('input_file', type=str, help=INPUT_HELP)
21+
parser_ola.add_argument('output_file', type=str, help=OUTPUT_HELP)
22+
parser_ola.add_argument('alpha', type=float, help=A_HELP)
23+
parser_ola.add_argument('--win_type', '-wt', default='hann', type=str,
24+
help=WT_HELP)
25+
parser_ola.add_argument('--win_size', '-ws', default=1024, type=int,
26+
help=WS_HELP)
27+
parser_ola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
28+
help=SH_HELP)
29+
30+
# create parser for WSOLA.
31+
parser_wsola = subparsers.add_parser('wsola', help=WSOLA_HELP,
32+
description=WSOLA_DESC)
33+
parser_wsola.add_argument('input_file', type=str, help=INPUT_HELP)
34+
parser_wsola.add_argument('output_file', type=str, help=OUTPUT_HELP)
35+
parser_wsola.add_argument('alpha', type=float, help=A_HELP)
36+
parser_wsola.add_argument('--win_type', '-wt', default='hann', type=str,
37+
help=WT_HELP)
38+
parser_wsola.add_argument('--win_size', '-ws', default=1024, type=int,
39+
help=WS_HELP)
40+
parser_wsola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
41+
help=SH_HELP)
42+
parser_wsola.add_argument('--tolerance', '-t', default=512, type=int,
43+
help=TOL_HELP)
44+
45+
# create parser for phase-vocoder.
46+
parser_pv = subparsers.add_parser('pv', help=PV_HELP,
47+
description=PV_DESC)
48+
parser_pv.add_argument('input_file', type=str, help=INPUT_HELP)
49+
parser_pv.add_argument('output_file', type=str, help=OUTPUT_HELP)
50+
parser_pv.add_argument('alpha', type=float, help=A_HELP)
51+
parser_pv.add_argument('--win_type', '-wt', default='sin', type=str,
52+
help=WT_HELP)
53+
parser_pv.add_argument('--win_size', '-ws', default=2048, type=int,
54+
help=WS_HELP)
55+
parser_pv.add_argument('--syn_hop_size', '-sh', default=512, type=int,
56+
help=SH_HELP)
57+
parser_pv.add_argument('--zero_pad', '-z', default=0, type=int,
58+
help=ZP_HELP)
59+
parser_pv.add_argument('--restore_energy', '-e', action='store_true',
60+
help=RE_HELP)
61+
parser_pv.add_argument('--fft_shift', '-fs', action='store_true',
62+
help=FS_HELP)
63+
parser_pv.add_argument('--phase_lock', '-pl', action='store_true',
64+
help=PL_HELP)
65+
66+
# create parser for phase-vocoder int.
67+
parser_pvi = subparsers.add_parser('pv_int', help=PVI_HELP,
68+
description=PVI_DESC)
69+
parser_pvi.add_argument('input_file', type=str, help=INPUT_HELP)
70+
parser_pvi.add_argument('output_file', type=str, help=OUTPUT_HELP)
71+
parser_pvi.add_argument('alpha', type=int, help=A_PVI_HELP)
72+
parser_pvi.add_argument('--win_type', '-wt', default='hann', type=str,
73+
help=WT_HELP)
74+
parser_pvi.add_argument('--win_size', '-ws', default=2048, type=int,
75+
help=WS_HELP)
76+
parser_pvi.add_argument('--syn_hop_size', '-sh', default=512, type=int,
77+
help=SH_HELP)
78+
parser_pvi.add_argument('--zero_pad', '-z', default=None, type=int,
79+
help=ZP_HELP)
80+
parser_pvi.add_argument('--restore_energy', '-e', action='store_true',
81+
help=RE_HELP)
82+
parser_pvi.add_argument('--fft_shift', '-fs', action='store_true',
83+
help=FS_HELP)
84+
85+
args = parser.parse_args()
86+
87+
x, sr = sf.read(args.input_file)
88+
89+
if args.subparser_name == 'ola':
90+
y = ola(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
91+
syn_hop_size=args.syn_hop_size)
92+
elif args.subparser_name == 'wsola':
93+
y = wsola(x, args.alpha, win_type=args.win_type,
94+
win_size=args.win_size, syn_hop_size=args.syn_hop_size,
95+
tolerance=args.tolerance)
96+
elif args.subparser_name == 'pv':
97+
y = pv(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
98+
syn_hop_size=args.syn_hop_size, zero_pad=args.zero_pad,
99+
restore_energy=args.restore_energy, fft_shift=args.fft_shift,
100+
phase_lock=args.phase_lock)
101+
elif args.subparser_name == 'pv_int':
102+
y = pv_int(x, args.alpha, win_type=args.win_type,
103+
win_size=args.win_size, syn_hop_size=args.syn_hop_size,
104+
zero_pad=args.zero_pad,
105+
restore_energy=args.restore_energy,
106+
fft_shift=args.fft_shift)
107+
# elif args.subparser_name == 'hp':
108+
# pass
109+
110+
sf.write(args.output_file, y.T, sr)
111+
112+
113+
if __name__ == '__main__':
114+
run()

pytsmod/console/descs.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
TSMOD_DESC = 'Processing time-scale modification for given audio file.'
2+
SUBPARSER_HELP = 'Available TSM algorithms'
3+
4+
INPUT_HELP = 'Input audio file to modify.'
5+
OUTPUT_HELP = 'Output file path to save.'
6+
7+
A_HELP = 'The time stretching factor alpha.'
8+
WT_HELP = 'Type of the window function. hann and sin are available.'
9+
WS_HELP = 'Size of the window function.'
10+
SH_HELP = 'Hop size of the synthesis window.'
11+
12+
OLA_HELP = 'Using OLA to modify audio file.'
13+
OLA_DESC = 'Using OLA to modify audio file.'
14+
15+
WSOLA_HELP = 'Using WSOLA to modify audio file.'
16+
WSOLA_DESC = 'Using WSOLA to modify audio file.'
17+
TOL_HELP = 'Number of samples the window positions in the input signal may be shifted'
18+
19+
PV_HELP = 'Using phase vocoder to modify audio file.'
20+
PV_DESC = 'Using phase vocoder to modify audio file.'
21+
ZP_HELP = 'The size of the zero pad in the window function.'
22+
RE_HELP = 'Try to reserve potential energy loss.'
23+
FS_HELP = 'Apply circular shift to STFT and ISTFT.'
24+
PL_HELP = 'Apply phase locking.'
25+
26+
PVI_HELP = 'Using phase vocoder specialized for integer stretching factor.'
27+
PVI_DESC = 'Using phase vocoder specialized for integer stretching factor.'
28+
A_PVI_HELP = 'The time stretching factor alpha. Only integer value is allowed.'

pytsmod/pvtsm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def phase_vocoder_int(x, s, win_type='hann', win_size=2048, syn_hop_size=512,
172172

173173
y[c, :] = y_chan
174174

175-
return y
175+
return y.squeeze()
176176

177177

178178
def _find_peaks(spec):

tests/test_console.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import pytest
2+
from pytsmod import ola, wsola
3+
from pytsmod import phase_vocoder as pv
4+
from pytsmod import phase_vocoder_int as pv_int
5+
import soundfile as sf
6+
import numpy as np
7+
import os
8+
from subprocess import call
9+
10+
11+
@pytest.mark.parametrize('algorithm', ['ola', 'wsola', 'pv', 'pv_int'])
12+
def test_console_default_params(algorithm):
13+
test_file = 'tests/data/castanetsviolin.wav'
14+
alpha = 2
15+
x, sr = sf.read(test_file)
16+
y = globals()[algorithm](x, alpha)
17+
18+
cmd = ['python', 'pytsmod/console/console.py', algorithm,
19+
test_file, 'temp_cli.wav', str(alpha)]
20+
if algorithm == 'pv_int':
21+
cmd.append('-fs')
22+
call(cmd)
23+
24+
sf.write('temp.wav', y, sr)
25+
y_, _ = sf.read('temp.wav')
26+
27+
y_cli, _ = sf.read('temp_cli.wav')
28+
29+
os.remove('temp.wav')
30+
os.remove('temp_cli.wav')
31+
32+
assert np.allclose(y_, y_cli)
33+
34+
35+
@pytest.mark.parametrize('alpha', [1.25])
36+
@pytest.mark.parametrize('win_type', ['sin'])
37+
@pytest.mark.parametrize('win_size', [512])
38+
@pytest.mark.parametrize('syn_hop_size', [256])
39+
def test_console_ola(alpha, win_type, win_size, syn_hop_size):
40+
test_file = 'tests/data/castanetsviolin.wav'
41+
x, sr = sf.read(test_file)
42+
y = ola(x, alpha, win_type=win_type, win_size=win_size,
43+
syn_hop_size=syn_hop_size)
44+
45+
cmd = ['python', 'pytsmod/console/console.py', 'ola',
46+
test_file, 'temp_cli.wav', str(alpha),
47+
'-wt', win_type, '-ws', str(win_size),
48+
'-sh', str(syn_hop_size)]
49+
call(cmd)
50+
51+
sf.write('temp.wav', y, sr)
52+
y_, _ = sf.read('temp.wav')
53+
54+
y_cli, _ = sf.read('temp_cli.wav')
55+
56+
os.remove('temp.wav')
57+
os.remove('temp_cli.wav')
58+
59+
assert np.allclose(y_, y_cli)
60+
61+
62+
@pytest.mark.parametrize('alpha', [1.25])
63+
@pytest.mark.parametrize('win_type', ['sin'])
64+
@pytest.mark.parametrize('win_size', [512])
65+
@pytest.mark.parametrize('syn_hop_size', [256])
66+
@pytest.mark.parametrize('tolerance', [256])
67+
def test_console_wsola(alpha, win_type, win_size, syn_hop_size, tolerance):
68+
test_file = 'tests/data/castanetsviolin.wav'
69+
x, sr = sf.read(test_file)
70+
y = wsola(x, alpha, win_type=win_type, win_size=win_size,
71+
syn_hop_size=syn_hop_size, tolerance=tolerance)
72+
73+
cmd = ['python', 'pytsmod/console/console.py', 'wsola',
74+
test_file, 'temp_cli.wav', str(alpha),
75+
'-wt', win_type, '-ws', str(win_size),
76+
'-sh', str(syn_hop_size), '-t', str(tolerance)]
77+
call(cmd)
78+
79+
sf.write('temp.wav', y, sr)
80+
y_, _ = sf.read('temp.wav')
81+
82+
y_cli, _ = sf.read('temp_cli.wav')
83+
84+
os.remove('temp.wav')
85+
os.remove('temp_cli.wav')
86+
87+
assert np.allclose(y_, y_cli)
88+
89+
90+
@pytest.mark.parametrize('alpha', [1.25])
91+
@pytest.mark.parametrize('win_type', ['hann'])
92+
@pytest.mark.parametrize('win_size', [1024])
93+
@pytest.mark.parametrize('syn_hop_size', [256])
94+
@pytest.mark.parametrize('zero_pad', [256])
95+
@pytest.mark.parametrize('restore_energy', [True])
96+
@pytest.mark.parametrize('fft_shift', [True])
97+
@pytest.mark.parametrize('phase_lock', [True])
98+
def test_console_pv(alpha, win_type, win_size, syn_hop_size, zero_pad,
99+
restore_energy, fft_shift, phase_lock):
100+
test_file = 'tests/data/castanetsviolin.wav'
101+
x, sr = sf.read(test_file)
102+
y = pv(x, alpha, win_type=win_type, win_size=win_size,
103+
syn_hop_size=syn_hop_size, zero_pad=zero_pad,
104+
restore_energy=restore_energy, fft_shift=fft_shift,
105+
phase_lock=phase_lock)
106+
107+
cmd = ['python', 'pytsmod/console/console.py', 'pv',
108+
test_file, 'temp_cli.wav', str(alpha),
109+
'-wt', win_type, '-ws', str(win_size),
110+
'-sh', str(syn_hop_size), '-z', str(zero_pad),
111+
'-e', '-fs', '-pl']
112+
call(cmd)
113+
114+
sf.write('temp.wav', y, sr)
115+
y_, _ = sf.read('temp.wav')
116+
117+
y_cli, _ = sf.read('temp_cli.wav')
118+
119+
os.remove('temp.wav')
120+
os.remove('temp_cli.wav')
121+
122+
assert np.allclose(y_, y_cli)
123+
124+
125+
@pytest.mark.parametrize('alpha', [2])
126+
@pytest.mark.parametrize('win_type', ['sin'])
127+
@pytest.mark.parametrize('win_size', [1024])
128+
@pytest.mark.parametrize('syn_hop_size', [256])
129+
@pytest.mark.parametrize('zero_pad', [256])
130+
@pytest.mark.parametrize('restore_energy', [True])
131+
@pytest.mark.parametrize('fft_shift', [False])
132+
def test_console_pv_int(alpha, win_type, win_size, syn_hop_size, zero_pad,
133+
restore_energy, fft_shift):
134+
test_file = 'tests/data/castanetsviolin.wav'
135+
x, sr = sf.read(test_file)
136+
y = pv(x, alpha, win_type=win_type, win_size=win_size,
137+
syn_hop_size=syn_hop_size, zero_pad=zero_pad,
138+
restore_energy=restore_energy, fft_shift=fft_shift)
139+
140+
cmd = ['python', 'pytsmod/console/console.py', 'pv',
141+
test_file, 'temp_cli.wav', str(alpha),
142+
'-wt', win_type, '-ws', str(win_size),
143+
'-sh', str(syn_hop_size), '-z', str(zero_pad),
144+
'-e']
145+
call(cmd)
146+
147+
sf.write('temp.wav', y, sr)
148+
y_, _ = sf.read('temp.wav')
149+
150+
y_cli, _ = sf.read('temp_cli.wav')
151+
152+
os.remove('temp.wav')
153+
os.remove('temp_cli.wav')
154+
155+
assert np.allclose(y_, y_cli)

tests/test_pytsmod.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33

44
def test_version():
5-
assert __version__ == '0.2.0'
5+
assert __version__ == '0.3.0'

0 commit comments

Comments
 (0)