Skip to content

Commit 077dcb7

Browse files
committed
added pitcher_gui.py and exe version
1 parent 7bb3058 commit 077dcb7

File tree

4 files changed

+340
-1
lines changed

4 files changed

+340
-1
lines changed

jealous6ud1.mp3.reapeaks

229 KB
Binary file not shown.

pitcher.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from librosa import load
1616
from librosa.core import resample
1717
from librosa.effects import time_stretch
18+
from librosa.util import normalize
1819

1920
ZOH_MULTIPLIER = 4
2021
RESAMPLE_MULTIPLIER = 2
@@ -213,10 +214,13 @@ def pitch(st, log_level, input_file, output_file, quantize_bits, skip_normalize,
213214
ratio = len(pitched) / len(resampled)
214215
log.info('\"skipping\" time stretch: stretching back to original length...')
215216
pitched = time_stretch(pitched, ratio)
217+
pitched = normalize(pitched)
216218

217219
if custom_time_stretch:
218-
log.info('running custom time stretch of ratio: {custom_time_stretch}')
220+
log.info(f'running custom time stretch of ratio: {custom_time_stretch}')
219221
pitched = time_stretch(pitched, custom_time_stretch)
222+
pitched = normalize(pitched)
223+
220224

221225
# oversample again (default factor of 4) to simulate ZOH
222226
# TODO: retest output against freq aliased sinc fn

pitcher_gui.py

Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
#! /usr/bin/env python3
2+
# Pitcher v 0.1
3+
# Copyright (C) 2020 Morgan Mitchell
4+
# Based on: Physical and Behavioral Circuit Modeling of the SP-12, DT Yeh, 2007
5+
# https://ccrma.stanford.edu/~dtyeh/sp12/yeh2007icmcsp12slides.pdf
6+
7+
8+
import logging
9+
import click
10+
import numpy as np
11+
import scipy as sp
12+
import audiofile as af
13+
import tkinter as tk
14+
15+
from pydub import AudioSegment
16+
from librosa import load
17+
from librosa.core import resample
18+
from librosa.effects import time_stretch
19+
from librosa.util import normalize
20+
21+
ZOH_MULTIPLIER = 4
22+
RESAMPLE_MULTIPLIER = 2
23+
24+
INPUT_SR = 96000
25+
OUTPUT_SR = 48000
26+
TARGET_SR = 26040
27+
28+
POSITIVE_TUNING_RATIO = 1.02930223664
29+
NEGATIVE_TUNING_RATIOS = {-1: 1.05652677103003,
30+
-2: 1.1215356033380033,
31+
-3: 1.1834835840896631,
32+
-4: 1.253228360845465,
33+
-5: 1.3310440397149297,
34+
-6: 1.4039714929646099,
35+
-7: 1.5028019735639886,
36+
-8: 1.5766735700797954}
37+
38+
log_levels = {'INFO': logging.INFO,
39+
'DEBUG': logging.DEBUG,
40+
'WARNING': logging.WARNING,
41+
'ERROR': logging.ERROR,
42+
'CRITICAL': logging.CRITICAL}
43+
44+
45+
def calc_quantize_function(quantize_bits, log):
46+
# https://dspillustrations.com/pages/posts/misc/quantization-and-quantization-noise.html
47+
log.info(f'calculating quantize fn with {quantize_bits} quantize bits')
48+
u = 1 # max amplitude to quantize
49+
quantization_levels = 2 ** quantize_bits
50+
delta_s = 2 * u / quantization_levels # level distance
51+
s_midrise = -u + delta_s / 2 + np.arange(quantization_levels) * delta_s
52+
s_midtread = -u + np.arange(quantization_levels) * delta_s
53+
log.info('done calculating quantize fn')
54+
return s_midrise, s_midtread
55+
56+
57+
def adjust_pitch(x, st, skip_time_stretch, log):
58+
log.info(f'adjusting audio pitch by {st} semitones')
59+
t = 0
60+
if (0 > st >= -8):
61+
t = NEGATIVE_TUNING_RATIOS[st]
62+
elif st > 0:
63+
t = POSITIVE_TUNING_RATIO ** -st
64+
elif st == 0: # no change
65+
return x
66+
else: # -8 > st: extrapolate, seems to lose a few points of precision?
67+
f = sp.interpolate.interp1d(
68+
list(NEGATIVE_TUNING_RATIOS.keys()),
69+
list(NEGATIVE_TUNING_RATIOS.values()),
70+
fill_value='extrapolate'
71+
)
72+
t = f(st)
73+
74+
n = int(np.round(len(x) * t))
75+
r = np.linspace(0, len(x) - 1, n).round().astype(np.int32)
76+
pitched = [x[r[e]] for e in range(n-1)] # could yield instead
77+
pitched = np.array(pitched)
78+
log.info('done pitching audio')
79+
80+
return pitched
81+
82+
83+
def filter_input(x, log):
84+
log.info('applying anti aliasing filter')
85+
# approximating the anti aliasing filter, don't think this needs to be
86+
# perfect since at fs/2=13.02kHz only -10dB attenuation, might be able to
87+
# improve accuracy in the 15 -> 20kHz range with firwin?
88+
f = sp.signal.ellip(4, 1, 72, 0.666, analog=False, output='sos')
89+
y = sp.signal.sosfilt(f, x)
90+
log.info('done applying anti aliasing filter')
91+
return y
92+
93+
94+
# could use sosfiltfilt for zero phase filtering, but it doubles filter order
95+
def filter_output(x, log):
96+
log.info('applying output eq filter')
97+
freq = np.array([0, 6510, 8000, 10000, 11111, 13020, 15000, 17500, 20000, 24000])
98+
att = np.array([0, 0, -5, -10, -15, -23, -28, -35, -41, -40])
99+
gain = np.power(10, att/20)
100+
f = sp.signal.firwin2(45, freq, gain, fs=OUTPUT_SR, antisymmetric=False)
101+
sos = sp.signal.tf2sos(f, [1.0])
102+
y = sp.signal.sosfilt(sos, x)
103+
log.info('done applying output eq filter')
104+
return y
105+
106+
107+
def scipy_resample(y, input_sr, target_sr, factor, log):
108+
''' resample from input_sr to target_sr_multiple/factor'''
109+
log.info(f'resampling audio to sample rate of {target_sr * factor}')
110+
seconds = len(y)/input_sr
111+
target_samples = int(seconds * (target_sr * factor)) + 1
112+
resampled = sp.signal.resample(y, target_samples)
113+
log.info('done resample 1/2')
114+
log.info(f'resampling audio to sample rate of {target_sr}')
115+
decimated = sp.signal.decimate(resampled, factor)
116+
log.info('done resample 2/2')
117+
log.info('done resampling audio')
118+
return decimated
119+
120+
121+
def zero_order_hold(y, zoh_multiplier, log):
122+
log.info(f'applying zero order hold of {zoh_multiplier}')
123+
# intentionally oversample by repeating each sample 4 times
124+
# could also try a freq aliased sinc filter
125+
zoh_applied = np.repeat(y, zoh_multiplier).astype(np.float32)
126+
log.info('done applying zero order hold')
127+
return zoh_applied
128+
129+
130+
def nearest_values(x, y):
131+
x, y = map(np.asarray, (x, y))
132+
tree = sp.spatial.cKDTree(y[:, None])
133+
ordered_neighbors = tree.query(x[:, None], 1)[1]
134+
return ordered_neighbors
135+
136+
137+
# no audible difference after audacity invert test @ 12 bits
138+
# however, when plotted the scaled amplitude of quantized audio is
139+
# noticeably higher than the original, leaving for now
140+
def quantize(x, S, bits, log):
141+
log.info(f'quantizing audio @ {bits} bits')
142+
y = nearest_values(x, S)
143+
quantized = S.flat[y].reshape(x.shape)
144+
log.info('done quantizing')
145+
return quantized
146+
147+
148+
# https://stackoverflow.com/questions/53633177/how-to-read-a-mp3-audio-file-into-a-numpy-array-save-a-numpy-array-to-mp3
149+
def write_mp3(f, x, sr, normalized=False):
150+
"""numpy array to MP3"""
151+
channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
152+
if normalized: # normalized array - each item should be a float in [-1, 1)
153+
y = np.int16(x * 2 ** 15)
154+
else:
155+
y = np.int16(x)
156+
song = AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
157+
song.export(f, format="mp3", bitrate="320k")
158+
return
159+
160+
161+
#@click.command()
162+
#@click.option('--st', default=0, help='number of semitones to shift')
163+
#@click.option('--log-level', default='INFO')
164+
#@click.option('--input-file', required=True)
165+
#@click.option('--output-file', required=True)
166+
#@click.option('--quantize-bits', default=12, help='bit rate of quantized output')
167+
#@click.option('--skip-quantize', is_flag=True, default=False)
168+
#@click.option('--skip-normalize', is_flag=True, default=False)
169+
#@click.option('--skip-input-filter', is_flag=True, default=False)
170+
#@click.option('--skip-output-filter', is_flag=True, default=False)
171+
#@click.option('--skip-time-stretch', is_flag=True, default=False)
172+
#@click.option('--custom-time-stretch', default=0, type=float)
173+
def pitch(st, input_file, output_file, log_level='INFO', quantize_bits=12, skip_normalize=False,
174+
skip_quantize=False, skip_input_filter=False, skip_output_filter=False, skip_time_stretch=False,
175+
custom_time_stretch=0):
176+
177+
log = logging.getLogger(__name__)
178+
sh = logging.StreamHandler()
179+
sh.setFormatter(logging.Formatter('%(levelname)-8s %(message)s'))
180+
log.addHandler(sh)
181+
182+
valid_levels = list(log_levels.keys())
183+
if (not log_level) or (log_level.upper() not in valid_levels):
184+
log.warn(f'Invalid log-level: "{log_level}", log-level set to "INFO", '
185+
f'valid log levels are {valid_levels}')
186+
log_level = 'INFO'
187+
188+
log_level = log_levels[log_level]
189+
log.setLevel(log_level)
190+
191+
log.info(f'loading: "{input_file}" at oversampled rate: {INPUT_SR}')
192+
y, s = load(input_file, sr=INPUT_SR)
193+
log.info('done loading')
194+
195+
midrise, midtread = calc_quantize_function(quantize_bits, log)
196+
197+
if skip_input_filter:
198+
log.info('skipping input anti aliasing filter')
199+
else:
200+
y = filter_input(y, log)
201+
202+
resampled = scipy_resample(y, INPUT_SR, TARGET_SR, RESAMPLE_MULTIPLIER, log)
203+
204+
if skip_quantize:
205+
log.info('skipping quantize')
206+
else:
207+
# simulate analog -> digital conversion
208+
# TODO: midtread/midrise option?
209+
resampled = quantize(resampled, midtread, quantize_bits, log)
210+
211+
212+
pitched = adjust_pitch(resampled, st, skip_time_stretch, log)
213+
214+
if skip_time_stretch:
215+
ratio = len(pitched) / len(resampled)
216+
log.info('\"skipping\" time stretch: stretching back to original length...')
217+
pitched = time_stretch(pitched, ratio)
218+
pitched = normalize(pitched)
219+
220+
if custom_time_stretch:
221+
log.info(f'running custom time stretch of ratio: {custom_time_stretch}')
222+
pitched = time_stretch(pitched, custom_time_stretch)
223+
pitched = normalize(pitched)
224+
225+
226+
# oversample again (default factor of 4) to simulate ZOH
227+
# TODO: retest output against freq aliased sinc fn
228+
post_zero_order_hold = zero_order_hold(pitched, ZOH_MULTIPLIER, log)
229+
230+
# TODO: try using scipy resample here?
231+
output = resample(np.asfortranarray(post_zero_order_hold),
232+
TARGET_SR * ZOH_MULTIPLIER, OUTPUT_SR)
233+
234+
if skip_output_filter:
235+
log.info('skipping output eq filter')
236+
else:
237+
output = filter_output(output, log) # eq filter
238+
239+
log.info(f'writing {output_file}, at sample rate {OUTPUT_SR} '
240+
f'with skip_normalize set to {skip_normalize}')
241+
242+
if '.mp3' in output_file:
243+
write_mp3(output_file, output, OUTPUT_SR, not skip_normalize)
244+
else:
245+
output_file = output_file
246+
af.write(output_file, output, OUTPUT_SR, '16bit', not skip_normalize)
247+
248+
log.info(f'done! output_file at: {output_file}')
249+
return
250+
251+
252+
if __name__ == '__main__':
253+
window = tk.Tk()
254+
window.geometry('600x200')
255+
window.resizable(True, False)
256+
window.title('P I T C H E R')
257+
258+
window.columnconfigure(0, weight=1)
259+
window.columnconfigure(1, weight=3)
260+
261+
current_value = tk.DoubleVar()
262+
263+
264+
def get_current_value():
265+
return '{: .2f}'.format(current_value.get())
266+
267+
268+
def slider_changed(event):
269+
value_label.configure(text=get_current_value())
270+
271+
st_slider = tk.Scale(
272+
window,
273+
from_= 12,
274+
to=-12,
275+
orient='vertical',
276+
tickinterval=1,
277+
command=slider_changed,
278+
variable=current_value
279+
)
280+
281+
st_slider.grid(
282+
column=1,
283+
row=0,
284+
sticky='we'
285+
)
286+
287+
slider_label = tk.Label(
288+
window,
289+
text='Semitones:'
290+
)
291+
292+
slider_label.grid(
293+
column=0,
294+
row=0,
295+
sticky='w'
296+
)
297+
298+
value_label = tk.Label(
299+
window,
300+
text=get_current_value()
301+
)
302+
303+
value_label.grid(
304+
column=1,
305+
row=0,
306+
sticky='n'
307+
)
308+
309+
input_entry = tk.Entry(width=60)
310+
input_entry.grid(column=1, row=3, sticky='w')
311+
312+
output_entry = tk.Entry(width=60)
313+
output_entry.grid(column=1, row=4, sticky='w')
314+
315+
from tkinter import filedialog
316+
def askopeninputfilename():
317+
input_file = filedialog.askopenfilename(filetypes=[("audio files", "*.mp3 *.wav *.flac")], parent=window, title='Choose a file')
318+
input_entry.delete(0, tk.END)
319+
input_entry.insert(0, input_file)
320+
321+
def askopenoutputfilename():
322+
output_file = filedialog.askopenfilename(filetypes=[("audio files", "*.mp3 *.wav *.flac")], parent=window, title='Choose a file')
323+
output_entry.delete(0, tk.END)
324+
output_entry.insert(0, output_file)
325+
326+
input_browse_button = tk.Button(window, text='Input File', command=askopeninputfilename)
327+
input_browse_button.grid(column=0, padx=5, row=3, sticky='w')
328+
329+
output_browse_button = tk.Button(window, text='Output File', command=askopenoutputfilename)
330+
output_browse_button.grid(column=0, padx=5, row=4, sticky='w')
331+
332+
run_button = tk.Button(window, text='Pitch', command= lambda: pitch(int(float(get_current_value())), input_entry.get(), output_entry.get()))
333+
run_button.grid(column=0, padx=5, row=5, sticky='w')
334+
335+
window.mainloop()

warning6ud4.wav.reapeaks

129 KB
Binary file not shown.

0 commit comments

Comments
 (0)