1
+ #! /usr/bin/env python3
2
+ # Pitcher v 0.1
3
+ # Copyright (C) 2020 Morgan Mitchell
4
+ # Based on: Physical and Behavioral Circuit Modeling of the SP-12, DT Yeh, 2007
5
+ # https://ccrma.stanford.edu/~dtyeh/sp12/yeh2007icmcsp12slides.pdf
6
+
7
+
8
+ import logging
9
+ import click
10
+ import numpy as np
11
+ import scipy as sp
12
+ import audiofile as af
13
+ import tkinter as tk
14
+
15
+ from pydub import AudioSegment
16
+ from librosa import load
17
+ from librosa .core import resample
18
+ from librosa .effects import time_stretch
19
+ from librosa .util import normalize
20
+
21
+ ZOH_MULTIPLIER = 4
22
+ RESAMPLE_MULTIPLIER = 2
23
+
24
+ INPUT_SR = 96000
25
+ OUTPUT_SR = 48000
26
+ TARGET_SR = 26040
27
+
28
+ POSITIVE_TUNING_RATIO = 1.02930223664
29
+ NEGATIVE_TUNING_RATIOS = {- 1 : 1.05652677103003 ,
30
+ - 2 : 1.1215356033380033 ,
31
+ - 3 : 1.1834835840896631 ,
32
+ - 4 : 1.253228360845465 ,
33
+ - 5 : 1.3310440397149297 ,
34
+ - 6 : 1.4039714929646099 ,
35
+ - 7 : 1.5028019735639886 ,
36
+ - 8 : 1.5766735700797954 }
37
+
38
+ log_levels = {'INFO' : logging .INFO ,
39
+ 'DEBUG' : logging .DEBUG ,
40
+ 'WARNING' : logging .WARNING ,
41
+ 'ERROR' : logging .ERROR ,
42
+ 'CRITICAL' : logging .CRITICAL }
43
+
44
+
45
+ def calc_quantize_function (quantize_bits , log ):
46
+ # https://dspillustrations.com/pages/posts/misc/quantization-and-quantization-noise.html
47
+ log .info (f'calculating quantize fn with { quantize_bits } quantize bits' )
48
+ u = 1 # max amplitude to quantize
49
+ quantization_levels = 2 ** quantize_bits
50
+ delta_s = 2 * u / quantization_levels # level distance
51
+ s_midrise = - u + delta_s / 2 + np .arange (quantization_levels ) * delta_s
52
+ s_midtread = - u + np .arange (quantization_levels ) * delta_s
53
+ log .info ('done calculating quantize fn' )
54
+ return s_midrise , s_midtread
55
+
56
+
57
+ def adjust_pitch (x , st , skip_time_stretch , log ):
58
+ log .info (f'adjusting audio pitch by { st } semitones' )
59
+ t = 0
60
+ if (0 > st >= - 8 ):
61
+ t = NEGATIVE_TUNING_RATIOS [st ]
62
+ elif st > 0 :
63
+ t = POSITIVE_TUNING_RATIO ** - st
64
+ elif st == 0 : # no change
65
+ return x
66
+ else : # -8 > st: extrapolate, seems to lose a few points of precision?
67
+ f = sp .interpolate .interp1d (
68
+ list (NEGATIVE_TUNING_RATIOS .keys ()),
69
+ list (NEGATIVE_TUNING_RATIOS .values ()),
70
+ fill_value = 'extrapolate'
71
+ )
72
+ t = f (st )
73
+
74
+ n = int (np .round (len (x ) * t ))
75
+ r = np .linspace (0 , len (x ) - 1 , n ).round ().astype (np .int32 )
76
+ pitched = [x [r [e ]] for e in range (n - 1 )] # could yield instead
77
+ pitched = np .array (pitched )
78
+ log .info ('done pitching audio' )
79
+
80
+ return pitched
81
+
82
+
83
+ def filter_input (x , log ):
84
+ log .info ('applying anti aliasing filter' )
85
+ # approximating the anti aliasing filter, don't think this needs to be
86
+ # perfect since at fs/2=13.02kHz only -10dB attenuation, might be able to
87
+ # improve accuracy in the 15 -> 20kHz range with firwin?
88
+ f = sp .signal .ellip (4 , 1 , 72 , 0.666 , analog = False , output = 'sos' )
89
+ y = sp .signal .sosfilt (f , x )
90
+ log .info ('done applying anti aliasing filter' )
91
+ return y
92
+
93
+
94
+ # could use sosfiltfilt for zero phase filtering, but it doubles filter order
95
+ def filter_output (x , log ):
96
+ log .info ('applying output eq filter' )
97
+ freq = np .array ([0 , 6510 , 8000 , 10000 , 11111 , 13020 , 15000 , 17500 , 20000 , 24000 ])
98
+ att = np .array ([0 , 0 , - 5 , - 10 , - 15 , - 23 , - 28 , - 35 , - 41 , - 40 ])
99
+ gain = np .power (10 , att / 20 )
100
+ f = sp .signal .firwin2 (45 , freq , gain , fs = OUTPUT_SR , antisymmetric = False )
101
+ sos = sp .signal .tf2sos (f , [1.0 ])
102
+ y = sp .signal .sosfilt (sos , x )
103
+ log .info ('done applying output eq filter' )
104
+ return y
105
+
106
+
107
+ def scipy_resample (y , input_sr , target_sr , factor , log ):
108
+ ''' resample from input_sr to target_sr_multiple/factor'''
109
+ log .info (f'resampling audio to sample rate of { target_sr * factor } ' )
110
+ seconds = len (y )/ input_sr
111
+ target_samples = int (seconds * (target_sr * factor )) + 1
112
+ resampled = sp .signal .resample (y , target_samples )
113
+ log .info ('done resample 1/2' )
114
+ log .info (f'resampling audio to sample rate of { target_sr } ' )
115
+ decimated = sp .signal .decimate (resampled , factor )
116
+ log .info ('done resample 2/2' )
117
+ log .info ('done resampling audio' )
118
+ return decimated
119
+
120
+
121
+ def zero_order_hold (y , zoh_multiplier , log ):
122
+ log .info (f'applying zero order hold of { zoh_multiplier } ' )
123
+ # intentionally oversample by repeating each sample 4 times
124
+ # could also try a freq aliased sinc filter
125
+ zoh_applied = np .repeat (y , zoh_multiplier ).astype (np .float32 )
126
+ log .info ('done applying zero order hold' )
127
+ return zoh_applied
128
+
129
+
130
+ def nearest_values (x , y ):
131
+ x , y = map (np .asarray , (x , y ))
132
+ tree = sp .spatial .cKDTree (y [:, None ])
133
+ ordered_neighbors = tree .query (x [:, None ], 1 )[1 ]
134
+ return ordered_neighbors
135
+
136
+
137
+ # no audible difference after audacity invert test @ 12 bits
138
+ # however, when plotted the scaled amplitude of quantized audio is
139
+ # noticeably higher than the original, leaving for now
140
+ def quantize (x , S , bits , log ):
141
+ log .info (f'quantizing audio @ { bits } bits' )
142
+ y = nearest_values (x , S )
143
+ quantized = S .flat [y ].reshape (x .shape )
144
+ log .info ('done quantizing' )
145
+ return quantized
146
+
147
+
148
+ # https://stackoverflow.com/questions/53633177/how-to-read-a-mp3-audio-file-into-a-numpy-array-save-a-numpy-array-to-mp3
149
+ def write_mp3 (f , x , sr , normalized = False ):
150
+ """numpy array to MP3"""
151
+ channels = 2 if (x .ndim == 2 and x .shape [1 ] == 2 ) else 1
152
+ if normalized : # normalized array - each item should be a float in [-1, 1)
153
+ y = np .int16 (x * 2 ** 15 )
154
+ else :
155
+ y = np .int16 (x )
156
+ song = AudioSegment (y .tobytes (), frame_rate = sr , sample_width = 2 , channels = channels )
157
+ song .export (f , format = "mp3" , bitrate = "320k" )
158
+ return
159
+
160
+
161
+ #@click.command()
162
+ #@click.option('--st', default=0, help='number of semitones to shift')
163
+ #@click.option('--log-level', default='INFO')
164
+ #@click.option('--input-file', required=True)
165
+ #@click.option('--output-file', required=True)
166
+ #@click.option('--quantize-bits', default=12, help='bit rate of quantized output')
167
+ #@click.option('--skip-quantize', is_flag=True, default=False)
168
+ #@click.option('--skip-normalize', is_flag=True, default=False)
169
+ #@click.option('--skip-input-filter', is_flag=True, default=False)
170
+ #@click.option('--skip-output-filter', is_flag=True, default=False)
171
+ #@click.option('--skip-time-stretch', is_flag=True, default=False)
172
+ #@click.option('--custom-time-stretch', default=0, type=float)
173
+ def pitch (st , input_file , output_file , log_level = 'INFO' , quantize_bits = 12 , skip_normalize = False ,
174
+ skip_quantize = False , skip_input_filter = False , skip_output_filter = False , skip_time_stretch = False ,
175
+ custom_time_stretch = 0 ):
176
+
177
+ log = logging .getLogger (__name__ )
178
+ sh = logging .StreamHandler ()
179
+ sh .setFormatter (logging .Formatter ('%(levelname)-8s %(message)s' ))
180
+ log .addHandler (sh )
181
+
182
+ valid_levels = list (log_levels .keys ())
183
+ if (not log_level ) or (log_level .upper () not in valid_levels ):
184
+ log .warn (f'Invalid log-level: "{ log_level } ", log-level set to "INFO", '
185
+ f'valid log levels are { valid_levels } ' )
186
+ log_level = 'INFO'
187
+
188
+ log_level = log_levels [log_level ]
189
+ log .setLevel (log_level )
190
+
191
+ log .info (f'loading: "{ input_file } " at oversampled rate: { INPUT_SR } ' )
192
+ y , s = load (input_file , sr = INPUT_SR )
193
+ log .info ('done loading' )
194
+
195
+ midrise , midtread = calc_quantize_function (quantize_bits , log )
196
+
197
+ if skip_input_filter :
198
+ log .info ('skipping input anti aliasing filter' )
199
+ else :
200
+ y = filter_input (y , log )
201
+
202
+ resampled = scipy_resample (y , INPUT_SR , TARGET_SR , RESAMPLE_MULTIPLIER , log )
203
+
204
+ if skip_quantize :
205
+ log .info ('skipping quantize' )
206
+ else :
207
+ # simulate analog -> digital conversion
208
+ # TODO: midtread/midrise option?
209
+ resampled = quantize (resampled , midtread , quantize_bits , log )
210
+
211
+
212
+ pitched = adjust_pitch (resampled , st , skip_time_stretch , log )
213
+
214
+ if skip_time_stretch :
215
+ ratio = len (pitched ) / len (resampled )
216
+ log .info ('\" skipping\" time stretch: stretching back to original length...' )
217
+ pitched = time_stretch (pitched , ratio )
218
+ pitched = normalize (pitched )
219
+
220
+ if custom_time_stretch :
221
+ log .info (f'running custom time stretch of ratio: { custom_time_stretch } ' )
222
+ pitched = time_stretch (pitched , custom_time_stretch )
223
+ pitched = normalize (pitched )
224
+
225
+
226
+ # oversample again (default factor of 4) to simulate ZOH
227
+ # TODO: retest output against freq aliased sinc fn
228
+ post_zero_order_hold = zero_order_hold (pitched , ZOH_MULTIPLIER , log )
229
+
230
+ # TODO: try using scipy resample here?
231
+ output = resample (np .asfortranarray (post_zero_order_hold ),
232
+ TARGET_SR * ZOH_MULTIPLIER , OUTPUT_SR )
233
+
234
+ if skip_output_filter :
235
+ log .info ('skipping output eq filter' )
236
+ else :
237
+ output = filter_output (output , log ) # eq filter
238
+
239
+ log .info (f'writing { output_file } , at sample rate { OUTPUT_SR } '
240
+ f'with skip_normalize set to { skip_normalize } ' )
241
+
242
+ if '.mp3' in output_file :
243
+ write_mp3 (output_file , output , OUTPUT_SR , not skip_normalize )
244
+ else :
245
+ output_file = output_file
246
+ af .write (output_file , output , OUTPUT_SR , '16bit' , not skip_normalize )
247
+
248
+ log .info (f'done! output_file at: { output_file } ' )
249
+ return
250
+
251
+
252
+ if __name__ == '__main__' :
253
+ window = tk .Tk ()
254
+ window .geometry ('600x200' )
255
+ window .resizable (True , False )
256
+ window .title ('P I T C H E R' )
257
+
258
+ window .columnconfigure (0 , weight = 1 )
259
+ window .columnconfigure (1 , weight = 3 )
260
+
261
+ current_value = tk .DoubleVar ()
262
+
263
+
264
+ def get_current_value ():
265
+ return '{: .2f}' .format (current_value .get ())
266
+
267
+
268
+ def slider_changed (event ):
269
+ value_label .configure (text = get_current_value ())
270
+
271
+ st_slider = tk .Scale (
272
+ window ,
273
+ from_ = 12 ,
274
+ to = - 12 ,
275
+ orient = 'vertical' ,
276
+ tickinterval = 1 ,
277
+ command = slider_changed ,
278
+ variable = current_value
279
+ )
280
+
281
+ st_slider .grid (
282
+ column = 1 ,
283
+ row = 0 ,
284
+ sticky = 'we'
285
+ )
286
+
287
+ slider_label = tk .Label (
288
+ window ,
289
+ text = 'Semitones:'
290
+ )
291
+
292
+ slider_label .grid (
293
+ column = 0 ,
294
+ row = 0 ,
295
+ sticky = 'w'
296
+ )
297
+
298
+ value_label = tk .Label (
299
+ window ,
300
+ text = get_current_value ()
301
+ )
302
+
303
+ value_label .grid (
304
+ column = 1 ,
305
+ row = 0 ,
306
+ sticky = 'n'
307
+ )
308
+
309
+ input_entry = tk .Entry (width = 60 )
310
+ input_entry .grid (column = 1 , row = 3 , sticky = 'w' )
311
+
312
+ output_entry = tk .Entry (width = 60 )
313
+ output_entry .grid (column = 1 , row = 4 , sticky = 'w' )
314
+
315
+ from tkinter import filedialog
316
+ def askopeninputfilename ():
317
+ input_file = filedialog .askopenfilename (filetypes = [("audio files" , "*.mp3 *.wav *.flac" )], parent = window , title = 'Choose a file' )
318
+ input_entry .delete (0 , tk .END )
319
+ input_entry .insert (0 , input_file )
320
+
321
+ def askopenoutputfilename ():
322
+ output_file = filedialog .askopenfilename (filetypes = [("audio files" , "*.mp3 *.wav *.flac" )], parent = window , title = 'Choose a file' )
323
+ output_entry .delete (0 , tk .END )
324
+ output_entry .insert (0 , output_file )
325
+
326
+ input_browse_button = tk .Button (window , text = 'Input File' , command = askopeninputfilename )
327
+ input_browse_button .grid (column = 0 , padx = 5 , row = 3 , sticky = 'w' )
328
+
329
+ output_browse_button = tk .Button (window , text = 'Output File' , command = askopenoutputfilename )
330
+ output_browse_button .grid (column = 0 , padx = 5 , row = 4 , sticky = 'w' )
331
+
332
+ run_button = tk .Button (window , text = 'Pitch' , command = lambda : pitch (int (float (get_current_value ())), input_entry .get (), output_entry .get ()))
333
+ run_button .grid (column = 0 , padx = 5 , row = 5 , sticky = 'w' )
334
+
335
+ window .mainloop ()
0 commit comments