Skip to content

Commit d46145c

Browse files
Merge pull request #6 from schuler-henry/4-playground-windowing
4 playground windowing
2 parents 8ece8ec + bf6c617 commit d46145c

File tree

10 files changed

+1250
-0
lines changed

10 files changed

+1250
-0
lines changed

code/main.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from preprocessing.AudioPreprocessor import AudioPreprocessor
2+
3+
def main():
4+
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
5+
print(frames)
6+
7+
if __name__ == "__main__":
8+
main()
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import numpy as np
2+
import librosa
3+
import librosa.display
4+
import noisereduce as nr
5+
6+
class AudioPreprocessor:
7+
@staticmethod
8+
def int_to_float(array, type=np.float32):
9+
"""
10+
Change np.array int16 into np.float32
11+
Parameters
12+
----------
13+
array: np.array
14+
type: np.float32
15+
Returns
16+
-------
17+
result : np.array
18+
"""
19+
20+
if array.dtype == type:
21+
return array
22+
23+
if array.dtype not in [np.float16, np.float32, np.float64]:
24+
if np.max(np.abs(array)) == 0:
25+
array = array.astype(np.float32)
26+
array[:] = 0
27+
else:
28+
array = array.astype(np.float32) / np.max(np.abs(array))
29+
30+
return array
31+
32+
@staticmethod
33+
def float_to_int(array, type=np.int16, divide_max_abs=True):
34+
"""
35+
Change np.array float32 / float64 into np.int16
36+
Parameters
37+
----------
38+
array: np.array
39+
type: np.int16
40+
Returns
41+
-------
42+
result : np.array
43+
"""
44+
45+
if array.dtype == type:
46+
return array
47+
48+
if array.dtype not in [np.int16, np.int32, np.int64]:
49+
if np.max(np.abs(array)) == 0:
50+
array[:] = 0
51+
array = type(array * np.iinfo(type).max)
52+
else:
53+
if divide_max_abs:
54+
array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
55+
else:
56+
array = type(array * np.iinfo(type).max)
57+
58+
return array
59+
60+
@staticmethod
61+
def remove_silence(y):
62+
threshold = 0.005
63+
pause_length_in_ms = 200
64+
keep_at_start_and_end = 50
65+
counter_below_threshold = 0
66+
indices_to_remove = []
67+
68+
for i, amp in enumerate(y):
69+
if abs(amp) < threshold:
70+
counter_below_threshold += 1
71+
else:
72+
if counter_below_threshold > pause_length_in_ms:
73+
for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
74+
indices_to_remove.append(index)
75+
counter_below_threshold = 0
76+
77+
if counter_below_threshold > pause_length_in_ms:
78+
for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
79+
indices_to_remove.append(index)
80+
81+
y_ = np.delete(y, indices_to_remove)
82+
83+
return y_
84+
85+
@staticmethod
86+
def remove_noise(y, sr):
87+
# prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
88+
y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)
89+
90+
return y_
91+
92+
@staticmethod
93+
def create_frames(y, frame_size, overlap):
94+
frames = []
95+
96+
if overlap >= frame_size or frame_size <= 0 or overlap < 0:
97+
return frames
98+
99+
index = 0
100+
101+
while index + frame_size < y.shape[0]:
102+
frames.append(y[index: index + frame_size])
103+
index = index + frame_size - overlap
104+
105+
return frames
106+
107+
@staticmethod
108+
def window_frames(frames, window_function=np.hanning):
109+
windowed_frames = []
110+
111+
for frame in frames:
112+
windowed_frames.append(frame * window_function(frame.shape[0]))
113+
114+
return windowed_frames
115+
116+
@staticmethod
117+
def load_preprocessed_frames(filepath=None, y=None, sr=None):
118+
if filepath is None and (y is None or sr is None):
119+
raise ValueError("Either filepath or y and sr must be given.")
120+
121+
if y is None or sr is None:
122+
y, sr = librosa.load(filepath)
123+
124+
y = AudioPreprocessor.remove_noise(y=y, sr=sr)
125+
y = AudioPreprocessor.remove_silence(y=y)
126+
127+
frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
128+
windowed_frames = AudioPreprocessor.window_frames(frames=frames)
129+
130+
return windowed_frames
131+
132+
def main():
133+
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
134+
print(frames)
135+
136+
if __name__ == '__main__':
137+
main()
Binary file not shown.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from tracemalloc import start
2+
import librosa
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
6+
class FileHandler:
7+
def __init__(self, filepath):
8+
self.y, self.sampling_rate = librosa.load(filepath)
9+
self.total_time = self.y.size / self.sampling_rate
10+
11+
print(self.total_time)
12+
13+
def get_sampling_rate(self):
14+
return self.sampling_rate
15+
16+
def get_frame(self, frame_time, start_frame):
17+
frame_frames = int(self.sampling_rate * frame_time)
18+
return self.y[start_frame:(start_frame + frame_frames)], frame_frames
19+
20+
def view(self):
21+
plt.plot(np.linspace(0, self.y.size, self.y.size), self.y)
22+
plt.show()
23+
24+
def autocorrelate(self, frame_size):
25+
frame_frames = int(self.sampling_rate * frame_size)
26+
frame_y = self.y[3200:(3200 + frame_frames)]
27+
Fr = np.fft.fft(frame_y)
28+
S = Fr * np.conjugate(Fr)
29+
print(Fr)
30+
31+
print(abs(np.fft.ifft(S))[:10])
32+
print(abs(np.fft.ifft(S)).size)
33+
34+
print(librosa.autocorrelate(frame_y)[:10])
35+
print(librosa.autocorrelate(frame_y).size)
36+
37+
plt.plot(np.linspace(0, frame_frames, frame_frames), frame_y)
38+
plt.show()
39+
plt.plot(np.linspace(0, frame_frames, frame_frames), np.fft.ifft(S))
40+
plt.plot(np.linspace(0, frame_frames, frame_frames), librosa.autocorrelate(frame_y))
41+
plt.show()
42+
return librosa.autocorrelate(frame_y * np.hanning(frame_frames))
43+
44+
def get_lpc(self, frame_time, order):
45+
frame_y, frame_frames = self.get_frame(frame_time, 3200)
46+
47+
return librosa.lpc(frame_y * np.hanning(frame_frames), order=order)

0 commit comments

Comments
 (0)