Skip to content

Commit 7ed6dfc

Browse files
committed
* support audio playback
1 parent 584dd9b commit 7ed6dfc

File tree

5 files changed

+354
-331
lines changed

5 files changed

+354
-331
lines changed

components/maixcam_lib/include/maixcam2/ax_middleware.hpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,18 @@ namespace maix::middleware::maixcam2 {
159159
} ax_audio_in_param_t;
160160

161161
typedef struct {
162+
int channels;
163+
int rate;
164+
int encode_rate;
165+
AX_AUDIO_BIT_WIDTH_E bits;
166+
AX_PAYLOAD_TYPE_E payload_type;
167+
unsigned int period_size;
168+
unsigned int period_count;
169+
bool insert_silence;
170+
bool cfg_pool_en;
171+
AX_POOL_CONFIG_T pool_cfg;
172+
bool cfg_pub_attr;
173+
AX_AO_ATTR_T pub_attr;
162174
bool vqe_en;
163175
AX_AP_DNVQE_ATTR_T vqe_attr;
164176
bool hpf_en;
@@ -256,12 +268,14 @@ namespace maix::middleware::maixcam2 {
256268
typedef struct {
257269
pthread_mutex_t lock;
258270
int init_count;
259-
SYS *sys;
260271
int card;
261272
int device;
273+
SYS *sys;
262274
AX_POOL pool_id;
263275
ax_audio_out_param_t param;
264-
276+
bool eq_en;
277+
bool hpf_en;
278+
bool lpf_en;
265279
AX_AO_ATTR_T attr;
266280
} ax_ao_mod_t;
267281

@@ -1025,6 +1039,7 @@ namespace maix::middleware::maixcam2 {
10251039
FRAME_FROM_VDEC_GET_STREAM,
10261040
FRAME_FROM_AX_MALLOC,
10271041
FRAME_FROM_AUDIO_GET_FRAME,
1042+
FRAME_FROM_AUDIO_FRAME,
10281043
} frame_from_e;
10291044

10301045
class Frame {
@@ -1044,6 +1059,7 @@ namespace maix::middleware::maixcam2 {
10441059
Frame(int pool_id, int w, int h, void *data, int data_size, AX_IMG_FORMAT_E fmt);
10451060
Frame(void *data, int data_size, frame_from_e from = FRAME_FROM_MALLOC);
10461061
Frame(int card, int device, AX_AUDIO_FRAME_T *frame, frame_from_e from = FRAME_FROM_AUDIO_GET_FRAME);
1062+
Frame(int card, int device, void *data, int data_size, AX_AUDIO_BIT_WIDTH_E bit_width, AX_AUDIO_SOUND_MODE_E sound_mode, frame_from_e from = FRAME_FROM_AUDIO_FRAME);
10471063
~Frame();
10481064
frame_from_e from();
10491065
err::Err get_video_frame(AX_VIDEO_FRAME_T * frame);
@@ -1135,7 +1151,7 @@ namespace maix::middleware::maixcam2 {
11351151
~AudioIn();
11361152
err::Err init();
11371153
err::Err deinit();
1138-
maixcam2::Frame *read(int32_t timeout_ms);
1154+
maixcam2::Frame *read(int32_t timeout_ms = -1);
11391155
float volume(float volume);
11401156
err::Err reset();
11411157
int period_size(int size);
@@ -1154,7 +1170,12 @@ namespace maix::middleware::maixcam2 {
11541170
float volume(float volume); // volume = -1, get volume, volume = 0~1, set volume
11551171
err::Err pause();
11561172
err::Err resume();
1157-
err::Err write(maixcam2::Frame *frame);
1173+
err::Err write(maixcam2::Frame *frame, int32_t timeout_ms = -1);
1174+
err::Err clear(void);
1175+
err::Err wait(int32_t timeout_ms = -1);
1176+
err::Err state(int &total_num, int &free_num, int &busy_num, int &pcm_delay);
1177+
int period_size(int size = -1);
1178+
int period_count(int count = -1);
11581179
private:
11591180
void *param;
11601181
};

components/nn/include/maix_nn_whisper.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ namespace maix::nn
441441
return "";
442442
}
443443

444-
audio::WavReader wav_reader(file);
444+
audio::AudioFileReader wav_reader(file);
445445
auto pcm = wav_reader.pcm(false);
446446
return forward_raw(pcm, wav_reader.sample_rate(), wav_reader.channels(), wav_reader.sample_bits());
447447
}

components/voice/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ endif()
3636
###### Add required/dependent components ######
3737
list(APPEND ADD_REQUIREMENTS basic)
3838
if(PLATFORM_LINUX)
39-
list(APPEND ADD_REQUIREMENTS WavFileReader)
39+
list(APPEND ADD_REQUIREMENTS)
4040
elseif(PLATFORM_MAIXCAM)
41-
list(APPEND ADD_REQUIREMENTS tinyalsa WavFileReader)
41+
list(APPEND ADD_REQUIREMENTS tinyalsa)
4242
elseif(PLATFORM_MAIXCAM2)
43-
list(APPEND ADD_REQUIREMENTS maixcam_lib tinyalsa WavFileReader)
43+
list(APPEND ADD_REQUIREMENTS maixcam_lib tinyalsa)
4444
endif()
4545
###############################################
4646

components/voice/include/maix_audio.hpp

Lines changed: 187 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#pragma once
99
#include "maix_basic.hpp"
1010
#include <memory>
11-
#include "wav_file_reader.h"
1211

1312
/**
1413
* @brief maix.audio module
@@ -36,44 +35,207 @@ namespace maix::audio
3635
};
3736

3837
/**
39-
* Wav file reader
40-
* @maixpy maix.audio.WavReader
38+
* Map the audio format to the number of bits
39+
* @param format audio format
40+
* @return number of bits
41+
* @maixpy maix.audio.fmt_bits
4142
*/
42-
class WavReader
43+
const std::vector<int> fmt_bits = {
44+
0, 8, 16, 32, 16, 32, 8, 16, 32, 16, 32
45+
};
46+
47+
/**
48+
* Audio file reader
49+
* @maixpy maix.audio.AudioFileReader
50+
*/
51+
class AudioFileReader
4352
{
53+
private:
4454
int _channels;
4555
int _sample_rate;
4656
int _sample_bits;
4757
int _data_size;
48-
std::unique_ptr<sakado::WavFileReader> _reader;
4958
std::unique_ptr<maix::Bytes> _pcm;
59+
typedef struct {
60+
unsigned long fmt_size;
61+
unsigned short fmt_id;
62+
unsigned short channels;
63+
unsigned long sample_rate;
64+
unsigned short bits_per_sample;
65+
unsigned short block_align;
66+
unsigned long bytes_per_sec;
67+
unsigned long data_size; // size of pcm
68+
} wav_header_t;
69+
70+
err::Err read_wav_header(std::string path, wav_header_t &header, int &header_seek) {
71+
err::Err ret = err::ERR_NONE;
72+
char ch[5];
73+
unsigned int size;
74+
75+
FILE *fp = fopen(path.c_str(), "rb");
76+
if (!fp) {
77+
return err::ERR_RUNTIME;
78+
}
79+
80+
fread(ch, 1, 4, fp);
81+
ch[4] = '\0';
82+
if (strcmp(ch, "RIFF")) {
83+
fclose(fp);
84+
return err::ERR_RUNTIME;
85+
}
86+
87+
fseek(fp, 4, fs::SEEK_CUR);
88+
fread(ch, 1, 4, fp);
89+
ch[4] = '\0';
90+
if (strcmp(ch, "WAVE")) {
91+
fclose(fp);
92+
return err::ERR_RUNTIME;
93+
}
94+
95+
fseek(fp, 4, fs::SEEK_CUR);
96+
fread(&header.fmt_size, 4, 1, fp);
97+
fread(&header.fmt_id, 2, 1, fp);
98+
fread(&header.channels, 2, 1, fp);
99+
fread(&header.sample_rate, 4, 1, fp);
100+
fread(&header.bytes_per_sec, 4, 1, fp);
101+
fread(&header.block_align, 2, 1, fp);
102+
fread(&header.bits_per_sample, 2, 1, fp);
103+
fseek(fp, header.fmt_size - 16, fs::SEEK_CUR);
104+
fread(ch, 1, 4, fp);
105+
while (strcmp(ch, "data")) {
106+
if (fread(&size, 4, 1, fp) != 1) {
107+
fclose(fp);
108+
return err::ERR_RUNTIME;
109+
}
110+
fseek(fp, size, fs::SEEK_CUR);
111+
fread(ch, 1, 4, fp);
112+
}
113+
fread(&header.data_size, 4, 1, fp);
114+
115+
if (header.bits_per_sample != 8 && header.bits_per_sample != 16) {
116+
fclose(fp);
117+
return err::ERR_RUNTIME;
118+
}
119+
120+
if (header.channels != 1 && header.channels != 2) {
121+
fclose(fp);
122+
return err::ERR_RUNTIME;
123+
}
124+
125+
header_seek = ftell(fp);
126+
127+
fclose(fp);
128+
return ret;
129+
}
130+
131+
err::Err read_wav(std::string path) {
132+
err::Err ret = err::ERR_NONE;
133+
134+
wav_header_t wav_header;
135+
int header_seek = 0;
136+
ret = read_wav_header(path, wav_header, header_seek);
137+
if (ret != err::ERR_NONE) {
138+
log::error("read_wav_header error");
139+
return ret;
140+
}
141+
142+
auto new_file = fopen(path.c_str(), "rb");
143+
if (!new_file) {
144+
log::error("open wav file failed!");
145+
return err::ERR_RUNTIME;
146+
}
147+
148+
fseek(new_file, header_seek, fs::SEEK_SET);
149+
150+
_channels = wav_header.channels;
151+
_sample_rate = wav_header.sample_rate;
152+
_data_size = wav_header.data_size;
153+
_sample_bits = wav_header.bits_per_sample;
154+
155+
_pcm = std::make_unique<maix::Bytes>(nullptr, _data_size);
156+
if (!_pcm) {
157+
log::error("allocate memory failed!");
158+
fclose(new_file);
159+
return err::ERR_RUNTIME;
160+
}
161+
162+
if (fread(_pcm->data, 1, _data_size, new_file) != (size_t)_data_size) {
163+
log::error("read wav file failed!");
164+
fclose(new_file);
165+
return err::ERR_RUNTIME;
166+
}
167+
fclose(new_file);
168+
169+
return ret;
170+
}
171+
172+
173+
err::Err read_pcm(std::string path, int sample_rate = 16000, int channels = 1, int bits_per_sample = 16) {
174+
err::Err ret = err::ERR_NONE;
175+
176+
auto new_file = fopen(path.c_str(), "rb");
177+
if (!new_file) {
178+
log::error("open wav file failed!");
179+
return err::ERR_RUNTIME;
180+
}
181+
fseek(new_file, 0, fs::SEEK_END);
182+
_data_size = ftell(new_file);
183+
auto bytes_per_frame = channels * bits_per_sample / 8;
184+
_data_size = (_data_size / bytes_per_frame) * bytes_per_frame;
185+
_sample_rate = sample_rate;
186+
_channels = channels;
187+
_sample_bits = bits_per_sample;
188+
189+
_pcm = std::make_unique<maix::Bytes>(nullptr, _data_size);
190+
if (!_pcm) {
191+
log::error("allocate memory failed!");
192+
fclose(new_file);
193+
return err::ERR_RUNTIME;
194+
}
195+
fseek(new_file, 0, fs::SEEK_SET);
196+
if (fread(_pcm->data, 1, _data_size, new_file) != (size_t)_data_size) {
197+
log::error("read wav file failed!");
198+
fclose(new_file);
199+
return err::ERR_RUNTIME;
200+
}
201+
fclose(new_file);
202+
203+
return ret;
204+
}
50205
public:
51206
/**
52-
* @brief Construct a new WavReader object.
53-
* @param path wav file path
54-
* @maixpy maix.audio.WavReader.__init__
207+
* @brief Construct a new AudioFileReader object.
208+
* @param path wav or pcm file path
209+
* @param sample_rate sample rate, need to be filled in when parsing .pcm files
210+
* @param channels channels, need to be filled in when parsing .pcm files
211+
* @param bits_per_sample bits per sample, need to be filled in when parsing .pcm files
212+
* @maixpy maix.audio.AudioFileReader.__init__
55213
*/
56-
WavReader(std::string path) {
57-
_reader = std::make_unique<sakado::WavFileReader>(path);
58-
_sample_bits = _reader->BitsPerSample;
59-
_channels = _reader->NumChannels;
60-
_sample_rate = _reader->SampleRate;
61-
_data_size = _reader->DataSize;
62-
_pcm = std::make_unique<maix::Bytes>(nullptr, _data_size);
63-
if (_reader->BitsPerSample == 16) {
64-
_reader->Read((short *)_pcm->data, _data_size / _channels / _sample_bits * 8);
65-
} else if (_reader->BitsPerSample == 8) {
66-
_reader->Read((uint8_t *)_pcm->data, _data_size / _channels / _sample_bits * 8);
214+
AudioFileReader(std::string path, int sample_rate = 16000, int channels = 1, int bits_per_sample = 16) {
215+
err::check_bool_raise(path.size() > 0, "path is empty");
216+
_sample_bits = bits_per_sample;
217+
_channels = channels;
218+
_sample_rate = sample_rate;
219+
220+
if (fs::exists(path)) {
221+
auto extension = fs::splitext(path)[1];
222+
if (extension == ".wav") {
223+
err::check_raise(read_wav(path));
224+
} else if (extension == ".pcm") {
225+
err::check_raise(read_pcm(path, _sample_rate, _channels, _sample_bits));
226+
} else {
227+
err::check_raise(err::ERR_NOT_FOUND, "Only files with the `.pcm` and `.wav` extensions are supported.");
228+
}
67229
}
68230
}
69-
~WavReader() {
231+
~AudioFileReader() {
70232

71233
}
72234

73235
/**
74236
* Get pcm data
75237
* @return pcm data. datatype @see Bytes
76-
* @maixpy maix.audio.WavReader.pcm
238+
* @maixpy maix.audio.AudioFileReader.pcm
77239
*/
78240
Bytes *pcm(bool copy = true) {
79241
return new maix::Bytes(_pcm->data, _pcm->data_len, false, copy);
@@ -82,28 +244,28 @@ namespace maix::audio
82244
/**
83245
* Get sample bit
84246
* @return sample bit
85-
* @maixpy maix.audio.WavReader.sample_bits
247+
* @maixpy maix.audio.AudioFileReader.sample_bits
86248
*/
87249
int sample_bits() {return _sample_bits;}
88250

89251
/**
90252
* Get sample bit
91253
* @return sample bit
92-
* @maixpy maix.audio.WavReader.channels
254+
* @maixpy maix.audio.AudioFileReader.channels
93255
*/
94256
int channels() {return _channels;}
95257

96258
/**
97259
* Get sample rate
98260
* @return sample rate
99-
* @maixpy maix.audio.WavReader.sample_rate
261+
* @maixpy maix.audio.AudioFileReader.sample_rate
100262
*/
101263
int sample_rate() {return _sample_rate;}
102264

103265
/**
104266
* Get data size
105267
* @return data size
106-
* @maixpy maix.audio.WavReader.data_size
268+
* @maixpy maix.audio.AudioFileReader.data_size
107269
*/
108270
int data_size() {return _data_size;}
109271
};

0 commit comments

Comments
 (0)