Skip to content
This repository was archived by the owner on Dec 20, 2023. It is now read-only.

Commit cd76a6c

Browse files
authored
Merge branch 'dev' into master
2 parents 527581e + 37c62af commit cd76a6c

File tree

18 files changed

+302
-115
lines changed

18 files changed

+302
-115
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ sudo: required
1818
# Some discussion: https://github.com/koalaman/shellcheck/issues/785
1919
before_install:
2020
- sudo apt-get update
21-
- sudo apt-get install -y wget git swig libasound2-dev libpulse-dev vlc-nox shellcheck
21+
- sudo apt-get install -y wget git swig portaudio19-dev libpulse-dev vlc-nox shellcheck
2222

2323
install:
2424
- pip install -r src/requirements.txt

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/).
55

66
## [Unreleased]
77

8+
### Added
9+
- Windows support
10+
11+
### Changed
12+
- Switched from pyalsaaudio (ALSA) to pyaudio (PortAudio)
13+
- You might have to change the `input_device` in your config, but this name will stay forever (we are not planning any change).
14+
- Also removed config option to allow unlisted devices as this is not possible from now on (you have to select a device from the list).
15+
816
## [1.5.1] - 2017-10-08
917
Small bugfix release.
1018

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# AlexaPi (the new & awesome version) [![Gitter chat](https://badges.gitter.im/alexa-pi/Lobby.png)](https://gitter.im/alexa-pi/Lobby)
22

3-
This is a client for Amazon's Alexa service. It is intended and tested to run on a wide range of platforms, such as Raspberry Pi, Orange Pi, CHIP and ordinary Linux desktops.
3+
This is a client for Amazon's Alexa service. It is intended and tested to run on a wide range of platforms, such as Raspberry Pi, Orange Pi, CHIP and ordinary Linux or Windows desktops.
44

55
### Do you want to help out? Read the [Contribution Guide](CONTRIBUTING.md).
66

@@ -10,16 +10,16 @@ This is a client for Amazon's Alexa service. It is intended and tested to run on
1010

1111
You will need:
1212

13-
1. **A Linux box**
13+
1. **A Linux or Windows box**
1414
- a Raspberry Pi and an SD Card with a fresh install of Raspbian
1515
- or an Orange Pi with Armbian
16-
- or pretty much any up-to-date Linux system
16+
- or pretty much any up-to-date Linux/Windows system
1717
2. **Audio peripherals**
1818
- external speaker with 3.5mm Jack
1919
- USB Sound Dongle and microphone
20-
3. Other
21-
- (optional) (Raspberry Pi) a push button connected between GPIO 18 and GND (configurable)
22-
- (optional) (Raspberry Pi) a dual colour LED (or 2 single LEDs) connected to GPIO 24 & 25 (configurable)
20+
3. **Optional (only for Raspberry Pi)**
21+
- a push button connected between GPIO 18 and GND (configurable)
22+
- a dual colour LED (or 2 single LEDs) connected between GPIO 24 & 25 and GND (configurable)
2323

2424
## You wanna give it a try? Check out the [Installation Guide](https://github.com/alexa-pi/AlexaPi/wiki/Installation).
2525

pylintrc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ load-plugins=
2121
# A comma-separated list of package or module names from where C extensions may
2222
# be loaded. Extensions are loading into the active Python interpreter and may
2323
# run arbitrary code
24-
extension-pkg-whitelist=alsaaudio,pyA20,CHIP_IO
24+
extension-pkg-whitelist=pyA20,CHIP_IO
2525

2626
[MESSAGES CONTROL]
2727

@@ -158,7 +158,7 @@ notes=XXX
158158
[SIMILARITIES]
159159

160160
# Minimum lines number of a similarity.
161-
min-similarity-lines=4
161+
min-similarity-lines=5
162162

163163
# Ignore comments when computing similarities.
164164
ignore-comments=yes

src/alexapi/capture.py

Lines changed: 166 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,46 @@
11
import logging
22
import time
3+
import threading
4+
import os
5+
import queue
36

4-
import alsaaudio
57
import webrtcvad
68

79
from .exceptions import ConfigurationException
810

11+
os.environ["PA_ALSA_PLUGHW"] = "1"
12+
import pyaudio # pylint: disable=wrong-import-position,wrong-import-order
13+
14+
915
logger = logging.getLogger(__name__)
1016

1117

18+
class DeviceInfo(object):
19+
20+
_pa = None
21+
22+
def __init__(self):
23+
self._pa = pyaudio.PyAudio()
24+
25+
def get_device_list(self, input_only=False):
26+
27+
device_list = []
28+
for i in range(self._pa.get_device_count()):
29+
if (not input_only) or (input_only and self._pa.get_device_info_by_index(i)['maxInputChannels'] > 0):
30+
device_list.append(self._pa.get_device_info_by_index(i)['name'])
31+
32+
return device_list
33+
34+
def get_device_index(self, name):
35+
if not name:
36+
return None
37+
38+
return self.get_device_list().index(name)
39+
40+
def __del__(self):
41+
self._pa.terminate()
42+
43+
1244
class Capture(object):
1345

1446
MAX_RECORDING_LENGTH = 8
@@ -19,22 +51,41 @@ class Capture(object):
1951
VAD_SILENCE_TIMEOUT = 1000
2052
VAD_THROWAWAY_FRAMES = 10
2153

54+
_pa = None
55+
_pa_exception_on_overflow = False
56+
57+
_handle = None
58+
_handle_chunk_size = None
59+
60+
_device_info = None
61+
_stream = None
62+
_callback_data = None
63+
_queue = None
2264
_vad = None
2365
_config = None
2466
_tmp_path = None
2567
_state_callback = None
68+
_interrupt = False
69+
_recording_lock_inverted = None
2670

2771
def __init__(self, config, tmp_path):
2872
self._config = config
2973
self._tmp_path = tmp_path
3074

75+
self._pa = pyaudio.PyAudio()
76+
self._queue = queue.Queue()
77+
self._device_info = DeviceInfo()
78+
79+
self._recording_lock_inverted = threading.Event()
80+
self._recording_lock_inverted.set()
81+
3182
self.validate_config()
3283

3384
def validate_config(self):
3485
input_device = self._config['sound']['input_device']
35-
input_devices = alsaaudio.pcms(alsaaudio.PCM_CAPTURE)
86+
input_devices = self._device_info.get_device_list(True)
3687

37-
if (input_device not in input_devices) and (not self._config['sound']['allow_unlisted_input_device']):
88+
if input_device and (input_device not in input_devices):
3889
raise ConfigurationException(
3990
"Your input_device '" + input_device + "' is invalid. Use one of the following:\n"
4091
+ '\n'.join(input_devices))
@@ -43,88 +94,141 @@ def setup(self, state_callback):
4394
self._vad = webrtcvad.Vad(2)
4495
self._state_callback = state_callback
4596

46-
def silence_listener(self, throwaway_frames=None, force_record=None):
97+
def cleanup(self):
4798

48-
throwaway_frames = throwaway_frames or self.VAD_THROWAWAY_FRAMES
99+
if not self._recording_lock_inverted.isSet():
100+
self._interrupt = True
101+
self._recording_lock_inverted.wait()
49102

50-
logger.debug("Setting up recording")
103+
self._pa.terminate()
51104

52-
# Reenable reading microphone raw data
53-
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self._config['sound']['input_device'])
54-
inp.setchannels(1)
55-
inp.setrate(self.VAD_SAMPLERATE)
56-
inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
57-
inp.setperiodsize(self.VAD_PERIOD)
105+
def handle_init(self, rate, chunk_size):
58106

59-
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
107+
self._handle = self._pa.open(
108+
input=True,
109+
input_device_index=self._device_info.get_device_index(self._config['sound']['input_device']),
110+
format=pyaudio.paInt16,
111+
channels=1,
112+
rate=rate,
113+
frames_per_buffer=chunk_size
114+
)
60115

61-
logger.debug("Start recording")
116+
self._handle_chunk_size = chunk_size
62117

63-
if self._state_callback:
64-
self._state_callback()
118+
def handle_read(self):
119+
return self._handle.read(self._handle_chunk_size, exception_on_overflow=self._pa_exception_on_overflow)
65120

66-
def _listen():
67-
start = time.time()
121+
def handle_release(self):
122+
self._handle.close()
68123

69-
do_VAD = True
70-
if force_record and not force_record[1]:
71-
do_VAD = False
124+
def _callback(self, in_data, frame_count, time_info, status): # pylint: disable=unused-argument
72125

73-
# Buffer as long as we haven't heard enough silence or the total size is within max size
74-
thresholdSilenceMet = False
75-
frames = 0
76-
numSilenceRuns = 0
77-
silenceRun = 0
126+
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
78127

79-
if debug:
80-
audio = b''
128+
if not in_data:
129+
self._queue.put(False)
130+
return None, pyaudio.paAbort
131+
132+
do_VAD = True
133+
if self._callback_data['force_record'] and not self._callback_data['force_record'][1]:
134+
do_VAD = False
135+
136+
# do not count first 10 frames when doing VAD
137+
if do_VAD and (self._callback_data['frames'] < self._callback_data['throwaway_frames']):
138+
self._callback_data['frames'] += 1
139+
140+
# now do VAD
141+
elif (self._callback_data['force_record'] and self._callback_data['force_record'][0]()) \
142+
or (do_VAD and (self._callback_data['thresholdSilenceMet'] is False)
143+
and ((time.time() - self._callback_data['start']) < self.MAX_RECORDING_LENGTH)):
81144

82145
if do_VAD:
83-
# do not count first 10 frames when doing VAD
84-
while frames < throwaway_frames:
85-
length, data = inp.read()
86-
frames += 1
87-
if length:
88-
yield data
89-
90-
if debug:
91-
audio += data
92-
93-
# now do VAD
94-
while (force_record and force_record[0]()) \
95-
or (do_VAD and (thresholdSilenceMet is False) and ((time.time() - start) < self.MAX_RECORDING_LENGTH)):
96-
97-
length, data = inp.read()
98-
if length:
99-
yield data
100146

101-
if debug:
102-
audio += data
147+
if int(len(in_data) / 2) == self.VAD_PERIOD:
148+
isSpeech = self._vad.is_speech(in_data, self.VAD_SAMPLERATE)
149+
150+
if not isSpeech:
151+
self._callback_data['silenceRun'] += 1
152+
else:
153+
self._callback_data['silenceRun'] = 0
154+
self._callback_data['numSilenceRuns'] += 1
103155

104-
if do_VAD and (length == self.VAD_PERIOD):
105-
isSpeech = self._vad.is_speech(data, self.VAD_SAMPLERATE)
156+
# only count silence runs after the first one
157+
# (allow user to speak for total of max recording length if they haven't said anything yet)
158+
if (self._callback_data['numSilenceRuns'] != 0) \
159+
and ((self._callback_data['silenceRun'] * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
160+
self._callback_data['thresholdSilenceMet'] = True
106161

107-
if not isSpeech:
108-
silenceRun += 1
109-
else:
110-
silenceRun = 0
111-
numSilenceRuns += 1
162+
else:
163+
self._queue.put(False)
164+
return None, pyaudio.paComplete
112165

113-
if do_VAD:
114-
# only count silence runs after the first one
115-
# (allow user to speak for total of max recording length if they haven't said anything yet)
116-
if (numSilenceRuns != 0) and ((silenceRun * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
117-
thresholdSilenceMet = True
166+
self._queue.put(in_data)
167+
if debug:
168+
self._callback_data['audio'] += in_data
118169

119-
logger.debug("End recording")
170+
return None, pyaudio.paContinue
120171

121-
inp.close()
172+
def silence_listener(self, throwaway_frames=None, force_record=None):
173+
174+
logger.debug("Recording: Setting up")
175+
176+
self._recording_lock_inverted.clear()
177+
178+
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
179+
180+
if self._state_callback:
181+
self._state_callback()
182+
183+
self._queue.queue.clear()
184+
185+
self._callback_data = {
186+
'start': time.time(),
187+
'thresholdSilenceMet': False, # Buffer as long as we haven't heard enough silence or the total size is within max size
188+
'frames': 0,
189+
'throwaway_frames': throwaway_frames or self.VAD_THROWAWAY_FRAMES,
190+
'numSilenceRuns': 0,
191+
'silenceRun': 0,
192+
'force_record': force_record,
193+
'audio': b'' if debug else False,
194+
}
195+
196+
stream = self._pa.open(
197+
input=True,
198+
input_device_index=self._device_info.get_device_index(self._config['sound']['input_device']),
199+
format=pyaudio.paInt16,
200+
channels=1,
201+
rate=self.VAD_SAMPLERATE,
202+
frames_per_buffer=self.VAD_PERIOD,
203+
stream_callback=self._callback,
204+
start=False
205+
)
206+
207+
logger.debug("Recording: Start")
208+
stream.start_stream()
209+
210+
def _listen():
211+
while True:
212+
try:
213+
data = self._queue.get(block=True, timeout=2)
214+
if not data or self._interrupt:
215+
break
216+
217+
yield data
218+
except queue.Empty:
219+
break
220+
221+
stream.stop_stream()
222+
logger.debug("Recording: End")
223+
stream.close()
122224

123225
if self._state_callback:
124226
self._state_callback(False)
125227

126228
if debug:
127229
with open(self._tmp_path + 'recording.wav', 'wb') as rf:
128-
rf.write(audio)
230+
rf.write(self._callback_data['audio'])
231+
232+
self._recording_lock_inverted.set()
129233

130234
return _listen()

0 commit comments

Comments
 (0)