资源占用方面,程序运行10小时,占用内存120M ,睡眠时CPU 占用不到1%,被唤醒后则多一些约5%。简单说就是不占资源,随意使用。响应速度方面,第一个版本比较慢,从说完话到开始听到语音回答要10到20秒。现在则只需要4到6秒,已经主要是chatGPT 的网络+计算时间开销了。虽然还有点慢,但换个角度想,其实这也不错,毕竟有时回答得太快会显得不够认真,不够尊重。声卡功能方面,在程序运行期间,不影响系统的录音功能,也不影响其他的花样玩法。可移植性方面,则可以无缝迁移到其他平台,除了唤醒词需要区分平台单独训练
import pvporcupine
import struct
import pyaudio
import pvcobra
import time
import os
import speech_recognition as sr
from agent import chatGPTAgent as gpt
from agent import speechAgent as speech
from utils.logger import logger
porcupine = None
pa = None
audio_stream = None
r = sr.Recognizer()
def picovoice():
access_key = 'your key'
porcupine = pvporcupine.create(
access_key=access_key,
keyword_paths=['your path']
)
pa = pyaudio.PyAudio()
cobra = pvcobra.create(access_key)
audio_stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length)
while True:
pcm = audio_stream.read(porcupine.frame_length, exception_on_overflow=False)
#
_pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
keyword_index = porcupine.process(_pcm)
if keyword_index >= 0:
run()
def chatGPT(text):
if len(text) == 0:
return
text = text.replace('\n', ' ').replace('\r', '').strip()
logger.info(f'chatGPT Q: {text}')
res = gpt.ask(text)
logger.info(f'chatGPT A: {res}')
return res
def run():
logger.info('start recognize_from_microphone')
q = speech.recognize_from_microphone()
logger.info(f'recognize_from_microphone, text={q}')
res = chatGPT(q)
# os.system(f'say -v "Mei-Jia" "{res}"')
speech.tts(res)
picovoice()