CSDN@如何基于本地语音识别,搭建一款智能聊天机器人?( 二 )

2、本地语音识别
目的在于让电脑知道你说了什么 。 主要是借助于Keras库进行读取模型 , glob读取录音文件 , 使用pickle解码 , 具体如下:
from keras.models import load_modelfrom keras import backend as Kimport numpy as npimport librosafrom python_speech_features import mfccimport pickleimport globwavs = glob.glob('data/*.wav')with open('dictionary.pkl', 'rb') as fr: [char2id, id2char, mfcc_mean, mfcc_std] = pickle.load(fr)mfcc_dim = 13model = load_model('asr.h5')index = np.random.randint(len(wavs))print(wavs[index])audio, sr = librosa.load(wavs[index])energy = librosa.feature.rmse(audio)frames = np.nonzero(energy >= np.max(energy) / 5)indices = librosa.core.frames_to_samples(frames)[1]audio = audio[indices[0]:indices[-1]] if indices.size else audio[0:0]X_data = http://news.hoteastday.com/a/mfcc(audio, sr, numcep=mfcc_dim, nfft=551)X_data = (X_data - mfcc_mean) / (mfcc_std + 1e-14)print(X_data.shape)with open(wavs[index] +'.trn', 'r', encoding='utf8') as fr: label = fr.readlines[0] print(label)pred = model.predict(np.expand_dims(X_data, axis=0))pred_ids = K.eval(K.ctc_decode(pred, [X_data.shape[0]], greedy=False, beam_width=10, top_paths=1)[0][0])pred_ids = pred_ids.flatten.tolistprint(''.join([id2char[i] for i in pred_ids]))3、加载聊天
在识别语音的基础上 , 让电脑在网上搜索如何合理的回复你的话 。 这里输入的结果是语音识别后的文字 , 输出的结果同样为文字 , 最后再通过文字转为语音达到语音回复效果!
num=3 if process == '': print("not") else: header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} url = requests.get("https://www.baidu.com/s?wd=" + process, headers=header) # 为了防止中文乱码 , 编码使用原网页编码 url.raise_for_status url.encoding = url.apparent_encoding # print(url.text) object = etree.HTML(url.text) # print(object) # 正则匹配搜索出来答案的所有网址 # 获取页面 head =object.xpath('//div[@id="page"]//a/@href') txt0='' for i in range(num): header0 = { 'User-Agent': } url0 = requests.get("https://www.baidu.com" + head[i], headers=header0) # 为了防止中文乱码 , 编码使用原网页编码 url0.raise_for_status url0.encoding = url0.apparent_encoding # print(url.text) object0 = etree.HTML(url.text) para0 = object.xpath('/html/body//div[@class="c-abstract"]/text') para10 = object.xpath('/html/body//div[@class="c-abstract"]/em/text') txt0 = '' for i in range(len(para0)): try: txt0 = txt0 + para0[i] + para10[i] except: pass #print(head) # 详细内容 para = object.xpath para1 = object.xpath txt = '' for i in range(len(para)): try: txt = txt + para[i] + para1[i]+txt0 except: pass return txttxtk=baiketxtd=baiduresult=txtk+txtdprint(result) 4、文字转语音进行回复
#文字转录音APP_ID = '15118279'API_KEY = 'xUx0Gm2AG2YMtA3FnGfwoKdP'SECRET_KEY = 'hdxyMvABhUD4xnacGtDdeHbEOUGmdjNx'client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)def text_to_audio(text): file_name ='luyin' # 保证文件名不重复 result = client.synthesis(text, 'zh', 1, { 'spd':5, 'vol': 5, 'pit':5, 'per':0 }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open('%s.mp3'%(file_name), 'wb') as f: f.write(result)return '%s.mp3'%(file_name)


推荐阅读