以下是我用来创建我的辨识请求:
func recordSpeech() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
isRecognizing = true
self.delegate?.recognitionStarted(sender: self)
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else {
print("there was an error in audioEngine.inputNode")
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object")
}
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = recognizer.recognitionTask(with: recognitionRequest) { result, error in
func finalizeResult() {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
guard error == nil else {
finalizeResult()
return
}
if !(result?.isFinal)! {
guard self.isRecognizing else {
return
}
// process partial result
self.processRecognition(result: result)
} else {
finalizeResult()
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch let error as NSError {
print("audio engine start error=\(error)")
}
}
要取消或随时停止这个我用这些方法:
@objc func stopRecording() {
isRecognizing = false
audioEngine.stop()
recognitionRequest?.endAudio()
self.delegate?.recognitionFinished()
}
func cancelRecording() {
isRecognizing = false
audioEngine.stop()
recognitionTask?.cancel()
self.delegate?.recognitionFinished()
}
我会设置一个按钮来触发的讲话确认并将其与recordSpeech()
联系起来。然后设置一个按钮并将其绑定到stopRecording()
。当用户停止请求时,result?.isfinal
将为真,并且您知道这是第一个输入的最终文本。然后用户可以再次使用语音输入来进行第二组讲话。
我的大部分代码都来自于语音识别2016年的WWDC会议上,你可以在这里找到:
Transcript
Video
我不清楚你问上面,但它听起来就像您可能希望用户能够说话以生成文本,然后编辑或添加到该文本然后再说一遍。如果是这样,我会将它作为两个单独的识别请求处理,结束第一个请求并将第二个请求的结果附加到第一个请求。 –
@DavidL:你理解这个问题是正确的,但是你提供的解决方案并不那么清楚。如何创建多个请求,何时启动第二个识别请求。 –
我实现它的方式是在语音识别开始时在屏幕上发出提示,并允许用户在完成时停止提示。然后,您可以使用口述文本添加到用户可以编辑它的文本字段中,或者再次点击语音识别按钮来交谈并添加更多文本。 –