/**
|
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
|
* Reserved. MIT License (https://opensource.org/licenses/MIT)
|
*/
|
|
//
|
// AudioCapture.m
|
// paraformer_online
|
//
|
// Created by 邱威 on 2023/6/6.
|
//
|
|
#import "AudioCapture.h"
|
#import <AVFoundation/AVFoundation.h>
|
#include <thread>
|
|
#import "AudioRecorder.h"
|
|
#include "precomp.h"
|
|
#define Recorder_Sample_Rate 16000
|
#define Samples_Per_Frame (Recorder_Sample_Rate/100)
|
|
#define k_input_frames 10
|
#define k_left_padding_frames 5
|
#define k_right_padding_frames 5
|
#define k_input_samples 960 // (60ms)
|
|
|
static AudioCapture *selfClass = nil;
|
|
@interface AudioCapture ()<AVCaptureAudioDataOutputSampleBufferDelegate>
|
|
@property (nonatomic, strong) AVCaptureSession *capture;
|
|
@property (nonatomic, copy) NSMutableData *sampleData;
|
|
@property (nonatomic, strong) AudioRecorder *audioRecorder;
|
|
@property (nonatomic, assign) BOOL isRecording;
|
|
@property (nonatomic, strong) NSLock *lock;
|
|
@end
|
|
using namespace funasr;
|
|
@implementation AudioCapture
|
{
|
const char* output_path;
|
void* denoiser;
|
void* resampler;
|
|
// Paraformer_stream *stream_;
|
|
FUNASR_HANDLE asr_handle_;
|
FUNASR_HANDLE online_handle_;
|
|
bool is_onnx;
|
|
char *speech_buff;
|
}
|
int packetIndex = 0;
|
|
- (id)initWithOnnxModel:(BOOL)onnxModel
|
{
|
self = [super init];
|
if (self) {
|
is_onnx = onnxModel ? true : false;
|
|
if (is_onnx) {
|
[self initASROnnx];
|
} else {
|
// [self initASR];
|
}
|
self.isRecording = NO;
|
|
NSLog(@"model init done!");
|
}
|
return self;
|
}
|
|
void S16ToFloatS16_1(const int16_t* src, size_t size, float* dest) {
|
for (size_t i = 0; i < size; ++i)
|
dest[i] = (float)src[i];
|
}
|
|
void CharToFloat(const char* src, size_t size, float* dst) {
|
const int16_t* sample_data = reinterpret_cast<const int16_t*>(src);
|
S16ToFloatS16_1(sample_data, size/2, dst);
|
}
|
|
void CharToFloat_1(const char* src, size_t size, float* dst) {
|
const int16_t* sample_data = reinterpret_cast<const int16_t*>(src);
|
// length = length/2;
|
float data_f[Samples_Per_Frame] = {0.0};
|
S16ToFloatS16_1(sample_data, size/2, data_f);
|
|
// float data_f_norm[480];
|
for (int i = 0; i < Samples_Per_Frame; i++) {
|
dst[i] = data_f[i] / 32767.0;
|
}
|
}
|
|
void CharToS16(const char* src, size_t src_size, short* dst) {
|
const int16_t* sample_data = reinterpret_cast<const int16_t*>(src);
|
for (int i = 0; i < src_size/2; i++) {
|
dst[i] = sample_data[i];
|
}
|
}
|
|
- (void)initASROnnx {
|
NSString *model_file_path = [[NSBundle mainBundle] pathForResource:@"config" ofType:@".yaml" inDirectory:@"model"];
|
model_file_path = [model_file_path stringByReplacingOccurrencesOfString:@"config.yaml" withString:@""];
|
const char* model_dir= [model_file_path UTF8String];
|
|
std::map<std::string, std::string> model_path;
|
model_path.insert({MODEL_DIR, model_dir});
|
model_path.insert({QUANTIZE, "true"});
|
|
struct timeval start, end;
|
// gettimeofday(&start, NULL);
|
int thread_num = 1;
|
asr_handle_ = FunASRInit(model_path, thread_num, ASR_ONLINE);
|
|
if (!asr_handle_)
|
{
|
std::cout << "FunVad init failed" << std::endl;
|
}
|
|
// gettimeofday(&end, NULL);
|
long seconds = (end.tv_sec - start.tv_sec);
|
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
std::cout << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s" << std::endl;
|
|
string default_id = "wav_default_id";
|
|
// init online features
|
online_handle_ = FunASROnlineInit(asr_handle_);
|
|
}
|
|
//static FILE *pf_file_out = NULL;
|
//int audio_index = 0;
|
- (void)startRecorder {
|
selfClass = self;
|
|
__weak __typeof(self) weakSelf = self;
|
[self.sampleData setLength:0];
|
// audio record call-back
|
// float speech[10 * 960];
|
speech_buff = (char *)calloc(10*960*2, sizeof(char));
|
__block int speech_idx = 0;
|
self.audioRecorder.inputBlock = ^(NSData *speechData) {
|
dispatch_async(dispatch_get_main_queue(), ^{
|
__strong __typeof(weakSelf) strongSelf = weakSelf;
|
if (weakSelf.isRecording) {
|
// [weakSelf appendPCMData:speechData]; // DUBUG USE: append audio data, Memory increment
|
const char* buffer = (const char*)speechData.bytes;
|
int length = (int)speechData.length;
|
|
if (strongSelf->is_onnx) {
|
// char speech_buff[9600*2];
|
int step = 9600*2;
|
|
memcpy(speech_buff+length*speech_idx, buffer, length*sizeof(char));
|
|
// FIX: You can change it to a ring buffer
|
if (speech_idx == k_input_frames*6-1) {
|
FUNASR_RESULT result = FunASRInferBuffer(strongSelf->online_handle_, speech_buff, step, RASR_NONE, NULL, false, 16000);
|
|
memset(speech_buff, 0, step * sizeof(char));
|
speech_idx = 0;
|
|
if (result)
|
{
|
string msg = FunASRGetResult(result, 0);
|
// std::cout<<msg << std::endl;
|
FunASRFreeResult(result);
|
|
if (strongSelf.resultBlock) {
|
strongSelf.resultBlock([NSString stringWithUTF8String:msg.c_str()]);
|
}
|
}
|
|
|
} else {
|
speech_idx++;
|
}
|
|
|
} else {
|
|
|
}
|
}
|
});
|
};
|
[self.audioRecorder start];
|
self.isRecording = YES;
|
}
|
|
- (void)pushData {
|
}
|
|
- (void)stopRecorder {
|
self.isRecording = NO;
|
if (is_onnx) {
|
FunASRUninit(asr_handle_);
|
FunASRUninit(online_handle_);
|
}
|
|
free(speech_buff);
|
|
[self.audioRecorder stop];
|
[self writeData];
|
|
/////
|
// const char* buffer = (const char*)self.sampleData.bytes;
|
// int length = (int)self.sampleData.length;
|
//
|
// float *input_data = (float *)malloc(sizeof(float) * (length/2));
|
// CharToFloat(buffer, length, input_data);
|
//
|
// std::string msg = stream_->Process(input_data, length/2);
|
// NSString *result = [NSString stringWithUTF8String:msg.c_str()];
|
//
|
// if (self.resultBlock) {
|
// self.resultBlock(result);
|
// }
|
//
|
// free(input_data);
|
|
}
|
|
- (BOOL)writeData {
|
return [self.sampleData writeToFile:[self getPCMPath] atomically:NO];
|
}
|
|
- (void)appendPCMData:(NSData *)pcmData {
|
[self audioProcessing:pcmData];
|
}
|
|
- (void)audioProcessing:(NSData *)data {
|
[self.sampleData appendData:data];
|
}
|
|
- (NSMutableData *)sampleData {
|
if (!_sampleData) {
|
_sampleData = [NSMutableData data];
|
}
|
|
return _sampleData;
|
}
|
|
- (AudioRecorder *)audioRecorder {
|
if (!_audioRecorder) {
|
_audioRecorder = [[AudioRecorder alloc] init];
|
}
|
|
return _audioRecorder;
|
}
|
|
- (NSLock *)lock {
|
if (!_lock) {
|
_lock = [NSLock new];
|
}
|
|
return _lock;
|
}
|
|
- (NSString *)getPCMPath {
|
NSString *directoryS = [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) firstObject];
|
NSString *directory = [directoryS stringByAppendingPathComponent:@"mic_ori.pcm"];
|
return directory;
|
}
|
|
|
@end
|