Skip to content

Commit

Permalink
Merge branch 'main' of github.com:alibaba-damo-academy/FunASR
Browse files Browse the repository at this point in the history
add
  • Loading branch information
LauraGPT committed Jan 23, 2024
2 parents 9c9e02b + 652627e commit ec1c462
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 20 deletions.
4 changes: 2 additions & 2 deletions runtime/docs/SDK_tutorial_online.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ For more client version support, please refer to the [websocket_protocol](./webs
If you want to run the client directly for testing, you can refer to the following simple instructions, using the Python version as an example:

```shell
python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav"
python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.pcm"
```

Command parameter instructions:
Expand All @@ -79,7 +79,7 @@ Command parameter instructions:

After entering the samples/cpp directory, you can test it with CPP. The command is as follows:
```shell
./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --wav-path ../audio/asr_example.wav
./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --wav-path ../audio/asr_example.pcm
```

Command parameter description:
Expand Down
2 changes: 1 addition & 1 deletion runtime/docs/SDK_tutorial_online_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode 2pass
进入samples/cpp目录后,可以用cpp进行测试,指令如下:
```shell
./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --mode 2pass \
--wav-path ../audio/asr_example.wav
--wav-path ../audio/asr_example.pcm
```

命令参数说明:
Expand Down
1 change: 1 addition & 0 deletions runtime/onnxruntime/include/audio.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class Audio {
int Fetch(float *&dout, int &len, int &flag, float &start_time);
void Padding();
void Split(OfflineStream* offline_streamj);
void CutSplit(OfflineStream* offline_streamj);
void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
void Split(VadModel* vad_obj, int chunk_len, bool input_finished=true, ASR_TYPE asr_mode=ASR_TWO_PASS);
float GetTimeLen();
Expand Down
54 changes: 54 additions & 0 deletions runtime/onnxruntime/src/audio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,60 @@ void Audio::Split(OfflineStream* offline_stream)
}
}

void Audio::CutSplit(OfflineStream* offline_stream)
{
std::unique_ptr<VadModel> vad_online_handle = make_unique<FsmnVadOnline>((FsmnVad*)(offline_stream->vad_handle).get());
AudioFrame *frame;

frame = frame_queue.front();
frame_queue.pop();
int sp_len = frame->GetLen();
delete frame;
frame = nullptr;

int step = dest_sample_rate*10;
bool is_final=false;
vector<std::vector<int>> vad_segments;
for (int sample_offset = 0; sample_offset < speech_len; sample_offset += std::min(step, speech_len - sample_offset)) {
if (sample_offset + step >= speech_len - 1) {
step = speech_len - sample_offset;
is_final = true;
} else {
is_final = false;
}
std::vector<float> pcm_data(speech_data+sample_offset, speech_data+sample_offset+step);
vector<std::vector<int>> cut_segments = vad_online_handle->Infer(pcm_data, is_final);
vad_segments.insert(vad_segments.end(), cut_segments.begin(), cut_segments.end());
}

int speech_start_i = -1, speech_end_i =-1;
for(vector<int> vad_segment:vad_segments)
{
if(vad_segment.size() != 2){
LOG(ERROR) << "Size of vad_segment is not 2.";
break;
}
if(vad_segment[0] != -1){
speech_start_i = vad_segment[0];
}
if(vad_segment[1] != -1){
speech_end_i = vad_segment[1];
}

if(speech_start_i!=-1 && speech_end_i!=-1){
frame = new AudioFrame();
int start = speech_start_i*seg_sample;
int end = speech_end_i*seg_sample;
frame->SetStart(start);
frame->SetEnd(end);
frame_queue.push(frame);
frame = nullptr;
speech_start_i=-1;
speech_end_i=-1;
}
}
}

void Audio::Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished)
{
AudioFrame *frame;
Expand Down
2 changes: 1 addition & 1 deletion runtime/onnxruntime/src/funasrruntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
return p_result;
}
if(offline_stream->UseVad()){
audio.Split(offline_stream);
audio.CutSplit(offline_stream);
}

float* buff;
Expand Down
8 changes: 4 additions & 4 deletions runtime/onnxruntime/src/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,8 @@ std::string TimestampSentence(std::string &text, std::string &str_time){
// format
ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
ts_sent += "\"punc\":\"" + characters[idx_str] + "\",";
ts_sent += "\"start\":\"" + to_string(start) + "\",";
ts_sent += "\"end\":\"" + to_string(end) + "\",";
ts_sent += "\"start\":" + to_string(start) + ",";
ts_sent += "\"end\":" + to_string(end) + ",";
ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";

if (idx_str == characters.size()-1){
Expand Down Expand Up @@ -627,8 +627,8 @@ std::string TimestampSentence(std::string &text, std::string &str_time){
// format
ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
ts_sent += "\"punc\":\"\",";
ts_sent += "\"start\":\"" + to_string(start) + "\",";
ts_sent += "\"end\":\"" + to_string(end) + "\",";
ts_sent += "\"start\":" + to_string(start) + ",";
ts_sent += "\"end\":" + to_string(end) + ",";
ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";
ts_sentences += ts_sent;
}
Expand Down
6 changes: 3 additions & 3 deletions runtime/websocket/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if(ENABLE_WEBSOCKET)
# cmake_policy(SET CMP0135 NEW)
include(FetchContent)

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/websocket )
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/websocket/websocketpp )
FetchContent_Declare(websocketpp
GIT_REPOSITORY https://github.com/zaphoyd/websocketpp.git
GIT_TAG 0.8.2
Expand All @@ -42,7 +42,7 @@ if(ENABLE_WEBSOCKET)
endif()
include_directories(${PROJECT_SOURCE_DIR}/third_party/websocket)

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/asio )
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/asio/asio )
FetchContent_Declare(asio
URL https://github.com/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/asio
Expand All @@ -52,7 +52,7 @@ if(ENABLE_WEBSOCKET)
endif()
include_directories(${PROJECT_SOURCE_DIR}/third_party/asio/asio/include)

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/json )
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/third_party/json/ChangeLog.md )
FetchContent_Declare(json
URL https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.tar.gz
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/json
Expand Down
5 changes: 1 addition & 4 deletions runtime/websocket/bin/funasr-wss-client-2pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,7 @@ class WebsocketClient {
funasr::Audio audio(1);
int32_t sampling_rate = audio_fs;
std::string wav_format = "pcm";
if (funasr::IsTargetFile(wav_path.c_str(), "wav")) {
if (!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
return;
} else if (funasr::IsTargetFile(wav_path.c_str(), "pcm")) {
if (funasr::IsTargetFile(wav_path.c_str(), "pcm")) {
if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false)) return;
} else {
wav_format = "others";
Expand Down
5 changes: 1 addition & 4 deletions runtime/websocket/bin/funasr-wss-client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,7 @@ class WebsocketClient {
funasr::Audio audio(1);
int32_t sampling_rate = audio_fs;
std::string wav_format = "pcm";
if (funasr::IsTargetFile(wav_path.c_str(), "wav")) {
if (!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
return;
} else if(funasr::IsTargetFile(wav_path.c_str(), "pcm")){
if(funasr::IsTargetFile(wav_path.c_str(), "pcm")){
if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false))
return ;
}else{
Expand Down
2 changes: 1 addition & 1 deletion runtime/websocket/bin/websocket-server-2pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ void WebSocketServer::do_decoder(
if(wav_format != "pcm" && wav_format != "PCM"){
websocketpp::lib::error_code ec;
nlohmann::json jsonresult;
jsonresult["text"] = "ERROR. Real-time transcription service ONLY SUPPORT wav_format pcm.";
jsonresult["text"] = "ERROR. Real-time transcription service ONLY SUPPORT PCM stream.";
jsonresult["wav_name"] = wav_name;
jsonresult["is_final"] = true;
if (is_ssl) {
Expand Down

0 comments on commit ec1c462

Please sign in to comment.