Skip to content

Commit 7fb4fea

Browse files
author
panhehe
committed
[runtime/xpu] Support the execution of non-streaming parsing on the Kunlun XPU card #1455
1 parent 89e8d0d commit 7fb4fea

28 files changed

+3404
-6
lines changed

runtime/core/cmake/xpu.cmake

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
if(NOT WIN32)
2+
string(ASCII 27 Esc)
3+
set(ColourReset "${Esc}[m")
4+
set(ColourBold "${Esc}[1m")
5+
set(Red "${Esc}[31m")
6+
set(Green "${Esc}[32m")
7+
set(Yellow "${Esc}[33m")
8+
set(Blue "${Esc}[34m")
9+
set(Magenta "${Esc}[35m")
10+
set(Cyan "${Esc}[36m")
11+
set(White "${Esc}[37m")
12+
set(BoldRed "${Esc}[1;31m")
13+
set(BoldGreen "${Esc}[1;32m")
14+
set(BoldYellow "${Esc}[1;33m")
15+
set(BoldBlue "${Esc}[1;34m")
16+
set(BoldMagenta "${Esc}[1;35m")
17+
set(BoldCyan "${Esc}[1;36m")
18+
set(BoldWhite "${Esc}[1;37m")
19+
endif()
20+
21+
if(XPU)
22+
set(RUNTIME_XPU_PATH ${CMAKE_CURRENT_SOURCE_DIR})
23+
message(STATUS "RUNTIME_XPU_PATH is ${RUNTIME_XPU_PATH} .\n")
24+
set(XPU_KUNLUN_PATH ${RUNTIME_XPU_PATH}/xpu/)
25+
if(NOT DEFINED ENV{XPU_API_PATH})
26+
message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
27+
else()
28+
set(XPU_API_PATH $ENV{XPU_API_PATH})
29+
message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
30+
endif()
31+
32+
include_directories(${XPU_KUNLUN_PATH}/
33+
${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
34+
link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)
35+
36+
add_definitions(-DUSE_XPU)
37+
endif()

runtime/core/decoder/CMakeLists.txt

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ set(decoder_srcs
77
ctc_endpoint.cc
88
)
99

10-
if(NOT TORCH AND NOT ONNX)
11-
message(FATAL_ERROR "Please build with TORCH or ONNX!!!")
10+
if(NOT TORCH AND NOT ONNX AND NOT XPU)
11+
message(FATAL_ERROR "Please build with TORCH or ONNX or XPU!!!")
1212
endif()
1313
if(TORCH)
1414
list(APPEND decoder_srcs torch_asr_model.cc)
@@ -17,8 +17,21 @@ if(ONNX)
1717
list(APPEND decoder_srcs onnx_asr_model.cc)
1818
endif()
1919

20+
if(XPU)
21+
list(APPEND decoder_srcs ../xpu/xpu_asr_model.cc)
22+
list(APPEND decoder_srcs ../xpu/xpu_conformer.cpp)
23+
list(APPEND decoder_srcs ../xpu/xpu_util.cpp)
24+
message(STATUS "xpu decoder_srcs is :: ${decoder_srcs} \n")
25+
endif()
26+
2027
add_library(decoder STATIC ${decoder_srcs})
21-
target_link_libraries(decoder PUBLIC kaldi-decoder frontend post_processor utils)
28+
if(XPU)
29+
target_link_libraries(decoder PUBLIC kaldi-decoder frontend
30+
post_processor utils xpuapi xpurt)
31+
else()
32+
target_link_libraries(decoder PUBLIC kaldi-decoder frontend
33+
post_processor utils)
34+
endif()
2235

2336
if(ANDROID)
2437
target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})

runtime/core/decoder/params.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// See the License for the specific language governing permissions and
1414
// limitations under the License.
1515

16-
1716
#ifndef DECODER_PARAMS_H_
1817
#define DECODER_PARAMS_H_
1918

@@ -29,17 +28,24 @@
2928
#ifdef USE_TORCH
3029
#include "decoder/torch_asr_model.h"
3130
#endif
31+
#ifdef USE_XPU
32+
#include "xpu/xpu_asr_model.h"
33+
#endif
3234
#include "frontend/feature_pipeline.h"
3335
#include "post_processor/post_processor.h"
3436
#include "utils/flags.h"
3537
#include "utils/string.h"
3638

3739
DEFINE_int32(num_threads, 1, "num threads for ASR model");
40+
DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");
3841

3942
// TorchAsrModel flags
4043
DEFINE_string(model_path, "", "pytorch exported model path");
4144
// OnnxAsrModel flags
4245
DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
46+
// XPUAsrModel flags
47+
DEFINE_string(xpu_model_dir, "",
48+
"directory where the XPU model and weights is saved");
4349

4450
// FeaturePipelineConfig flags
4551
DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
@@ -66,7 +72,8 @@ DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
6672
DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
6773
DEFINE_double(blank_skip_thresh, 1.0,
6874
"blank skip thresh for ctc wfst search, 1.0 means no skip");
69-
DEFINE_double(length_penalty, 0.0, "length penalty ctc wfst search, will not"
75+
DEFINE_double(length_penalty, 0.0,
76+
"length penalty ctc wfst search, will not"
7077
"apply on self-loop arc, for balancing the del/ins ratio, "
7178
"suggest set to -3.0");
7279
DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
@@ -130,7 +137,7 @@ std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
130137
#else
131138
LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
132139
#endif
133-
} else {
140+
} else if (!FLAGS_model_path.empty()) {
134141
#ifdef USE_TORCH
135142
LOG(INFO) << "Reading torch model " << FLAGS_model_path;
136143
TorchAsrModel::InitEngineThreads(FLAGS_num_threads);
@@ -140,6 +147,19 @@ std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
140147
#else
141148
LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
142149
#endif
150+
} else if (!FLAGS_xpu_model_dir.empty()) {
151+
#ifdef USE_XPU
152+
LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
153+
auto model = std::make_shared<XPUAsrModel>();
154+
model->SetEngineThreads(FLAGS_num_threads);
155+
model->SetDeviceId(FLAGS_device_id);
156+
model->Read(FLAGS_xpu_model_dir);
157+
resource->model = model;
158+
#else
159+
LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
160+
#endif
161+
} else {
162+
LOG(FATAL) << "Please set ONNX, TORCH or XPU model path!!!";
143163
}
144164

145165
LOG(INFO) << "Reading unit table " << FLAGS_unit_path;

runtime/kunlun/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
build/
2+
fc_base/

runtime/kunlun/CMakeLists.txt

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
2+
3+
project(wenet VERSION 0.1)
4+
5+
option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
6+
option(FST_HAVE_BIN "whether to build fst binaries" OFF)
7+
option(BUILD_TESTING "whether to build unit test" OFF)
8+
option(GRPC "whether to build with gRPC" OFF)
9+
# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
10+
# which is a very big library
11+
option(WEBSOCKET "whether to build with websocket" OFF)
12+
option(TORCH "whether to build with Torch" OFF)
13+
option(XPU "whether to build with XPU" ON)
14+
option(ONNX "whether to build with ONNX" OFF)
15+
option(GPU "whether to build with GPU" OFF)
16+
17+
set(CMAKE_VERBOSE_MAKEFILE OFF)
18+
19+
include(FetchContent)
20+
include(ExternalProject)
21+
set(FETCHCONTENT_QUIET OFF)
22+
get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
23+
set(FETCHCONTENT_BASE_DIR ${fc_base})
24+
25+
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
26+
27+
if(NOT MSVC)
28+
# Keep the same with openfst, -fPIC or -fpic
29+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
30+
else()
31+
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
32+
add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
33+
endif()
34+
35+
# Include all dependency
36+
include(libtorch)
37+
if(ONNX)
38+
include(onnx)
39+
endif()
40+
if(XPU)
41+
include(xpu)
42+
# compile conformer_test
43+
add_subdirectory(xpu)
44+
endif()
45+
include(openfst)
46+
include_directories(
47+
${CMAKE_CURRENT_SOURCE_DIR}
48+
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
49+
)
50+
51+
# Build all libraries
52+
add_subdirectory(utils)
53+
if(NOT MSVC)
54+
add_dependencies(utils openfst)
55+
endif()
56+
add_subdirectory(frontend)
57+
add_subdirectory(post_processor)
58+
add_subdirectory(kaldi) # kaldi: wfst based decoder
59+
add_subdirectory(decoder)
60+
add_subdirectory(api)
61+
62+
# Optionally, you can build with websocket
63+
if(WEBSOCKET)
64+
include(boost)
65+
add_subdirectory(websocket)
66+
endif()
67+
68+
# Optionally, you can build with gRPC
69+
if(GRPC)
70+
include(grpc)
71+
add_subdirectory(grpc)
72+
endif()
73+
74+
# Build all bins
75+
add_subdirectory(bin)
76+
77+
# Unit Test
78+
if(BUILD_TESTING)
79+
include(gtest)
80+
add_subdirectory(test)
81+
endif()

runtime/kunlun/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# 在昆仑芯片上运行Wenet
2+
## 介绍
3+
下面的示例展示了如何在XPU上部署WeNet离线或在线的ASR模型。XPU是一种由昆仑芯100%自主研发的通用人工智能计算核心架构。
4+
5+
## 准备XPU运行环境
6+
7+
在开始之前,请确认您获得以下必须的环境。
8+
9+
XRE(XPU Runtime Environment):昆仑芯片的基础运行环境,包括芯片驱动程序、runtime api库、固件FW工具等功能模块。
10+
XDNN(XPU Deep Neural Network Library):加速深度神经网络的昆仑芯片库,提供应用程序中使用的高性能DNN功能库。
11+
12+
如果您需要任何帮助,或是想要进一步了解昆仑芯片,请通过官方网址联系我们:
13+
https://www.kunlunxin.com.cn/
14+
15+
## 操作步骤
16+
- 第一步:构建,需要cmake 3.14及以上版本
17+
18+
``` sh
19+
export CXX=${your_g++_path}
20+
export CC=${your_gcc_path}
21+
export XPU_API_PATH=${your_api_path}
22+
23+
# -r : release version; -d : debug version
24+
bash ./compile.sh -r
25+
```
26+
27+
- 第二步:测试,测试结果将在控制台输出
28+
29+
``` sh
30+
## set KUNLUN XPU visible device
31+
export XPU_VISIBLE_DEVICES=0
32+
export XPUSIM_DEVICE_MODEL=KUNLUN2
33+
## set logging level
34+
export GLOG_logtostderr=1
35+
export GLOG_v=3
36+
## set speech wav and model/weight path
37+
wav_path=${your_test_wav_path}
38+
xpu_model_dir=${your_xpu_weight_dir}
39+
units=${your_units.txt}
40+
## executive command
41+
./build/bin/decoder_main \
42+
--chunk_size -1 \
43+
--wav_path ${wav_path} \
44+
--xpu_model_dir ${xpu_model_di} \
45+
--unit_path ${units} \
46+
--device_id 0 \
47+
--nbest 3 2>&1 | tee log.txt
48+
```

runtime/kunlun/README_EN.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# WeNet running on KUNLUNXIN XPU device
2+
## Introduction
3+
The below example shows how to deploy WeNet offline and online ASR models on XPUs.
4+
XPU is a core architecture 100% independently developed by KUNLUNXIN for general artificial intelligence computing.
5+
6+
## Setup environment for XPU device
7+
8+
Before the start, makesure you have these necessary environment
9+
10+
XRE(XPU Runtime Environment):The basic operating environment of the XPUs
11+
includes functional modules such as chip drivers, runtime api library, and firmware tools.
12+
13+
XDNN(XPU Deep Neural Network Library): XPU library for accelerating deep neural networks, providing high-performance DNN function library used in applications.
14+
15+
If you would like to know more about XPUs or need any help, please contact us through the official website:
16+
17+
https://www.kunlunxin.com.cn/
18+
19+
## Instruction
20+
- Step 1. Build, the build requires cmake 3.14 or above.
21+
22+
``` sh
23+
export CXX=${your_g++_path}
24+
export CC=${your_gcc_path}
25+
export XPU_API_PATH=${your_api_path}
26+
27+
# -r : release version; -d : debug version
28+
bash ./compile.sh -r
29+
```
30+
31+
- Step 2. Testing, the result is shown in the console.
32+
33+
``` sh
34+
## set KUNLUN XPU visible device
35+
export XPU_VISIBLE_DEVICES=0
36+
export XPUSIM_DEVICE_MODEL=KUNLUN2
37+
## set logging level
38+
export GLOG_logtostderr=1
39+
export GLOG_v=3
40+
## set speech wav and model/weight/units path
41+
wav_path=${your_test_wav_path}
42+
xpu_model_dir=${your_xpu_weight_dir}
43+
units=${your_units.txt}
44+
## executive command
45+
./build/bin/decoder_main \
46+
--chunk_size -1 \
47+
--wav_path $wav_path \
48+
--xpu_model_dir $xpu_model_dir \
49+
--unit_path $units \
50+
--device_id 0 \
51+
--nbest 3 2>&1 | tee log.txt
52+
```

runtime/kunlun/api

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../core/api

runtime/kunlun/bin

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../core/bin

runtime/kunlun/cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../core/cmake

0 commit comments

Comments
 (0)