Skip to content

Commit 429736b

Browse files
authored
feat(horizonxj3): Support horizon x3 pi (#1597)
* feat(horizonXJ3): export onnx and convert to bin * feat(horizonxj3): add quantization pipeline * fest(horizonxj3): minor update * feat(horizonxj3): add runtime, 1st commit * fest(horizonxj3): rename horizonxj3 -> horizonbpu * fest(horizonxj3): pipeline stage && nchw->nhwc * fest(horizonxj3): mhwc -> nchw * fest(horizonxj3): remove attn_run_on_bpu * fest(horizonxj3): rm grpc * fest(horizonxj3): fix att_mask shape * fest(horizonxj3): fix runtime * fest(horizonxj3): remove XxxManager from being class members * fest(horizonxj3): rename X3??? to BPU??? * fest(horizonxj3): add args: extra_ops_run_on_cpu * fest(horizonxj3): refine readme * fest(horizonxj3): refine readme * fest(horizonxj3): refine readme * fix(run_on_cpu): [cr_id_skip] fix run_on_cpu for torch < 1.12.0 * refactor(horizonxj3): update readme * refactor(horizonxj3): update doc * refactor(horizonxj3): update readme * fix(run_on_cpu): [cr_id_skip] fix run_on_cpu for torch < 1.13.0 * fix(r3pi): Fix readme * refactor(horizonxj3): Move core/decoder/bpu_asr_model.* to horizonbpu/bpu/bpu_asr_model.* * fix(horizonxj3): Fix lint
1 parent f862576 commit 429736b

File tree

23 files changed

+2136
-3
lines changed

23 files changed

+2136
-3
lines changed

runtime/core/cmake/bpu.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
if(BPU)
2+
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
3+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
4+
set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
5+
set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
6+
else()
7+
message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
8+
endif()
9+
else()
10+
message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
11+
endif()
12+
13+
FetchContent_Declare(easy_dnn
14+
URL ${EASY_DNN_URL}
15+
URL_HASH ${URL_HASH}
16+
)
17+
FetchContent_MakeAvailable(easy_dnn)
18+
include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
19+
include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
20+
include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
21+
link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
22+
link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
23+
link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)
24+
25+
add_definitions(-DUSE_BPU)
26+
# NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
27+
# https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
28+
# https://github.com/tensorflow/tensorflow/issues/47849
29+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
30+
endif()

runtime/core/decoder/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ set(decoder_srcs
77
ctc_endpoint.cc
88
)
99

10-
if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS)
11-
message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS!!!")
10+
if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU)
11+
message(FATAL_ERROR "Please build with TORCH or ONNX or XPU or IOS or BPU!!!")
1212
endif()
1313
if(TORCH OR IOS)
1414
list(APPEND decoder_srcs torch_asr_model.cc)
@@ -30,6 +30,9 @@ else()
3030
if(ONNX)
3131
target_link_libraries(decoder PUBLIC onnxruntime)
3232
endif()
33+
if(BPU)
34+
target_link_libraries(decoder PUBLIC bpu_asr_model)
35+
endif()
3336
if(XPU)
3437
target_link_libraries(decoder PUBLIC xpu_conformer)
3538
endif()

runtime/core/decoder/params.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
#ifdef USE_XPU
3232
#include "xpu/xpu_asr_model.h"
3333
#endif
34+
#ifdef USE_BPU
35+
#include "bpu/bpu_asr_model.h"
36+
#endif
3437
#include "frontend/feature_pipeline.h"
3538
#include "post_processor/post_processor.h"
3639
#include "utils/flags.h"
@@ -45,6 +48,9 @@ DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
4548
// XPUAsrModel flags
4649
DEFINE_string(xpu_model_dir, "",
4750
"directory where the XPU model and weights is saved");
51+
// BPUAsrModel flags
52+
DEFINE_string(bpu_model_dir, "",
53+
"directory where the HORIZON BPU model is saved");
4854

4955
// FeaturePipelineConfig flags
5056
DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
@@ -156,9 +162,18 @@ std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
156162
resource->model = model;
157163
#else
158164
LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
165+
#endif
166+
} else if (!FLAGS_bpu_model_dir.empty()) {
167+
#ifdef USE_BPU
168+
LOG(INFO) << "Reading Horizon BPU model from " << FLAGS_bpu_model_dir;
169+
auto model = std::make_shared<BPUAsrModel>();
170+
model->Read(FLAGS_bpu_model_dir);
171+
resource->model = model;
172+
#else
173+
LOG(FATAL) << "Please rebuild with cmake options '-DBPU=ON'.";
159174
#endif
160175
} else {
161-
LOG(FATAL) << "Please set ONNX, TORCH or XPU model path!!!";
176+
LOG(FATAL) << "Please set ONNX, TORCH, XPU or BPU model path!!!";
162177
}
163178

164179
LOG(INFO) << "Reading unit table " << FLAGS_unit_path;

runtime/horizonbpu/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
build/
2+
fc_base/

runtime/horizonbpu/CMakeLists.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
2+
3+
project(wenet VERSION 0.1)
4+
5+
option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
6+
option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
7+
option(BUILD_TESTING "whether to build unit test" ON)
8+
9+
option(GRPC "whether to build with gRPC" OFF)
10+
# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
11+
# which is a very big library
12+
option(WEBSOCKET "whether to build with websocket" OFF)
13+
option(BPU "whether to build with BPU" ON)
14+
15+
set(CMAKE_VERBOSE_MAKEFILE OFF)
16+
17+
include(FetchContent)
18+
set(FETCHCONTENT_QUIET OFF)
19+
get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
20+
set(FETCHCONTENT_BASE_DIR ${fc_base})
21+
22+
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
23+
24+
# Keep the same with openfst, -fPIC or -fpic
25+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
26+
27+
# Include all dependency
28+
include(openfst)
29+
# This CMakeLists.txt is only used for horizon bpu, so remove the contents
30+
# about onnx, libtorch, gpu and windows.
31+
include(bpu)
32+
# Compile bpu_asr_model
33+
add_subdirectory(bpu)
34+
35+
include_directories(
36+
${CMAKE_CURRENT_SOURCE_DIR}
37+
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
38+
)
39+
40+
# Build all libraries
41+
add_subdirectory(utils)
42+
add_subdirectory(frontend)
43+
add_subdirectory(post_processor)
44+
add_subdirectory(kaldi) # kaldi: wfst based decoder
45+
add_subdirectory(decoder)
46+
add_subdirectory(api)
47+
48+
# Optionally, you can build with websocket
49+
if(WEBSOCKET)
50+
include(boost)
51+
add_subdirectory(websocket)
52+
endif()
53+
54+
# Optionally, you can build with gRPC
55+
if(GRPC)
56+
include(grpc)
57+
add_subdirectory(grpc)
58+
endif()
59+
60+
# Build all bins
61+
add_subdirectory(bin)
62+
63+
# Unit Test
64+
if(BUILD_TESTING)
65+
include(gtest)
66+
add_subdirectory(test)
67+
endif()

runtime/horizonbpu/README.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# WeNet & Horizon BPU (Cross Compile)
2+
3+
* Step 1. Setup environment (install horizon packages and cross compile tools) in the PC. (~10min)
4+
5+
```sh
6+
# Conda env (This conda env is only used for converting bpu models, not for training torch models,
7+
# It's OK to install cpu-version pytorch)
8+
conda create -n horizonbpu python=3.8
9+
conda activate horizonbpu
10+
git clone https://github.com/wenet-e2e/wenet.git
11+
cd wenet/runtime/horizonbpu
12+
pip install -r ../../requirements.txt -i https://mirrors.aliyun.com/pypi/simple
13+
pip install torch==1.13.0 torchaudio==0.13.0 torchvision==0.14.0 onnx onnxruntime -i https://mirrors.aliyun.com/pypi/simple
14+
15+
# Horizon packages
16+
wget https://gitee.com/xcsong-thu/toolchain_pkg/releases/download/resource/wheels.tar.gz
17+
tar -xzf wheels.tar.gz
18+
pip install wheels/* -i https://mirrors.aliyun.com/pypi/simple
19+
20+
# Cross compile tools
21+
sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
22+
```
23+
24+
25+
* Step 2. Build decoder_main. It requires cmake 3.14 or above. and Send the binary/libraries to Horizon X3PI. (~20min)
26+
27+
``` sh
28+
# Assume current dir is `wenet/runtime/horizonbpu`
29+
cmake -B build -DBPU=ON -DONNX=OFF -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF -DCMAKE_TOOLCHAIN_FILE=toolchains/aarch64-linux-gnu.toolchain.cmake
30+
cmake --build build
31+
32+
# Send binary and libraries
33+
export BPUIP=xxx.xxx.xxx.xxx
34+
export DEMO_PATH_ON_BOARD=/path/to/demo
35+
scp build/bin/decoder_main sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
36+
scp fc_base/easy_dnn-src/dnn/*j3*/*/*/lib/libdnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
37+
scp fc_base/easy_dnn-src/easy_dnn/*j3*/*/*/lib/libeasy_dnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
38+
scp fc_base/easy_dnn-src/hlog/*j3*/*/*/lib/libhlog.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
39+
```
40+
41+
* Step 3. Export model to ONNX and convert ONNX to Horizon .bin and Send the model/dict/test_wav to Horizon X3PI. (~40min)
42+
43+
``` sh
44+
# Assume current dir is `wenet/runtime/horizonbpu`
45+
conda activate horizonbpu
46+
export WENET_DIR=$PWD/../../
47+
export PYTHONIOENCODING=UTF-8
48+
export PYTHONPATH=$WENET_DIR:$PYTHONPATH
49+
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python'
50+
51+
# Download torch model
52+
wget https://ghproxy.com/https://github.com/xingchensong/toolchain_pkg/releases/download/conformer_subsample8_110M/model_subsample8_parameter110M.tar.gz
53+
tar -xzf model_subsample8_parameter110M.tar.gz
54+
55+
# Convert torch model to bpu model (*.pt -> *.onnx -> *.bin)
56+
# NOTE(xcsong): Convert model with 110M parameters requires CPU MEM >= 16G,
57+
# if your CPU does not meet the requirement, you can download pre-converted encoder.bin/ctc.bin
58+
# via this link: https://github.com/xingchensong/toolchain_pkg/releases
59+
python3 $WENET_DIR/tools/onnx2horizonbin.py \
60+
--config ./model_subsample8_parameter110M/train.yaml \
61+
--checkpoint ./model_subsample8_parameter110M/final.pt \
62+
--output_dir ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16 \
63+
--chunk_size 8 \
64+
--num_decoding_left_chunks 16 \
65+
--max_samples 50 \
66+
--dict ./model_subsample8_parameter110M/units.txt \
67+
--cali_datalist ./model_subsample8_parameter110M/calibration_data/data.list
68+
69+
# scp test wav file and dictionary
70+
scp ./model_subsample8_parameter110M/test_wav.wav sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
71+
scp ./model_subsample8_parameter110M/units.txt sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
72+
# scp bpu models
73+
scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_encoder/encoder.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
74+
scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_ctc/ctc.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD
75+
```
76+
77+
* Step 4. Testing on X3PI, the RTF(real time factor) is shown in Horizon X3PI's console. (~1min)
78+
79+
``` sh
80+
cd /path/to/demo
81+
export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH
82+
export GLOG_logtostderr=1
83+
export GLOG_v=2
84+
./decoder_main \
85+
--chunk_size 8 \
86+
--num_left_chunks 16 \
87+
--rescoring_weight 0.0 \
88+
--wav_path ./test_wav.wav \
89+
--bpu_model_dir ./ \
90+
--unit_path ./units.txt 2>&1 | tee log.txt
91+
```

runtime/horizonbpu/api

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../core/api

runtime/horizonbpu/bin

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../core/bin
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
message("cmake build type is ${CMAKE_BUILD_TYPE} .")
2+
3+
if(BPU)
4+
list(APPEND bpu_asr_model_srcs ./bpu_asr_model.cc)
5+
message(STATUS "Use src_files: [ ${bpu_asr_model_srcs} ] to compile bpu_asr_model .")
6+
7+
# compile bpu_asr_model
8+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
9+
add_library(bpu_asr_model STATIC ${bpu_asr_model_srcs})
10+
target_link_libraries(bpu_asr_model PUBLIC easy_dnn dnn)
11+
endif()

0 commit comments

Comments
 (0)