|
| 1 | +# WeNet & Horizon BPU (Cross Compile) |
| 2 | + |
| 3 | +* Step 1. Setup environment (install horizon packages and cross compile tools) in the PC. (~10min) |
| 4 | + |
| 5 | +```sh |
| 6 | +# Conda env (This conda env is only used for converting bpu models, not for training torch models, |
| 7 | +# It's OK to install cpu-version pytorch) |
| 8 | +conda create -n horizonbpu python=3.8 |
| 9 | +conda activate horizonbpu |
| 10 | +git clone https://github.com/wenet-e2e/wenet.git |
| 11 | +cd wenet/runtime/horizonbpu |
| 12 | +pip install -r ../../requirements.txt -i https://mirrors.aliyun.com/pypi/simple |
| 13 | +pip install torch==1.13.0 torchaudio==0.13.0 torchvision==0.14.0 onnx onnxruntime -i https://mirrors.aliyun.com/pypi/simple |
| 14 | + |
| 15 | +# Horizon packages |
| 16 | +wget https://gitee.com/xcsong-thu/toolchain_pkg/releases/download/resource/wheels.tar.gz |
| 17 | +tar -xzf wheels.tar.gz |
| 18 | +pip install wheels/* -i https://mirrors.aliyun.com/pypi/simple |
| 19 | + |
| 20 | +# Cross compile tools |
| 21 | +sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu |
| 22 | +``` |
| 23 | + |
| 24 | + |
| 25 | +* Step 2. Build decoder_main. It requires cmake 3.14 or above. and Send the binary/libraries to Horizon X3PI. (~20min) |
| 26 | + |
| 27 | +``` sh |
| 28 | +# Assume current dir is `wenet/runtime/horizonbpu` |
| 29 | +cmake -B build -DBPU=ON -DONNX=OFF -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF -DCMAKE_TOOLCHAIN_FILE=toolchains/aarch64-linux-gnu.toolchain.cmake |
| 30 | +cmake --build build |
| 31 | + |
| 32 | +# Send binary and libraries |
| 33 | +export BPUIP=xxx.xxx.xxx.xxx |
| 34 | +export DEMO_PATH_ON_BOARD=/path/to/demo |
| 35 | +scp build/bin/decoder_main sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 36 | +scp fc_base/easy_dnn-src/dnn/*j3*/*/*/lib/libdnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 37 | +scp fc_base/easy_dnn-src/easy_dnn/*j3*/*/*/lib/libeasy_dnn.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 38 | +scp fc_base/easy_dnn-src/hlog/*j3*/*/*/lib/libhlog.so sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 39 | +``` |
| 40 | + |
| 41 | +* Step 3. Export model to ONNX and convert ONNX to Horizon .bin and Send the model/dict/test_wav to Horizon X3PI. (~40min) |
| 42 | + |
| 43 | +``` sh |
| 44 | +# Assume current dir is `wenet/runtime/horizonbpu` |
| 45 | +conda activate horizonbpu |
| 46 | +export WENET_DIR=$PWD/../../ |
| 47 | +export PYTHONIOENCODING=UTF-8 |
| 48 | +export PYTHONPATH=$WENET_DIR:$PYTHONPATH |
| 49 | +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python' |
| 50 | + |
| 51 | +# Download torch model |
| 52 | +wget https://ghproxy.com/https://github.com/xingchensong/toolchain_pkg/releases/download/conformer_subsample8_110M/model_subsample8_parameter110M.tar.gz |
| 53 | +tar -xzf model_subsample8_parameter110M.tar.gz |
| 54 | + |
| 55 | +# Convert torch model to bpu model (*.pt -> *.onnx -> *.bin) |
| 56 | +# NOTE(xcsong): Convert model with 110M parameters requires CPU MEM >= 16G, |
| 57 | +# if your CPU does not meet the requirement, you can download pre-converted encoder.bin/ctc.bin |
| 58 | +# via this link: https://github.com/xingchensong/toolchain_pkg/releases |
| 59 | +python3 $WENET_DIR/tools/onnx2horizonbin.py \ |
| 60 | + --config ./model_subsample8_parameter110M/train.yaml \ |
| 61 | + --checkpoint ./model_subsample8_parameter110M/final.pt \ |
| 62 | + --output_dir ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16 \ |
| 63 | + --chunk_size 8 \ |
| 64 | + --num_decoding_left_chunks 16 \ |
| 65 | + --max_samples 50 \ |
| 66 | + --dict ./model_subsample8_parameter110M/units.txt \ |
| 67 | + --cali_datalist ./model_subsample8_parameter110M/calibration_data/data.list |
| 68 | + |
| 69 | +# scp test wav file and dictionary |
| 70 | +scp ./model_subsample8_parameter110M/test_wav.wav sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 71 | +scp ./model_subsample8_parameter110M/units.txt sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 72 | +# scp bpu models |
| 73 | +scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_encoder/encoder.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 74 | +scp ./model_subsample8_parameter110M/sample50_chunk8_leftchunk16/hb_makertbin_output_ctc/ctc.bin sunrise@$BPUIP:$DEMO_PATH_ON_BOARD |
| 75 | +``` |
| 76 | + |
| 77 | +* Step 4. Testing on X3PI, the RTF(real time factor) is shown in Horizon X3PI's console. (~1min) |
| 78 | + |
| 79 | +``` sh |
| 80 | +cd /path/to/demo |
| 81 | +export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH |
| 82 | +export GLOG_logtostderr=1 |
| 83 | +export GLOG_v=2 |
| 84 | +./decoder_main \ |
| 85 | + --chunk_size 8 \ |
| 86 | + --num_left_chunks 16 \ |
| 87 | + --rescoring_weight 0.0 \ |
| 88 | + --wav_path ./test_wav.wav \ |
| 89 | + --bpu_model_dir ./ \ |
| 90 | + --unit_path ./units.txt 2>&1 | tee log.txt |
| 91 | +``` |
0 commit comments