diff --git a/README.md b/README.md index f93080b55a801b62b4e7ba7e80abf16c306baefe..331e63e53e071aa15babaed3e2f7f0b36ff68c79 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,8 @@ | RetinaFace | FP16 | [✅](models/cv/object_detection/retinaface/igie) | [✅](models/cv/object_detection/retinaface/ixrt) | 4.3.0 | | RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | [✅](models/cv/object_detection/retinanet/ixrt) | 4.3.0 | | RTMDet | FP16 | [✅](models/cv/object_detection/rtmdet/igie) | | 4.3.0 | +| RTDETR | FP16 | [✅](models/cv/object_detection/rtdetr/igie) | [✅](models/cv/object_detection/rtdetr/ixrt) | dev-only | +| | INT8 | [✅](models/cv/object_detection/rtdetr/igie) | | dev-only | | SABL | FP16 | [✅](models/cv/object_detection/sabl/igie) | | 4.3.0 | | SSD | FP16 | [✅](models/cv/object_detection/ssd/igie) | | 4.3.0 | | YOLOF | FP16 | [✅](models/cv/object_detection/yolof/igie) | [✅](models/cv/object_detection/yolof/ixrt) | 4.3.0 | @@ -233,6 +235,15 @@ | YOLOX | FP16 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | + +| Model | Prec. | PaddlePaddle | IXUCA SDK | +|------------|-------|---------------------------------------------------------|-----------| +| RTDETR | FP16 | [✅](models/cv/object_detection/rtdetr/paddlepaddle) | dev-only | + +| Model | Prec. | Pytorch | IXUCA SDK | +|------------|-------|-------------------------------------------------|-----------| +| YOLOv8n | FP16 | [✅](models/cv/object_detection/yolov8n/pytorch) | dev-only | + #### 人脸识别 | Model | Prec. | IGIE | ixRT | IXUCA SDK | diff --git a/README_en.md b/README_en.md index 6488b16d5ecfb11554cdb85270a6ba35f0b19ab9..ac3550173e308dc79933f57ef60fff825ab785f7 100644 --- a/README_en.md +++ b/README_en.md @@ -212,6 +212,8 @@ inference to be expanded in the future. | RetinaFace | FP16 | [✅](models/cv/object_detection/retinaface/igie) | [✅](models/cv/object_detection/retinaface/ixrt) | 4.3.0 | | RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | [✅](models/cv/object_detection/retinanet/ixrt) | 4.3.0 | | RTMDet | FP16 | [✅](models/cv/object_detection/rtmdet/igie) | | 4.3.0 | +| RTDETR | FP16 | [✅](models/cv/object_detection/rtdetr/igie) | [✅](models/cv/object_detection/rtdetr/ixrt) | dev-only | +| | INT8 | [✅](models/cv/object_detection/rtdetr/igie) | | dev-only | | SABL | FP16 | [✅](models/cv/object_detection/sabl/igie) | | 4.3.0 | | SSD | FP16 | [✅](models/cv/object_detection/ssd/igie) | | 4.3.0 | | YOLOF | FP16 | [✅](models/cv/object_detection/yolof/igie) | [✅](models/cv/object_detection/yolof/ixrt) | 4.3.0 | @@ -243,6 +245,14 @@ inference to be expanded in the future. | YOLOX | FP16 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | +| Model | Prec. | PaddlePaddle | IXUCA SDK | +|------------|-------|---------------------------------------------------------|-----------| +| RTDETR | FP16 | [✅](models/cv/object_detection/rtdetr/paddlepaddle) | dev-only | + +| Model | Prec. | Pytorch | IXUCA SDK | +|------------|-------|-------------------------------------------------|-----------| +| YOLOv8n | FP16 | [✅](models/cv/object_detection/yolov8n/pytorch) | dev-only | + #### Face Recognition | Model | Prec. | IGIE | ixRT | IXUCA SDK | diff --git a/models/cv/classification/clip/ixrt/README.md b/models/cv/classification/clip/ixrt/README.md index adb345be58cfa5d8991a66c205a7a4147960e09c..59f1886cf7064dcef61ddf82022588668eb3679d 100644 --- a/models/cv/classification/clip/ixrt/README.md +++ b/models/cv/classification/clip/ixrt/README.md @@ -27,11 +27,10 @@ git clone https://huggingface.co/openai/clip-vit-base-patch32 clip-vit-base-patc Contact the Iluvatar administrator to get the missing packages: -- ixrt-1.0.0a0+corex.4.3.0.20250723-cp310-cp310-linux_x86_64.whl or later +- ixrt-*.whl ```bash pip3 install -r requirements.txt -pip3 install ixrt-1.0.0a0+corex.4.3.0.20250723-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/classification/clip/ixrt/ci/prepare.sh b/models/cv/classification/clip/ixrt/ci/prepare.sh index 9866b8e1555917074d71bf78b3e153e2d8bf45f6..636638a293c6201a16732e18cdc9fc9e33d30a40 100644 --- a/models/cv/classification/clip/ixrt/ci/prepare.sh +++ b/models/cv/classification/clip/ixrt/ci/prepare.sh @@ -19,5 +19,4 @@ set -x pip3 install -r requirements.txt mkdir -p checkpoints/clip python3 export.py --output checkpoints/clip/clip.onnx -cp -r /root/data/3rd_party/iluvatar-corex-ixrt ./ -pip3 install /root/data/install/ixrt-1.0.0a0+corex.4.3.0.20250723-cp310-cp310-linux_x86_64.whl \ No newline at end of file +cp -r /root/data/3rd_party/iluvatar-corex-ixrt ./ \ No newline at end of file diff --git a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh index 92b57eded0228f7cf6a1c9cd63730de780457b0d..fb64d7d170b81599d1b7c414430bb06c24f93583 100644 --- a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh +++ b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh @@ -16,19 +16,15 @@ set -x -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi -else +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm -pip3 install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl ln -s /mnt/deepspark/data/checkpoints/resnet50.onnx ./ ln -s /mnt/deepspark/data/checkpoints/resnet50-fp32.pt ./ \ No newline at end of file diff --git a/models/cv/classification/swin_transformer_large/ixrt/README.md b/models/cv/classification/swin_transformer_large/ixrt/README.md index 9a951a1bb10fa51802ddf188c61ea17eb1a9ba99..8a74748e9d819715bdadb3dae1feaa18974beb14 100644 --- a/models/cv/classification/swin_transformer_large/ixrt/README.md +++ b/models/cv/classification/swin_transformer_large/ixrt/README.md @@ -28,7 +28,7 @@ bash ./scripts/prepare_model_and_dataset.sh ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- tensorflow-*.whl ```bash export PROJ_ROOT=/PATH/TO/DEEPSPARKINFERENCE diff --git a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh index 72f8042dd813c1d9d8b7c73ebeb002a6128d07f4..484e403e11bc7b96275c53aa97011f6eb10e5373 100644 --- a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh +++ b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh @@ -16,18 +16,14 @@ set -x -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi -else +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm -pip3 install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl ln -s /mnt/deepspark/data/checkpoints/vgg16.onnx ./ \ No newline at end of file diff --git a/models/cv/instance_segmentation/mask_rcnn/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/mask_rcnn/ixrt/ci/prepare.sh index 34e7de538bd4b91e725ee267ee78e1a9dfdd2223..c23d091c11c150410e8ded4c4c165403cdf6460b 100644 --- a/models/cv/instance_segmentation/mask_rcnn/ixrt/ci/prepare.sh +++ b/models/cv/instance_segmentation/mask_rcnn/ixrt/ci/prepare.sh @@ -18,8 +18,6 @@ set -x ln -s /root/data/checkpoints/maskrcnn.wts ./python/ ln -s /root/data/datasets/coco ./coco -# install ixrt run -bash /root/data/install/ixrt-1.0.0.alpha+corex.4.3.0-linux_x86_64.run if [ "$1" = "nvidia" ]; then cd scripts && bash init_nv.sh diff --git a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh index 107ffda40c31521179d432506d46984ab75805fc..75fa99d6a7ba4de64d6f925fb28ef8d1d796bb63 100644 --- a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh +++ b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh @@ -27,6 +27,7 @@ fi pip install -r requirements.txt +# it need low mmcv version pip install /root/data/install/mmcv_full-1.7.0+corex.20250108131027-cp310-cp310-linux_x86_64.whl mkdir -p checkpoints diff --git a/models/cv/object_detection/atss/igie/README.md b/models/cv/object_detection/atss/igie/README.md index ae51eb99ea10405f3007d443113306a0d4875652..26aca7ad565f445f0de289007a63046f72409eba 100644 --- a/models/cv/object_detection/atss/igie/README.md +++ b/models/cv/object_detection/atss/igie/README.md @@ -54,7 +54,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/a Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/atss/ixrt/README.md b/models/cv/object_detection/atss/ixrt/README.md index 2e837a4055524ec5bc523d83c836da66eb5ae951..9531f5565c6f962247e9baba80a763eb1c9c20ba 100644 --- a/models/cv/object_detection/atss/ixrt/README.md +++ b/models/cv/object_detection/atss/ixrt/README.md @@ -53,7 +53,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/a Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt diff --git a/models/cv/object_detection/centernet/igie/README.md b/models/cv/object_detection/centernet/igie/README.md index 03f396210e234f6b8a6229da245795f26f45bd05..e2cf0d68dd98fb12b80a8596b27156455f89e7da 100644 --- a/models/cv/object_detection/centernet/igie/README.md +++ b/models/cv/object_detection/centernet/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/centernet/ixrt/README.md b/models/cv/object_detection/centernet/ixrt/README.md index fa9dd5713395498a5e63ce1e2220fcaff1379c24..5f0b6fd588361d946a071975440c420f1c3a80b8 100644 --- a/models/cv/object_detection/centernet/ixrt/README.md +++ b/models/cv/object_detection/centernet/ixrt/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/detr/ixrt/README.md b/models/cv/object_detection/detr/ixrt/README.md index a496547dccce1cc0db5b411d9214bd14db382a39..f91c775a8379cfc257c486adf5b29963c5d68f45 100755 --- a/models/cv/object_detection/detr/ixrt/README.md +++ b/models/cv/object_detection/detr/ixrt/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL @@ -76,7 +76,7 @@ export PROJ_DIR=./ export DATASETS_DIR=/path/to/coco2017/ export CHECKPOINTS_DIR=./checkpoints export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json -export EVAL_DIR=${DATASETS_DIR}/val2017 +export EVAL_DIR=${DATASETS_DIR}/images/val2017 export RUN_DIR=./ export CONFIG_DIR=config/DETR_CONFIG ``` diff --git a/models/cv/object_detection/fcos/igie/README.md b/models/cv/object_detection/fcos/igie/README.md index d41c2437414c7fcf1ee5da07dbb8432e9013fab8..bf3cf639b3159d160561fe1e9686ad4cfd7fd003 100644 --- a/models/cv/object_detection/fcos/igie/README.md +++ b/models/cv/object_detection/fcos/igie/README.md @@ -54,7 +54,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn- Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/fcos/ixrt/README.md b/models/cv/object_detection/fcos/ixrt/README.md index a9d2b6140c73ef2762495e84fce25205300d4fd0..894a1e72f295681089fdbfe3374ae5d2bba42cd0 100755 --- a/models/cv/object_detection/fcos/ixrt/README.md +++ b/models/cv/object_detection/fcos/ixrt/README.md @@ -49,10 +49,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/fcos/ixrt/ci/prepare.sh b/models/cv/object_detection/fcos/ixrt/ci/prepare.sh index 633e4f20e94da82794295a5c83bf6f4596fdf226..1a22e892c2ac3f568765f0a8795eecd5ae605ba8 100644 --- a/models/cv/object_detection/fcos/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/fcos/ixrt/ci/prepare.sh @@ -25,6 +25,5 @@ else echo "Not Support Os" fi pip3 install -r requirements.txt -pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl mkdir -p checkpoints cp /root/data/checkpoints/fcos_opt.onnx checkpoints/ diff --git a/models/cv/object_detection/foveabox/igie/README.md b/models/cv/object_detection/foveabox/igie/README.md index f7155fcb5df8f99ba2d355a9e5ecdf22934d8d02..7d742556cfe2d1fce39cfe1531490a865e837da0 100644 --- a/models/cv/object_detection/foveabox/igie/README.md +++ b/models/cv/object_detection/foveabox/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/foveabox/ixrt/README.md b/models/cv/object_detection/foveabox/ixrt/README.md index 075cd716e26acb23f631a1c63d2bd851706c20cf..4aecc35ac4a4eff0090b0f8fc4a8db75877802e6 100644 --- a/models/cv/object_detection/foveabox/ixrt/README.md +++ b/models/cv/object_detection/foveabox/ixrt/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/freeanchor/igie/README.md b/models/cv/object_detection/freeanchor/igie/README.md index 9bceac9d0535e8017991866886cc55949797fda6..b4deee6f440ae2f6cf528c03949c925003adeb6d 100644 --- a/models/cv/object_detection/freeanchor/igie/README.md +++ b/models/cv/object_detection/freeanchor/igie/README.md @@ -52,11 +52,10 @@ wget https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_ ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/freeanchor/igie/ci/prepare.sh b/models/cv/object_detection/freeanchor/igie/ci/prepare.sh index 84905c33fe6033d0cb29d533be69ac43bac92c9a..2242acf9e06b10f46bebfdc0f4c2aec238e2fb15 100644 --- a/models/cv/object_detection/freeanchor/igie/ci/prepare.sh +++ b/models/cv/object_detection/freeanchor/igie/ci/prepare.sh @@ -17,7 +17,7 @@ set -x pip3 install -r requirements.txt -pip3 install /mnt/deepspark/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + # export onnx model python3 export.py --weight retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth --cfg freeanchor_r50_fpn_1x_coco.py --output freeanchor_r50.onnx diff --git a/models/cv/object_detection/fsaf/igie/README.md b/models/cv/object_detection/fsaf/igie/README.md index 4619d6ada5b386d020438dc014b86604b274c435..f6ed5c77d785a431cbcf9355db8992d9b27c0d2c 100644 --- a/models/cv/object_detection/fsaf/igie/README.md +++ b/models/cv/object_detection/fsaf/igie/README.md @@ -54,7 +54,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/f Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/fsaf/ixrt/README.md b/models/cv/object_detection/fsaf/ixrt/README.md index 6715f9f161908c02184a11addce6e74db5eae150..f099a5d3c45a56501b7181cadce06a4e342e946e 100644 --- a/models/cv/object_detection/fsaf/ixrt/README.md +++ b/models/cv/object_detection/fsaf/ixrt/README.md @@ -54,7 +54,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/f Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/gfl/igie/README.md b/models/cv/object_detection/gfl/igie/README.md index eabac8601886cf59beec18a17e0f35fd044c5e5c..f850cc20e718d3d02e4bb592bb137cf1cdbb4c27 100644 --- a/models/cv/object_detection/gfl/igie/README.md +++ b/models/cv/object_detection/gfl/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/hrnet/igie/README.md b/models/cv/object_detection/hrnet/igie/README.md index 6d5f8d8ecd9c7ba66a76c6567efc321223f22bda..e5fc0d113ef99f830cf8bb16be3481ea90f39796 100644 --- a/models/cv/object_detection/hrnet/igie/README.md +++ b/models/cv/object_detection/hrnet/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/hrnet/ixrt/README.md b/models/cv/object_detection/hrnet/ixrt/README.md index d94ed8a6c6e2638c07b047062457a2ce0cf7d9bb..7338b7664725addfd0bc8a0b10f742dd771c7fe1 100644 --- a/models/cv/object_detection/hrnet/ixrt/README.md +++ b/models/cv/object_detection/hrnet/ixrt/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/paa/igie/README.md b/models/cv/object_detection/paa/igie/README.md index 8d8d418592ee7fe1942b826c172149f64e471470..7391dc0501432a161c4687597f75a960a7f5053e 100644 --- a/models/cv/object_detection/paa/igie/README.md +++ b/models/cv/object_detection/paa/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/paa/ixrt/README.md b/models/cv/object_detection/paa/ixrt/README.md index 67ac6f126d6939953a5e30eab532afbefd5f73fd..828729e2c49013666b9071be255f92e4f0ff7fc3 100644 --- a/models/cv/object_detection/paa/ixrt/README.md +++ b/models/cv/object_detection/paa/ixrt/README.md @@ -53,7 +53,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt diff --git a/models/cv/object_detection/pisa/igie/README.md b/models/cv/object_detection/pisa/igie/README.md index 2ca8e8af74955abe3fdf4644d9a4a6a4b9596226..9e7c832b07211c590154896ae627e5c82b5db7a7 100644 --- a/models/cv/object_detection/pisa/igie/README.md +++ b/models/cv/object_detection/pisa/igie/README.md @@ -52,11 +52,10 @@ wget https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/pisa/igie/ci/prepare.sh b/models/cv/object_detection/pisa/igie/ci/prepare.sh index 8ff3fb98c7d89a96ba91357c0643d2d4505b6eb4..ab6892ade104eb0125d22c9b545f8daee7f50e02 100644 --- a/models/cv/object_detection/pisa/igie/ci/prepare.sh +++ b/models/cv/object_detection/pisa/igie/ci/prepare.sh @@ -17,7 +17,7 @@ set -x pip3 install -r requirements.txt -pip3 install /mnt/deepspark/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + # export onnx model python3 export.py --weight pisa_retinanet_r50_fpn_1x_coco-76409952.pth --cfg pisa_retinanet_r50_fpn_1x_coco.py --output pisa.onnx diff --git a/models/cv/object_detection/retinaface/igie/README.md b/models/cv/object_detection/retinaface/igie/README.md index a6afd7fb2e7dc67d6ca8a056c2fc71d0dedbd394..7f354187accd4e59d49567b8403a46e21c17798f 100755 --- a/models/cv/object_detection/retinaface/igie/README.md +++ b/models/cv/object_detection/retinaface/igie/README.md @@ -21,10 +21,6 @@ Dataset: to download the validation dataset. ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/retinaface/ixrt/README.md b/models/cv/object_detection/retinaface/ixrt/README.md index 18cd56c6a147e08b1117047ca7b9d12be177948b..b80e3113d4c579fa2c8fba0baaa5d0fe6de474b8 100644 --- a/models/cv/object_detection/retinaface/ixrt/README.md +++ b/models/cv/object_detection/retinaface/ixrt/README.md @@ -25,10 +25,6 @@ wget https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weigh ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/retinanet/igie/README.md b/models/cv/object_detection/retinanet/igie/README.md index af42e63f5655292ec1258f2461b878d8ea2aad8e..8bd77c7407abcc1140a5bcb6cd792d833da741fa 100644 --- a/models/cv/object_detection/retinanet/igie/README.md +++ b/models/cv/object_detection/retinanet/igie/README.md @@ -50,7 +50,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/retinanet/ixrt/README.md b/models/cv/object_detection/retinanet/ixrt/README.md index 5123a272e33250bd36f75eafe512e3cf69e6dd9c..f2b99cf4ab0096d82a2a180cabd1ebd5c5da97cd 100644 --- a/models/cv/object_detection/retinanet/ixrt/README.md +++ b/models/cv/object_detection/retinanet/ixrt/README.md @@ -53,7 +53,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt diff --git a/models/cv/object_detection/rtdetr/igie/README.md b/models/cv/object_detection/rtdetr/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3346cdd55e323b616eed2b1fb3f880b773f024c5 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/README.md @@ -0,0 +1,83 @@ +# RTDetr (IGIE) + +## Model Description + +Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge end-to-end object detector that provides real-time performance while maintaining high accuracy. It is based on the idea of DETR (the NMS-free framework), meanwhile introducing conv-based backbone and an efficient hybrid encoder to gain real-time speed. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | dev-only | 26.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + - to download the labels dataset. + - to download the validation dataset. + - to download the train dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/images/ train2017.zip +unzip -q -d ./coco/images/ val2017.zip + +coco +├── annotations +│   └── instances_val2017.json +├── images +│   ├── train2017 +│   └── val2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_rtdetr_fp16_accuracy.sh +# Performance +bash scripts/infer_rtdetr_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_rtdetr_int8_accuracy.sh +# Performance +bash scripts/infer_rtdetr_int8_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | MAP@0.5 | +| :-----: | :----: | :----: | :----: | :----: | +| RTDetr | 32 | FP16 | 335.8 | 0.654 | +| RTDetr | 32 | INT8 | 298.2 | 0.651 | + diff --git a/models/cv/object_detection/rtdetr/igie/build_engine.py b/models/cv/object_detection/rtdetr/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..41e9ce4c62826ebf1dada65d88515d1f5486f8b6 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/build_engine.py @@ -0,0 +1,76 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie +import os + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + if os.path.exists(args.engine_path): + return + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/ci/prepare.sh b/models/cv/object_detection/rtdetr/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..b0df2ecbd1999ebf37959d6f582407b095f0fd12 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/ci/prepare.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +pip3 install -r requirements.txt \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/inference.py b/models/cv/object_detection/rtdetr/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..4e86a8eb622bfe85f42ce8b894817d5f5ad5e654 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/inference.py @@ -0,0 +1,166 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import torch +import numpy as np +from tvm import relay +from tqdm import tqdm + +from utils import COCO2017Dataset, COCO2017Evaluator + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, label_path, batch_size, num_workers): + + dataset = COCO2017Dataset(data_path, label_path, image_size=(640, 640)) + + dataloader = torch.utils.data.DataLoader(dataset, + batch_size=batch_size, + drop_last=False, + num_workers=num_workers, + collate_fn=dataset.collate_fn) + return dataloader + +def main(): + args = parse_args() + + batch_size = args.batchsize + data_path = os.path.join(args.datasets, "images", "val2017") + label_path = os.path.join(args.datasets, "annotations", "instances_val2017.json") + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(data_path, label_path, batch_size, args.num_workers) + + # get evaluator + evaluator = COCO2017Evaluator(label_path=label_path, + conf_thres=args.conf, + iou_thres=args.iou, + image_size=640) + + for all_inputs in tqdm(dataloader): + image = all_inputs[0] + pred = None + + pad_batch = len(image) != batch_size + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + module.run() + + for i in range(module.get_num_outputs()): + output = module.get_output(i).asnumpy() + + if pad_batch: + output = output[:origin_size] + + if pred is None: + pred = torch.from_numpy(output) + else: + pred = torch.cat((pred, torch.from_numpy(output)), dim=-1) + + evaluator.evaluate(pred, all_inputs) + + + evaluator.summary() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/quantize.py b/models/cv/object_detection/rtdetr/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..07be844882462b37eee3d094106f2ce4e5eddb54 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/quantize.py @@ -0,0 +1,108 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from inference import get_dataloader +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {k: np.array(v) for k, v in zip(self.input_name, input_info)} + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + data_path = os.path.join(args.datasets, "images", "val2017") + label_path = os.path.join(args.datasets, "annotations", "instances_val2017.json") + + dataloader = get_dataloader(data_path, label_path, batch_size=1, num_workers=args.num_workers) + calibration = CalibrationDataLoader(input_names, dataloader, cnt_limit=20) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + op_types_to_quantize=['Conv'], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/requirements.txt b/models/cv/object_detection/rtdetr/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f88d8c99cba6217fa4c53365cab95f6e2ab5abf5 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/requirements.txt @@ -0,0 +1,5 @@ +tqdm +onnx +onnxsim +pycocotools +ninja==1.11.1.3 \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_accuracy.sh b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..efff00b42398f757e90b39a3817efbf83e283527 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_accuracy.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="rtdetrv3_r18vd_6x_coco_image.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input image:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path rtdetrv3_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine rtdetrv3_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name image \ + --datasets ${datasets_path} diff --git a/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_performance.sh b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..f0ee22ba9bc673eed1417796b4f143bc7026bd45 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_fp16_performance.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="rtdetrv3_r18vd_6x_coco_image.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input image:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path rtdetrv3_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine rtdetrv3_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name image \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_accuracy.sh b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..d2ce2a321afae49980e66afbc651e257f126bca8 --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_accuracy.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="rtdetrv3_r18vd_6x_coco_image.onnx" +quantized_model_path="rtdetrv3_r18vd_6x_coco_image_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --datasets ${datasets_path} +fi + +# build engine +python3 build_engine.py \ + --model_path ${quantized_model_path} \ + --input image:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path rtdetrv3_bs_${batchsize}_int8.so + + +# inference +python3 inference.py \ + --engine rtdetrv3_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name image \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_performance.sh b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..a96aaa549ace6a27421b097bd5efca66da961dff --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/scripts/infer_rtdetr_int8_performance.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="rtdetrv3_r18vd_6x_coco_image.onnx" +quantized_model_path="rtdetrv3_r18vd_6x_coco_image_int8.onnx" +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --datasets ${datasets_path} +fi + +# build engine +python3 build_engine.py \ + --model_path ${quantized_model_path} \ + --input image:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path rtdetrv3_bs_${batchsize}_int8.so + + +# inference +python3 inference.py \ + --engine rtdetrv3_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name image \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/igie/utils.py b/models/cv/object_detection/rtdetr/igie/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2df9e975fdbf54f72946a35c0bdf6afc3914bd4c --- /dev/null +++ b/models/cv/object_detection/rtdetr/igie/utils.py @@ -0,0 +1,238 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import json +import torch +import torchvision +import numpy as np + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +coco80_to_coco91 = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, + 89, 90 +] + +coco80_to_coco91_dict = {idx: i for idx, i in enumerate(coco80_to_coco91)} +coco91_to_coco80_dict = {i: idx for idx, i in enumerate(coco80_to_coco91)} + +def clip_boxes(boxes, shape): + # Clip boxes (xyxy) to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + if clip: + clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center + y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center + y[:, 2] = (x[:, 2] - x[:, 0]) / w # width + y[:, 3] = (x[:, 3] - x[:, 1]) / h # height + return y + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + +class COCO2017Dataset(torch.utils.data.Dataset): + def __init__(self, + image_dir_path, + label_json_path, + image_size=640, + pad_color=114, + val_mode=True, + input_layout="NCHW"): + + self.image_dir_path = image_dir_path + self.label_json_path = label_json_path + self.image_size = image_size + self.pad_color = pad_color + self.val_mode = val_mode + self.input_layout = input_layout + + self.coco = COCO(annotation_file=self.label_json_path) + + if self.val_mode: + self.img_ids = list(sorted(self.coco.imgs.keys())) + else: + self.img_ids = sorted(list(self.coco.imgToAnns.keys())) + + def __len__(self): + return len(self.img_ids) + + def __getitem__(self, index): + img_path = self._get_image_path(index) + org_img = self._load_image(img_path) + + img = self.preproc(org_img, input_size=self.image_size) + + org_shape = [org_img.shape[0], org_img.shape[1]] + + return img, img_path, org_shape + + def _get_image_path(self, index): + idx = self.img_ids[index] + path = self.coco.loadImgs(idx)[0]["file_name"] + img_path = os.path.join(self.image_dir_path, path) + return img_path + + def _load_image(self, img_path): + img = cv2.imread(img_path) + assert img is not None, f"file {img_path} not found" + + return img + + def preproc(self, img, input_size, swap=(2, 0, 1)): + image = cv2.resize(img, (input_size[0], input_size[1])) + image = image.transpose(swap)[::-1] # HWC to CHW, BGR to RGB + + image = np.ascontiguousarray(image, dtype=np.float32) / 255.0 + return image + + + def _load_json_label(self, index): + _, (h0, w0), _ = self._load_image(index) + + idx = self.img_ids[index] + ann_ids = self.coco.getAnnIds(imgIds=idx) + targets = self.coco.loadAnns(ids=ann_ids) + + labels = [] + for target in targets: + cat = target["category_id"] + coco80_cat = coco91_to_coco80_dict[cat] + cat = np.array([[coco80_cat]]) + + x, y, w, h = target["bbox"] + x1, y1, x2, y2 = x, y, int(x + w), int(y + h) + xyxy = np.array([[x1, y1, x2, y2]]) + xywhn = xyxy2xywhn(xyxy, w0, h0) + labels.append(np.hstack((cat, xywhn))) + + if labels: + labels = np.vstack(labels) + else: + if self.val_mode: + labels = np.zeros((1, 5)) + else: + raise ValueError(f"set val_mode = False to use images with labels") + + return labels + + @staticmethod + def collate_fn(batch): + im, img_path, org_shape = zip(*batch) + return np.concatenate([i[None] for i in im], axis=0), img_path, org_shape + +def get_coco_accuracy(pred_json, ann_json): + coco = COCO(annotation_file=ann_json) + coco_pred = coco.loadRes(pred_json) + + coco_evaluator = COCOeval(cocoGt=coco, cocoDt=coco_pred, iouType="bbox") + + coco_evaluator.evaluate() + coco_evaluator.accumulate() + coco_evaluator.summarize() + return coco_evaluator.stats + +class COCO2017Evaluator: + def __init__(self, + label_path, + image_size=640, + conf_thres=0.001, + iou_thres=0.65): + + self.conf_thres = conf_thres + self.iou_thres = iou_thres + self.label_path = label_path + self.image_size = image_size + + self.jdict = [] + + # iou vector for mAP@0.5:0.95 + self.iouv = torch.linspace(0.5, 0.95, 10) + self.niou = self.iouv.numel() + + def Detect(self, boxes, img_size, org_size): + x_scale, y_scale = img_size[1] / org_size[1], img_size[0] / org_size[0] + + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + boxes[:, 4] /= x_scale + boxes[:, 5] /= y_scale + + clip_boxes(boxes, org_size) + return boxes + + def evaluate(self, pred, all_inputs): + im = all_inputs[0] + img_path = all_inputs[1] + img_info = all_inputs[2] + + _, _, height, width = im.shape + + for (output, org_img, path) in zip(pred, img_info, img_path): + if output is None: + continue + + detected_boxes = self.Detect(output, [height, width], org_img) + + detected_boxes[:, 4] -= detected_boxes[:, 2] + detected_boxes[:, 5] -= detected_boxes[:, 3] + + cls = detected_boxes[:, 0] + scores = detected_boxes[:, 1] + bboxes = detected_boxes[:, 2:] + + self._save_one_json(bboxes, cls, scores, self.jdict, path, coco80_to_coco91) + + def _save_one_json(self, bboxes, class_, scores, jdict, path, class_map): + image_id = int(os.path.splitext(os.path.basename(path))[0]) + for box, score, cls in zip(bboxes.numpy().tolist(), scores.numpy().tolist(), class_.numpy().tolist()): + jdict.append({ + 'image_id': image_id, + 'category_id': class_map[int(cls)], + 'bbox': box, + 'score': score + }) + + def summary(self): + if len(self.jdict): + pred_json = os.path.join("coco2017_predictions.json") + with open(pred_json, 'w') as f: + json.dump(self.jdict, f) + result = get_coco_accuracy(pred_json, self.label_path) + else: + raise ValueError("can not find generated json dict for pycocotools") + return result diff --git a/models/cv/object_detection/rtdetr/ixrt/README.md b/models/cv/object_detection/rtdetr/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fe0ad7e3c0a285e79f05883d3ce66f59b3d0c7a5 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/README.md @@ -0,0 +1,89 @@ +# RTDETR (ixRT) + +## Model Description + +RT-DETR is the first real-time end-to-end object detector. Specifically, we design an efficient hybrid encoder that effectively processes multi-scale features by decoupling intra-scale interaction and cross-scale fusion. Additionally, we propose an IoU-aware query selection mechanism to optimize the initialization of decoder queries. Moreover, RT-DETR supports flexible adjustment of inference speed by using a different number of decoder layers without requiring retraining, which greatly facilitates practical deployment of real-time object detectors. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | dev-only | 26.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + +- to download the labels dataset. +- to download the validation dataset. +- to download the train dataset. +- to download the annotations dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/images/ train2017.zip +unzip -q -d ./coco/images/ val2017.zip +unzip -q -d ./coco annotations_trainval2017.zip + +coco +├── annotations +│ └── instances_train2017.json +│ └── instances_val2017.json +│ └── captions_train2017.json +│ └── captions_val2017.json +│ └── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── images +│   ├── train2017 +│   └── val2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +```bash +pip3 install tqdm +pip3 install onnx +pip3 install pycocotools +pip3 install opencv-python==4.6.0.66 +``` + +### Model Conversion + +```bash +mkdir -p checkpoints +# download onnx into checkpoints +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 +```bash +# Accuracy +bash scripts/infer_rtdetr_fp16_accuracy.sh +# Performance +bash scripts/infer_rtdetr_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +|:------:|:---------:|:---------:|:-----:|:-------:|:------------:| +| RT-DETR| 32 | FP16 | 71.4 | 0.729 | 0.543 | diff --git a/models/cv/object_detection/rtdetr/ixrt/build_dynamic_engine.py b/models/cv/object_detection/rtdetr/ixrt/build_dynamic_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..1eb7453231d93363677f8a2d06312866e83a048e --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/build_dynamic_engine.py @@ -0,0 +1,58 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + profile = builder.create_optimization_profile() + profile.set_shape("image", Dims([1, 3, 640, 640]), Dims([32, 3, 640, 640]), Dims([64, 3, 640, 640])) + build_config.add_optimization_profile(profile) + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + build_config.set_flag(precision) + if config.precision == "int8": + build_config.set_flag(tensorrt.BuilderFlag.FP16) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/calibration_dataset.py b/models/cv/object_detection/rtdetr/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0deb8df5055c35817d98ee0eb3cdf612ef4d6457 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/calibration_dataset.py @@ -0,0 +1,27 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=1, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/ci/prepare.sh b/models/cv/object_detection/rtdetr/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..808c89b3c88743b779be5a6345987cce9785286b --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/ci/prepare.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +pip3 install tqdm +pip3 install onnx +pip3 install pycocotools +pip3 install opencv-python==4.6.0.66 +mkdir -p checkpoints +ln -s /root/data/checkpoints/rtdetrv3_r18vd_6x_coco_image_sim.onnx checkpoints/rtdetrv3_r18vd_6x_coco_image_sim.onnx \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/coco_labels.py b/models/cv/object_detection/rtdetr/ixrt/coco_labels.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/coco_labels.py rename to models/cv/object_detection/rtdetr/ixrt/coco_labels.py diff --git a/models/cv/object_detection/rtdetr/ixrt/common.py b/models/cv/object_detection/rtdetr/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..22f3fa601b6381c3325f429245ca0ed4b0037b11 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/common.py @@ -0,0 +1,49 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +from cuda import cuda, cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result,class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + if image_id == -1: + continue + for box in boxes: + c, p, x, y, w, h = box + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context diff --git a/models/cv/object_detection/yolov5s/igie/datasets/__init__.py b/models/cv/object_detection/rtdetr/ixrt/datasets/__init__.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/__init__.py rename to models/cv/object_detection/rtdetr/ixrt/datasets/__init__.py diff --git a/models/cv/object_detection/rtdetr/ixrt/datasets/coco.py b/models/cv/object_detection/rtdetr/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2b35e753c80d341a92a2276508b56c4b9a798756 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_pre_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_pre_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/rtdetr/ixrt/datasets/common.py b/models/cv/object_detection/rtdetr/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8adc73e1cf2329ae93a8c0730797b6c127b6c8 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/datasets/common.py @@ -0,0 +1,94 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def scale_rtdetr_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [2, 4]] -= pad[0] # x padding + boxes[:, [3, 5]] -= pad[1] # y padding + boxes[:, 2:] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + boxes[:, 4] /= x_scale + boxes[:, 5] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + return boxes \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/datasets/post_process.py b/models/cv/object_detection/rtdetr/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..710616a649b1afebdf80238494be5c79b2aa8194 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/datasets/post_process.py @@ -0,0 +1,192 @@ +import cv2 +import math +import numpy as np +import torch +import torch.nn.functional as F + +from .common import letterbox, scale_boxes, clip_boxes, scale_rtdetr_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + elif data_process_type == "detr": + return DetrPostprocess + elif data_process_type == "rtdetrv3": + return RtDetrV3Postprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def box_cxcywh_to_xyxy(x): + x_c, y_c, w, h = x.unbind(-1) + b = [(x_c - 0.5 * w), (y_c - 0.5 * h), + (x_c + 0.5 * w), (y_c + 0.5 * h)] + return torch.stack(b, dim=-1) + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(-1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + +def DetrPostprocess(pred_logits, pred_boxes, target_sizes): + + out_logits = torch.from_numpy(pred_logits) + out_bbox = torch.from_numpy(pred_boxes) + assert len(target_sizes) == 2 + + prob = F.softmax(out_logits, -1) + scores, labels = prob[..., :-1].max(-1) + + # convert to [x0, y0, x1, y1] format + boxes = box_cxcywh_to_xyxy(out_bbox) + # and from relative [0, 1] to absolute [0, height] coordinates + img_w, img_h = target_sizes + scale_fct = torch.tensor([img_w, img_h, img_w, img_h]) + boxes = boxes * scale_fct + + boxes = clip_boxes(boxes, target_sizes) + boxes = convert_to_xywh(boxes) + + labels = labels.unsqueeze(1) + scores =scores.unsqueeze(1) + pred_boxes = torch.cat([ + boxes, + labels, + scores], dim=1).numpy().tolist() + return pred_boxes + +def RtDetrV3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_num, + sample_num, + max_det=300, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + + for i in range(sample_num): + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_rtdetr_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 4] -= boxes[:, 2] + boxes[:, 5] -= boxes[:, 3] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/datasets/pre_process.py b/models/cv/object_detection/rtdetr/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..da87d016a53475f662283a636904b2d1a57828ee --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/datasets/pre_process.py @@ -0,0 +1,83 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_pre_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + elif data_process_type == "detr": + return DetrPreprocess + elif data_process_type == "rtdetrv3": + return RtDetrV3Preprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img + +def DetrPreprocess(image, img_size): + # img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + # img = img.resize((img_size, img_size)) + + std = [0.485, 0.456, 0.406] + mean = [0.229, 0.224, 0.225] + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + + image[0,:,:] = (image[0,:,:]- std[0])/mean[0] + image[1,:,:] = (image[1,:,:]- std[1])/mean[1] + image[2,:,:] = (image[2,:,:]- std[2])/mean[2] + + return image + +def RtDetrV3Preprocess(image, img_size): + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/datasets/vision.py b/models/cv/object_detection/rtdetr/ixrt/datasets/vision.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/vision.py rename to models/cv/object_detection/rtdetr/ixrt/datasets/vision.py diff --git a/models/cv/object_detection/rtdetr/ixrt/inference.py b/models/cv/object_detection/rtdetr/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..06074e93fa0222dd8cd9032480ffda38b6d852c5 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/inference.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import json +import os +import time +import numpy as np +from cuda import cuda, cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json +from common import create_engine_context +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process +from PIL import Image, ImageDraw +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + + # Setup I/O bindings + inputs = [] + outputs = [] + for i in range(engine.num_io_tensors): + tensor_name = engine.get_tensor_name(i) + dtype = engine.get_tensor_dtype(tensor_name) + shape = engine.get_tensor_shape(tensor_name) + size = np.dtype(tensorrt.nptype(dtype)).itemsize + np_dtype = np.dtype(tensorrt.nptype(dtype)) + if -1 in list(shape): + if engine.get_tensor_mode(tensor_name) == tensorrt.TensorIOMode.INPUT: + shape = engine.get_tensor_profile_shape(tensor_name, 0)[2] + context.set_input_shape(tensor_name, shape) + for s in shape: + size *= s + else: + shape = context.get_tensor_shape(tensor_name) + for s in shape: + size *= s + else: + for s in shape: + size *= s + + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + context.set_tensor_address(tensor_name, int(allocation)) + binding = { + "name": tensor_name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "allocation": allocation, + } + if engine.get_tensor_mode(tensor_name) == tensorrt.TensorIOMode.INPUT: + inputs.append(binding) + else: + outputs.append(binding) + + err, stream = cudart.cudaStreamCreate() + assert err == cudart.cudaError_t.cudaSuccess + + # Warm up + print(config.warm_up) + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_async_v3(stream) + print("Warm Done.") + + # Prepare the output data + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + cur_bsz_sample = batch_data.shape[0] + # Set input + context.set_input_shape(inputs[0]["name"], batch_data.shape) + err, = cudart.cudaMemcpy(inputs[0]["allocation"], batch_data, batch_data.nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + assert err == cudart.cudaError_t.cudaSuccess + + # Forward + start_time = time.time() + context.execute_async_v3(stream) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + output_shape = context.get_tensor_shape(outputs[0]["name"]) + model_output = np.zeros(output_shape, outputs[0]["dtype"]) + output_size = outputs[0]["dtype"].itemsize + for s in output_shape: + output_size *= s + err, = cudart.cudaMemcpy(model_output, outputs[0]["allocation"], output_size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost) + assert err == cudart.cudaError_t.cudaSuccess + + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=model_output, + box_num=output_shape[1], + sample_num=cur_bsz_sample, + max_det=300 + ) + save2json(batch_img_id, pred_boxes, json_result,class_map) + + + fps = num_samples / forward_time + # Free + for i in range(engine.num_io_tensors): + tensor_name = engine.get_tensor_name(i) + (err,) = cudart.cudaFree(context.get_tensor_address(tensor_name)) + assert err == cudart.cudaError_t.cudaSuccess + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + pred_json = os.path.join( + config.pred_dir, f"rt-detr-v3_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + eval.evaluate() + eval.accumulate() + print( + f"==============================eval rt-detr-v3 {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=0, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_accuracy.sh b/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c59bd141cf94c26016e0b302a0c71dc65ccf010c --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_accuracy.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=8 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 +DATA_PROCESS_TYPE=rtdetrv3 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR}/rtdetrv3_r18vd_6x_coco_image_sim.onnx + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : rt-detr-v3 +echo Onnx Path : ${ORIGINE_MODEL} + +CURRENT_MODEL=${CHECKPOINTS_DIR}/rtdetrv3_r18vd_6x_coco_image_sim.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/rtdetrv3_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_dynamic_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --precision ${PRECISION} \ + --imgsz 640 \ + --test_mode ${RUN_MODE} \ + --map_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_performance.sh b/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..7c1dd82cde932c9ce64cfe49338d17451faff7e1 --- /dev/null +++ b/models/cv/object_detection/rtdetr/ixrt/scripts/infer_rtdetr_fp16_performance.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=8 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=FPS +PRECISION=float16 +DATA_PROCESS_TYPE=rtdetrv3 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR}/rtdetrv3_r18vd_6x_coco_image_sim.onnx + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : rt-detr-v3 +echo Onnx Path : ${ORIGINE_MODEL} + +CURRENT_MODEL=${CHECKPOINTS_DIR}/rtdetrv3_r18vd_6x_coco_image_sim.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/rtdetrv3_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_dynamic_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --precision ${PRECISION} \ + --imgsz 640 \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/README.md b/models/cv/object_detection/rtdetr/paddlepaddle/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7caca7e86f55d3809a0bf7c660c4ad372984effe --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/README.md @@ -0,0 +1,103 @@ +# RTDETR (PaddlePaddle) + +## Model Description + +RT-DETR is the first real-time end-to-end object detector. Specifically, we design an efficient hybrid encoder that effectively processes multi-scale features by decoupling intra-scale interaction and cross-scale fusion. Additionally, we propose an IoU-aware query selection mechanism to optimize the initialization of decoder queries. Moreover, RT-DETR supports flexible adjustment of inference speed by using a different number of decoder layers without requiring retraining, which greatly facilitates practical deployment of real-time object detectors. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | dev-only | 26.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + +- to download the labels dataset. +- to download the validation dataset. +- to download the train dataset. +- to download the annotations dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/ train2017.zip +unzip -q -d ./coco/ val2017.zip +unzip -q -d ./coco annotations_trainval2017.zip +coco +├── annotations +│ └── instances_train2017.json +│ └── instances_val2017.json +│ └── captions_train2017.json +│ └── captions_val2017.json +│ └── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── val2017 +├── train2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: + +- paddlepaddle-3.1.0+corex*.whl + +```bash +pip3 install paddlepaddle-3.1.0+corex*.whl +pip3 install scikit-learn +# Install PaddleDetection +wget https://github.com/PaddlePaddle/PaddleDetection/archive/refs/tags/v2.8.1.zip +unzip PaddleDetection-2.8.1.zip +cd PaddleDetection-2.8.1 + +pip3 install -r requirements.txt +pip3 install -e . +cd .. +rm -rf PaddleDetection-2.8.1.zip +``` + +### Model Conversion + +```bash +mkdir -p output_inference +# export paddle inference model +python3 export_model.py -c PaddleDetection-2.8.1/configs/rtdetr/rtdetr_r101vd_6x_coco.yml -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams --output_dir=output_inference +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 +```bash +# Accuracy +bash scripts/infer_rtdetr_fp16_accuracy.sh +# Performance +bash scripts/infer_rtdetr_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +|:------:|:---------:|:---------:|:-----:|:-------:|:------------:| +| RT-DETR| 32 | FP16 | 71.4 | 0.729 | 0.543 | + +## References + +- [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection.git) diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/ci/prepare.sh b/models/cv/object_detection/rtdetr/paddlepaddle/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..28f0601b3c7ad261f460ffe1f047d526ac662266 --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/ci/prepare.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +pip3 install scikit-learn +unzip -q /mnt/deepspark/data/3rd_party/v2.8.1.zip -d ./ +cd PaddleDetection-2.8.1 +pip3 install -r requirements.txt +pip3 install -e . +cd .. +mkdir -p output_inference +# export paddle inference model +python3 export_model.py -c PaddleDetection-2.8.1/configs/rtdetr/rtdetr_r101vd_6x_coco.yml -o weights=./rtdetr_r101vd_6x_coco.pdparams --output_dir=output_inference \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/export_model.py b/models/cv/object_detection/rtdetr/paddlepaddle/export_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8e00a5277a2d443286663ecc9bfedbcf51d66000 --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/export_model.py @@ -0,0 +1,123 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys + +# add python path of PaddleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +sys.path.insert(0, parent_path) + +# ignore warning log +import warnings +warnings.filterwarnings('ignore') + +import paddle +from ppdet.core.workspace import load_config, merge_config +from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.engine import Trainer +from ppdet.engine.trainer_ssod import Trainer_ARSL +from ppdet.slim import build_slim_model + +from ppdet.utils.logger import setup_logger +logger = setup_logger('export_model') + + +def parse_args(): + parser = ArgsParser() + parser.add_argument( + "--output_dir", + type=str, + default="output_inference", + help="Directory for storing the output model files.") + parser.add_argument( + "--export_serving_model", + type=bool, + default=False, + help="Whether to export serving model or not.") + parser.add_argument( + "--slim_config", + default=None, + type=str, + help="Configuration file of slim method.") + parser.add_argument("--for_fd", action='store_true') + args = parser.parse_args() + return args + + +def run(FLAGS, cfg): + ssod_method = cfg.get('ssod_method', None) + if ssod_method is not None and ssod_method == 'ARSL': + trainer = Trainer_ARSL(cfg, mode='test') + trainer.load_weights(cfg.weights, ARSL_eval=True) + # build detector + else: + trainer = Trainer(cfg, mode='test') + + # load weights + if cfg.architecture in ['DeepSORT', 'ByteTrack']: + trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights) + else: + trainer.load_weights(cfg.weights) + + # export model + trainer.export(FLAGS.output_dir, for_fd=FLAGS.for_fd) + + if FLAGS.export_serving_model: + assert not FLAGS.for_fd + from paddle_serving_client.io import inference_model_to_serving + model_name = os.path.splitext(os.path.split(cfg.filename)[-1])[0] + + inference_model_to_serving( + dirname="{}/{}".format(FLAGS.output_dir, model_name), + serving_server="{}/{}/serving_server".format(FLAGS.output_dir, + model_name), + serving_client="{}/{}/serving_client".format(FLAGS.output_dir, + model_name), + model_filename="model.pdmodel", + params_filename="model.pdiparams") + + +def main(): + if 'npu' in paddle.device.get_device(): + paddle.set_device("npu") + else: + paddle.set_device("cpu") + + FLAGS = parse_args() + cfg = load_config(FLAGS.config) + merge_config(FLAGS.opt) + + if FLAGS.slim_config: + cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test') + + # FIXME: Temporarily solve the priority problem of FLAGS.opt + merge_config(FLAGS.opt) + check_config(cfg) + if 'use_gpu' not in cfg: + cfg.use_gpu = False + check_gpu(cfg.use_gpu) + check_version() + + run(FLAGS, cfg) + + +if __name__ == '__main__': + main() diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/post_process.py b/models/cv/object_detection/rtdetr/paddlepaddle/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c38e6ca23bdb5649c63e5a8d5070329357a023b7 --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/post_process.py @@ -0,0 +1,158 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +import cv2 + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + """ + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + top_k: keep top_k results. If k <= 0, keep all the results. + candidate_size: only consider the candidates with the highest scores. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + indexes = np.argsort(scores) + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of( + rest_boxes, + np.expand_dims( + current_box, axis=0), ) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +def iou_of(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def area_of(left_top, right_bottom): + """Compute the areas of rectangles given two corners. + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + Returns: + area (N): return the area. + """ + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +class PPYOLOEPostProcess(object): + """ + Args: + input_shape (int): network input image size + scale_factor (float): scale factor of ori image + """ + + def __init__(self, + score_threshold=0.4, + nms_threshold=0.5, + nms_top_k=10000, + keep_top_k=300): + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + + def _non_max_suppression(self, prediction, scale_factor): + batch_size = prediction.shape[0] + out_boxes_list = [] + box_num_list = [] + for batch_id in range(batch_size): + bboxes, confidences = prediction[batch_id][..., :4], prediction[ + batch_id][..., 4:] + # nms + picked_box_probs = [] + picked_labels = [] + for class_index in range(0, confidences.shape[1]): + probs = confidences[:, class_index] + mask = probs > self.score_threshold + probs = probs[mask] + if probs.shape[0] == 0: + continue + subset_boxes = bboxes[mask, :] + box_probs = np.concatenate( + [subset_boxes, probs.reshape(-1, 1)], axis=1) + box_probs = hard_nms( + box_probs, + iou_threshold=self.nms_threshold, + top_k=self.nms_top_k) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.shape[0]) + + if len(picked_box_probs) == 0: + out_boxes_list.append(np.empty((0, 4))) + + else: + picked_box_probs = np.concatenate(picked_box_probs) + # resize output boxes + picked_box_probs[:, 0] /= scale_factor[batch_id][1] + picked_box_probs[:, 2] /= scale_factor[batch_id][1] + picked_box_probs[:, 1] /= scale_factor[batch_id][0] + picked_box_probs[:, 3] /= scale_factor[batch_id][0] + + # clas score box + out_box = np.concatenate( + [ + np.expand_dims( + np.array(picked_labels), axis=-1), np.expand_dims( + picked_box_probs[:, 4], axis=-1), + picked_box_probs[:, :4] + ], + axis=1) + if out_box.shape[0] > self.keep_top_k: + out_box = out_box[out_box[:, 1].argsort()[::-1] + [:self.keep_top_k]] + out_boxes_list.append(out_box) + box_num_list.append(out_box.shape[0]) + + out_boxes_list = np.concatenate(out_boxes_list, axis=0) + box_num_list = np.array(box_num_list) + return out_boxes_list, box_num_list + + def __call__(self, outs, scale_factor): + out_boxes_list, box_num_list = self._non_max_suppression(outs, + scale_factor) + return {'bbox': out_boxes_list, 'bbox_num': box_num_list} diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_accuracy.sh b/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..ebdceac1fe054f090de1332a5453641f221ed4ad --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_accuracy.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +echo "batch size is ${batchsize}" + +python3 test_det.py \ + --model_path output_inference/rtdetr_r101vd_6x_coco \ + --config PaddleDetection-2.8.1/deploy/auto_compression/configs/rtdetr_reader.yml \ + --precision fp16 \ + --dataset_dir ${datasets_path} \ + --batch_size ${batchsize} \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_performance.sh b/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..106e47cb14c0fdf45c5aa32526b3cc926b726100 --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/scripts/infer_rtdetr_fp16_performance.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +datasets_path=${DATASETS_DIR} + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) batchsize=${arguments[index]};; + esac +done + +python3 test_det.py \ + --model_path output_inference/rtdetr_r101vd_6x_coco \ + --config PaddleDetection-2.8.1/deploy/auto_compression/configs/rtdetr_reader.yml \ + --precision fp16 \ + --dataset_dir ${datasets_path} \ + --batch_size ${batchsize} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/rtdetr/paddlepaddle/test_det.py b/models/cv/object_detection/rtdetr/paddlepaddle/test_det.py new file mode 100644 index 0000000000000000000000000000000000000000..f1a28da54a9af658794ca089d5299e30a7d7f812 --- /dev/null +++ b/models/cv/object_detection/rtdetr/paddlepaddle/test_det.py @@ -0,0 +1,563 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import time +import sys +import cv2 +import numpy as np + +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor +from ppdet.core.workspace import load_config, create +from ppdet.metrics import COCOMetric + +from post_process import PPYOLOEPostProcess + + +def str_to_bool(value): + if isinstance(value, bool): + return value + if value.lower() in ('True', 'true'): + return True + elif value.lower() in ('False', 'false'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def argsparser(): + """ + argsparser func + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_path", type=str, help="inference model filepath") + parser.add_argument( + "--image_file", + type=str, + default=None, + help="image path, if set image_file, it will not eval coco.") + parser.add_argument( + "--config", + type=str, + default=None, + help="path of datset and reader config.") + parser.add_argument( + "--benchmark", + type=str_to_bool, + default=False, + help="Whether run benchmark or not.") + parser.add_argument( + "--use_trt", + type=str_to_bool, + default=False, + help="Whether use TensorRT or not.") + parser.add_argument( + "--precision", + type=str, + default="paddle", + help="mode of running(fp32/fp16/int8)") + parser.add_argument( + "--device", + type=str, + default="GPU", + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU", + ) + parser.add_argument( + "--use_dynamic_shape", + type=str_to_bool, + default=False, + help="Whether use dynamic shape or not.") + parser.add_argument( + "--use_mkldnn", + type=str_to_bool, + default=False, + help="Whether use mkldnn or not.") + parser.add_argument( + "--cpu_threads", type=int, default=10, help="Num of cpu threads.") + parser.add_argument("--img_shape", type=int, default=640, help="input_size") + parser.add_argument( + '--include_nms', + type=str_to_bool, + default=True, + help="Whether include nms or not.") + parser.add_argument( + "--use_multi_img_for_dynamic_shape_collect", + type=str_to_bool, + default=True, + help="Whether it is necessary to use multiple images to collect shape infomation,\ + When the image sizes in the data set are different, it needs to be set to True.") + parser.add_argument( + "--delete_pass_name", + default=None, + type=str, + help="Pass that need to be deleted during the ir optimization process") + parser.add_argument( + "--dataset_dir", + type=str, + required=True, + help="Path to COCO dataset root (e.g., /home/datasets/coco)") + parser.add_argument( + "--batch_size", + type=int, + default=1, + help="Batch size for evaluation") + parser.add_argument( + "--perf_only", + type=bool, + default=False, + help="Run performance test only") + return parser + + +CLASS_LABEL = [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', + 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', + 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', + 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', + 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', + 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', + 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush' +] + + +def generate_scale(im, target_shape, keep_ratio=True): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + if keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(target_shape) + target_size_max = np.max(target_shape) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = target_shape + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +def image_preprocess(img_path, target_shape): + """ + image_preprocess func + """ + img = cv2.imread(img_path) + im_scale_y, im_scale_x = generate_scale(img, target_shape, keep_ratio=False) + img = cv2.resize( + img, (target_shape[0], target_shape[0]), + interpolation=cv2.INTER_LANCZOS4) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.transpose(img, [2, 0, 1]) / 255 + img = np.expand_dims(img, 0) + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + img -= img_mean + img /= img_std + scale_factor = np.array([[im_scale_y, im_scale_x]]) + return img.astype(np.float32), scale_factor.astype(np.float32) + + +def get_color_map_list(num_classes): + """ + get_color_map_list func + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= ((lab >> 0) & 1) << (7 - j) + color_map[i * 3 + 1] |= ((lab >> 1) & 1) << (7 - j) + color_map[i * 3 + 2] |= ((lab >> 2) & 1) << (7 - j) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def draw_box(image_file, results, class_label, threshold=0.5): + """ + draw_box func + """ + srcimg = cv2.imread(image_file, 1) + for i in range(len(results)): + color_list = get_color_map_list(len(class_label)) + clsid2color = {} + classid, conf = int(results[i, 0]), results[i, 1] + if conf < threshold: + continue + xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int( + results[i, 4]), int(results[i, 5]) + + if classid not in clsid2color: + clsid2color[classid] = color_list[classid] + color = tuple(clsid2color[classid]) + + cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2) + print(class_label[classid] + ": " + str(round(conf, 3))) + cv2.putText( + srcimg, + class_label[classid] + ":" + str(round(conf, 3)), + (xmin, ymin - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (0, 255, 0), + thickness=2, ) + return srcimg + +def find_images_with_bounding_size(loader): + max_length_index = -1 + max_width_index = -1 + min_length_index = -1 + min_width_index = -1 + + max_length = float('-inf') + max_width = float('-inf') + min_length = float('inf') + min_width = float('inf') + for idx, data in enumerate(loader): + data_all = {k: np.array(v) for k, v in data.items()} + # print(idx) + h,w = data_all["im_shape"][0] + # print(h, w) + if int(w)==800 and h > max_length: + max_length = h + max_length_index = idx + if int(h)==800 and w > max_width: + max_width = w + max_width_index = idx + if h < min_length: + min_length = h + min_length_index = idx + if w < min_width: + min_width = w + min_width_index = idx + print(f"Found max image length: {max_length}, index: {max_length_index}") + print(f"Found max image width: {max_width}, index: {max_width_index}") + print(f"Found min image length: {min_length}, index: {min_length_index}") + print(f"Found min image width: {min_width}, index: {min_width_index}") + + roidbs = loader.dataset.roidbs + subset = loader.dataset + subset.roidbs = [roidbs[i] for i in [max_length_index, max_width_index, min_length_index, min_width_index]] + return subset + +def load_predictor( + model_dir, + precision="fp32", + use_trt=False, + use_mkldnn=False, + batch_size=1, + device="CPU", + min_subgraph_size=3, + use_dynamic_shape=False, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + cpu_threads=1, ): + """set AnalysisConfig, generate AnalysisPredictor + Args: + model_dir (str): root path of __model__ and __params__ + precision (str): mode of running(fp32/fp16/int8) + use_trt (bool): whether use TensorRT or not. + use_mkldnn (bool): whether use MKLDNN or not in CPU. + device (str): Choose the device you want to run, it can be: CPU/GPU, default is CPU + use_dynamic_shape (bool): use dynamic shape or not + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + Returns: + predictor (PaddlePredictor): AnalysisPredictor + Raises: + ValueError: predict by TensorRT need device == 'GPU'. + """ + rerun_flag = False + if device != "GPU" and use_trt: + raise ValueError( + "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}". + format(precision, device)) + config = Config( + os.path.join(model_dir, "model.pdmodel"), + os.path.join(model_dir, "model.pdiparams")) + if device == "GPU": + # Map precision string to Paddle Inference Precision enum + precision_map = { + "fp32": Config.Precision.Float32, + "fp16": Config.Precision.Half, + # Note: Int8 is only effective when use_trt=True + "int8": Config.Precision.Int8, + } + gpu_precision = precision_map.get(precision, Config.Precision.Float32) + + config.enable_use_gpu(200, 0, gpu_precision) + config.switch_ir_optim(False) + else: + config.disable_gpu() + config.set_cpu_math_library_num_threads(cpu_threads) + config.switch_ir_optim() + if use_mkldnn: + config.enable_mkldnn() + if precision == "int8": + if "picodet_s" in FLAGS.config: + config.enable_mkldnn_int8({"conv2d"}) + else: + config.enable_mkldnn_int8({"conv2d", "depthwise_conv2d"}) + + precision_map = { + "int8": Config.Precision.Int8, + "fp32": Config.Precision.Float32, + "fp16": Config.Precision.Half, + } + if precision in precision_map.keys() and use_trt: + config.enable_tensorrt_engine( + workspace_size=(1 << 30) * batch_size, + max_batch_size=batch_size, + min_subgraph_size=min_subgraph_size, + precision_mode=precision_map[precision], + use_static=True, + use_calib_mode=False, ) + + if use_dynamic_shape: + dynamic_shape_file = os.path.join(FLAGS.model_path, + "dynamic_shape.txt") + if os.path.exists(dynamic_shape_file): + config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file, + True) + print("trt set dynamic shape done!") + else: + config.disable_gpu() + config.set_cpu_math_library_num_threads(10) + config.collect_shape_range_info(dynamic_shape_file) + print("Start collect dynamic shape...") + rerun_flag = True + + if "dino" in FLAGS.config: + config.exp_disable_tensorrt_ops(["reshape2", "slice", "stack", "elementwise_add"]) + if "rtdetr" in FLAGS.config: + config.delete_pass("fc_mkldnn_pass") + config.delete_pass("fc_act_mkldnn_fuse_pass") + if FLAGS.delete_pass_name is not None: + config.delete_pass(FLAGS.delete_pass_name) + predictor = create_predictor(config) + return predictor, rerun_flag + + +def predict_image(predictor, + image_file, + image_shape=[640, 640], + warmup=1, + repeats=1, + threshold=0.5): + """ + predict image main func + """ + img, scale_factor = image_preprocess(image_file, image_shape) + inputs = {} + inputs["image"] = img + if FLAGS.include_nms: + inputs['scale_factor'] = scale_factor + input_names = predictor.get_input_names() + for i, _ in enumerate(input_names): + input_tensor = predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + for i in range(warmup): + predictor.run() + + np_boxes, np_boxes_num = None, None + cpu_mems, gpu_mems = 0, 0 + predict_time = 0.0 + time_min = float("inf") + time_max = float("-inf") + for i in range(repeats): + start_time = time.time() + predictor.run() + output_names = predictor.get_output_names() + boxes_tensor = predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + if FLAGS.include_nms: + boxes_num = predictor.get_output_handle(output_names[1]) + np_boxes_num = boxes_num.copy_to_cpu() + end_time = time.time() + timed = end_time - start_time + time_min = min(time_min, timed) + time_max = max(time_max, timed) + predict_time += timed + time_avg = predict_time / repeats + print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format( + round(time_min * 1000, 2), + round(time_max * 1000, 1), round(time_avg * 1000, 1))) + if not FLAGS.include_nms: + postprocess = PPYOLOEPostProcess(score_threshold=0.3, nms_threshold=0.6) + res = postprocess(np_boxes, scale_factor) + else: + res = {'bbox': np_boxes, 'bbox_num': np_boxes_num} + res_img = draw_box( + image_file, res["bbox"], CLASS_LABEL, threshold=threshold) + cv2.imwrite("result.jpg", res_img) + + +def eval(predictor, val_loader, metric, rerun_flag=False): + """ + eval main func + """ + cpu_mems, gpu_mems = 0, 0 + predict_time = 0.0 + time_min = float("inf") + time_max = float("-inf") + sample_nums = len(val_loader.dataset) + input_names = predictor.get_input_names() + output_names = predictor.get_output_names() + boxes_tensor = predictor.get_output_handle(output_names[0]) + if FLAGS.include_nms: + boxes_num = predictor.get_output_handle(output_names[1]) + for batch_id, data in enumerate(val_loader): + data_all = {k: np.array(v) for k, v in data.items()} + for i, _ in enumerate(input_names): + input_tensor = predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(data_all[input_names[i]]) + start_time = time.time() + predictor.run() + np_boxes = boxes_tensor.copy_to_cpu() + if FLAGS.include_nms: + np_boxes_num = boxes_num.copy_to_cpu() + end_time = time.time() + timed = end_time - start_time + time_min = min(time_min, timed) + time_max = max(time_max, timed) + predict_time += timed + if rerun_flag: + if FLAGS.use_multi_img_for_dynamic_shape_collect: + if batch_id == 3: + print( + "***** Collect dynamic shape done, Please rerun the program to get correct results. *****" + ) + return + else: + continue + else: + print( + "***** Collect dynamic shape done, Please rerun the program to get correct results. *****" + ) + return + + if not FLAGS.include_nms: + postprocess = PPYOLOEPostProcess( + score_threshold=0.3, nms_threshold=0.6) + res = postprocess(np_boxes, data_all['scale_factor']) + else: + res = {'bbox': np_boxes, 'bbox_num': np_boxes_num} + metric.update(data_all, res) + if batch_id % 100 == 0: + print("Eval iter:", batch_id) + sys.stdout.flush() + if FLAGS.perf_only: + fps = sample_nums / predict_time if predict_time > 0 else 0.0 + print("[Benchmark] FPS: {}".format(round(fps, 2))) + else: + metric.accumulate() + metric.log() + map_res = metric.get_results() + metric.reset() + time_avg = predict_time / sample_nums + print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format( + round(time_min * 1000, 2), + round(time_max * 1000, 1), round(time_avg * 1000, 1))) + print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0])) + sys.stdout.flush() + + +def main(): + """ + main func + """ + predictor, rerun_flag = load_predictor( + FLAGS.model_path, + device=FLAGS.device, + use_trt=FLAGS.use_trt, + use_mkldnn=FLAGS.use_mkldnn, + precision=FLAGS.precision, + use_dynamic_shape=FLAGS.use_dynamic_shape, + cpu_threads=FLAGS.cpu_threads) + + if FLAGS.image_file: + warmup, repeats = 1, 1 + if FLAGS.benchmark: + warmup, repeats = 50, 100 + predict_image( + predictor, + FLAGS.image_file, + image_shape=[FLAGS.img_shape, FLAGS.img_shape], + warmup=warmup, + repeats=repeats) + else: + reader_cfg = load_config(FLAGS.config) + + reader_cfg["EvalDataset"].dataset_dir = FLAGS.dataset_dir + reader_cfg["EvalReader"]["batch_size"] = FLAGS.batch_size + + dataset = reader_cfg["EvalDataset"] + # global val_loader + val_loader = create("EvalReader")(reader_cfg["EvalDataset"], + reader_cfg["worker_num"], + return_list=True) + + if rerun_flag: + sub_dataset = find_images_with_bounding_size(val_loader) + batch_sampler = paddle.io.BatchSampler( + sub_dataset, batch_size=1, shuffle=True, drop_last=False) + val_loader = paddle.io.DataLoader( + dataset=sub_dataset, + batch_sampler=batch_sampler, + collate_fn=val_loader._batch_transforms, + num_workers=1, + return_list=True + ) + + clsid2catid = {v: k for k, v in dataset.catid2clsid.items()} + anno_file = dataset.get_anno() + metric = COCOMetric( + anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox") + eval(predictor, val_loader, metric, rerun_flag=rerun_flag) + + + +if __name__ == "__main__": + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + # DataLoader need run on cpu + paddle.set_device("cpu") + + main() diff --git a/models/cv/object_detection/rtmdet/igie/README.md b/models/cv/object_detection/rtmdet/igie/README.md index 2739e619da6a86c3c29768154e7b0a27d7cd5b18..bc22fe8983cb4ef6cb585e5999318ff918b56ba4 100644 --- a/models/cv/object_detection/rtmdet/igie/README.md +++ b/models/cv/object_detection/rtmdet/igie/README.md @@ -61,7 +61,7 @@ wget https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32 Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/sabl/igie/README.md b/models/cv/object_detection/sabl/igie/README.md index 7bb2d14f6d53a0e77e29e3da8208ea21ac436a79..a26458e05eb0ecd9ad6d1cdbb7989279fbaa14b1 100644 --- a/models/cv/object_detection/sabl/igie/README.md +++ b/models/cv/object_detection/sabl/igie/README.md @@ -54,7 +54,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/ssd/igie/README.md b/models/cv/object_detection/ssd/igie/README.md index 80d4b03e9a2cc4d2552069d2acdef9a4e84213e0..8b0bb5432f3a1fbf1dd571af7d16fdcbdafeb5c7 100644 --- a/models/cv/object_detection/ssd/igie/README.md +++ b/models/cv/object_detection/ssd/igie/README.md @@ -49,7 +49,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/yolof/igie/README.md b/models/cv/object_detection/yolof/igie/README.md index 731f0633c3f140e36d6667cc16728f7ce95dffbd..e93bf359a5a04cd88d0e6c2aea86ad9f7bc71d29 100644 --- a/models/cv/object_detection/yolof/igie/README.md +++ b/models/cv/object_detection/yolof/igie/README.md @@ -49,7 +49,7 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash # Install libGL diff --git a/models/cv/object_detection/yolof/ixrt/README.md b/models/cv/object_detection/yolof/ixrt/README.md index 2402c67f8d9789628d5bf707a7865d12a52cf175..cd57909c42341cf9f4954d066ef732cb0602f338 100644 --- a/models/cv/object_detection/yolof/ixrt/README.md +++ b/models/cv/object_detection/yolof/ixrt/README.md @@ -49,11 +49,10 @@ coco Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/yolov10/igie/README.md b/models/cv/object_detection/yolov10/igie/README.md index e530952c6fcad98ed04b5d4806fad365b2e2717b..6a2b1448773ed28a44f36ab01e3a1bda690b35e7 100644 --- a/models/cv/object_detection/yolov10/igie/README.md +++ b/models/cv/object_detection/yolov10/igie/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ## Model Conversion diff --git a/models/cv/object_detection/yolov11/igie/README.md b/models/cv/object_detection/yolov11/igie/README.md index 2396ab5df3e09bcf99e362ab98bd52a68610dd80..ab39a4c2f212036afacd3f00029e646cba9ff973 100644 --- a/models/cv/object_detection/yolov11/igie/README.md +++ b/models/cv/object_detection/yolov11/igie/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ## Model Conversion diff --git a/models/cv/object_detection/yolov12/igie/README.md b/models/cv/object_detection/yolov12/igie/README.md index 46e9beb5d786c24e816d7f4618805804e6c60ced..f81ac624547b7fea9e30cec1eea7410dec86a36b 100644 --- a/models/cv/object_detection/yolov12/igie/README.md +++ b/models/cv/object_detection/yolov12/igie/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt ``` diff --git a/models/cv/object_detection/yolov13/igie/README.md b/models/cv/object_detection/yolov13/igie/README.md index 911e0186785481797abe4013bf960ddb73d858a2..5571c98b24bf0ec00359c16eb43dba7c94aa5d06 100644 --- a/models/cv/object_detection/yolov13/igie/README.md +++ b/models/cv/object_detection/yolov13/igie/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ## Model Conversion diff --git a/models/cv/object_detection/yolov3/igie/README.md b/models/cv/object_detection/yolov3/igie/README.md index 2e22a56899a2be6ebf1f6e53d73c21013708be61..12b47ee1d4f00bc58bd0b8ab89ec8f702671d47a 100644 --- a/models/cv/object_detection/yolov3/igie/README.md +++ b/models/cv/object_detection/yolov3/igie/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/yolov3/igie/ci/prepare.sh b/models/cv/object_detection/yolov3/igie/ci/prepare.sh index 73e4fd9e1bb486242b5535281a31edf746646692..ca3233849b2f8e55c0e9e2f66279088abfe77507 100644 --- a/models/cv/object_detection/yolov3/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/igie/ci/prepare.sh @@ -16,15 +16,6 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - pip3 install -r requirements.txt python3 export.py --weight yolov3.pt --output yolov3.onnx diff --git a/models/cv/object_detection/yolov3/ixrt/README.md b/models/cv/object_detection/yolov3/ixrt/README.md index 74497f064017182dc74b83ff46671aa67ce7c89f..8ccd0aca7afe327510e6233d88faec0141719b95 100644 --- a/models/cv/object_detection/yolov3/ixrt/README.md +++ b/models/cv/object_detection/yolov3/ixrt/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh index c67a36fb0882934a0481f54897a90ad0cce58559..219cffa026e8c503c7214de8acc032cf2aa5f8d2 100644 --- a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh @@ -16,15 +16,6 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./ diff --git a/models/cv/object_detection/yolov3_sample/igie/build_engine.py b/models/cv/object_detection/yolov3_sample/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..7b11d92d4d0be000a3fa873d582cd529bb681bc6 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/build_engine.py @@ -0,0 +1,33 @@ +import argparse +import numpy as np +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + + inputs_info = {"images": ([config.bsz, 3, 416, 416], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/build_nms_engine.py b/models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/build_nms_engine.py rename to models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py diff --git a/models/cv/object_detection/yolov5s/igie/calibration_dataset.py b/models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/calibration_dataset.py rename to models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py diff --git a/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..2034e9e8dc1149b2ec421deb945c8ae52cc52b02 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install tqdm onnxsim pycuda +pip3 install pycocotools opencv_python==4.6.0.66 +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov3/yolov3.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/coco_labels.py b/models/cv/object_detection/yolov3_sample/igie/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5s/igie/common.py b/models/cv/object_detection/yolov3_sample/igie/common.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/common.py rename to models/cv/object_detection/yolov3_sample/igie/common.py diff --git a/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG b/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..828be17df1bcb7b4cdbad91680dd957a57fc81af --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=416 +MODEL_NAME=yolov3 +ORIGINE_MODEL=yolov3.onnx +DATA_PROCESS_TYPE=yolov3 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(416 353 290) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=10647 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/cut_model.py b/models/cv/object_detection/yolov3_sample/igie/cut_model.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/cut_model.py rename to models/cv/object_detection/yolov3_sample/igie/cut_model.py diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov3_sample/igie/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolov5s/igie/datasets/coco.py b/models/cv/object_detection/yolov3_sample/igie/datasets/coco.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/coco.py rename to models/cv/object_detection/yolov3_sample/igie/datasets/coco.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/common.py b/models/cv/object_detection/yolov3_sample/igie/datasets/common.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/common.py rename to models/cv/object_detection/yolov3_sample/igie/datasets/common.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/post_process.py b/models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/post_process.py rename to models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/pre_process.py b/models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/pre_process.py rename to models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5s/igie/deploy.py b/models/cv/object_detection/yolov3_sample/igie/deploy.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/deploy.py rename to models/cv/object_detection/yolov3_sample/igie/deploy.py diff --git a/models/cv/object_detection/yolov3_sample/igie/inference.py b/models/cv/object_detection/yolov3_sample/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1cfaa78449d23d1bda5a100415a1ddd3f9debfaf --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/inference.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + # module.run() + module.context.execute_v2(module.allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolov5s/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/load_ixrt_plugin.py rename to models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py diff --git a/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..f696ae5517dfb15c020c533332c02a2b6b06c873 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py @@ -0,0 +1,54 @@ +import onnx +import argparse +import copy +import numpy as np + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +def modify_resize_nodes(model, bsz): + print("modify resize") + for node in model.graph.node: + if node.op_type == "Resize": + if len(node.input) >= 4 and node.input[3]: + sizes_name = node.input[3] + for initializer in model.graph.initializer: + if initializer.name == sizes_name: + shape = copy.deepcopy(onnx.numpy_helper.to_array(initializer)) + shape[0] = shape[0] * bsz + new_sizes = np.array(shape, dtype=np.int64) + initializer.CopyFrom(onnx.numpy_helper.from_array(new_sizes, name=initializer.name)) + break + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +modify_resize_nodes(model, args.batch_size) +onnx.save(model, args.output_model) diff --git a/models/cv/object_detection/yolov5s/igie/quant.py b/models/cv/object_detection/yolov3_sample/igie/quant.py similarity index 93% rename from models/cv/object_detection/yolov5s/igie/quant.py rename to models/cv/object_detection/yolov3_sample/igie/quant.py index bcf5d9b6f73ee58fee41e27252425e7b9dc4e6fb..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d 100644 --- a/models/cv/object_detection/yolov5s/igie/quant.py +++ b/models/cv/object_detection/yolov3_sample/igie/quant.py @@ -5,6 +5,9 @@ import numpy as np from tensorrt.deploy import static_quantize import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) from calibration_dataset import create_dataloaders def setseed(seed=42): diff --git a/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c2c7653905b88010c1647c4d394a07782f67e6fc --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.65 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV3_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..d93e51e8e6d1c191dd54ea35c32889db1e8a55e5 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=1010 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV3_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/simplify_model.py b/models/cv/object_detection/yolov3_sample/igie/simplify_model.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/simplify_model.py rename to models/cv/object_detection/yolov3_sample/igie/simplify_model.py diff --git a/models/cv/object_detection/yolov4/igie/README.md b/models/cv/object_detection/yolov4/igie/README.md index eb7c70b370f30a20b5eb845b4ca936080a7144a2..1f6b6ab7d1b0906a71c87bbd99924cf6a18b801f 100644 --- a/models/cv/object_detection/yolov4/igie/README.md +++ b/models/cv/object_detection/yolov4/igie/README.md @@ -50,10 +50,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov4/ixrt/README.md b/models/cv/object_detection/yolov4/ixrt/README.md index bf57bb154f9f769426c2624ba233d9fa6ea33391..bdcc21f149abc3256739cd3f4bd24d823961e899 100644 --- a/models/cv/object_detection/yolov4/ixrt/README.md +++ b/models/cv/object_detection/yolov4/ixrt/README.md @@ -50,10 +50,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov5/igie/README.md b/models/cv/object_detection/yolov5/igie/README.md index 44d1a6e9424fbc47684dbb823274a9c97c482aa1..b0be77947a47c8ba716fb1da3cc9f3e08d2412e6 100644 --- a/models/cv/object_detection/yolov5/igie/README.md +++ b/models/cv/object_detection/yolov5/igie/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/yolov5/ixrt/README.md b/models/cv/object_detection/yolov5/ixrt/README.md index 41bec0dc1070ca19897fe0385a83155c2064b074..07e53eca1c77c5094cf46167f585384bcc90793d 100644 --- a/models/cv/object_detection/yolov5/ixrt/README.md +++ b/models/cv/object_detection/yolov5/ixrt/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov5_sample/igie/build_engine.py b/models/cv/object_detection/yolov5_sample/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..cdace84b400d7140653700dea1488c460826dafa --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/build_engine.py @@ -0,0 +1,39 @@ +import os +import cv2 +import argparse +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + if config.precision == "float16": + precision = "fp16" + + inputs_info = {"images": ([config.bsz, 3, 640, 640], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..51d70747679443f85a314a6072eb83e35b0e30ed --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7d3e3e489605f7f9ec0cd042658daf98798d1eb5 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py @@ -0,0 +1,30 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..2662da090a63183bec96d5e277010c311eb67b8c --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5m/yolov5m.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/coco_labels.py b/models/cv/object_detection/yolov5_sample/igie/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5_sample/igie/common.py b/models/cv/object_detection/yolov5_sample/igie/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5435557ecb72c836cc5a3c253482b0458657f6 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG b/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..510b359ada10646ad2427fddcf1957cab4c5b6dc --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5m +ORIGINE_MODEL=yolov5m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(443 482 521) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/cut_model.py b/models/cv/object_detection/yolov5_sample/igie/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov5_sample/igie/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/common.py b/models/cv/object_detection/yolov5_sample/igie/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5_sample/igie/deploy.py b/models/cv/object_detection/yolov5_sample/igie/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..ec56b7ab83c6b271c92de6e5c36153927f629887 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/inference.py b/models/cv/object_detection/yolov5_sample/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..8042fc5658b9494bb98ab6715aad7e7c3a6cdfb9 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/inference.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + + # Load Engine + module = init_by_igie(config.model_engine) + + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/modify_batchsize.py b/models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/modify_batchsize.py rename to models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py diff --git a/models/cv/object_detection/yolov5_sample/igie/quant.py b/models/cv/object_detection/yolov5_sample/igie/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c8729053b6b5a51ea609f3aad52eb400e0e8769a --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.626 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..a9db515ac573fdba05b4c37737b346714d01f50a --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=735 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/simplify_model.py b/models/cv/object_detection/yolov5_sample/igie/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/ixrt/README.md b/models/cv/object_detection/yolov5s/ixrt/README.md index d6d07b1518bc2c32ce02499b370470994c5cbb99..a375cb5b40d05c62ef52182d7a02b43747428e0e 100755 --- a/models/cv/object_detection/yolov5s/ixrt/README.md +++ b/models/cv/object_detection/yolov5s/ixrt/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh index a08c47d7053f6c3a910a533b81e4331e8dbf3dfc..4672c649731fe233dde275144445f20028c58344 100644 --- a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh @@ -16,15 +16,6 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints diff --git a/models/cv/object_detection/yolov5s/igie/build_engine.py b/models/cv/object_detection/yolov5s_sample/igie/build_engine.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/build_engine.py rename to models/cv/object_detection/yolov5s_sample/igie/build_engine.py diff --git a/models/cv/object_detection/yolov5s_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov5s_sample/igie/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..51d70747679443f85a314a6072eb83e35b0e30ed --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov5s_sample/igie/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..578e013db932c53f0cfa2790e375d7b699081168 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh b/models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh similarity index 55% rename from models/cv/object_detection/yolov5s/igie/ci/prepare.sh rename to models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh index b53ca6d194ba2c395ee60bbce719cfc299a6bf8a..34ec0222b1fd1c59cee47e03df0a8ae73b61fdd5 100644 --- a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh @@ -16,20 +16,9 @@ set -x -pip3 install -r ../../ixrt_common/requirements.txt +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 mkdir -p checkpoints -cp -r /mnt/deepspark/data/3rd_party/yolov5 ./ - -cd yolov5/ - -# 有一些环境需要安装 -# wget https://ultralytics.com/assets/Arial.ttf -mkdir -p /root/.config/Ultralytics -cp /mnt/deepspark/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf - -ln -s /mnt/deepspark/data/checkpoints/yolov5s.pt ./ -# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) -python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32 -mv yolov5s.onnx ../checkpoints -cd .. +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5s/yolov5s.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/coco_labels.py b/models/cv/object_detection/yolov5s_sample/igie/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5s_sample/igie/common.py b/models/cv/object_detection/yolov5s_sample/igie/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5435557ecb72c836cc5a3c253482b0458657f6 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG b/models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG similarity index 94% rename from models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG rename to models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG index c3f46cf87029af585f8c40a6b5e435b4a41fc956..1330489abda4aea77dee2b8ad233b34d04f4d27d 100644 --- a/models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG +++ b/models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG @@ -18,7 +18,7 @@ MODEL_INPUT_NAMES=(images) LAYER_FUSION=1 DECODER_FASTER=1 DECODER_NUM_CLASS=80 -DECODER_INPUT_NAMES=(/model.24/m.0/Conv_output_0 /model.24/m.1/Conv_output_0 /model.24/m.2/Conv_output_0) +DECODER_INPUT_NAMES=(326 364 402) DECODER_8_ANCHOR=(10 13 16 30 33 23) DECODER_16_ANCHOR=(30 61 62 45 59 119) DECODER_32_ANCHOR=(116 90 156 198 373 326) diff --git a/models/cv/object_detection/yolov5s_sample/igie/cut_model.py b/models/cv/object_detection/yolov5s_sample/igie/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/common.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5s_sample/igie/deploy.py b/models/cv/object_detection/yolov5s_sample/igie/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..ec56b7ab83c6b271c92de6e5c36153927f629887 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/inference.py b/models/cv/object_detection/yolov5s_sample/igie/inference.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/inference.py rename to models/cv/object_detection/yolov5s_sample/igie/inference.py diff --git a/models/cv/object_detection/yolov5s_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov5s_sample/igie/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov5s_sample/igie/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..00ed65dd16bf19445396df7f72d81d653eed756d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/igie/quant.py b/models/cv/object_detection/yolov5s_sample/igie/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_accuracy.sh b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_accuracy.sh similarity index 100% rename from models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_accuracy.sh rename to models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_accuracy.sh diff --git a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh similarity index 99% rename from models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh rename to models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh index c1252f166803c0e513a640e633999344d7f9a2ac..35cc5785820d17e52b8b24c057a50450ba6c035e 100644 --- a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh +++ b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh @@ -12,7 +12,7 @@ check_status() # Run paraments BSZ=32 WARM_UP=3 -TGT=-1 +TGT=840 LOOP_COUNT=100 RUN_MODE=FPS PRECISION=float16 diff --git a/models/cv/object_detection/yolov5s_sample/igie/simplify_model.py b/models/cv/object_detection/yolov5s_sample/igie/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/README.md b/models/cv/object_detection/yolov5s_sample/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..05187af80cf4f3161696a8bf6ddfc3793c29dc1f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/README.md @@ -0,0 +1,25 @@ +# 此代码是检测网络基于coco数据集的通用实现 + + +## 推理流程(以Yolov5s为例进行说明) +在ixrt-modelzoo/executables/yolov5s路径下 + +1. 下载onnx文件、数据集 && 安装依赖包 +``` +bash init.sh +``` + +2. 执行脚本(所需的量化、build engine等步骤都包含) + +``` +bash infer_yolov5s_int8_accuracy.sh --bs 32 --tgt 0.55 +bash infer_yolov5s_int8_performance.sh --bs 32 --tgt 2000 +``` + + +## 如何添加新模型 +1. 添加模型相关配置 +ixrt-modelzoo/benchmarks/cv/detection/general_impl/trt/config/{MODELNAME_CONFIG} +2. 添加执行脚本 ++ ixrt-modelzoo/executables/{model_name}/init.sh ++ ixrt-modelzoo/executables/{model_name}/infer_{model_name}_{precision}_{task}.sh \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py b/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d47e45e518cc0bd35d2fd27f19f7da17bec44abf --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py @@ -0,0 +1,43 @@ +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py b/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..25f0ab8abee2d4a6948250e3fcc4bb4705777550 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py @@ -0,0 +1,81 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py b/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..578e013db932c53f0cfa2790e375d7b699081168 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..78e9936de990d75d7e5002697f29552f4ca84de7 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /root/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5s/yolov5s.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py b/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/common.py b/models/cv/object_detection/yolov5s_sample/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5435557ecb72c836cc5a3c253482b0458657f6 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG b/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..1330489abda4aea77dee2b8ad233b34d04f4d27d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5s +ORIGINE_MODEL=yolov5s.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(326 364 402) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py b/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/__init__.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py b/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..ec56b7ab83c6b271c92de6e5c36153927f629887 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/inference.py b/models/cv/object_detection/yolov5s_sample/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5452d56f4c7850aa1a816b90e0e1465ea2300d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/inference.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Forward + # start_time = time.time() + context.execute_v2(allocations) + # end_time = time.time() + # forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + context.execute_v2(allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py b/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py b/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..00ed65dd16bf19445396df7f72d81d653eed756d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/quant.py b/models/cv/object_detection/yolov5s_sample/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..95dbf1d09535c48825060e8d26de554ffe44d852 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.56 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5S_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..556e529c471ec78fc9bc812283e8bec7a9bd69a9 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=840 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5S_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py b/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov6/igie/README.md b/models/cv/object_detection/yolov6/igie/README.md index bb67aa60d9b0b9d057cac7185bed9d57c03f63b7..cd372a903346e1928d8a83d5c66f2d6637585f59 100644 --- a/models/cv/object_detection/yolov6/igie/README.md +++ b/models/cv/object_detection/yolov6/igie/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov6/ixrt/README.md b/models/cv/object_detection/yolov6/ixrt/README.md index c7fd888948511ec2c655c62229f62431d743a5b9..521a43494670b1ff732ae4ce23567c7c7cce005e 100644 --- a/models/cv/object_detection/yolov6/ixrt/README.md +++ b/models/cv/object_detection/yolov6/ixrt/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS diff --git a/models/cv/object_detection/yolov7/igie/README.md b/models/cv/object_detection/yolov7/igie/README.md index 5b0ce58703502f11bca09f266769d8b921a3f2e7..d75e116693d94aa444f2001a67b611b5f5909b33 100644 --- a/models/cv/object_detection/yolov7/igie/README.md +++ b/models/cv/object_detection/yolov7/igie/README.md @@ -48,10 +48,6 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash # Install libGL ## CentOS @@ -68,7 +64,9 @@ pip3 install -r requirements.txt # clone yolov7 git clone https://github.com/WongKinYiu/yolov7.git cd yolov7 - +git checkout a207844b1ce82d204ab36d87d496728d3d2348e7 +# set weights_only=False to be comaptible with pytorch 2.7 +sed -i '252 s/map_location)/map_location, weights_only=False)/' ./models/experimental.py # export onnx model python3 export.py --weights ../yolov7.pt --simplify --img-size 640 640 --dynamic-batch --grid diff --git a/models/cv/object_detection/yolov7/igie/ci/prepare.sh b/models/cv/object_detection/yolov7/igie/ci/prepare.sh index e73fc3cc70f9319c85b4532838a5c62d3cea6278..e023787249d32970dbf655bc3c6895fa51aeb97c 100644 --- a/models/cv/object_detection/yolov7/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/igie/ci/prepare.sh @@ -16,24 +16,9 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - pip3 install -r requirements.txt -# clone yolov7 -REPO_URL="https://gitee.com/monkeycc/yolov7.git" -TARGET_DIR="yolov7" -if [ ! -d "$TARGET_DIR" ]; then - git clone --depth 1 "$REPO_URL" "$TARGET_DIR" -fi -cd $TARGET_DIR - +cp -r /mnt/deepspark/data/3rd_party/yolov7 ./ +cd yolov7 # export onnx model python3 export.py --weights ../yolov7.pt --simplify --img-size 640 640 --dynamic-batch --grid diff --git a/models/cv/object_detection/yolov7/ixrt/README.md b/models/cv/object_detection/yolov7/ixrt/README.md index ea91a845e942acf842f43fa2cee5f2a10a4ec4c7..54ca3229e31ae77c8a29b7dcef51950c99299c89 100644 --- a/models/cv/object_detection/yolov7/ixrt/README.md +++ b/models/cv/object_detection/yolov7/ixrt/README.md @@ -48,13 +48,8 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash pip3 install -r ../../ixrt_common/requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/cv/object_detection/yolov7_sample/igie/build_engine.py b/models/cv/object_detection/yolov7_sample/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..cdace84b400d7140653700dea1488c460826dafa --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/build_engine.py @@ -0,0 +1,39 @@ +import os +import cv2 +import argparse +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + if config.precision == "float16": + precision = "fp16" + + inputs_info = {"images": ([config.bsz, 3, 640, 640], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..51d70747679443f85a314a6072eb83e35b0e30ed --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..578e013db932c53f0cfa2790e375d7b699081168 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..543e7e365593785d6fadccc4a5c1007965269a81 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov7/yolov7m.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/coco_labels.py b/models/cv/object_detection/yolov7_sample/igie/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov7_sample/igie/common.py b/models/cv/object_detection/yolov7_sample/igie/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5435557ecb72c836cc5a3c253482b0458657f6 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG b/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..51bd2d14cfe9b1058b7a37acc932d606e189af02 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov7m +ORIGINE_MODEL=yolov7m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/model.105/m.0/Conv_output_0 /model.105/m.1/Conv_output_0 /model.105/m.2/Conv_output_0) +DECODER_8_ANCHOR=(12 16 19 36 40 28) +DECODER_16_ANCHOR=(36 75 76 55 72 146) +DECODER_32_ANCHOR=(142 110 192 243 459 401) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/cut_model.py b/models/cv/object_detection/yolov7_sample/igie/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov7_sample/igie/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/common.py b/models/cv/object_detection/yolov7_sample/igie/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov7_sample/igie/deploy.py b/models/cv/object_detection/yolov7_sample/igie/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..83f80a9ec314975979d283bcd6b5a4ee958485a4 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/deploy.py @@ -0,0 +1,125 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/inference.py b/models/cv/object_detection/yolov7_sample/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..517a3f6d5bc0aa41332f4c927dbcfd97757bf8d0 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/inference.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + # Load Engine + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..00ed65dd16bf19445396df7f72d81d653eed756d --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/quant.py b/models/cv/object_detection/yolov7_sample/igie/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..958c8b021dab4ab7d2c3aaeec8c9a9953a7335a4 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.68 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV7M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..980e788aab4960549d477afb20c8b0c079c32772 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=425 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV7M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov7_sample/igie/simplify_model.py b/models/cv/object_detection/yolov7_sample/igie/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/README.md b/models/cv/object_detection/yolov8n/igie/README.md index dc481a4dfab90fc9ad9ac44cdfd6e41344f16056..f30d3a5a395d8c1796f0b88f352a93e3a18d2e5f 100644 --- a/models/cv/object_detection/yolov8n/igie/README.md +++ b/models/cv/object_detection/yolov8n/igie/README.md @@ -8,7 +8,7 @@ YOLOv8n combines exceptional speed and competitive accuracy in real-time object | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | | :----: | :----: | :----: | -| MR-V100 | 4.3.0 | 25.23 | +| MR-V100 | 4.3.0 | 25.12 | ## Model Preparation @@ -47,17 +47,7 @@ coco ### Install Dependencies -Contact the Iluvatar administrator to get the missing packages: - -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - ```bash -# Install libGL -## CentOS -yum install -y mesa-libGL -## Ubuntu -apt install -y libgl1-mesa-glx - pip3 install -r requirements.txt ``` diff --git a/models/cv/object_detection/yolov8n/igie/inference.py b/models/cv/object_detection/yolov8n/igie/inference.py index be549137973d9b2a98b4bbf5f7305dcd65ccdbc2..4ca50b00b702bbe5ef962cf735c4ff62f9fa3d43 100644 --- a/models/cv/object_detection/yolov8n/igie/inference.py +++ b/models/cv/object_detection/yolov8n/igie/inference.py @@ -150,6 +150,12 @@ class IGIEValidator(DetectionValidator): return stats + def preprocess(self, batch): + """Preprocess without PyTorch device transfer (for TVM).""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + def init_metrics(self): """Initialize evaluation metrics for YOLO.""" val = self.data.get(self.args.split, '') # validation path @@ -159,10 +165,11 @@ class IGIEValidator(DetectionValidator): self.names = self.data['names'] self.nc = len(self.names) self.metrics.names = self.names - self.confusion_matrix = ConfusionMatrix(nc=80) + self.confusion_matrix = ConfusionMatrix(names=self.names) self.seen = 0 self.jdict = [] - self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[]) + self.end2end = False + self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS def main(): args = parse_args() diff --git a/models/cv/object_detection/yolov8n/igie/quantize.py b/models/cv/object_detection/yolov8n/igie/quantize.py index 5a8747883afad7d88b40abdeb499cd943c1d90d0..8a4c42f771cbd2702fb9e091cb115f992a682de5 100644 --- a/models/cv/object_detection/yolov8n/igie/quantize.py +++ b/models/cv/object_detection/yolov8n/igie/quantize.py @@ -105,6 +105,12 @@ class PreProcessDatasets(DetectionValidator): return datasets + def preprocess(self, batch): + """Preprocess without PyTorch device transfer (for TVM).""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + class CalibrationDataset(torch.utils.data.Dataset): def __init__(self, datasets): self.datasets = datasets diff --git a/models/cv/object_detection/yolov8n/igie/requirements.txt b/models/cv/object_detection/yolov8n/igie/requirements.txt index d69fe4dc8f45e4ac14e980b6b0c498132fb8f915..5cc8ec9ca0360dbf1e5d7ca15da6df106abeab21 100644 --- a/models/cv/object_detection/yolov8n/igie/requirements.txt +++ b/models/cv/object_detection/yolov8n/igie/requirements.txt @@ -1,6 +1,5 @@ tqdm onnx pycocotools -# FAILed in 8.2.51 -ultralytics==8.1.34 +ultralytics opencv-python diff --git a/models/cv/object_detection/yolov8n/ixrt/inference.py b/models/cv/object_detection/yolov8n/ixrt/inference.py index 81725723d3f5eb2eadc06a55c17a1834dcd3b3ec..df5017495719e1c7267b45d280b9127c9ab82b05 100644 --- a/models/cv/object_detection/yolov8n/ixrt/inference.py +++ b/models/cv/object_detection/yolov8n/ixrt/inference.py @@ -205,6 +205,12 @@ class IxRT_Validator(DetectionValidator): return stats + def preprocess(self, batch): + """Preprocess without PyTorch device transfer.""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + def init_metrics(self): """Initialize evaluation metrics for YOLO.""" val = self.data.get(self.args.split, '') # validation path @@ -214,10 +220,11 @@ class IxRT_Validator(DetectionValidator): self.names = self.data['names'] self.nc = len(self.names) self.metrics.names = self.names - self.confusion_matrix = ConfusionMatrix(nc=80) + self.confusion_matrix = ConfusionMatrix(names=self.names) self.seen = 0 self.jdict = [] - self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + self.end2end = False + self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS def main(): config = parse_args() diff --git a/models/cv/object_detection/yolov8n/ixrt/requirements.txt b/models/cv/object_detection/yolov8n/ixrt/requirements.txt index 346fe1c7d21fa2d840c877dec15c1e30bd58a1d3..eb001fe64f3cefd450ebfe965ddfb345bf9faa4b 100644 --- a/models/cv/object_detection/yolov8n/ixrt/requirements.txt +++ b/models/cv/object_detection/yolov8n/ixrt/requirements.txt @@ -3,5 +3,5 @@ onnx onnxsim pycocotools opencv-python==4.8.0.74 -ultralytics==8.2.51 +ultralytics cuda-python \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/README.md b/models/cv/object_detection/yolov8n/pytorch/README.md similarity index 48% rename from models/cv/object_detection/yolov5s/igie/README.md rename to models/cv/object_detection/yolov8n/pytorch/README.md index 1c97e8f50f9d25af4d6a5d1c37ccdcc0eb551cf7..a901ba8e3e25ba4955c16ef861ea6301279eb5cc 100644 --- a/models/cv/object_detection/yolov5s/igie/README.md +++ b/models/cv/object_detection/yolov8n/pytorch/README.md @@ -1,20 +1,20 @@ -# YOLOv5s (IGIE) +# YOLOv8n (Pytorch) ## Model Description -The YOLOv5 architecture is designed for efficient and accurate object detection tasks in real-time scenarios. It employs a single convolutional neural network to simultaneously predict bounding boxes and class probabilities for multiple objects within an image. The YOLOV5s is a tiny model. +YOLOv8n combines exceptional speed and competitive accuracy in real-time object detection tasks. With a focus on simplicity and efficiency, this compact model employs a single neural network to make predictions, enabling rapid and reliable identification of objects in images or video streams, making it ideal for resource-constrained environments. ## Supported Environments | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | | :----: | :----: | :----: | -| MR-V100 | 4.3.0 | 25.12 | +| MR-V100 | dev-only | 26.03 | ## Model Preparation ### Prepare Resources -Pretrained model: +Pretrained model: Dataset: @@ -46,48 +46,32 @@ coco ``` ### Install Dependencies - ```bash -pip3 install -r ../../ixrt_common/requirements.txt +pip3 install -r requirements.txt ``` -### Model Conversion +## Model Inference ```bash -mkdir checkpoints -git clone -b v6.1 --depth 1 https://github.com/ultralytics/yolov5 - -# 有一些环境需要安装 -wget https://ultralytics.com/assets/Arial.ttf -cp Arial.ttf /root/.config/Ultralytics/Arial.ttf - -# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) -pushd ./yolov5 -# set weights_only=False to be comaptible with pytorch 2.7 -sed -i '96 s/map_location)/map_location, weights_only=False)/' ./models/experimental.py - -python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32 -mv yolov5s.onnx ../checkpoints -popd +export DATASETS_DIR=/Path/to/coco/ ``` -## Model Inference - +### Generate coco.yaml ```bash -export DATASETS_DIR=./coco/ +bash generate_coco.sh ``` ### FP16 ```bash # Accuracy -bash scripts/infer_yolov5s_fp16_accuracy.sh +bash scripts/infer_yolov8n_fp16_accuracy.sh # Performance -bash scripts/infer_yolov5s_fp16_performance.sh +bash scripts/infer_yolov8n_fp16_performance.sh ``` ## Model Results -| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 | +| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 | | :----: | :----: | :----: | :----: | :----: | :----: | -| YOLOv5s | 32 | FP16 | 1112.66 | 0.565 | 0.370 | +| YOLOv8n | 32 | FP16 | 134 | 0.526 | 0.374 | diff --git a/models/cv/object_detection/yolov8n/pytorch/ci/prepare.sh b/models/cv/object_detection/yolov8n/pytorch/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..6b3db025d780462a53110704e8744af4cc808f1c --- /dev/null +++ b/models/cv/object_detection/yolov8n/pytorch/ci/prepare.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r requirements.txt +bash generate_coco.sh \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/pytorch/generate_coco.sh b/models/cv/object_detection/yolov8n/pytorch/generate_coco.sh new file mode 100644 index 0000000000000000000000000000000000000000..9b1251ca8abe4db48dffd93d7a09516ae0366d26 --- /dev/null +++ b/models/cv/object_detection/yolov8n/pytorch/generate_coco.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +set -e + +# 1. Check environment variable +if [ -z "$DATASETS_DIR" ]; then + echo "Error: DATASETS_DIR environment variable is not set." + exit 1 +fi + +# 2. Check directory validity +if [ ! -d "$DATASETS_DIR" ]; then + echo "Error: COCO dataset directory not found at '$COCO_DIR'." + exit 1 +fi + +# Get script directory (more robust than pwd) +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +yaml_pth="${SCRIPT_DIR}/coco.yaml" + +# 3. Generate coco.yaml +cat > "$yaml_pth" < 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..ca7bb94581d66d5378298649ba172f2934f58996 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" +fi + +pip3 install pycocotools onnxsim pycuda +pip3 install loguru +pip3 install tabulate +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 +pip3 install simplejson + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolox/yolox_m_export.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/coco_labels.py b/models/cv/object_detection/yolox_sample/igie/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolox_sample/igie/common.py b/models/cv/object_detection/yolox_sample/igie/common.py new file mode 100644 index 0000000000000000000000000000000000000000..677051f2b3ead5cfb27e0f8f767c1d2b45010ec3 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/common.py @@ -0,0 +1,89 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + if c<1 or c>80: + print("error class: ", c) + continue + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG b/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..6da9d04e458b598072925c855ed950bbbb075661 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG @@ -0,0 +1,56 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolox +ORIGINE_MODEL=yolox_m_export.onnx +DATA_PROCESS_TYPE=yolox +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +# nx4x80x80 nx1x80x80 nx80x80x80 +DECODER0_INPUT_NAMES=(1041 1042 1032) +# nx4x40x40 nx1x40x40 nx80x40x40 +DECODER1_INPUT_NAMES=(1067 1068 1058) +# nx4x20x20 nx1x20x20 nx80x20x20 +DECODER2_INPUT_NAMES=(1093 1094 1084) + +# Fuse Focus +FOCUS_INPUT_EDGE=images +FOCUS_OUTPUT_EDGE=input +FOCUS_LAST_NODE=Concat_40 + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.65 +SCORE_THRESH=0.01 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=8400 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/cut_model.py b/models/cv/object_detection/yolox_sample/igie/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolox_sample/igie/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/coco.py b/models/cv/object_detection/yolox_sample/igie/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/common.py b/models/cv/object_detection/yolox_sample/igie/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/vision.py b/models/cv/object_detection/yolox_sample/igie/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolox_sample/igie/deploy.py b/models/cv/object_detection/yolox_sample/igie/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..668b342040d2f83dc20eaa784c047ddd55ac0234 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/deploy.py @@ -0,0 +1,135 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + print("AddYoloDecoderOp:", ) + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/inference.py b/models/cv/object_detection/yolox_sample/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..7fbb611d054ea14f8e092a707af5d8ab5bf586cb --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/inference.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + # Load Engine + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..00ed65dd16bf19445396df7f72d81d653eed756d --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/quant.py b/models/cv/object_detection/yolox_sample/igie/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..7c23bc050bd08a015dce710247ac911ba7064af4 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.645 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..42ea520d862bb14fef5683999e49713847d42ea4 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=540 +CPU_AFFINITY=$(ixsmi topo -m|grep "^GPU0" |awk '{print $(NF-1)}') +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +numactl --physcpubind=${CPU_AFFINITY} python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/simplify_model.py b/models/cv/object_detection/yolox_sample/igie/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/build_engine.py b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..63936d601802d92198fc27fb20427ed508d1abe4 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py @@ -0,0 +1,46 @@ +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + if config.precision == "int8": + build_config.set_flag(tensorrt.BuilderFlag.FP16) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py b/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..25f0ab8abee2d4a6948250e3fcc4bb4705777550 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py @@ -0,0 +1,81 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py b/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0f39a87ac9cd6d806fee9ebd623abbf1b5530b09 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py @@ -0,0 +1,29 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh b/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..f56a16d24665826a9053be338ae241f785b8b962 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" +fi + +pip3 install pycocotools onnxsim pycuda +pip3 install loguru +pip3 install tabulate +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 +pip3 install simplejson + +mkdir -p checkpoints +ln -s /root/data/datasets/corex-inference-data-4.0.0/checkpoints/yolox/yolox_m_export.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py b/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..69d38878ff16d66dfe7550fcd170ac91d0862318 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolox_sample/ixrt/common.py b/models/cv/object_detection/yolox_sample/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5435557ecb72c836cc5a3c253482b0458657f6 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG b/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..6da9d04e458b598072925c855ed950bbbb075661 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG @@ -0,0 +1,56 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolox +ORIGINE_MODEL=yolox_m_export.onnx +DATA_PROCESS_TYPE=yolox +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +# nx4x80x80 nx1x80x80 nx80x80x80 +DECODER0_INPUT_NAMES=(1041 1042 1032) +# nx4x40x40 nx1x40x40 nx80x40x40 +DECODER1_INPUT_NAMES=(1067 1068 1058) +# nx4x20x20 nx1x20x20 nx80x20x20 +DECODER2_INPUT_NAMES=(1093 1094 1084) + +# Fuse Focus +FOCUS_INPUT_EDGE=images +FOCUS_OUTPUT_EDGE=input +FOCUS_LAST_NODE=Concat_40 + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.65 +SCORE_THRESH=0.01 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=8400 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/cut_model.py b/models/cv/object_detection/yolox_sample/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..af0a3a4f0cc3caf05b95be3c77dea7728c931e3f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/__init__.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7f355b8444e2bc8d38d5c89cb3217328c497420e --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e120e00fece2055a96d2ed24010f61b2ca1e3837 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..a58c02f810baa41bf6ee96092a8a3955fe311640 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8cc643a88528b7c7bbd7e3b1eb8095116ba53568 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..32da4a789767939efc1e83d89f2955145798a5f3 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolox_sample/ixrt/deploy.py b/models/cv/object_detection/yolox_sample/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..668b342040d2f83dc20eaa784c047ddd55ac0234 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/deploy.py @@ -0,0 +1,135 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + print("AddYoloDecoderOp:", ) + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/inference.py b/models/cv/object_detection/yolox_sample/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1860545a5509832e76ca1ac9a144945e94296929 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/inference.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Forward + # start_time = time.time() + context.execute_v2(allocations) + # end_time = time.time() + # forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + context.execute_v2(allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py b/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py b/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..00ed65dd16bf19445396df7f72d81d653eed756d --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/quant.py b/models/cv/object_detection/yolox_sample/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..d73212ca60a4985cc036f67e8fb0b3c70ba24e4d --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..162f5ab88c407e3287378cc5e83d080f20d94e7f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.645 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..f31f12b726deca82284f6f5004dfa25175ea65a9 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=540 +CPU_AFFINITY=$(ixsmi topo -m|grep "^GPU0" |awk '{print $(NF-1)}') +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +numactl --physcpubind=${CPU_AFFINITY} python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py b/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b4254b6f903cb5f8775e43b2f80d5572bf45b1d6 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/ocr/kie_layoutxlm/igie/README.md b/models/cv/ocr/kie_layoutxlm/igie/README.md index ebe6ec11d1e1c4e500c6a6a541471a14df70917b..01f3819ec156977e7d2a3766b9e3b3e5b749d144 100644 --- a/models/cv/ocr/kie_layoutxlm/igie/README.md +++ b/models/cv/ocr/kie_layoutxlm/igie/README.md @@ -22,7 +22,7 @@ Dataset: to down ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- paddlepaddle-3.0.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- paddlepaddle-*.whl ```bash pip3 install -r requirements.txt diff --git a/models/cv/ocr/svtr/igie/README.md b/models/cv/ocr/svtr/igie/README.md index 341e9fea20b8870377e134002c07bf8366e3f872..0c2ecb0d1e3ab4bc6fe47d9e1e10c3fb17e15ba9 100644 --- a/models/cv/ocr/svtr/igie/README.md +++ b/models/cv/ocr/svtr/igie/README.md @@ -22,11 +22,10 @@ Dataset: to download the dataset. ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- mmcv-*.whl ```bash pip3 install -r requirements.txt -pip3 install mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl ``` ### Model Conversion diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md index d2c8855b619efdc01ef44778b3e35aa00beb3f01..cb0b6938b58c24628ae12fe9c245b4800bf5e9fe 100755 --- a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md +++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md @@ -31,7 +31,7 @@ mkdir data/ ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- transformers-4.45.2+corex.4.3.0-py3-none-any.whl +- transformers-*.whl ```bash # Install libGL diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh index a3756a47e4396dc3bb999f5b3cf2a7a129f86f7e..003a15085302c59c266f7412f697cd7325a45948 100644 --- a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh @@ -24,5 +24,4 @@ else echo "Not Support Os" fi -cp -r ../../vllm_public_assets/ ./ -pip install /mnt/deepspark/install/transformers-4.45.2+corex.4.3.0-py3-none-any.whl \ No newline at end of file +cp -r ../../vllm_public_assets/ ./ \ No newline at end of file diff --git a/models/multimodal/vision_language_model/minicpm_o/vllm/README.md b/models/multimodal/vision_language_model/minicpm_o/vllm/README.md index c437f74aad966c567da92471f62ad0e853f3e1db..0d6e3fd453c41b1d49815cf21f946cab829df1c3 100644 --- a/models/multimodal/vision_language_model/minicpm_o/vllm/README.md +++ b/models/multimodal/vision_language_model/minicpm_o/vllm/README.md @@ -30,7 +30,7 @@ cp -r ../../vllm_public_assets/ ./ Contact the Iluvatar administrator to get the missing packages: -- transformers-4.45.2+corex.4.3.0-py3-none-any.whl +- transformers-*.whl ## Model Inference diff --git a/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh index 072ab4383b422e31d394f51664d99127c41d4bbe..ec26235fe230da2eb0a00a6e15b04aa8c46b6530 100644 --- a/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh @@ -16,4 +16,3 @@ set -x cp -r ../../vllm_public_assets/ ./ -pip install /mnt/deepspark/install/transformers-4.45.2+corex.4.3.0-py3-none-any.whl diff --git a/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_vision_language.py index 3feb8a86605d89dd3116d7584e8ef3faf8b2c45e..d3285b6b2dc68273cb77a0bfa7ee32a13148035a 100644 --- a/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_vision_language.py +++ b/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_vision_language.py @@ -23,91 +23,128 @@ on HuggingFace model repository. """ import sys from pathlib import Path +import argparse as _argparse +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= import io import time -sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse import dataclasses import inspect from PIL import Image import base64 -from vllm import LLM, EngineArgs, SamplingParams -from utils import sampling_add_cli_args +# ==================== PATCH argparse to ignore 'deprecated' ==================== +def make_action_compat(action_class): + original_init = action_class.__init__ + def patched_init(self, *args, **kwargs): + kwargs.pop('deprecated', None) + original_init(self, *args, **kwargs) + action_class.__init__ = patched_init -# Pixtral -def run_pixtral(question,engine_params): +actions_to_patch = [ + argparse._StoreAction, + argparse._StoreTrueAction, + argparse._StoreFalseAction, +] - prompt = f"{question}" - # Note: The default setting of max_num_seqs (256) and - # max_model_len (128k) for this model may cause OOM. - # You may lower either to run this example on lower-end GPUs. +if hasattr(argparse, 'BooleanOptionalAction'): + actions_to_patch.append(argparse.BooleanOptionalAction) - # In this example, we override max_num_seqs to 5 while - # keeping the original context length of 128k. - llm = LLM(**engine_params) - stop_token_ids = None - return llm, prompt, stop_token_ids +for action_cls in actions_to_patch: + make_action_compat(action_cls) +# ============================================================================== +# Add parent path for utils +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) -if __name__ == "__main__": +from vllm import LLM, EngineArgs, SamplingParams +try: + from utils import sampling_add_cli_args +except ImportError: + # Fallback: define minimal sampling CLI args if utils missing + def sampling_add_cli_args(parser): + parser.add_argument("--temperature", type=float, default=1.0) + parser.add_argument("--top-p", type=float, default=1.0) + parser.add_argument("--max-tokens", type=int, default=16) + return parser + +def main(): parser = argparse.ArgumentParser() parser = EngineArgs.add_cli_args(parser) parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] - sampling_args = [ - param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) - ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} + + # --- Build EngineArgs properly --- + engine_args = EngineArgs.from_cli_args(args) + # Use dataclasses.asdict instead of .to_dict() + engine_params = dataclasses.asdict(engine_args) + + # --- Build SamplingParams safely --- + sampling_signature = inspect.signature(SamplingParams) + sampling_arg_names = set(sampling_signature.parameters.keys()) sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + k: v for k, v in vars(args).items() + if k in sampling_arg_names and v is not None } - - prompt = "Describe this image in one sentence." + sampling_params_obj = SamplingParams(**sampling_params) - llm, prompt, stop_token_ids = run_pixtral(prompt,engine_params) - sampling_params['stop_token_ids'] = stop_token_ids + # --- Prepare input --- + prompt = "Describe this image in one sentence." + image_path = "./vllm_public_assets/cherry_blossom.jpg" + try: + image = Image.open(image_path).convert("RGB") + except FileNotFoundError: + print(f"❌ Image not found: {image_path}") + print("Please ensure the image exists or update the path.") + sys.exit(1) - # We set temperature to 0.2 so that outputs can be different - # even when all prompts are identical when running batch inference. - sampling_params = SamplingParams(**sampling_params) - - image: Image = Image.open("./vllm_public_assets/cherry_blossom.jpg") - image = image.convert("RGB") image_data = io.BytesIO() image.save(image_data, format='JPEG') image_base64 = base64.b64encode(image_data.getvalue()).decode("utf-8") - messages = [ - # {"role": "system", "content": SYSTEM_PROMPT}, - { - "role": "user", - "content": [ - { - "type": "text", - "text": prompt - }, - {"type": "image_url", "image_url": { - "url": f"data:image/jpeg;base64,{image_base64}" - }, - } - ], - }, - ] + messages = [{ + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}, + ], + }] + + # --- Run inference --- + print("🚀 Initializing LLM...") + llm = LLM(**engine_params) + print("🧠 Generating response...") start_time = time.perf_counter() - outputs = llm.chat(messages, sampling_params=sampling_params) + outputs = llm.chat(messages, sampling_params=sampling_params_obj) end_time = time.perf_counter() - duration_time = end_time - start_time + duration = end_time - start_time + + # --- Output results --- num_tokens = 0 for o in outputs: + text = o.outputs[0].text num_tokens += len(o.outputs[0].token_ids) - generated_text = o.outputs[0].text - print(generated_text) - num_requests = len(messages) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") \ No newline at end of file + print(f"✅ Output: {text}") + + num_reqs = len(outputs) + qps = num_reqs / duration if duration > 0 else 0 + token_per_sec = num_tokens / duration if duration > 0 else 0 + + print(f"\n📊 Summary:") + print(f" Requests: {num_reqs}") + print(f" QPS: {qps:.2f}") + print(f" Total tokens: {num_tokens}") + print(f" Token/s: {token_per_sec:.2f}") + print(f" Duration: {duration:.2f}s") + +if __name__ == "__main__": + main() diff --git a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md index 2092dcc64c567b8631439a61fd186f7da1f4b44b..bf11ca80b8e0699868b7a002d488cfb1b50f5938 100644 --- a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md +++ b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md @@ -24,10 +24,6 @@ cp -r ../../vllm_public_assets/ ./ In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. -```bash -pip install transformers==4.50.3 -``` - ## Model Inference ```bash diff --git a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh index cc6608c240adf4526fc66d01d049232f64da883b..1ce243cbc5197ba4f8526707e50605e75b46e691 100644 --- a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh @@ -16,5 +16,4 @@ set -x -cp -r ../../vllm_public_assets/ ./ -pip install transformers==4.50.3 +cp -r ../../vllm_public_assets/ ./ \ No newline at end of file diff --git a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/offline_inference_vision_language.py index dc0b7ea239a7f3d69d345fd39f1e0cab48e1cc3b..3954253b3fab223e2f94f2f2772aeb59f21bcc66 100644 --- a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/offline_inference_vision_language.py +++ b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/offline_inference_vision_language.py @@ -24,7 +24,16 @@ on HuggingFace model repository. """ import sys from pathlib import Path -import os +import argparse as _argparse +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= import time sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse @@ -98,14 +107,14 @@ if __name__ == "__main__": parser = EngineArgs.add_cli_args(parser) parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) sampling_args = [ param.name for param in list( inspect.signature(SamplingParams).parameters.values() ) ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} sampling_params = { attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) } diff --git a/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md b/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md index c694ff03cb37f1ddb65343a3aa857b2892d5194e..ab5fee0964dfca570445aaf068688c3dfaa930fa 100644 --- a/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md +++ b/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md @@ -24,10 +24,6 @@ cp -r ../../vllm_public_assets/ ./ In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. -```bash -pip install transformers==4.50.3 -``` - ## Model Inference ```bash diff --git a/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh index cc6608c240adf4526fc66d01d049232f64da883b..b54c8d39af69827a8993f582b3029fd463c58c0b 100644 --- a/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh @@ -17,4 +17,3 @@ set -x cp -r ../../vllm_public_assets/ ./ -pip install transformers==4.50.3 diff --git a/models/multimodal/vision_language_model/qwen2_vl/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/qwen2_vl/vllm/offline_inference_vision_language.py index 998bf36fbdf21824d155b6010f2827522948ed32..79566081bb448f34403c74ba5919112f92b7870b 100644 --- a/models/multimodal/vision_language_model/qwen2_vl/vllm/offline_inference_vision_language.py +++ b/models/multimodal/vision_language_model/qwen2_vl/vllm/offline_inference_vision_language.py @@ -24,7 +24,16 @@ on HuggingFace model repository. """ import sys from pathlib import Path -import os +import argparse as _argparse +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= import time sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse @@ -93,14 +102,14 @@ if __name__ == "__main__": parser = EngineArgs.add_cli_args(parser) parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) sampling_args = [ param.name for param in list( inspect.signature(SamplingParams).parameters.values() ) ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} sampling_params = { attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) } diff --git a/models/multimodal/vision_language_model/qwen_vl/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/qwen_vl/vllm/offline_inference_vision_language.py index 8274b92c44312aeb2b3f8af7239ed05726d9b191..b43c6bad9242f36cd6779401a4105dafa0af5cd1 100644 --- a/models/multimodal/vision_language_model/qwen_vl/vllm/offline_inference_vision_language.py +++ b/models/multimodal/vision_language_model/qwen_vl/vllm/offline_inference_vision_language.py @@ -24,7 +24,16 @@ on HuggingFace model repository. """ import sys from pathlib import Path -import os +import argparse as _argparse +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= import time sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse @@ -70,8 +79,6 @@ def get_multi_modal_input(args): msg = f"Modality {args.modality} is not supported." raise ValueError(msg) - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--num-prompts', @@ -89,14 +96,14 @@ if __name__ == "__main__": parser = EngineArgs.add_cli_args(parser) parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) sampling_args = [ param.name for param in list( inspect.signature(SamplingParams).parameters.values() ) ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} sampling_params = { attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) } diff --git a/models/nlp/llm/deepseek-r1-distill-llama-70b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-llama-70b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-llama-70b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-llama-70b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/deepseek-r1-distill-llama-8b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-llama-8b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-llama-8b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-llama-8b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py b/models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py index e597ded92d8ac36ac9c3201b835b96830f302d1c..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py +++ b/models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py @@ -16,10 +16,21 @@ import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -41,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -90,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/internlm3/lmdeploy/README.md b/models/nlp/llm/internlm3/lmdeploy/README.md index fb40fe8f1b8b6831e31bfe06576aeb1069447d48..179fdd68fff57368b8383b46888dcf6911c456ef 100644 --- a/models/nlp/llm/internlm3/lmdeploy/README.md +++ b/models/nlp/llm/internlm3/lmdeploy/README.md @@ -21,11 +21,8 @@ InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8 ### Install Dependencies -In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. - -```bash -pip install lmdeploy-0.7.2+corex.4.3.0-py3-none-any.whl -``` +Contact the Iluvatar administrator to get the missing packages: +- lmdeploy-*.whl ## Model Inference diff --git a/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh b/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh index 34aa71cee97e8a388ab721416acfdaf00cd4ec33..d6fa2d8c4444e68238722ee515b6237608fed3bd 100644 --- a/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh +++ b/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh @@ -14,5 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -x -pip install /mnt/deepspark/data/install/lmdeploy-0.7.2+corex.4.3.0-py3-none-any.whl \ No newline at end of file +set -x \ No newline at end of file diff --git a/models/nlp/llm/llama2-7b/vllm/offline_inference.py b/models/nlp/llm/llama2-7b/vllm/offline_inference.py index 260b163893cabc0233ff2b10383d6cc414ef0523..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/llama2-7b/vllm/offline_inference.py +++ b/models/nlp/llm/llama2-7b/vllm/offline_inference.py @@ -1,10 +1,36 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -26,49 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. - prompts = [ - "Shanghai is one of the most prosperous cities in China, with a GDP of over $300 billion. Shanghai has the fastest growing economy in China and is the second busiest port in the world. In addition to being a hub for business, Shanghai is also a major tourist destination. It is known for its diverse culture and many historical sites.\nThe city of Shanghai is located on the coast of the Pacific Ocean in east-central China. It is bordered by Jiangsu Province to the north, Zhejiang Province to the south, and Jiangsu Province to the west.", - "What signs may indicate that a person is experiencing anxiety?", - "Describe how to make cheese pizza.", - "Write a review article on the development of 5G networks.", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) + prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: - prompts_new = prompts if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) + prompts_new = prompts else: - # Build chat model promopt - # logging.warning("If you are using a non chat model, please pass the --remove_chat_template in CLI.") - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -78,41 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/llama3-70b/vllm/offline_inference.py b/models/nlp/llm/llama3-70b/vllm/offline_inference.py index 260b163893cabc0233ff2b10383d6cc414ef0523..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/llama3-70b/vllm/offline_inference.py +++ b/models/nlp/llm/llama3-70b/vllm/offline_inference.py @@ -1,10 +1,36 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -26,49 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. - prompts = [ - "Shanghai is one of the most prosperous cities in China, with a GDP of over $300 billion. Shanghai has the fastest growing economy in China and is the second busiest port in the world. In addition to being a hub for business, Shanghai is also a major tourist destination. It is known for its diverse culture and many historical sites.\nThe city of Shanghai is located on the coast of the Pacific Ocean in east-central China. It is bordered by Jiangsu Province to the north, Zhejiang Province to the south, and Jiangsu Province to the west.", - "What signs may indicate that a person is experiencing anxiety?", - "Describe how to make cheese pizza.", - "Write a review article on the development of 5G networks.", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) + prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: - prompts_new = prompts if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) + prompts_new = prompts else: - # Build chat model promopt - # logging.warning("If you are using a non chat model, please pass the --remove_chat_template in CLI.") - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -78,41 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/qwen-7b/vllm/offline_inference.py b/models/nlp/llm/qwen-7b/vllm/offline_inference.py index 3433001da70cbf4e36406755267404f58d6a955f..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/qwen-7b/vllm/offline_inference.py +++ b/models/nlp/llm/qwen-7b/vllm/offline_inference.py @@ -1,10 +1,36 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -26,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -75,44 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) - + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/qwen2-72b/vllm/offline_inference.py b/models/nlp/llm/qwen2-72b/vllm/offline_inference.py index 6ff276cda138d2671cbbab918c35fda5960a88d2..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/qwen2-72b/vllm/offline_inference.py +++ b/models/nlp/llm/qwen2-72b/vllm/offline_inference.py @@ -1,10 +1,36 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -26,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -75,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/llm/qwen2-7b/vllm/offline_inference.py b/models/nlp/llm/qwen2-7b/vllm/offline_inference.py index 6ff276cda138d2671cbbab918c35fda5960a88d2..5289214eaf79836fab8779cf268f82d3bec93e85 100644 --- a/models/nlp/llm/qwen2-7b/vllm/offline_inference.py +++ b/models/nlp/llm/qwen2-7b/vllm/offline_inference.py @@ -1,10 +1,36 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os +import argparse as _argparse +import dataclasses + +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument + +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse -import dataclasses import inspect import logging import time @@ -26,46 +52,35 @@ if __name__ == "__main__": parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) + sampling_args = [ param.name - for param in list( - inspect.signature(SamplingParams).parameters.values() - ) + for param in inspect.signature(SamplingParams).parameters.values() ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} - sampling_params = { - attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) + sampling_params_dict = { + attr: getattr(args, attr) for attr in sampling_args if hasattr(args, attr) } + sampling_params = SamplingParams(**sampling_params_dict) model_name = os.path.dirname(args.model).rsplit("/")[-1] - # Sample prompts. prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] - # Create a sampling params object. - sampling_params = SamplingParams(**sampling_params) - # Create an LLM. llm = LLM(**engine_params) - # process chat template + # Process chat template if args.remove_chat_template: if "chat" in model_name.lower(): logging.warning( - f"The model name from model path is {model_name}, so we guess you are using the chat model and the additional processing is required for the input prompt. " - f"If the result is not quite correct, please ensure you do not pass --remove_chat_template in CLI." + f"The model name from model path is {model_name}, so we guess you are using the chat model. " + f"If the result is not quite correct, please do not pass --remove_chat_template." ) prompts_new = prompts else: - # Build chat model promopt - logging.warning( - "If you are using a non chat model, please pass the --remove_chat_template in CLI." - ) - # Try use transformers's apply_chat_template, if chat_template is None, will use defalut template. - # For some old models, the default template may cause bad answers. we don't consider this situation, - # because the Transformers team is advancing the chat template. For more informatino about it, - # please refer to https://huggingface.co/docs/transformers/main/chat_templating + logging.warning("If you are using a non-chat model, please pass --remove_chat_template.") try: load_chat_template(llm.get_tokenizer(), args.chat_template) prompts_new = [] @@ -75,43 +90,29 @@ if __name__ == "__main__": messages, tokenize=False, add_generation_prompt=True ) prompts_new.append(text) - except: - logging.warning( - "use tokenizer apply_chat_template function failed, may because of low transformers version...(try use transformers>=4.34.0)" - ) + except Exception as e: + logging.warning(f"apply_chat_template failed: {e}. may because of low transformers version...(try use transformers>=4.34.0)") prompts_new = prompts - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = ( - llm.generate(prompts_new, sampling_params, use_tqdm=False) - if isinstance(prompts_new[0], str) - else llm.generate( - sampling_params=sampling_params, - prompt_token_ids=prompts_new, - use_tqdm=False, - ) - ) + # Warmup (optional but avoids first-run overhead in timing) + _ = llm.generate(prompts_new[:1], sampling_params, use_tqdm=False) torch.cuda.synchronize() + # Timed inference start_time = time.perf_counter() - outputs = ( - llm.generate(prompts_new, sampling_params) - if isinstance(prompts_new[0], str) - else llm.generate(sampling_params=sampling_params, prompt_token_ids=prompts_new) - ) + outputs = llm.generate(prompts_new, sampling_params, use_tqdm=False) torch.cuda.synchronize() end_time = time.perf_counter() duration_time = end_time - start_time num_tokens = 0 - # Print the outputs. for i, output in enumerate(outputs): - prompt = prompts[i] # show the origin prompt. actully prompt is "output.prompt" + prompt = prompts[i] generated_text = output.outputs[0].text - num_tokens += len(output.outputs[0].token_ids) - print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - num_requests = len(prompts_new) # 请求的数量 - qps = num_requests / duration_time - print(f"requests: {num_requests}, QPS: {qps}, tokens: {num_tokens}, Token/s: {num_tokens/duration_time}") + print(f"Prompt: {prompt}\nGenerated text: {generated_text}\n") + + num_requests = len(prompts_new) + qps = num_requests / duration_time if duration_time > 0 else float('inf') + token_per_sec = num_tokens / duration_time if duration_time > 0 else float('inf') + print(f"requests: {num_requests}, QPS: {qps:.2f}, tokens: {num_tokens}, Token/s: {token_per_sec:.2f}") diff --git a/models/nlp/plm/albert/ixrt/README.md b/models/nlp/plm/albert/ixrt/README.md index e2fc2d46b9536addd18d5672454f5774b631334a..3118d15d69e8522995b2bc778a237bc31db92637 100644 --- a/models/nlp/plm/albert/ixrt/README.md +++ b/models/nlp/plm/albert/ixrt/README.md @@ -31,9 +31,9 @@ bash ./scripts/prepare_model_and_dataset.sh ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- tensorflow-*.whl +- ixrt-*.whl +- cuda_python-*.whl ```bash apt install -y libnuma-dev diff --git a/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh b/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh index 41a05302909e464c70e0ba9008da5971d89f3745..a9227cdd8d2583da6d6f52415dec958e5a7f9921 100644 --- a/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh +++ b/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh @@ -21,16 +21,13 @@ mkdir -p ./data/datasets/bert_base_squad ln -s /mnt/deepspark/data/checkpoints/bert_base_uncased_squad ./data/checkpoints/bert_base_squad/ ln -s /mnt/deepspark/data/datasets/squad ./data/datasets/bert_base_squad/ -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl else - apt install -y numactl + echo "Not Support Os" fi pip3 install --no-dependencies transformers diff --git a/models/nlp/plm/bert_large_squad/ixrt/README.md b/models/nlp/plm/bert_large_squad/ixrt/README.md index 9833c52529e3d4afb7a99b7e77b0178600ea1fae..8d81c2306c3b6cd00dd987053f9d2fb836f11d72 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/README.md +++ b/models/nlp/plm/bert_large_squad/ixrt/README.md @@ -26,8 +26,8 @@ wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O data/da ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- ixrt-*.whl +- cuda_python-*.whl ## Model Inference diff --git a/models/nlp/plm/bert_large_squad/ixrt/builder.py b/models/nlp/plm/bert_large_squad/ixrt/builder.py index 970f91bc27011be0ca26e1ac2a4f4cc255010ec8..f52be8b7d6708f3be01d35be499060c85dd2c3c1 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/builder.py +++ b/models/nlp/plm/bert_large_squad/ixrt/builder.py @@ -1,109 +1,36 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -#!/usr/bin/env python3 -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +import os import argparse -import ctypes import json -import os -import sys +import tensorrt as trt import time - +import sys +import ctypes +import os import numpy as np -import ixrt -from builder_utils import ( # Attention Keys; Transformer Keys; SQuAD Output Keys - B_AOUT, - B_LOUT, - B_MID, - BQKV, - SQD_B, - SQD_W, - W_AOUT, - W_LOUT, - W_MID, - WQKV, - load_onnx_weights_and_quant, - load_pytorch_weights_and_quant, -) - -plugin_lib_name = ( - "libnvinfer_plugin.so" if os.getenv("USE_TRT") == "True" else "libixrt_plugin.so" -) +from builder_utils import load_onnx_weights_and_quant, load_pytorch_weights_and_quant +from builder_utils import WQKV, BQKV # Attention Keys +from builder_utils import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT # Transformer Keys +from builder_utils import SQD_W, SQD_B # SQuAD Output Keys + +trt_version = [int(n) for n in trt.__version__.split('.')] +plugin_lib_name = "libnvinfer_plugin.so" if os.getenv('USE_TRT') == 'True' else "libixrt_plugin.so" print(plugin_lib_name) -TRT_LOGGER = ixrt.Logger(ixrt.Logger.WARNING) +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) from load_ixrt_plugin import load_ixrt_plugin - load_ixrt_plugin(TRT_LOGGER) -plg_registry = ixrt.get_plugin_registry() +plg_registry = trt.get_plugin_registry() registry_list = plg_registry.plugin_creator_list -print( - "registry_list: ", - [registry.name + "/" + registry.plugin_version for registry in registry_list], -) -emln_plg_creator = plg_registry.get_plugin_creator( - "CustomEmbLayerNormPluginDynamic_IxRT", "1", "" -) -qkv2_plg_creator = plg_registry.get_plugin_creator( - "CustomQKVToContextPluginDynamic_IxRT", "1", "" -) -skln_plg_creator = plg_registry.get_plugin_creator( - "CustomSkipLayerNormPluginDynamic_IxRT", "1", "" -) -ffn_plg_creator = plg_registry.get_plugin_creator( - "CustomFFNPluginDynamic_IxRT", "1", "" -) -gelu_plg_creator = plg_registry.get_plugin_creator( - "CustomGeluPluginDynamic_IxRT", "1", "" -) +print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list]) +emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "1", "") +qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "1", "") +skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "1", "") +ffn_plg_creator = plg_registry.get_plugin_creator("CustomFFNPluginDynamic_IxRT", "1", "") +gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "") fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "1", "") - class BertConfig: def __init__(self, bert_config_path, use_fp16, use_trt): with open(bert_config_path, "r") as f: @@ -116,46 +43,36 @@ class BertConfig: self.use_fp16 = use_fp16 self.use_trt = use_trt - def set_tensor_name(tensor, prefix, name): tensor.name = prefix + name - -def set_output_name(layer, prefix, name, out_idx=0): +def set_output_name(layer, prefix, name, out_idx = 0): set_tensor_name(layer.get_output(out_idx), prefix, name) - -def set_output_range(layer, maxval, out_idx=0): +def set_output_range(layer, maxval, out_idx = 0): layer.get_output(out_idx).set_dynamic_range(-maxval, maxval) - def get_mha_dtype(config): - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 + dtype = trt.float16 return int(dtype) - def custom_fc(network, input_tensor, out_dims, W, B): - pf_out_dims = ixrt.PluginField( - "out_dims", np.array(out_dims, dtype=np.int32), ixrt.PluginFieldType.INT32 - ) - pf_type = ixrt.PluginField( - "type_id", np.array(int(ixrt.float16), dtype=np.int32), ixrt.PluginFieldType.INT32 - ) - pf_W = ixrt.PluginField("W", W, ixrt.PluginFieldType.FLOAT32) + pf_out_dims = trt.PluginField("out_dims", np.array(out_dims, dtype=np.int32), trt.PluginFieldType.INT32) + pf_type = trt.PluginField("type_id", np.array(int(trt.float16), dtype=np.int32), trt.PluginFieldType.INT32) + pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) fields = [pf_out_dims, pf_type, pf_W] if B is not None: - pf_B = ixrt.PluginField("B", B, ixrt.PluginFieldType.FLOAT32) + pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) fields.append(pf_B) - pfc = ixrt.PluginFieldCollection(fields) + pfc = trt.PluginFieldCollection(fields) fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc) plug_inputs = [input_tensor] out_dense = network.add_plugin_v2(plug_inputs, fc_plugin) return out_dense - def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer @@ -172,23 +89,11 @@ def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask) has_mask = imask is not None # QKV2CTX - pf_type = ixrt.PluginField( - "type_id", - np.array([get_mha_dtype(config)], np.int32), - ixrt.PluginFieldType.INT32, - ) - pf_hidden_size = ixrt.PluginField( - "hidden_size", np.array([hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_num_heads = ixrt.PluginField( - "num_heads", np.array([num_heads], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_has_mask = ixrt.PluginField( - "has_mask", np.array([has_mask], np.int32), ixrt.PluginFieldType.INT32 - ) - pfc = ixrt.PluginFieldCollection( - [pf_hidden_size, pf_num_heads, pf_has_mask, pf_type] - ) + pf_type = trt.PluginField("type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32) + pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) + pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) + pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) + pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0)] @@ -205,54 +110,43 @@ def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): idims = input_tensor.shape hidden_size = idims[2] - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 + dtype = trt.float16 - pf_ld = ixrt.PluginField( - "ld", np.array([hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) + pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] - pf_beta = ixrt.PluginField("beta", wbeta, ixrt.PluginFieldType.FLOAT32) + pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] - pf_gamma = ixrt.PluginField("gamma", wgamma, ixrt.PluginFieldType.FLOAT32) - pf_type = ixrt.PluginField( - "type_id", np.array([int(dtype)], np.int32), ixrt.PluginFieldType.INT32 - ) + pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32) + pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) - fields = [pf_ld, pf_beta, pf_gamma, pf_type] + fields = [pf_ld, pf_beta, pf_gamma, pf_type ] if bias is not None: - pf_bias = ixrt.PluginField("bias", bias, ixrt.PluginFieldType.FLOAT32) + pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32) fields.append(pf_bias) - pfc = ixrt.PluginFieldCollection(fields) + pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer - def ffn_trt(prefix, config, init_dict, network, input_tensor): - # FC1 + GELU + # FC1 + GELU B_mid = init_dict[prefix + B_MID] W_mid = init_dict[prefix + W_MID] - mid_dense = network.add_fully_connected( - input_tensor, config.intermediate_size, W_mid, B_mid - ) + mid_dense = network.add_fully_connected(input_tensor, config.intermediate_size, W_mid, B_mid) - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 - pf_type = ixrt.PluginField( - "type_id", np.array([int(dtype)], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_ld = ixrt.PluginField( - "ld", np.array([config.hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) - - pfc = ixrt.PluginFieldCollection([pf_type, pf_ld]) + dtype = trt.float16 + pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) + pf_ld = trt.PluginField("ld", np.array([config.hidden_size], np.int32), trt.PluginFieldType.INT32) + + pfc = trt.PluginFieldCollection([pf_type, pf_ld]) gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc) gelu_inputs = [mid_dense.get_output(0)] @@ -264,61 +158,33 @@ def ffn_trt(prefix, config, init_dict, network, input_tensor): # Dense to hidden size B_lout = init_dict[prefix + B_LOUT] W_lout = init_dict[prefix + W_LOUT] - out_dense = network.add_fully_connected( - intermediate_act, config.hidden_size, W_lout, B_lout - ) + out_dense = network.add_fully_connected(intermediate_act, config.hidden_size, W_lout, B_lout) B_lout = None - out_layer = skipln( - prefix + "output_layernorm_", - config, - init_dict, - network, - out_dense.get_output(0), - input_tensor, - B_lout, - ) + out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, B_lout) return out_layer - def ffn(prefix, config, init_dict, network, input_tensor): # FC1 + GELU B_mid = init_dict[prefix + B_MID] W_mid = init_dict[prefix + W_MID] B_lout = init_dict[prefix + B_LOUT] W_lout = init_dict[prefix + W_LOUT] - pf_out_dim = ixrt.PluginField( - "out_dims", np.array(config.hidden_size, np.int32), ixrt.PluginFieldType.INT32 - ) - pf_type = ixrt.PluginField( - "type_id", np.array(int(ixrt.float16), np.int32), ixrt.PluginFieldType.INT32 - ) - pf_W1 = ixrt.PluginField("W1", W_mid, ixrt.PluginFieldType.FLOAT32) - pf_W2 = ixrt.PluginField("W2", W_lout, ixrt.PluginFieldType.FLOAT32) - pf_B1 = ixrt.PluginField("B1", B_mid, ixrt.PluginFieldType.FLOAT32) - pf_act_type = ixrt.PluginField( - "act_type", np.array(int(3), np.int32), ixrt.PluginFieldType.INT32 - ) - pfc = ixrt.PluginFieldCollection( - [pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type] - ) + pf_out_dim = trt.PluginField("out_dims", np.array(config.hidden_size, np.int32), trt.PluginFieldType.INT32) + pf_type = trt.PluginField("type_id", np.array(int(trt.float16), np.int32), trt.PluginFieldType.INT32) + pf_W1 = trt.PluginField("W1", W_mid, trt.PluginFieldType.FLOAT32) + pf_W2 = trt.PluginField("W2", W_lout, trt.PluginFieldType.FLOAT32) + pf_B1 = trt.PluginField("B1", B_mid, trt.PluginFieldType.FLOAT32) + pf_act_type = trt.PluginField("act_type", np.array(int(3), np.int32), trt.PluginFieldType.INT32) + pfc = trt.PluginFieldCollection([pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type]) ffn_plug = ffn_plg_creator.create_plugin("ffn", pfc) ffn_inputs = [input_tensor] ffn_layer = network.add_plugin_v2(ffn_inputs, ffn_plug) - out_layer = skipln( - prefix + "output_layernorm_", - config, - init_dict, - network, - ffn_layer.get_output(0), - input_tensor, - B_lout, - ) + out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, ffn_layer.get_output(0), input_tensor, B_lout) return out_layer - def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the transformer layer @@ -326,26 +192,16 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas idims = input_tensor.shape hidden_size = idims[2] - context_transposed = attention_layer_opt( - prefix + "attention_", config, init_dict, network, input_tensor, imask - ) + context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask) attention_heads = context_transposed.get_output(0) - + # FC0 B_aout = init_dict[prefix + B_AOUT] W_aout = init_dict[prefix + W_AOUT] attention_out_fc = custom_fc(network, attention_heads, hidden_size, W_aout, B_aout) - B_aout = None - - skiplayer = skipln( - prefix + "attention_output_layernorm_", - config, - init_dict, - network, - attention_out_fc.get_output(0), - input_tensor, - B_aout, - ) + B_aout = None + + skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, B_aout) attention_ln = skiplayer.get_output(0) if config.use_trt: @@ -354,21 +210,17 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas ffn_layer = ffn(prefix, config, init_dict, network, attention_ln) return ffn_layer - def bert_model(config, init_dict, network, input_tensor, input_mask): """ Create the bert model """ prev_input = input_tensor for layer in range(0, config.num_hidden_layers): - ss = "l{}_".format(layer) - out_layer = transformer_layer_opt( - ss, config, init_dict, network, prev_input, input_mask - ) + ss = "l{}_".format(layer) + out_layer = transformer_layer_opt(ss, config, init_dict, network, prev_input, input_mask) prev_input = out_layer.get_output(0) return prev_input - def squad_output(prefix, config, init_dict, network, input_tensor): """ Create the squad output @@ -388,98 +240,34 @@ def squad_output(prefix, config, init_dict, network, input_tensor): return OUT return dense - -def emb_layernorm( - builder, - network, - config, - weights_dict, - builder_config, - sequence_lengths, - batch_sizes, -): - input_ids = network.add_input( - name="input_ids", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) - segment_ids = network.add_input( - name="segment_ids", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) - input_mask = network.add_input( - name="input_mask", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) +def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes): + input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) + segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) + input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) if len(sequence_lengths) > 1: profile = builder.create_optimization_profile() min_shape = (batch_sizes[0], sequence_lengths[0]) opt_shape = (batch_sizes[1], sequence_lengths[1]) max_shape = (batch_sizes[2], sequence_lengths[2]) - assert ( - sequence_lengths[0] <= sequence_lengths[1] - and sequence_lengths[1] <= sequence_lengths[2] - ) - - print("set dynamic shape -> ", min_shape, opt_shape, max_shape) + assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2]) + + print('set dynamic shape -> ', min_shape, opt_shape, max_shape) profile.set_shape("input_ids", min_shape, opt_shape, max_shape) profile.set_shape("segment_ids", min_shape, opt_shape, max_shape) profile.set_shape("input_mask", min_shape, opt_shape, max_shape) builder_config.add_optimization_profile(profile) - wbeta = ixrt.PluginField( - "bert_embeddings_layernorm_beta", - weights_dict["bert_embeddings_layernorm_beta"], - ixrt.PluginFieldType.FLOAT32, - ) - - wgamma = ixrt.PluginField( - "bert_embeddings_layernorm_gamma", - weights_dict["bert_embeddings_layernorm_gamma"], - ixrt.PluginFieldType.FLOAT32, - ) - wwordemb = ixrt.PluginField( - "bert_embeddings_word_embeddings", - weights_dict["bert_embeddings_word_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - wtokemb = ixrt.PluginField( - "bert_embeddings_token_type_embeddings", - weights_dict["bert_embeddings_token_type_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - wposemb = ixrt.PluginField( - "bert_embeddings_position_embeddings", - weights_dict["bert_embeddings_position_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - - output_fp16 = ixrt.PluginField( - "output_fp16", - np.array([1 if config.use_fp16 else 0]).astype(np.int32), - ixrt.PluginFieldType.INT32, - ) - mha_type = ixrt.PluginField( - "mha_type_id", - np.array([get_mha_dtype(config)], np.int32), - ixrt.PluginFieldType.INT32, - ) - - pfc = ixrt.PluginFieldCollection( - [wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type] - ) + wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32) + wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32) + wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32) + wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32) + wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32) + + output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 else 0]).astype(np.int32), trt.PluginFieldType.INT32) + mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32) + + pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type]) fn = emln_plg_creator.create_plugin("embeddings", pfc) if config.use_trt: @@ -489,142 +277,62 @@ def emb_layernorm( segment_ids.second_transpose = (1, 0) input_mask = network.add_shuffle(input_mask) input_mask.second_transpose = (1, 0) - inputs = [ - input_ids.get_output(0), - segment_ids.get_output(0), - input_mask.get_output(0), - ] + inputs = [input_ids.get_output(0), segment_ids.get_output(0), input_mask.get_output(0)] else: inputs = [input_ids, segment_ids, input_mask] emb_layer = network.add_plugin_v2(inputs, fn) return emb_layer - def build_engine(batch_sizes, sequence_lengths, config, weights_dict): - explicit_batch_flag = 1 << int(ixrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - builder = ixrt.Builder(TRT_LOGGER) - with builder.create_network( - explicit_batch_flag - ) as network, builder.create_builder_config() as builder_config: + builder = trt.Builder(TRT_LOGGER) + with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config: if config.use_fp16: - builder_config.set_flag(ixrt.BuilderFlag.FP16) + builder_config.set_flag(trt.BuilderFlag.FP16) # Create the network - emb_layer = emb_layernorm( - builder, - network, - config, - weights_dict, - builder_config, - sequence_lengths, - batch_sizes, - ) + emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes) embeddings = emb_layer.get_output(0) mask_idx = emb_layer.get_output(1) - + bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx) squad_logits = squad_output("cls_", config, weights_dict, network, bert_out) squad_logits_out = squad_logits.get_output(0) - squad_logits.set_output_type(0, ixrt.float32) network.mark_output(squad_logits_out) build_start_time = time.time() - serialized_engine = builder.build_serialized_network(network, builder_config) - build_time_elapsed = time.time() - build_start_time - TRT_LOGGER.log( - TRT_LOGGER.INFO, "build serialized_engine in {:.3f} Sec".format(build_time_elapsed) - ) - return serialized_engine - + plan = builder.build_serialized_network(network, builder_config) + build_time_elapsed = (time.time() - build_start_time) + TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed)) + return plan def str2bool(v): - return v.lower() in ("yes", "true") - + return v.lower() in ('yes', 'true') def main(): - parser = argparse.ArgumentParser( - description="IxRT BERT Sample", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-z", - "--use_trt", - type=str2bool, - default=False, - help="Whether to use ixrt or IxRT", - ) - parser.add_argument( - "-x", "--onnx", required=False, help="The ONNX model file path." - ) - parser.add_argument( - "-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path." - ) - parser.add_argument( - "-o", - "--output", - required=True, - default="bert_base_384.engine", - help="The bert engine file, ex bert.engine", - ) - parser.add_argument( - "-b", - "--batch-size", - nargs="+", - help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", - type=int, - ) - parser.add_argument( - "-s", - "--sequence-length", - nargs="+", - help="Sequence length of the BERT model", - type=int, - ) - parser.add_argument( - "-c", - "--config-dir", - required=True, - help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google", - ) - parser.add_argument( - "-f", - "--fp16", - action="store_true", - help="Indicates that inference should be run in FP16 precision", - required=False, - ) - parser.add_argument( - "-j", - "--squad-json", - default="squad/dev-v1.1.json", - help="squad json dataset used for int8 calibration", - required=False, - ) - parser.add_argument( - "-v", - "--vocab-file", - default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", - help="Path to file containing entire understandable vocab", - required=False, - ) - parser.add_argument( - "--verbose", - action="store_true", - help="Turn on verbose logger and set profiling verbosity to DETAILED", - required=False, - ) + parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-z", "--use_trt", type=str2bool, default=False, help = "Whether to use tensorRT or IxRT") + parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.") + parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.") + parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine") + parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int) + parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int) + parser.add_argument("-c", "--config-dir", required=True, + help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google") + parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False) + parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False) + parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False) + parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False) args, _ = parser.parse_known_args() args.batch_size = args.batch_size or [1] args.sequence_length = args.sequence_length or [128] if len(args.sequence_length) not in [1, 3]: - print( - "Error: You must provide either one or three integers." - ) + print("Error: You must provide either one or three integers.") sys.exit(1) if len(args.batch_size) not in [1, 3]: @@ -635,9 +343,7 @@ def main(): TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE bert_config_path = args.config_dir - TRT_LOGGER.log( - TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path) - ) + TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path)) config = BertConfig(bert_config_path, args.fp16, args.use_trt) @@ -646,18 +352,13 @@ def main(): elif args.pytorch != None: weights_dict = load_pytorch_weights_and_quant(args.pytorch, config) else: - raise RuntimeError( - "You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model." - ) + raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.") - with build_engine( - args.batch_size, args.sequence_length, config, weights_dict - ) as serialized_engine: + with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as serialized_engine: TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output)) with open(args.output, "wb") as fout: fout.write(serialized_engine) TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.") - if __name__ == "__main__": main() diff --git a/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py b/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py index 88459ebfafbd84c11356c0a3dfc3838882e4b2f8..712e1a61d29a198eb276f41a9249b0c66e3786ba 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py +++ b/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py @@ -159,14 +159,14 @@ def convert_example_to_features(doc_tokens, question_text, tokenizer, max_seq_le input_mask = [1] * len(input_ids) # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length + # while len(input_ids) < max_seq_length: + # input_ids.append(0) + # input_mask.append(0) + # segment_ids.append(0) + + # assert len(input_ids) == max_seq_length + # assert len(input_mask) == max_seq_length + # assert len(segment_ids) == max_seq_length def create_int_feature(values): feature = np.asarray(values, dtype=np.int32, order=None) diff --git a/models/nlp/plm/bert_large_squad/ixrt/perf.py b/models/nlp/plm/bert_large_squad/ixrt/perf.py deleted file mode 100644 index 8343c95d0a57f374f091617b8099951ed66c2ea1..0000000000000000000000000000000000000000 --- a/models/nlp/plm/bert_large_squad/ixrt/perf.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - - -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import ctypes -import time -import numpy as np -import tensorrt as trt -import pycuda.driver as cuda -import pycuda.autoinit - -import numpy as np - -TRT_LOGGER = trt.Logger(trt.Logger.ERROR) -from load_ixrt_plugin import load_ixrt_plugin - -class DeviceBuffer(object): - def __init__(self, shape, dtype=trt.int32): - self.buf = cuda.mem_alloc(trt.volume(shape) * 4) - - def binding(self): - return int(self.buf) - - def free(self): - self.buf.free() - - -def main(): - parser = argparse.ArgumentParser(description='BERT Inference Benchmark') - parser.add_argument("-z", "--use_trt", action="store_false", help="Whether to use tensorRT or IxRT") - parser.add_argument("-e", "--engine", help='Path to BERT TensorRT engine') - parser.add_argument('-b', '--batch-size', default=[], action="append", help='Batch size(s) to benchmark. Can be specified multiple times for more than one batch size. This script assumes that the engine has been built with one optimization profile for each batch size, and that these profiles are in order of increasing batch size.', type=int) - parser.add_argument('-s', '--sequence-length', default=128, help='Sequence length of the BERT model', type=int) - parser.add_argument('-i', '--iterations', default=200, help='Number of iterations to run when benchmarking each batch size.', type=int) - parser.add_argument('-w', '--warm-up-runs', default=10, help='Number of iterations to run prior to benchmarking.', type=int) - parser.add_argument('-d', '--duration', default=0.0, help='Minimal number of seconds to run when benchmarking each batch size.', type=float) - parser.add_argument('-r', '--random-seed', required=False, default=12345, help='Random seed.', type=int) - args, _ = parser.parse_known_args() - args.batch_size = args.batch_size or [1] - - # Import necessary plugins for BERT TensorRT - load_ixrt_plugin(TRT_LOGGER) - - with open(args.engine, 'rb') as f: - runtime = trt.Runtime(TRT_LOGGER) - engine = runtime.deserialize_cuda_engine(f.read()) - context = engine.create_execution_context() - - # Allocate buffers large enough to store the largest batch size - max_input_shape = (max(args.batch_size), args.sequence_length) - max_output_shape = (max(args.batch_size), args.sequence_length, 2, 1, 1) - buffers = [ - DeviceBuffer(max_input_shape), - DeviceBuffer(max_input_shape), - DeviceBuffer(max_input_shape), - DeviceBuffer(max_output_shape) - ] - - # Prepare random input - pseudo_vocab_size = 30522 - pseudo_type_vocab_size = 2 - np.random.seed(args.random_seed) - test_word_ids = np.random.randint(0, pseudo_vocab_size, (max(args.batch_size), args.sequence_length), dtype=np.int32) - test_segment_ids = np.random.randint(0, pseudo_type_vocab_size, (max(args.batch_size), args.sequence_length), dtype=np.int32) - test_input_mask = np.ones((max(args.batch_size), args.sequence_length), dtype=np.int32) - - # Copy input h2d - cuda.memcpy_htod(buffers[0].buf, test_word_ids.ravel()) - cuda.memcpy_htod(buffers[1].buf, test_segment_ids.ravel()) - cuda.memcpy_htod(buffers[2].buf, test_input_mask.ravel()) - - num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles - - bench_times = {} - - stream = cuda.Stream() - for batch_size in sorted(args.batch_size): - # # Select engine profile - selected_profile = -1 - for idx in range(engine.num_optimization_profiles): - profile_shape = engine.get_profile_shape(idx, idx * num_binding_per_profile) - if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size and profile_shape[0][1] <= args.sequence_length and profile_shape[2][1] >= args.sequence_length: - selected_profile = idx - break - if selected_profile == -1: - raise RuntimeError("None of the dynamic shape profiles meets the requirement batch = {} and sequence = {}.".format(batch_size, args.sequence_length)) - context.set_optimization_profile_async(selected_profile, stream.handle) - - # Each profile has unique bindings - binding_idx_offset = selected_profile * num_binding_per_profile - bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers] - - shapes = { - 0 : (batch_size, args.sequence_length), - 1 : (batch_size, args.sequence_length), - 2 : (batch_size, args.sequence_length), - } - - for binding, shape in shapes.items(): - context.set_binding_shape(binding, shape) - assert context.all_binding_shapes_specified - - # Inference - total_time = 0 - start = cuda.Event() - end = cuda.Event() - - # Warmup - for _ in range(args.warm_up_runs): - context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) - stream.synchronize() - - # Timing loop - times = [] - actual_iterations = 0 - start_time = time.time() - while actual_iterations < args.iterations or (time.time() - start_time) < args.duration: - start.record(stream) - context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) - end.record(stream) - stream.synchronize() - times.append(end.time_since(start)) - actual_iterations += 1 - - # Compute average time, 95th percentile time and 99th percentile time. - bench_times[batch_size] = times - - [b.free() for b in buffers] - - for batch_size, times in bench_times.items(): - total_time = sum(times) - avg_time = total_time / float(actual_iterations) - times.sort() - percentile95 = times[int(actual_iterations * 0.95)] - percentile99 = times[int(actual_iterations * 0.99)] - print("Running {:} iterations with Batch Size: {:}\n\tTotal Time: {:} ms \tAverage Time: {:} ms\t95th Percentile Time: {:} ms\t99th Percentile Time: {:}".format(actual_iterations, batch_size, total_time, avg_time, percentile95, percentile99)) - - del context - del engine - -if __name__ == '__main__': - main() diff --git a/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh b/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh index b5ea193cb4c13c28793c45f014951c8b752dd0bb..c8a5bb56f2b1feae68a0cfc22add8f35e52aa8db 100644 --- a/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh +++ b/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh @@ -21,16 +21,13 @@ mkdir -p ./data/datasets/bert_large_squad ln -s /mnt/deepspark/data/checkpoints/bert-large-uncased ./data/checkpoints/bert_large_squad/ ln -s /mnt/deepspark/data/datasets/squad ./data/datasets/bert_large_squad/ -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl else - apt install -y numactl + echo "Not Support Os" fi pip3 install --no-dependencies transformers diff --git a/models/nlp/plm/deberta/ixrt/README.md b/models/nlp/plm/deberta/ixrt/README.md index 087e2b368f2c32b646a674ab46a2c2e776674ebb..92784e5c0bf1d5a3b738053de1cd46827d780ad7 100644 --- a/models/nlp/plm/deberta/ixrt/README.md +++ b/models/nlp/plm/deberta/ixrt/README.md @@ -31,9 +31,9 @@ bash ./scripts/prepare_model_and_dataset.sh ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- tensorflow-*.whl +- ixrt-*.whl +- cuda_python-*.whl ```bash export PROJ_ROOT=/PATH/TO/DEEPSPARKINFERENCE diff --git a/models/nlp/plm/roberta/ixrt/README.md b/models/nlp/plm/roberta/ixrt/README.md index 346f64a1ef242778ca8f4dfd041dc624e29fda93..b4ebd3788d5521287d1bbab2ae10609eb607f604 100644 --- a/models/nlp/plm/roberta/ixrt/README.md +++ b/models/nlp/plm/roberta/ixrt/README.md @@ -29,9 +29,9 @@ Dataset: ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- tensorflow-*.whl +- ixrt-*.whl +- cuda_python-*.whl ```bash export PROJ_ROOT=/PATH/TO/DEEPSPARKINFERENCE diff --git a/models/nlp/plm/roformer/ixrt/README.md b/models/nlp/plm/roformer/ixrt/README.md index 3838aa976183fd1ab75a7b6d94101d3ce56f2851..47bd8d8fb38a5373aa3ba2122dd38cc377a8c199 100644 --- a/models/nlp/plm/roformer/ixrt/README.md +++ b/models/nlp/plm/roformer/ixrt/README.md @@ -46,9 +46,9 @@ popd ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: -- tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl -- cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl +- tensorflow-*.whl +- ixrt-*.whl +- cuda_python-*.whl ```bash apt install -y libnuma-dev diff --git a/models/nlp/plm/transformer/igie/ci/prepare.sh b/models/nlp/plm/transformer/igie/ci/prepare.sh index 7df2d9d99ff5a17e5cd7a800b61bd29e27125c02..f1f93e8024188a474c5aacaf4b297c8b0c413c42 100644 --- a/models/nlp/plm/transformer/igie/ci/prepare.sh +++ b/models/nlp/plm/transformer/igie/ci/prepare.sh @@ -25,13 +25,11 @@ else echo "Not Support Os" fi -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn - pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn + pip3 install numpy==1.26.4 + apt install -y libgomp1 fi pip3 install -r requirements.txt diff --git a/models/nlp/plm/transformer/ixrt/ci/prepare.sh b/models/nlp/plm/transformer/ixrt/ci/prepare.sh index 9a3a30cd83d6341d9cd64fb9fca4f9199f9c5274..46fe1a4d4c88c25ad4178ffc8588fd120061cc8d 100644 --- a/models/nlp/plm/transformer/ixrt/ci/prepare.sh +++ b/models/nlp/plm/transformer/ixrt/ci/prepare.sh @@ -25,13 +25,11 @@ else echo "Not Support Os" fi -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn - pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn + pip3 install numpy==1.26.4 + apt install -y libgomp1 fi pip3 install -r requirements.txt diff --git a/models/speech/asr/whisper/vllm/README.md b/models/speech/asr/whisper/vllm/README.md index 995adae271b01011691fc6ef2a7266458e4da115..b20871521bd91b49f5d391c623214dbe039cb91e 100644 --- a/models/speech/asr/whisper/vllm/README.md +++ b/models/speech/asr/whisper/vllm/README.md @@ -23,7 +23,6 @@ Whisper large-v3-turbo is a finetuned version of a pruned Whisper large-v3. In o In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. ```bash -pip3 install transformers==4.50.3 pip3 install librosa ``` diff --git a/models/speech/asr/whisper/vllm/ci/prepare.sh b/models/speech/asr/whisper/vllm/ci/prepare.sh index a7e2b7e81929af29a1557a9a6fe607c5ab7a5742..263747e0227558d1c51847ef2b17252500a6a25b 100644 --- a/models/speech/asr/whisper/vllm/ci/prepare.sh +++ b/models/speech/asr/whisper/vllm/ci/prepare.sh @@ -16,5 +16,4 @@ set -x -pip3 install transformers==4.50.3 pip3 install librosa \ No newline at end of file diff --git a/models/speech/asr/whisper/vllm/offline_inference_audio_language.py b/models/speech/asr/whisper/vllm/offline_inference_audio_language.py index ca866ff125c869f263e588dd857d508232fcca10..61d667b36ecdc459b83f8373be949b2ae6818f75 100644 --- a/models/speech/asr/whisper/vllm/offline_inference_audio_language.py +++ b/models/speech/asr/whisper/vllm/offline_inference_audio_language.py @@ -21,7 +21,16 @@ Run `pytest tests/models/encoder_decoder/audio/test_whisper.py`. """ import time from typing import Optional +import argparse as _argparse +# ====== PATCH: 兼容旧版 argparse 不支持 'deprecated' ====== +_original_add_argument = _argparse._ArgumentGroup.add_argument +def _patched_add_argument(self, *args, **kwargs): + kwargs.pop('deprecated', None) + return _original_add_argument(self, *args, **kwargs) + +_argparse._ArgumentGroup.add_argument = _patched_add_argument +# ========================================================= import argparse import sys from pathlib import Path @@ -144,14 +153,14 @@ if __name__ == "__main__": parser = EngineArgs.add_cli_args(parser) parser = sampling_add_cli_args(parser) args = parser.parse_args() - engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)] + engine_args = EngineArgs.from_cli_args(args) + engine_params = dataclasses.asdict(engine_args) sampling_args = [ param.name for param in list( inspect.signature(SamplingParams).parameters.values() ) ] - engine_params = {attr: getattr(args, attr) for attr in engine_args} sampling_params = { attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr) } diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/README.md b/models/speech/speech_synthesis/cosyvoice/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..683b2bbeb331fdd3604306b64a6e0e19d106dae6 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/README.md @@ -0,0 +1,57 @@ +# CosyVoice2 (ixRT) + +## Model Description + +CosyVoice2-0.5B is a small speech model designed to understand and generate human-like speech. It can be used for tasks like voice assistants, text-to-speech, or voice cloning. With 0.5 billion parameters, it is lightweight and works well on devices with limited computing power. It focuses on natural-sounding voices and easy customization. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | dev-only | 26.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +### Install Dependencies + +```bash +pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ +pip install onnxsim +git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git +# If you failed to clone the submodule due to network failures, please run the following command until success +cd CosyVoice +git checkout 1fc843514689daa61b471f1bc862893b3a5035a7 +git submodule update --init --recursive + +# cp modify files +cp -rf ../cosyvoice ./ +cp ../asset/zero_shot_reference.wav ./asset/ +cp -r ../scripts ./ +cp ../build_dynamic_engine.py ./ +cp ../inference.py ./ + +mkdir -p pretrained_models +# download CosyVoice2-0.5B model into pretrained_models dir + +onnxsim ./pretrained_models/CosyVoice2-0.5B/flow.decoder.estimator.fp32.onnx ./pretrained_models/CosyVoice2-0.5B/flow.decoder.estimator.fp32_sim.onnx + +# If you encounter sox compatibility issues +# ubuntu +sudo apt-get install sox libsox-dev +# centos +sudo yum install sox sox-devel +``` + +## Model Inference + +```bash +bash scripts/infer_cosyvoice2_fp16.sh +``` + +## References + +- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/1fc843514689daa61b471f1bc862893b3a5035a7) \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/asset/zero_shot_reference.wav b/models/speech/speech_synthesis/cosyvoice/ixrt/asset/zero_shot_reference.wav new file mode 100644 index 0000000000000000000000000000000000000000..9a614a7555c0315b7814e3a19f5d701e0c962d0a Binary files /dev/null and b/models/speech/speech_synthesis/cosyvoice/ixrt/asset/zero_shot_reference.wav differ diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/build_dynamic_engine.py b/models/speech/speech_synthesis/cosyvoice/ixrt/build_dynamic_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..9e6ff4d94cfe10dfdb1b43b8e4fce0a0db1f299f --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/build_dynamic_engine.py @@ -0,0 +1,70 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + profile = builder.create_optimization_profile() + profile.set_shape("x", Dims([2, 80, 40]), Dims([2, 80, 500]), Dims([2, 80, 3000])) + profile.set_shape("mask", Dims([2, 1, 4]), Dims([2, 1, 500]), Dims([2, 1, 3000])) + profile.set_shape("mu", Dims([2, 80, 4]), Dims([2, 80, 500]), Dims([2, 80, 3000])) + profile.set_shape("cond", Dims([2, 80, 4]), Dims([2, 80, 500]), Dims([2, 80, 3000])) + + tensor_dtype = tensorrt.DataType.FLOAT + if config.precision == "float16": + build_config.set_flag(tensorrt.BuilderFlag.FP16) + tensor_dtype = tensorrt.DataType.HALF + + build_config.add_optimization_profile(profile) + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + # set input and output data type + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + input_tensor.dtype = tensor_dtype + for i in range(network.num_outputs): + output_tensor = network.get_output(i) + output_tensor.dtype = tensor_dtype + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/cosyvoice.py b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/cosyvoice.py new file mode 100644 index 0000000000000000000000000000000000000000..64c7457f34931b345d7a49f44557b3fcbf84a2a0 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/cosyvoice.py @@ -0,0 +1,194 @@ +# Copyright (c) 2024 Alibaba Inc (authors: Xiang Lyu) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from typing import Generator +from tqdm import tqdm +from hyperpyyaml import load_hyperpyyaml +from modelscope import snapshot_download +import torch +from cosyvoice.cli.frontend import CosyVoiceFrontEnd +from cosyvoice.cli.model import CosyVoiceModel, CosyVoice2Model +from cosyvoice.utils.file_utils import logging +from cosyvoice.utils.class_utils import get_model_type + + +class CosyVoice: + + def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False, trt_concurrent=1): + self.instruct = True if '-Instruct' in model_dir else False + self.model_dir = model_dir + self.fp16 = fp16 + if not os.path.exists(model_dir): + model_dir = snapshot_download(model_dir) + hyper_yaml_path = '{}/cosyvoice.yaml'.format(model_dir) + if not os.path.exists(hyper_yaml_path): + raise ValueError('{} not found!'.format(hyper_yaml_path)) + with open(hyper_yaml_path, 'r') as f: + configs = load_hyperpyyaml(f) + assert get_model_type(configs) != CosyVoice2Model, 'do not use {} for CosyVoice initialization!'.format(model_dir) + self.frontend = CosyVoiceFrontEnd(configs['get_tokenizer'], + configs['feat_extractor'], + '{}/campplus.onnx'.format(model_dir), + '{}/speech_tokenizer_v1.onnx'.format(model_dir), + '{}/spk2info.pt'.format(model_dir), + configs['allowed_special']) + self.sample_rate = configs['sample_rate'] + if torch.cuda.is_available() is False and (load_jit is True or load_trt is True or fp16 is True): + load_jit, load_trt, fp16 = False, False, False + logging.warning('no cuda device, set load_jit/load_trt/fp16 to False') + self.model = CosyVoiceModel(configs['llm'], configs['flow'], configs['hift'], fp16) + self.model.load('{}/llm.pt'.format(model_dir), + '{}/flow.pt'.format(model_dir), + '{}/hift.pt'.format(model_dir)) + if load_jit: + self.model.load_jit('{}/llm.text_encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'), + '{}/llm.llm.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'), + '{}/flow.encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32')) + if load_trt: + self.model.load_trt('{}/flow.decoder.estimator.{}.mygpu.plan'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'), + '{}/flow.decoder.estimator.fp32_sim.onnx'.format(model_dir), + trt_concurrent, + self.fp16) + del configs + + def list_available_spks(self): + spks = list(self.frontend.spk2info.keys()) + return spks + + def add_zero_shot_spk(self, prompt_text, prompt_speech_16k, zero_shot_spk_id): + assert zero_shot_spk_id != '', 'do not use empty zero_shot_spk_id' + model_input = self.frontend.frontend_zero_shot('', prompt_text, prompt_speech_16k, self.sample_rate, '') + del model_input['text'] + del model_input['text_len'] + self.frontend.spk2info[zero_shot_spk_id] = model_input + return True + + def save_spkinfo(self): + torch.save(self.frontend.spk2info, '{}/spk2info.pt'.format(self.model_dir)) + + def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0, text_frontend=True): + for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): + model_input = self.frontend.frontend_sft(i, spk_id) + start_time = time.time() + logging.info('synthesis text {}'.format(i)) + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() + + def inference_zero_shot(self, tts_text, prompt_text, prompt_speech_16k, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True): + prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend) + for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): + if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text): + logging.warning('synthesis text {} too short than prompt text {}, this may lead to bad performance'.format(i, prompt_text)) + model_input = self.frontend.frontend_zero_shot(i, prompt_text, prompt_speech_16k, self.sample_rate, zero_shot_spk_id) + start_time = time.time() + logging.info('synthesis text {}'.format(i)) + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() + + def inference_cross_lingual(self, tts_text, prompt_speech_16k, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True): + for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): + model_input = self.frontend.frontend_cross_lingual(i, prompt_speech_16k, self.sample_rate, zero_shot_spk_id) + start_time = time.time() + logging.info('synthesis text {}'.format(i)) + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() + + def inference_instruct(self, tts_text, spk_id, instruct_text, stream=False, speed=1.0, text_frontend=True): + assert isinstance(self.model, CosyVoiceModel), 'inference_instruct is only implemented for CosyVoice!' + if self.instruct is False: + raise ValueError('{} do not support instruct inference'.format(self.model_dir)) + instruct_text = self.frontend.text_normalize(instruct_text, split=False, text_frontend=text_frontend) + for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): + model_input = self.frontend.frontend_instruct(i, spk_id, instruct_text) + start_time = time.time() + logging.info('synthesis text {}'.format(i)) + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() + + def inference_vc(self, source_speech_16k, prompt_speech_16k, stream=False, speed=1.0): + model_input = self.frontend.frontend_vc(source_speech_16k, prompt_speech_16k, self.sample_rate) + start_time = time.time() + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() + + +class CosyVoice2(CosyVoice): + + def __init__(self, model_dir, load_jit=False, load_trt=False, load_vllm=False, fp16=False, trt_concurrent=1): + self.instruct = True if '-Instruct' in model_dir else False + self.model_dir = model_dir + self.fp16 = fp16 + if not os.path.exists(model_dir): + model_dir = snapshot_download(model_dir) + hyper_yaml_path = '{}/cosyvoice2.yaml'.format(model_dir) + if not os.path.exists(hyper_yaml_path): + raise ValueError('{} not found!'.format(hyper_yaml_path)) + with open(hyper_yaml_path, 'r') as f: + configs = load_hyperpyyaml(f, overrides={'qwen_pretrain_path': os.path.join(model_dir, 'CosyVoice-BlankEN')}) + assert get_model_type(configs) == CosyVoice2Model, 'do not use {} for CosyVoice2 initialization!'.format(model_dir) + self.frontend = CosyVoiceFrontEnd(configs['get_tokenizer'], + configs['feat_extractor'], + '{}/campplus.onnx'.format(model_dir), + '{}/speech_tokenizer_v2.onnx'.format(model_dir), + '{}/spk2info.pt'.format(model_dir), + configs['allowed_special']) + self.sample_rate = configs['sample_rate'] + if torch.cuda.is_available() is False and (load_jit is True or load_trt is True or fp16 is True): + load_jit, load_trt, fp16 = False, False, False + logging.warning('no cuda device, set load_jit/load_trt/fp16 to False') + self.model = CosyVoice2Model(configs['llm'], configs['flow'], configs['hift'], fp16) + self.model.load('{}/llm.pt'.format(model_dir), + '{}/flow.pt'.format(model_dir), + '{}/hift.pt'.format(model_dir)) + if load_vllm: + self.model.load_vllm('{}/vllm'.format(model_dir)) + if load_jit: + self.model.load_jit('{}/flow.encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32')) + if load_trt: + self.model.load_trt('{}/flow.decoder.estimator.{}.mygpu.plan'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'), + '{}/flow.decoder.estimator.fp32_sim.onnx'.format(model_dir), + trt_concurrent, + self.fp16) + del configs + + def inference_instruct(self, *args, **kwargs): + raise NotImplementedError('inference_instruct is not implemented for CosyVoice2!') + + def inference_instruct2(self, tts_text, instruct_text, prompt_speech_16k, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True): + assert isinstance(self.model, CosyVoice2Model), 'inference_instruct2 is only implemented for CosyVoice2!' + for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)): + model_input = self.frontend.frontend_instruct2(i, instruct_text, prompt_speech_16k, self.sample_rate, zero_shot_spk_id) + start_time = time.time() + logging.info('synthesis text {}'.format(i)) + for model_output in self.model.tts(**model_input, stream=stream, speed=speed): + speech_len = model_output['tts_speech'].shape[1] / self.sample_rate + logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) + yield model_output + start_time = time.time() diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/model.py b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/model.py new file mode 100644 index 0000000000000000000000000000000000000000..957089375030c089d4d83acf3777773916559aa1 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/cli/model.py @@ -0,0 +1,388 @@ +# Copyright (c) 2024 Alibaba Inc (authors: Xiang Lyu) +# 2025 Alibaba Inc (authors: Xiang Lyu, Bofan Zhou) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Generator +import torch +import numpy as np +import threading +import time +from torch.nn import functional as F +from contextlib import nullcontext +import uuid +from cosyvoice.utils.common import fade_in_out +from cosyvoice.utils.file_utils import convert_onnx_to_trt, export_cosyvoice2_vllm +from cosyvoice.utils.common import TrtContextWrapper + + +class CosyVoiceModel: + + def __init__(self, + llm: torch.nn.Module, + flow: torch.nn.Module, + hift: torch.nn.Module, + fp16: bool = False): + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.llm = llm + self.flow = flow + self.hift = hift + self.fp16 = fp16 + if self.fp16 is True: + self.llm.half() + self.flow.half() + self.token_min_hop_len = 2 * self.flow.input_frame_rate + self.token_max_hop_len = 4 * self.flow.input_frame_rate + self.token_overlap_len = 20 + # mel fade in out + self.mel_overlap_len = int(self.token_overlap_len / self.flow.input_frame_rate * 22050 / 256) + self.mel_window = np.hamming(2 * self.mel_overlap_len) + # hift cache + self.mel_cache_len = 20 + self.source_cache_len = int(self.mel_cache_len * 256) + # speech fade in out + self.speech_window = np.hamming(2 * self.source_cache_len) + # rtf and decoding related + self.stream_scale_factor = 1 + assert self.stream_scale_factor >= 1, 'stream_scale_factor should be greater than 1, change it according to your actual rtf' + self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else nullcontext() + self.lock = threading.Lock() + # dict used to store session related variable + self.tts_speech_token_dict = {} + self.llm_end_dict = {} + self.mel_overlap_dict = {} + self.flow_cache_dict = {} + self.hift_cache_dict = {} + + def load(self, llm_model, flow_model, hift_model): + self.llm.load_state_dict(torch.load(llm_model, map_location=self.device), strict=True) + self.llm.to(self.device).eval() + self.flow.load_state_dict(torch.load(flow_model, map_location=self.device), strict=True) + self.flow.to(self.device).eval() + # in case hift_model is a hifigan model + hift_state_dict = {k.replace('generator.', ''): v for k, v in torch.load(hift_model, map_location=self.device).items()} + self.hift.load_state_dict(hift_state_dict, strict=True) + self.hift.to(self.device).eval() + + def load_jit(self, llm_text_encoder_model, llm_llm_model, flow_encoder_model): + llm_text_encoder = torch.jit.load(llm_text_encoder_model, map_location=self.device) + self.llm.text_encoder = llm_text_encoder + llm_llm = torch.jit.load(llm_llm_model, map_location=self.device) + self.llm.llm = llm_llm + flow_encoder = torch.jit.load(flow_encoder_model, map_location=self.device) + self.flow.encoder = flow_encoder + + def load_trt(self, flow_decoder_estimator_model, flow_decoder_onnx_model, trt_concurrent, fp16): + assert torch.cuda.is_available(), 'tensorrt only supports gpu!' + if not os.path.exists(flow_decoder_estimator_model) or os.path.getsize(flow_decoder_estimator_model) == 0: + convert_onnx_to_trt(flow_decoder_estimator_model, self.get_trt_kwargs(), flow_decoder_onnx_model, fp16) + del self.flow.decoder.estimator + import tensorrt as trt + from tensorrt.utils import load_ixrt_plugin + load_ixrt_plugin() + with open(flow_decoder_estimator_model, 'rb') as f: + estimator_engine = trt.Runtime(trt.Logger(trt.Logger.WARNING)).deserialize_cuda_engine(f.read()) + assert estimator_engine is not None, 'failed to load trt {}'.format(flow_decoder_estimator_model) + self.flow.decoder.estimator = TrtContextWrapper(estimator_engine, trt_concurrent=trt_concurrent, device=self.device) + + def get_trt_kwargs(self): + min_shape = [(2, 80, 4), (2, 1, 4), (2, 80, 4), (2, 80, 4)] + opt_shape = [(2, 80, 500), (2, 1, 500), (2, 80, 500), (2, 80, 500)] + max_shape = [(2, 80, 3000), (2, 1, 3000), (2, 80, 3000), (2, 80, 3000)] + input_names = ["x", "mask", "mu", "cond"] + return {'min_shape': min_shape, 'opt_shape': opt_shape, 'max_shape': max_shape, 'input_names': input_names} + + def llm_job(self, text, prompt_text, llm_prompt_speech_token, llm_embedding, uuid): + with self.llm_context, torch.cuda.amp.autocast(self.fp16 is True and hasattr(self.llm, 'vllm') is False): + if isinstance(text, Generator): + assert isinstance(self, CosyVoice2Model) and not hasattr(self.llm, 'vllm'), 'streaming input text is only implemented for CosyVoice2 and do not support vllm!' + for i in self.llm.inference_bistream(text=text, + prompt_text=prompt_text.to(self.device), + prompt_text_len=torch.tensor([prompt_text.shape[1]], dtype=torch.int32).to(self.device), + prompt_speech_token=llm_prompt_speech_token.to(self.device), + prompt_speech_token_len=torch.tensor([llm_prompt_speech_token.shape[1]], dtype=torch.int32).to(self.device), + embedding=llm_embedding.to(self.device)): + self.tts_speech_token_dict[uuid].append(i) + else: + for i in self.llm.inference(text=text.to(self.device), + text_len=torch.tensor([text.shape[1]], dtype=torch.int32).to(self.device), + prompt_text=prompt_text.to(self.device), + prompt_text_len=torch.tensor([prompt_text.shape[1]], dtype=torch.int32).to(self.device), + prompt_speech_token=llm_prompt_speech_token.to(self.device), + prompt_speech_token_len=torch.tensor([llm_prompt_speech_token.shape[1]], dtype=torch.int32).to(self.device), + embedding=llm_embedding.to(self.device), + uuid=uuid): + self.tts_speech_token_dict[uuid].append(i) + self.llm_end_dict[uuid] = True + + def vc_job(self, source_speech_token, uuid): + self.tts_speech_token_dict[uuid] = source_speech_token.flatten().tolist() + self.llm_end_dict[uuid] = True + + def token2wav(self, token, prompt_token, prompt_feat, embedding, uuid, finalize=False, speed=1.0): + with torch.cuda.amp.autocast(self.fp16): + tts_mel, self.flow_cache_dict[uuid] = self.flow.inference(token=token.to(self.device), + token_len=torch.tensor([token.shape[1]], dtype=torch.int32).to(self.device), + prompt_token=prompt_token.to(self.device), + prompt_token_len=torch.tensor([prompt_token.shape[1]], dtype=torch.int32).to(self.device), + prompt_feat=prompt_feat.to(self.device), + prompt_feat_len=torch.tensor([prompt_feat.shape[1]], dtype=torch.int32).to(self.device), + embedding=embedding.to(self.device), + flow_cache=self.flow_cache_dict[uuid]) + + # mel overlap fade in out + if self.mel_overlap_dict[uuid].shape[2] != 0: + tts_mel = fade_in_out(tts_mel, self.mel_overlap_dict[uuid], self.mel_window) + # append hift cache + if self.hift_cache_dict[uuid] is not None: + hift_cache_mel, hift_cache_source = self.hift_cache_dict[uuid]['mel'], self.hift_cache_dict[uuid]['source'] + tts_mel = torch.concat([hift_cache_mel, tts_mel], dim=2) + else: + hift_cache_source = torch.zeros(1, 1, 0) + # keep overlap mel and hift cache + if finalize is False: + self.mel_overlap_dict[uuid] = tts_mel[:, :, -self.mel_overlap_len:] + tts_mel = tts_mel[:, :, :-self.mel_overlap_len] + tts_speech, tts_source = self.hift.inference(speech_feat=tts_mel, cache_source=hift_cache_source) + if self.hift_cache_dict[uuid] is not None: + tts_speech = fade_in_out(tts_speech, self.hift_cache_dict[uuid]['speech'], self.speech_window) + self.hift_cache_dict[uuid] = {'mel': tts_mel[:, :, -self.mel_cache_len:], + 'source': tts_source[:, :, -self.source_cache_len:], + 'speech': tts_speech[:, -self.source_cache_len:]} + tts_speech = tts_speech[:, :-self.source_cache_len] + else: + if speed != 1.0: + assert self.hift_cache_dict[uuid] is None, 'speed change only support non-stream inference mode' + tts_mel = F.interpolate(tts_mel, size=int(tts_mel.shape[2] / speed), mode='linear') + tts_speech, tts_source = self.hift.inference(speech_feat=tts_mel, cache_source=hift_cache_source) + if self.hift_cache_dict[uuid] is not None: + tts_speech = fade_in_out(tts_speech, self.hift_cache_dict[uuid]['speech'], self.speech_window) + return tts_speech + + def tts(self, text=torch.zeros(1, 0, dtype=torch.int32), flow_embedding=torch.zeros(0, 192), llm_embedding=torch.zeros(0, 192), + prompt_text=torch.zeros(1, 0, dtype=torch.int32), + llm_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), + flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), + prompt_speech_feat=torch.zeros(1, 0, 80), source_speech_token=torch.zeros(1, 0, dtype=torch.int32), stream=False, speed=1.0, **kwargs): + # this_uuid is used to track variables related to this inference thread + this_uuid = str(uuid.uuid1()) + with self.lock: + self.tts_speech_token_dict[this_uuid], self.llm_end_dict[this_uuid] = [], False + self.hift_cache_dict[this_uuid] = None + self.mel_overlap_dict[this_uuid] = torch.zeros(1, 80, 0) + self.flow_cache_dict[this_uuid] = torch.zeros(1, 80, 0, 2) + if source_speech_token.shape[1] == 0: + p = threading.Thread(target=self.llm_job, args=(text, prompt_text, llm_prompt_speech_token, llm_embedding, this_uuid)) + else: + p = threading.Thread(target=self.vc_job, args=(source_speech_token, this_uuid)) + p.start() + if stream is True: + token_hop_len = self.token_min_hop_len + while True: + time.sleep(0.1) + if len(self.tts_speech_token_dict[this_uuid]) >= token_hop_len + self.token_overlap_len: + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid][:token_hop_len + self.token_overlap_len]) \ + .unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + uuid=this_uuid, + finalize=False) + yield {'tts_speech': this_tts_speech.cpu()} + with self.lock: + self.tts_speech_token_dict[this_uuid] = self.tts_speech_token_dict[this_uuid][token_hop_len:] + # increase token_hop_len for better speech quality + token_hop_len = min(self.token_max_hop_len, int(token_hop_len * self.stream_scale_factor)) + if self.llm_end_dict[this_uuid] is True and len(self.tts_speech_token_dict[this_uuid]) < token_hop_len + self.token_overlap_len: + break + p.join() + # deal with remain tokens, make sure inference remain token len equals token_hop_len when cache_speech is not None + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid]).unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + uuid=this_uuid, + finalize=True) + yield {'tts_speech': this_tts_speech.cpu()} + else: + # deal with all tokens + p.join() + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid]).unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + uuid=this_uuid, + finalize=True, + speed=speed) + yield {'tts_speech': this_tts_speech.cpu()} + with self.lock: + self.tts_speech_token_dict.pop(this_uuid) + self.llm_end_dict.pop(this_uuid) + self.mel_overlap_dict.pop(this_uuid) + self.hift_cache_dict.pop(this_uuid) + self.flow_cache_dict.pop(this_uuid) + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.current_stream().synchronize() + + +class CosyVoice2Model(CosyVoiceModel): + + def __init__(self, + llm: torch.nn.Module, + flow: torch.nn.Module, + hift: torch.nn.Module, + fp16: bool = False): + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.llm = llm + self.flow = flow + self.hift = hift + self.fp16 = fp16 + if self.fp16 is True: + self.llm.half() + self.flow.half() + # NOTE must matching training static_chunk_size + self.token_hop_len = 25 + # hift cache + self.mel_cache_len = 8 + self.source_cache_len = int(self.mel_cache_len * 480) + # speech fade in out + self.speech_window = np.hamming(2 * self.source_cache_len) + # rtf and decoding related + self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else nullcontext() + self.lock = threading.Lock() + # dict used to store session related variable + self.tts_speech_token_dict = {} + self.llm_end_dict = {} + self.hift_cache_dict = {} + + def load_jit(self, flow_encoder_model): + flow_encoder = torch.jit.load(flow_encoder_model, map_location=self.device) + self.flow.encoder = flow_encoder + + def load_vllm(self, model_dir): + export_cosyvoice2_vllm(self.llm, model_dir, self.device) + from vllm import EngineArgs, LLMEngine + engine_args = EngineArgs(model=model_dir, + skip_tokenizer_init=True, + enable_prompt_embeds=True, + gpu_memory_utilization=0.2) + self.llm.vllm = LLMEngine.from_engine_args(engine_args) + self.llm.lock = threading.Lock() + del self.llm.llm.model.model.layers + + def token2wav(self, token, prompt_token, prompt_feat, embedding, token_offset, uuid, stream=False, finalize=False, speed=1.0): + with torch.cuda.amp.autocast(self.fp16): + tts_mel, _ = self.flow.inference(token=token.to(self.device), + token_len=torch.tensor([token.shape[1]], dtype=torch.int32).to(self.device), + prompt_token=prompt_token.to(self.device), + prompt_token_len=torch.tensor([prompt_token.shape[1]], dtype=torch.int32).to(self.device), + prompt_feat=prompt_feat.to(self.device), + prompt_feat_len=torch.tensor([prompt_feat.shape[1]], dtype=torch.int32).to(self.device), + embedding=embedding.to(self.device), + streaming=stream, + finalize=finalize) + tts_mel = tts_mel[:, :, token_offset * self.flow.token_mel_ratio:] + # append hift cache + if self.hift_cache_dict[uuid] is not None: + hift_cache_mel, hift_cache_source = self.hift_cache_dict[uuid]['mel'], self.hift_cache_dict[uuid]['source'] + tts_mel = torch.concat([hift_cache_mel, tts_mel], dim=2) + else: + hift_cache_source = torch.zeros(1, 1, 0) + # keep overlap mel and hift cache + if finalize is False: + tts_speech, tts_source = self.hift.inference(speech_feat=tts_mel, cache_source=hift_cache_source) + if self.hift_cache_dict[uuid] is not None: + tts_speech = fade_in_out(tts_speech, self.hift_cache_dict[uuid]['speech'], self.speech_window) + self.hift_cache_dict[uuid] = {'mel': tts_mel[:, :, -self.mel_cache_len:], + 'source': tts_source[:, :, -self.source_cache_len:], + 'speech': tts_speech[:, -self.source_cache_len:]} + tts_speech = tts_speech[:, :-self.source_cache_len] + else: + if speed != 1.0: + assert self.hift_cache_dict[uuid] is None, 'speed change only support non-stream inference mode' + tts_mel = F.interpolate(tts_mel, size=int(tts_mel.shape[2] / speed), mode='linear') + tts_speech, tts_source = self.hift.inference(speech_feat=tts_mel, cache_source=hift_cache_source) + if self.hift_cache_dict[uuid] is not None: + tts_speech = fade_in_out(tts_speech, self.hift_cache_dict[uuid]['speech'], self.speech_window) + return tts_speech + + def tts(self, text=torch.zeros(1, 0, dtype=torch.int32), flow_embedding=torch.zeros(0, 192), llm_embedding=torch.zeros(0, 192), + prompt_text=torch.zeros(1, 0, dtype=torch.int32), + llm_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), + flow_prompt_speech_token=torch.zeros(1, 0, dtype=torch.int32), + prompt_speech_feat=torch.zeros(1, 0, 80), source_speech_token=torch.zeros(1, 0, dtype=torch.int32), stream=False, speed=1.0, **kwargs): + # this_uuid is used to track variables related to this inference thread + this_uuid = str(uuid.uuid1()) + with self.lock: + self.tts_speech_token_dict[this_uuid], self.llm_end_dict[this_uuid] = [], False + self.hift_cache_dict[this_uuid] = None + if source_speech_token.shape[1] == 0: + p = threading.Thread(target=self.llm_job, args=(text, prompt_text, llm_prompt_speech_token, llm_embedding, this_uuid)) + else: + p = threading.Thread(target=self.vc_job, args=(source_speech_token, this_uuid)) + p.start() + if stream is True: + token_offset = 0 + prompt_token_pad = int(np.ceil(flow_prompt_speech_token.shape[1] / self.token_hop_len) * self.token_hop_len - flow_prompt_speech_token.shape[1]) + while True: + time.sleep(0.1) + this_token_hop_len = self.token_hop_len + prompt_token_pad if token_offset == 0 else self.token_hop_len + if len(self.tts_speech_token_dict[this_uuid]) - token_offset >= this_token_hop_len + self.flow.pre_lookahead_len: + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid][:token_offset + this_token_hop_len + self.flow.pre_lookahead_len]).unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + token_offset=token_offset, + uuid=this_uuid, + stream=stream, + finalize=False) + token_offset += this_token_hop_len + yield {'tts_speech': this_tts_speech.cpu()} + if self.llm_end_dict[this_uuid] is True and len(self.tts_speech_token_dict[this_uuid]) - token_offset < this_token_hop_len + self.flow.pre_lookahead_len: + break + p.join() + # deal with remain tokens, make sure inference remain token len equals token_hop_len when cache_speech is not None + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid]).unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + token_offset=token_offset, + uuid=this_uuid, + finalize=True) + yield {'tts_speech': this_tts_speech.cpu()} + else: + # deal with all tokens + p.join() + this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid]).unsqueeze(dim=0) + this_tts_speech = self.token2wav(token=this_tts_speech_token, + prompt_token=flow_prompt_speech_token, + prompt_feat=prompt_speech_feat, + embedding=flow_embedding, + token_offset=0, + uuid=this_uuid, + finalize=True, + speed=speed) + yield {'tts_speech': this_tts_speech.cpu()} + with self.lock: + self.tts_speech_token_dict.pop(this_uuid) + self.llm_end_dict.pop(this_uuid) + self.hift_cache_dict.pop(this_uuid) + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.current_stream().synchronize() diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/flow/flow_matching.py b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/flow/flow_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..68a867044f61ab9dd60e1abfbd2cce8beec46b79 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/flow/flow_matching.py @@ -0,0 +1,225 @@ +# Copyright (c) 2024 Alibaba Inc (authors: Xiang Lyu, Zhihao Du) +# 2025 Alibaba Inc (authors: Xiang Lyu, Bofan Zhou) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn.functional as F +from matcha.models.components.flow_matching import BASECFM +from cosyvoice.utils.common import set_all_random_seed + + +import cuda.cudart as cudart +class ConditionalCFM(BASECFM): + def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None): + super().__init__( + n_feats=in_channels, + cfm_params=cfm_params, + n_spks=n_spks, + spk_emb_dim=spk_emb_dim, + ) + self.t_scheduler = cfm_params.t_scheduler + self.training_cfg_rate = cfm_params.training_cfg_rate + self.inference_cfg_rate = cfm_params.inference_cfg_rate + in_channels = in_channels + (spk_emb_dim if n_spks > 0 else 0) + # Just change the architecture of the estimator here + self.estimator = estimator + + @torch.inference_mode() + def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, prompt_len=0, cache=torch.zeros(1, 80, 0, 2)): + """Forward diffusion + + Args: + mu (torch.Tensor): output of encoder + shape: (batch_size, n_feats, mel_timesteps) + mask (torch.Tensor): output_mask + shape: (batch_size, 1, mel_timesteps) + n_timesteps (int): number of diffusion steps + temperature (float, optional): temperature for scaling noise. Defaults to 1.0. + spks (torch.Tensor, optional): speaker ids. Defaults to None. + shape: (batch_size, spk_emb_dim) + cond: Not used but kept for future purposes + + Returns: + sample: generated mel-spectrogram + shape: (batch_size, n_feats, mel_timesteps) + """ + + z = torch.randn_like(mu).to(mu.device).to(mu.dtype) * temperature + cache_size = cache.shape[2] + # fix prompt and overlap part mu and z + if cache_size != 0: + z[:, :, :cache_size] = cache[:, :, :, 0] + mu[:, :, :cache_size] = cache[:, :, :, 1] + z_cache = torch.concat([z[:, :, :prompt_len], z[:, :, -34:]], dim=2) + mu_cache = torch.concat([mu[:, :, :prompt_len], mu[:, :, -34:]], dim=2) + cache = torch.stack([z_cache, mu_cache], dim=-1) + + t_span = torch.linspace(0, 1, n_timesteps + 1, device=mu.device, dtype=mu.dtype) + if self.t_scheduler == 'cosine': + t_span = 1 - torch.cos(t_span * 0.5 * torch.pi) + return self.solve_euler(z, t_span=t_span, mu=mu, mask=mask, spks=spks, cond=cond), cache + + def solve_euler(self, x, t_span, mu, mask, spks, cond, streaming=False): + """ + Fixed euler solver for ODEs. + Args: + x (torch.Tensor): random noise + t_span (torch.Tensor): n_timesteps interpolated + shape: (n_timesteps + 1,) + mu (torch.Tensor): output of encoder + shape: (batch_size, n_feats, mel_timesteps) + mask (torch.Tensor): output_mask + shape: (batch_size, 1, mel_timesteps) + spks (torch.Tensor, optional): speaker ids. Defaults to None. + shape: (batch_size, spk_emb_dim) + cond: Not used but kept for future purposes + """ + t, _, dt = t_span[0], t_span[-1], t_span[1] - t_span[0] + t = t.unsqueeze(dim=0) + + # I am storing this because I can later plot it by putting a debugger here and saving it to a file + # Or in future might add like a return_all_steps flag + sol = [] + + # Do not use concat, it may cause memory format changed and trt infer with wrong results! + x_in = torch.zeros([2, 80, x.size(2)], device=x.device, dtype=x.dtype) + mask_in = torch.zeros([2, 1, x.size(2)], device=x.device, dtype=x.dtype) + mu_in = torch.zeros([2, 80, x.size(2)], device=x.device, dtype=x.dtype) + t_in = torch.zeros([2], device=x.device, dtype=x.dtype) + spks_in = torch.zeros([2, 80], device=x.device, dtype=x.dtype) + cond_in = torch.zeros([2, 80, x.size(2)], device=x.device, dtype=x.dtype) + for step in range(1, len(t_span)): + # Classifier-Free Guidance inference introduced in VoiceBox + x_in[:] = x + mask_in[:] = mask + mu_in[0] = mu + t_in[:] = t.unsqueeze(0) + spks_in[0] = spks + cond_in[0] = cond + dphi_dt = self.forward_estimator( + x_in, mask_in, + mu_in, t_in, + spks_in, + cond_in, + streaming + ) + dphi_dt, cfg_dphi_dt = torch.split(dphi_dt, [x.size(0), x.size(0)], dim=0) + dphi_dt = ((1.0 + self.inference_cfg_rate) * dphi_dt - self.inference_cfg_rate * cfg_dphi_dt) + x = x + dt * dphi_dt + t = t + dt + sol.append(x) + if step < len(t_span) - 1: + dt = t_span[step + 1] - t + + return sol[-1].float() + + def forward_estimator(self, x, mask, mu, t, spks, cond, streaming=False): + if isinstance(self.estimator, torch.nn.Module): + return self.estimator(x, mask, mu, t, spks, cond, streaming=streaming) + else: + [estimator, stream], trt_engine = self.estimator.acquire_estimator() + # NOTE need to synchronize when switching stream + torch.cuda.current_stream().synchronize() + estimator.set_input_shape('x', (2, 80, x.size(2))) + estimator.set_input_shape('mask', (2, 1, x.size(2))) + estimator.set_input_shape('mu', (2, 80, x.size(2))) + estimator.set_input_shape('cond', (2, 80, x.size(2))) + data_ptrs = [x.contiguous().data_ptr(), + mask.contiguous().data_ptr(), + mu.contiguous().data_ptr(), + t.contiguous().data_ptr(), + spks.contiguous().data_ptr(), + cond.contiguous().data_ptr(), + x.data_ptr()] + for i, j in enumerate(data_ptrs): + estimator.set_tensor_address(trt_engine.get_tensor_name(i), j) + # run trt engine + assert estimator.execute_async_v3(stream) is True + cudart.cudaStreamSynchronize(stream) + self.estimator.release_estimator(estimator, stream) + return x + + def compute_loss(self, x1, mask, mu, spks=None, cond=None, streaming=False): + """Computes diffusion loss + + Args: + x1 (torch.Tensor): Target + shape: (batch_size, n_feats, mel_timesteps) + mask (torch.Tensor): target mask + shape: (batch_size, 1, mel_timesteps) + mu (torch.Tensor): output of encoder + shape: (batch_size, n_feats, mel_timesteps) + spks (torch.Tensor, optional): speaker embedding. Defaults to None. + shape: (batch_size, spk_emb_dim) + + Returns: + loss: conditional flow matching loss + y: conditional flow + shape: (batch_size, n_feats, mel_timesteps) + """ + b, _, t = mu.shape + + # random timestep + t = torch.rand([b, 1, 1], device=mu.device, dtype=mu.dtype) + if self.t_scheduler == 'cosine': + t = 1 - torch.cos(t * 0.5 * torch.pi) + # sample noise p(x_0) + z = torch.randn_like(x1) + + y = (1 - (1 - self.sigma_min) * t) * z + t * x1 + u = x1 - (1 - self.sigma_min) * z + + # during training, we randomly drop condition to trade off mode coverage and sample fidelity + if self.training_cfg_rate > 0: + cfg_mask = torch.rand(b, device=x1.device) > self.training_cfg_rate + mu = mu * cfg_mask.view(-1, 1, 1) + spks = spks * cfg_mask.view(-1, 1) + cond = cond * cfg_mask.view(-1, 1, 1) + + pred = self.estimator(y, mask, mu, t.squeeze(), spks, cond, streaming=streaming) + loss = F.mse_loss(pred * mask, u * mask, reduction="sum") / (torch.sum(mask) * u.shape[1]) + return loss, y + + +class CausalConditionalCFM(ConditionalCFM): + def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None): + super().__init__(in_channels, cfm_params, n_spks, spk_emb_dim, estimator) + set_all_random_seed(0) + self.rand_noise = torch.randn([1, 80, 50 * 300]) + + @torch.inference_mode() + def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, streaming=False): + """Forward diffusion + + Args: + mu (torch.Tensor): output of encoder + shape: (batch_size, n_feats, mel_timesteps) + mask (torch.Tensor): output_mask + shape: (batch_size, 1, mel_timesteps) + n_timesteps (int): number of diffusion steps + temperature (float, optional): temperature for scaling noise. Defaults to 1.0. + spks (torch.Tensor, optional): speaker ids. Defaults to None. + shape: (batch_size, spk_emb_dim) + cond: Not used but kept for future purposes + + Returns: + sample: generated mel-spectrogram + shape: (batch_size, n_feats, mel_timesteps) + """ + + z = self.rand_noise[:, :, :mu.size(2)].to(mu.device).to(mu.dtype) * temperature + # fix prompt and overlap part mu and z + t_span = torch.linspace(0, 1, n_timesteps + 1, device=mu.device, dtype=mu.dtype) + if self.t_scheduler == 'cosine': + t_span = 1 - torch.cos(t_span * 0.5 * torch.pi) + return self.solve_euler(z, t_span=t_span, mu=mu, mask=mask, spks=spks, cond=cond, streaming=streaming), None diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/common.py b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..9e58ad79b6bc16a65d87e4b4669daf64b2fafde6 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/common.py @@ -0,0 +1,187 @@ +# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang) +# 2024 Alibaba Inc (authors: Xiang Lyu) +# 2025 Alibaba Inc (authors: Xiang Lyu, Bofan Zhou) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Modified from ESPnet(https://github.com/espnet/espnet) +"""Unility functions for Transformer.""" + +import queue +import random +from typing import List + +import numpy as np +import torch + +IGNORE_ID = -1 + + +def pad_list(xs: List[torch.Tensor], pad_value: int): + """Perform padding for the list of tensors. + + Args: + xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)]. + pad_value (float): Value for padding. + + Returns: + Tensor: Padded tensor (B, Tmax, `*`). + + Examples: + >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)] + >>> x + [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])] + >>> pad_list(x, 0) + tensor([[1., 1., 1., 1.], + [1., 1., 0., 0.], + [1., 0., 0., 0.]]) + + """ + max_len = max([len(item) for item in xs]) + batchs = len(xs) + ndim = xs[0].ndim + if ndim == 1: + pad_res = torch.zeros(batchs, + max_len, + dtype=xs[0].dtype, + device=xs[0].device) + elif ndim == 2: + pad_res = torch.zeros(batchs, + max_len, + xs[0].shape[1], + dtype=xs[0].dtype, + device=xs[0].device) + elif ndim == 3: + pad_res = torch.zeros(batchs, + max_len, + xs[0].shape[1], + xs[0].shape[2], + dtype=xs[0].dtype, + device=xs[0].device) + else: + raise ValueError(f"Unsupported ndim: {ndim}") + pad_res.fill_(pad_value) + for i in range(batchs): + pad_res[i, :len(xs[i])] = xs[i] + return pad_res + + +def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor, + ignore_label: int) -> torch.Tensor: + """Calculate accuracy. + + Args: + pad_outputs (Tensor): Prediction tensors (B * Lmax, D). + pad_targets (LongTensor): Target label tensors (B, Lmax). + ignore_label (int): Ignore label id. + + Returns: + torch.Tensor: Accuracy value (0.0 - 1.0). + + """ + pad_pred = pad_outputs.view(pad_targets.size(0), pad_targets.size(1), + pad_outputs.size(1)).argmax(2) + mask = pad_targets != ignore_label + numerator = torch.sum( + pad_pred.masked_select(mask) == pad_targets.masked_select(mask)) + denominator = torch.sum(mask) + return (numerator / denominator).detach() + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +# Repetition Aware Sampling in VALL-E 2 +def ras_sampling(weighted_scores, decoded_tokens, sampling, top_p=0.8, top_k=25, win_size=10, tau_r=0.1): + top_ids = nucleus_sampling(weighted_scores, top_p=top_p, top_k=top_k) + rep_num = (torch.tensor(decoded_tokens[-win_size:]).to(weighted_scores.device) == top_ids).sum().item() + if rep_num >= win_size * tau_r: + top_ids = random_sampling(weighted_scores, decoded_tokens, sampling) + return top_ids + + +def nucleus_sampling(weighted_scores, top_p=0.8, top_k=25): + prob, indices = [], [] + cum_prob = 0.0 + sorted_value, sorted_idx = weighted_scores.softmax(dim=0).sort(descending=True, stable=True) + for i in range(len(sorted_idx)): + # sampling both top-p and numbers. + if cum_prob < top_p and len(prob) < top_k: + cum_prob += sorted_value[i] + prob.append(sorted_value[i]) + indices.append(sorted_idx[i]) + else: + break + prob = torch.tensor(prob).to(weighted_scores) + indices = torch.tensor(indices, dtype=torch.long).to(weighted_scores.device) + top_ids = indices[prob.multinomial(1, replacement=True)] + return top_ids + + +def random_sampling(weighted_scores, decoded_tokens, sampling): + top_ids = weighted_scores.softmax(dim=0).multinomial(1, replacement=True) + return top_ids + + +def fade_in_out(fade_in_mel, fade_out_mel, window): + device = fade_in_mel.device + fade_in_mel, fade_out_mel = fade_in_mel.cpu(), fade_out_mel.cpu() + mel_overlap_len = int(window.shape[0] / 2) + if fade_in_mel.device == torch.device('cpu'): + fade_in_mel = fade_in_mel.clone() + fade_in_mel[..., :mel_overlap_len] = fade_in_mel[..., :mel_overlap_len] * window[:mel_overlap_len] + \ + fade_out_mel[..., -mel_overlap_len:] * window[mel_overlap_len:] + return fade_in_mel.to(device) + + +def set_all_random_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def mask_to_bias(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: + assert mask.dtype == torch.bool + assert dtype in [torch.float32, torch.bfloat16, torch.float16] + mask = mask.to(dtype) + # attention mask bias + # NOTE(Mddct): torch.finfo jit issues + # chunk_masks = (1.0 - chunk_masks) * torch.finfo(dtype).min + mask = (1.0 - mask) * -1.0e+10 + return mask + +import cuda.cudart as cudart + +class TrtContextWrapper: + def __init__(self, trt_engine, trt_concurrent=1, device='cuda:0'): + self.trt_context_pool = queue.Queue(maxsize=trt_concurrent) + self.trt_engine = trt_engine + for _ in range(trt_concurrent): + trt_context = trt_engine.create_execution_context() + err, trt_stream = cudart.cudaStreamCreate() + assert trt_context is not None, 'failed to create trt context, maybe not enough CUDA memory, try reduce current trt concurrent {}'.format(trt_concurrent) + self.trt_context_pool.put([trt_context, trt_stream]) + assert self.trt_context_pool.empty() is False, 'no avaialbe estimator context' + + def acquire_estimator(self): + return self.trt_context_pool.get(), self.trt_engine + + def release_estimator(self, context, stream): + self.trt_context_pool.put([context, stream]) diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/file_utils.py b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..950e5ee001d03b4451a5949f2266c5b075cb342c --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/cosyvoice/utils/file_utils.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021 Mobvoi Inc. (authors: Binbin Zhang) +# 2024 Alibaba Inc (authors: Xiang Lyu, Zetao Hu) +# 2025 Alibaba Inc (authors: Xiang Lyu, Yabin Li) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import torch +import torchaudio +import logging +logging.getLogger('matplotlib').setLevel(logging.WARNING) +logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + + +def read_lists(list_file): + lists = [] + with open(list_file, 'r', encoding='utf8') as fin: + for line in fin: + lists.append(line.strip()) + return lists + + +def read_json_lists(list_file): + lists = read_lists(list_file) + results = {} + for fn in lists: + with open(fn, 'r', encoding='utf8') as fin: + results.update(json.load(fin)) + return results + + +def load_wav(wav, target_sr): + speech, sample_rate = torchaudio.load(wav, backend='soundfile') + speech = speech.mean(dim=0, keepdim=True) + if sample_rate != target_sr: + assert sample_rate > target_sr, 'wav sample rate {} must be greater than {}'.format(sample_rate, target_sr) + speech = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)(speech) + return speech + + +def convert_onnx_to_trt(trt_model, trt_kwargs, onnx_model, fp16): + import tensorrt as trt + from tensorrt.utils import load_ixrt_plugin + logging.info("Converting onnx to trt...") + network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + logger = trt.Logger(trt.Logger.INFO) + load_ixrt_plugin(logger) + builder = trt.Builder(logger) + network = builder.create_network(network_flags) + parser = trt.OnnxParser(network, logger) + config = builder.create_builder_config() + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 32) # 4GB + if fp16: + config.set_flag(trt.BuilderFlag.FP16) + profile = builder.create_optimization_profile() + # load onnx model + with open(onnx_model, "rb") as f: + if not parser.parse(f.read()): + for error in range(parser.num_errors): + print(parser.get_error(error)) + raise ValueError('failed to parse {}'.format(onnx_model)) + # set input shapes + for i in range(len(trt_kwargs['input_names'])): + profile.set_shape(trt_kwargs['input_names'][i], trt_kwargs['min_shape'][i], trt_kwargs['opt_shape'][i], trt_kwargs['max_shape'][i]) + tensor_dtype = trt.DataType.HALF if fp16 else trt.DataType.FLOAT + # set input and output data type + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + input_tensor.dtype = tensor_dtype + for i in range(network.num_outputs): + output_tensor = network.get_output(i) + output_tensor.dtype = tensor_dtype + config.add_optimization_profile(profile) + engine_bytes = builder.build_serialized_network(network, config) + # save trt engine + with open(trt_model, "wb") as f: + f.write(engine_bytes) + logging.info("Succesfully convert onnx to trt...") + + +def export_cosyvoice2_vllm(model, model_path, device): + if os.path.exists(model_path): + return + pad_to = DEFAULT_VOCAB_PADDING_SIZE = 64 + vocab_size = model.speech_embedding.num_embeddings + feature_size = model.speech_embedding.embedding_dim + pad_vocab_size = ((vocab_size + pad_to - 1) // pad_to) * pad_to + + dtype = torch.bfloat16 + # lm_head + new_lm_head = torch.nn.Linear(in_features=feature_size, out_features=pad_vocab_size, bias=True) + with torch.no_grad(): + new_lm_head.weight[:vocab_size] = model.llm_decoder.weight + new_lm_head.bias[:vocab_size] = model.llm_decoder.bias + new_lm_head.weight[vocab_size:] = 0 + new_lm_head.bias[vocab_size:] = 0 + model.llm.model.lm_head = new_lm_head + new_codec_embed = torch.nn.Linear(in_features=feature_size, out_features=pad_vocab_size) + # embed_tokens + embed_tokens = model.llm.model.model.embed_tokens + with torch.no_grad(): + new_codec_embed.weight[:vocab_size] = model.speech_embedding.weight + new_codec_embed.weight[vocab_size:] = 0 + model.llm.model.set_input_embeddings(new_codec_embed) + model.llm.model.to(device) + model.llm.model.to(dtype) + tmp_vocab_size = model.llm.model.config.vocab_size + tmp_tie_embedding = model.llm.model.config.tie_word_embeddings + del model.llm.model.generation_config.eos_token_id + del model.llm.model.config.bos_token_id + del model.llm.model.config.eos_token_id + model.llm.model.config.vocab_size = pad_vocab_size + model.llm.model.config.tie_word_embeddings = False + model.llm.model.config.use_bias = True + model.llm.model.save_pretrained(model_path) + os.system('sed -i s@Qwen2ForCausalLM@CosyVoice2ForCausalLM@g {}/config.json'.format(os.path.abspath(model_path))) + model.llm.model.config.vocab_size = tmp_vocab_size + model.llm.model.config.tie_word_embeddings = tmp_tie_embedding + model.llm.model.set_input_embeddings(embed_tokens) diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/inference.py b/models/speech/speech_synthesis/cosyvoice/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..40e6087c8afe83b9f00357d8d4cf354386f2a69a --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/inference.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import json +import os +import time +import numpy as np + +import sys +sys.path.append('third_party/Matcha-TTS') +from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 +from cosyvoice.utils.file_utils import load_wav +import torchaudio + +import soundfile as sf +from pystoi import stoi + +def main(config): + use_fp16 = False + if config.precision == "float16": + use_fp16 = True + cosyvoice = CosyVoice2(config.model_dir, load_jit=False, load_trt=True, load_vllm=False, fp16=use_fp16) + + prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000) + + start_time = time.time() + for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)): + torchaudio.save('zero_shot_{}_ixrt_fp16.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) + end_time = time.time() + forward_time = end_time - start_time + + print("execute time {} s:".format(forward_time)) + + ref, sr0 = sf.read('./asset/zero_shot_reference.wav') + deg, sr1 = sf.read('zero_shot_0_ixrt_fp16.wav') + if sr0 != sr1: + print('采样率错误') + exit(1) + + min_len = min(len(ref), len(deg)) + ref, deg = ref[:min_len], deg[:min_len] + stoi_score = stoi(ref, deg, sr0, extended=False) + if stoi_score < config.stoi_target: + print('精度异常') + exit(1) + print('stoi_score:',stoi_score) + exit() + + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16", + help="The precision of datatype") + parser.add_argument( + "--model_dir", + type=str, + default="pretrained_models/CosyVoice2-0.5B", + help="model dir path", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=5, help="loop count") + parser.add_argument("--stoi_target", type=float, default=0.8, help="target mAP") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/requirements.txt b/models/speech/speech_synthesis/cosyvoice/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..452c4bfb6e7de2254ad0c163d936335cacf358bc --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/requirements.txt @@ -0,0 +1,32 @@ +conformer==0.3.2 +deepspeed==0.15.1; sys_platform == 'linux' +diffusers==0.29.0 +fastapi==0.115.6 +fastapi-cli==0.0.4 +gdown==5.1.0 +gradio==5.4.0 +grpcio==1.57.0 +grpcio-tools==1.57.0 +hydra-core==1.3.2 +HyperPyYAML +inflect==7.3.1 +librosa==0.10.2 +lightning==2.2.4 +matplotlib==3.7.5 +modelscope==1.20.0 +networkx==3.1 +omegaconf==2.3.0 +onnx==1.16.0 +openai-whisper==20231117 +protobuf==4.25 +pyarrow==18.1.0 +pydantic==2.7.0 +pyworld==0.3.4 +rich==13.7.1 +soundfile==0.12.1 +tensorboard==2.14.0 +transformers==4.51.3 +uvicorn==0.30.0 +wetext==0.0.4 +wget==3.2 +pystoi diff --git a/models/speech/speech_synthesis/cosyvoice/ixrt/scripts/infer_cosyvoice2_fp16.sh b/models/speech/speech_synthesis/cosyvoice/ixrt/scripts/infer_cosyvoice2_fp16.sh new file mode 100644 index 0000000000000000000000000000000000000000..62048b72eed2228d9be4f057755b0b0a2c5f7d53 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/ixrt/scripts/infer_cosyvoice2_fp16.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +WARM_UP=-1 +TGT=0.8 +LOOP_COUNT=-1 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +CHECKPOINTS_DIR="${PROJ_DIR}/pretrained_models/CosyVoice2-0.5B" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR}/flow.decoder.estimator.fp32_sim.onnx + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : cosyvoice2 +echo Onnx Path : ${ORIGINE_MODEL} + +CURRENT_MODEL=${CHECKPOINTS_DIR}/flow.decoder.estimator.fp32_sim.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/flow.decoder.estimator.fp16.mygpu.plan +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_dynamic_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +cd ${RUN_DIR} +python3 inference.py \ + --model_dir ${CHECKPOINTS_DIR} \ + --precision ${PRECISION} \ + --stoi_target ${TGT}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/README.md b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md index 677f5919d2b74a32b8dfe6c95d67fafb2f1f23e1..05ad42ba46b3533d83aca0fbd445439ce7a47c35 100644 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/README.md +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md @@ -22,8 +22,9 @@ Pretrained model: pip3 install -r requirements.txt pip3 install onnxruntime==1.18.0 git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git -# If you failed to clone the submodule due to network failures, please run the following command until success cd CosyVoice +git checkout 2db78e705835e56778f69b80cae51e4314d635b0 +# If you failed to clone the submodule due to network failures, please run the following command until success git submodule update --init --recursive mkdir -p pretrained_models @@ -39,12 +40,32 @@ sudo yum install sox sox-devel ## Model Inference ```bash -cp ../inference_test.py ./ -python3 inference_test.py +# Make sure run cosyvoice2_example() in example.py, default is run cosyvoice3_example() +python3 example.py +``` + +## Model Eval +```bash +git clone https://github.com/FunAudioLLM/CV3-Eval.git +cd CV3-Eval +mv ../CosyVoice ./ +pip3 install -r requirements.txt +pip3 install jiwer==3.1.0 +cp ../get_infer_wavs.py scripts/ +cp ../inference.sh scripts/ + +# if you want to run eval for en/hrad_en set, please add the following command +# cp -f ../run_wer.py utils/ + +cp ../run_inference_fp16_eval.sh ./ +bash run_inference_fp16_eval.sh ``` ## Model Results +| Model | Model Size | Precision | test-zh
CER/WER(%) ↓ | test_zh
Speaker Similarity(%) ↑ | +| :---- | :----: | :----: | :----: | :----: | +| CosyVoice2 | 0.5B | FP16 | 4.525 | 77.23 | ## References -- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/0a496c18f78ca993c63f6d880fcc60778bfc85c1) \ No newline at end of file +- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/2db78e705835e56778f69b80cae51e4314d635b0) \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh b/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh index 7eb40df5168b2f5e6360d08971d85f39beeb44e9..c7821ca43019e924c127d956485168bf21437b7e 100644 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh @@ -21,7 +21,20 @@ pip3 install -r requirements.txt pip3 install onnxruntime==1.18.0 cp -r /mnt/deepspark/data/repos/CosyVoice ./ cd CosyVoice +git checkout 2db78e705835e56778f69b80cae51e4314d635b0 mkdir -p pretrained_models ln -s /mnt/deepspark/data/checkpoints/CosyVoice2-0.5B pretrained_models/ -cp ../inference_test.py ./ \ No newline at end of file +python3 example.py + +cd .. +mkdir -p /root/.cache/modelscope/hub/iic +ln -s /mnt/deepspark/data/checkpoints/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch /root/.cache/modelscope/hub/iic/ +cp -r /mnt/deepspark/data/repos/CV3-Eval ./ +cd CV3-Eval +mv ../CosyVoice ./ +pip3 install -r requirements.txt +pip3 install jiwer==3.1.0 +cp ../get_infer_wavs.py scripts/ +cp ../inference.sh scripts/ +cp ../run_inference_fp16_eval.sh ./ \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/get_infer_wavs.py b/models/speech/speech_synthesis/cosyvoice/pytorch/get_infer_wavs.py new file mode 100644 index 0000000000000000000000000000000000000000..db0e3540b7276117c54c7e7ea336403e89c42543 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/get_infer_wavs.py @@ -0,0 +1,63 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import os +import sys +from tqdm import tqdm +import torchaudio +sys.path.append('third_party/Matcha-TTS') +from cosyvoice.cli.cosyvoice import CosyVoice2 + +def parse_args(): + parser = argparse.ArgumentParser(description='Extract CosyVoice2 inference embeddings.') + parser.add_argument('--inference_dir', default='', type=str, help='The root path inference eval') + parser.add_argument('--input_text', default='', type=str, help='The text required for inference wavs') + parser.add_argument('--prompt_text', default='', type=str, help='The text required for prompt wavs') + parser.add_argument('--prompt_wav_scp', default='', type=str, help='The path of prompt wavs') + parser.add_argument('--fp16', action='store_true', help='Enable FP16 precision') + parser.add_argument('--output_dir', default='', type=str, help='Output directory for inference wavs') + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + print(args) + + input_texts = {} + for line in tqdm(open(args.input_text, 'r', encoding='utf-8').readlines()): + utt_i, infer_text = line.strip().split(maxsplit=1) + input_texts[utt_i] = infer_text + prompt_texts = {} + for line in tqdm(open(args.prompt_text, 'r', encoding='utf-8').readlines()): + utt_p, pro_text = line.strip().split(maxsplit=1) + prompt_texts[utt_p] = pro_text + prompt_wavs = {} + for line in tqdm(open(args.prompt_wav_scp, 'r', encoding='utf-8').readlines()): + utt, prompt_wav = line.strip().split(maxsplit=1) + prompt_wavs[utt] = os.path.join(args.inference_dir, prompt_wav) + + # inference output wavs + cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=args.fp16) + for uttid in tqdm(input_texts.keys()): + for i, j in enumerate(cosyvoice.inference_zero_shot(input_texts[uttid], prompt_texts[uttid], prompt_wavs[uttid], stream=False)): + wav_name = os.path.join(args.output_dir, f"{uttid}.wav") + torchaudio.save(wav_name, j['tts_speech'], cosyvoice.sample_rate) + + print(f"Inference results have been saved to {args.output_dir}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/inference.sh b/models/speech/speech_synthesis/cosyvoice/pytorch/inference.sh new file mode 100644 index 0000000000000000000000000000000000000000..5ec9210c18d5477bb52bc9ae3537772e6b77799e --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/inference.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +inference_dir=$1 +data_dir=$2 +output_dir=$3 + +OS_ID=$(awk -F= '/^ID=/{print $2}' /etc/os-release | tr -d '"') +if [[ "$OS_ID" == "ubuntu" ]]; then + apt-get install -y sox libsox-dev libmagic1 libmagic-dev libgl1 libglib2.0-0 + apt-get update && apt-get install -y ffmpeg +elif [[ "$OS_ID" == "centos" ]]; then + yum install -y sox sox-devel file-devel mesa-libGL + yum install -y epel-release + yum install -y https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm + yum install -y https://mirrors.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-7.noarch.rpm + yum install -y ffmpeg ffmpeg-devel + ffmpeg -version +fi + +REPO_DIR="CosyVoice" +if [ ! -d "$REPO_DIR" ]; then + echo "Cloning CosyVoice repository for the first time..." + git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git + cd "$REPO_DIR" || exit 1 + git submodule update --init --recursive + echo "Repository cloned and submodules initialized." +else + echo "CosyVoice repository already exists. Skipping git clone." + cd "$REPO_DIR" + # git pull && git submodule update --init --recursive +fi + +model_name="CosyVoice2-0.5B" +target_dir="pretrained_models/${model_name}" +if [ -d "${target_dir}" ] && [ -n "$(ls -A "${target_dir}" 2>/dev/null)" ]; then + echo "✅ Model already exists at: ${target_dir}" + echo " Skipping download." +else + echo "Preparing to download model: iic/${model_name} to ${target_dir}" + mkdir -p "${target_dir}" + python3 -c " +from modelscope import snapshot_download +snapshot_download('iic/CosyVoice2-0.5B', local_dir='${target_dir}') +" || { + echo "❌ ERROR: Model download failed (non-zero exit code from Python)." + exit 1 + } + if [ ! -d "${target_dir}" ] || [ -z "$(ls -A "${target_dir}" 2>/dev/null)" ]; then + echo "❌ ERROR: Downloaded directory is empty or does not exist: ${target_dir}" + exit 1 + fi + echo "✅ Model downloaded successfully to: ${target_dir}" +fi + +mkdir -p "${output_dir}" +cp -f ../scripts/get_infer_wavs.py . +python3 get_infer_wavs.py \ +--inference_dir $inference_dir \ +--input_text $inference_dir/$data_dir/text \ +--prompt_text $inference_dir/$data_dir/prompt_text \ +--prompt_wav_scp $inference_dir/$data_dir/prompt_wav.scp \ +--output_dir $output_dir \ +--fp16 + +cd .. diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py b/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py deleted file mode 100644 index 66cfac19faf44a20bd67cd2abe55805d7445acf6..0000000000000000000000000000000000000000 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('third_party/Matcha-TTS') -from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 -from cosyvoice.utils.file_utils import load_wav -import torchaudio -cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False) - -# NOTE if you want to reproduce the results on https://funaudiollm.github.io/cosyvoice2, please add text_frontend=False during inference -# zero_shot usage -prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000) -text = ('收到好友从远方寄来的生日礼物,' - '那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,' - '笑容如花儿般绽放。') -text_1 = '希望你以后能够做的比我还好呦。' -for i, j in enumerate(cosyvoice.inference_zero_shot(text, text_1, prompt_speech_16k, stream=False)): - torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) - -# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248 -text = ('在他讲述那个荒诞故事的过程中,' - '他突然[laughter]停下来,' - '因为他自己也被逗笑了[laughter]。') -for i, j in enumerate(cosyvoice.inference_cross_lingual(text, prompt_speech_16k, stream=False)): - torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) - -# instruct usage -text = ('收到好友从远方寄来的生日礼物,' - '那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,' - '笑容如花儿般绽放。') -for i, j in enumerate(cosyvoice.inference_instruct2(text, '用四川话说这句话', prompt_speech_16k, stream=False)): - torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) -print("Offline inference is successful!") \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt b/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt index 2bc0604ee8811262e2952ba2b9f954e333b5c675..258e2b66571c69a0a4820ff647425a75ef159c61 100644 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt @@ -10,7 +10,7 @@ gradio==5.4.0 grpcio==1.57.0 grpcio-tools==1.57.0 hydra-core==1.3.2 -HyperPyYAML==1.2.2 +HyperPyYAML inflect==7.3.1 librosa==0.10.2 lightning==2.2.4 diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/run_inference_fp16_eval.sh b/models/speech/speech_synthesis/cosyvoice/pytorch/run_inference_fp16_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..0181054f5b95a8090baafa7b4ac504b5d32aaf69 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/run_inference_fp16_eval.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +SPK_LAB=utils/3D-Speaker +DNSMOS_LAB=utils/DNSMOS + +export PYTHONPATH=${SPK_LAB}:${PYTHONPATH} + +nj=1 +asr_gpu=1 + +task=zero_shot +dumpdir=data/${task} +# test_set="zh en hard_zh hard_en" +test_set="zh" + +# task=cross_lingual_zeroshot +# dumpdir=data/${task} +# # test_set="to_zh to_en to_hard_zh to_hard_en to_ja to_ko" +# test_set="to_zh to_en" + +# task=emotion_zeroshot +# dumpdir=data/${task} +# test_set="en zh" + +# task=subjective_zeroshot +# dumpdir=data/${task} + +# task=subjective_continue +# dumpdir=data/${task} +# test_set="emotion rhyme speed volume" + +inference_dir=$(pwd) +. utils/parse_options.sh || exit 1; +inference_tag="${inference_dir}/${task}" +decode_dir=${inference_dir}/results/${task} + +for lang in ${test_set}; do + name_without_extension=$lang + + echo "Do inference..." + bash scripts/inference.sh ${inference_dir} ${dumpdir}/${name_without_extension} ${decode_dir}/${name_without_extension}/wavs + + echo "Score WER for ${decode_dir}/${name_without_extension}" + + bash scripts/run_score_wer.sh ${dumpdir}/${name_without_extension}/text ${decode_dir}/${name_without_extension} ${name_without_extension} ${asr_gpu} + + find ${decode_dir}/${name_without_extension}/wavs -name *.wav | awk -F '/' '{print $NF, $0}' | sed "s@\.wav @ @g" > ${decode_dir}/${name_without_extension}/wav.scp + + echo "Score 3DSpeaker for ${decode_dir}/${name_without_extension}" + python3 scripts/eval_speaker_similarity.py \ + --model_id damo/speech_eres2net_sv_en_voxceleb_16k \ + --local_model_dir ${SPK_LAB}/pretrained \ + --prompt_wavs ${dumpdir}/${name_without_extension}/prompt_wav.scp \ + --hyp_wavs ${decode_dir}/${name_without_extension}/wav.scp \ + --log_file ${decode_dir}/${name_without_extension}/spk_simi_scores.txt \ + --devices "0" + + echo "Score DNSMOS for ${decode_dir}/${name_without_extension}" + python3 ${DNSMOS_LAB}/dnsmos_local_wavscp.py -t ${decode_dir}/${name_without_extension}/wav.scp -e ${DNSMOS_LAB} -o ${decode_dir}/${name_without_extension}/mos.csv + + cat ${decode_dir}/${name_without_extension}/mos.csv | sed '1d' |awk -F ',' '{ sum += $NF; count++ } END { if (count > 0) print sum / count }' > ${decode_dir}/${name_without_extension}/dnsmos_mean.txt + + + spk_simi_file="${decode_dir}/${name_without_extension}/spk_simi_scores.txt" + if [ ! -f "$spk_simi_file" ]; then + echo "❌ Error: File not found: $spk_simi_file" + exit 1 + fi + spk_simi_score=$(awk '/^avg[[:space:]]/ { print $NF }' "$spk_simi_file") + if [ -z "$spk_simi_score" ]; then + echo "❌ Error: No 'avg' line found in $spk_simi_file" + exit 1 + fi + + wer_file="${decode_dir}/${name_without_extension}/wav_res_ref_text.wer" + if [ ! -f "$wer_file" ]; then + echo "❌ Error: File not found: $wer_file" + exit 1 + fi + wer_score=$(grep -oP '^WER:\s*\K[0-9.]+(?=,|$)' "$wer_file") + if [ -z "$wer_score" ]; then + echo "❌ Error: No valid WER value found in $wer_file" + exit 1 + fi + { + echo {\'metricResult\': {\'CER/WER\': $wer_score, \'Speaker Similarity\': $spk_simi_score}} + echo "CER/WER: $wer_score" + echo "Speaker Similarity: $spk_simi_score" + } | tee "${decode_dir}/${name_without_extension}/eval.log" +done + + diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/run_wer.py b/models/speech/speech_synthesis/cosyvoice/pytorch/run_wer.py new file mode 100644 index 0000000000000000000000000000000000000000..c31f1aab460deea524844c69144d9e77d21638a2 --- /dev/null +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/run_wer.py @@ -0,0 +1,130 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys, os +import torch +from tqdm import tqdm +import multiprocessing +from jiwer import compute_measures +from zhon.hanzi import punctuation +import string +import numpy as np +from transformers import WhisperProcessor, WhisperForConditionalGeneration +import soundfile as sf +import scipy +import zhconv +from funasr import AutoModel +import whisper +punctuation_all = punctuation + string.punctuation + +wav_res_text_path = sys.argv[1] +res_path = sys.argv[2] +lang = sys.argv[3] # zh or en + +device = "cuda" if torch.cuda.is_available() else 'cpu' + +def load_en_model(): + # model_id = "large-v3" + # model = whisper.load_model(model_id).to(device) + model_path = os.path.expanduser("./whisper-large-v3/large-v3.pt") + if not os.path.exists('./whisper-large-v3'): + from modelscope import snapshot_download + snapshot_download('iic/whisper-large-v3', local_dir='./whisper-large-v3') + model = whisper.load_model(model_path).to(device) + model.eval() + return model + +def load_zh_model(): + model = AutoModel(model="paraformer-zh") + return model + +def process_one(hypo, truth): + raw_truth = truth + raw_hypo = hypo + + for x in punctuation_all: + if x == '\'': + continue + truth = truth.replace(x, '') + hypo = hypo.replace(x, '') + + truth = truth.replace(' ', ' ') + hypo = hypo.replace(' ', ' ') + + if lang[-2:] in ["zh", "ja", "ko"]: + truth = " ".join([x for x in truth]) + hypo = " ".join([x for x in hypo]) # 中文hypo自带空格 + else: + # elif lang == "en": + truth = truth.lower() + hypo = hypo.lower() + # else: + # raise NotImplementedError + + measures = compute_measures(truth, hypo) + ref_list = truth.split(" ") + wer = measures["wer"] + subs = measures["substitutions"] / len(ref_list) + dele = measures["deletions"] / len(ref_list) + inse = measures["insertions"] / len(ref_list) + return (raw_truth, raw_hypo, wer, subs, dele, inse) + + +def run_asr(wav_res_text_path, res_path): + if lang[-2:] in ["zh", "hard_zh"]: + model = load_zh_model() + else: + model = load_en_model() + + params = [] + for line in open(wav_res_text_path).readlines(): + line = line.strip() + if len(line.split('|')) == 2: + wav_res_path, text_ref = line.split('|') + elif len(line.split('|')) == 3: + wav_res_path, wav_ref_path, text_ref = line.split('|') + elif len(line.split('|')) == 4: # for edit + wav_res_path, _, text_ref, wav_ref_path = line.split('|') + else: + raise NotImplementedError + + if not os.path.exists(wav_res_path): + continue + params.append((wav_res_path, text_ref)) + fout = open(res_path, "w") + + n_higher_than_50 = 0 + wers_below_50 = [] + for wav_res_path, text_ref in tqdm(params): + try: + if lang[-2:] in ["zh", "hard_zh"]: + res = model.generate(input=wav_res_path, + batch_size_s=300) + transcription = res[0]["text"] + else: + result = model.transcribe(wav_res_path, language=lang[-2:]) + transcription = result["text"].strip() + except Exception as e: + print(e) + continue + if 'zh' in lang: + transcription = zhconv.convert(transcription, 'zh-cn') + + raw_truth, raw_hypo, wer, subs, dele, inse = process_one(transcription, text_ref) + fout.write(f"{wav_res_path}\t{wer}\t{raw_truth}\t{raw_hypo}\t{inse}\t{dele}\t{subs}\n") + fout.flush() + +run_asr(wav_res_text_path, res_path) + diff --git a/tests/model_info.json b/tests/model_info.json index eeeebc923eecd933bb60c158065aef6bb2f4e288..32f4d5dbad970562520b7e9efb9320b0578ce0f7 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -8834,8 +8834,8 @@ "demoType": "" }, { - "display_name": "YOLOv5s", - "model_name": "yolov5s", + "display_name": "YOLOv5s_Sample", + "model_name": "yolov5s_sample", "framework": "igie", "release_version": "25.12", "release_sdk": "4.3.0", @@ -8847,8 +8847,8 @@ "mdims": "", "dataset": "", "license": "", - "model_path": "models/cv/object_detection/yolov5s/igie/", - "readme_file": "models/cv/object_detection/yolov5s/igie/README.md", + "model_path": "models/cv/object_detection/yolov5s_sample/igie/", + "readme_file": "models/cv/object_detection/yolov5s_sample/igie/README.md", "bitbucket_repo": "", "bitbucket_branch": "", "bitbucket_path": "", @@ -9065,6 +9065,370 @@ "type": "inference", "hasDemo": false, "demoType": "" + }, + { + "display_name": "YOLOv3_Sample", + "model_name": "yolov3_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov3_sample/igie/", + "readme_file": "models/cv/object_detection/yolov3_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov3.pt", + "need_third_part": false, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv5_Sample", + "model_name": "yolov5_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov5_sample/igie/", + "readme_file": "models/cv/object_detection/yolov5_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt", + "need_third_part": false, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv7_Sample", + "model_name": "yolov7_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov7_sample/igie/", + "readme_file": "models/cv/object_detection/yolov7_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOX_Sample", + "model_name": "yolox_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolox_sample/igie/", + "readme_file": "models/cv/object_detection/yolox_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOX_Sample", + "model_name": "yolox_sample", + "framework": "ixrt", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolox_sample/ixrt/", + "readme_file": "models/cv/object_detection/yolox_sample/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv5s_Sample", + "model_name": "yolov5s_sample", + "framework": "ixrt", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov5s_sample/ixrt/", + "readme_file": "models/cv/object_detection/yolov5s_sample/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt", + "need_third_part": true, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "RTDETR", + "model_name": "rtdetr", + "framework": "paddlepaddle", + "release_version": "26.03", + "release_sdk": "4.4.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.4.0", + "latest_gpgpu": "MR-V100", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/rtdetr/paddlepaddle", + "readme_file": "models/cv/object_detection/rtdetr/paddlepaddle/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams", + "need_third_part": false, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv8n", + "model_name": "yolov8n", + "framework": "pytorch", + "release_version": "26.03", + "release_sdk": "4.4.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.4.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov8n/pytorch/", + "readme_file": "models/cv/object_detection/yolov8n/pytorch/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt", + "need_third_part": false, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "RTDETR", + "model_name": "rtdetr", + "framework": "igie", + "release_version": "26.03", + "release_sdk": "4.4.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.4.0", + "latest_gpgpu": "MR-V100", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/rtdetr/igie", + "readme_file": "models/cv/object_detection/rtdetr/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "http://files.deepspark.org.cn:880/deepspark/data/checkpoints/rtdetrv3_r18vd_6x_coco_image.onnx", + "need_third_part": false, + "precisions": [ + "fp16", + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "RTDETR", + "model_name": "rtdetr", + "framework": "ixrt", + "release_version": "26.03", + "release_sdk": "4.4.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.4.0", + "latest_gpgpu": "MR-V100", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/rtdetr/ixrt", + "readme_file": "models/cv/object_detection/rtdetr/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "http://files.deepspark.org.cn:880/deepspark/data/checkpoints/rtdetrv3_r18vd_6x_coco_image_sim.onnx", + "need_third_part": false, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "CosyVoice2-0.5B", + "model_name": "cosyvoice", + "framework": "ixrt", + "release_version": "26.03", + "release_sdk": "4.4.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.4.0", + "latest_gpgpu": "", + "category": "speech/speech_synthesis", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/speech/speech_synthesis/cosyvoice/ixrt", + "readme_file": "models/speech/speech_synthesis/cosyvoice/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "", + "download_url": "https://huggingface.co/FunAudioLLM/CosyVoice2-0.5B", + "need_third_part": false, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" } ] } \ No newline at end of file diff --git a/tests/run_igie.py b/tests/run_igie.py index 355aa1eb0babbc06bcda0005d999fde3caeab563..76fa721a41f98092a4d4c58473c44b3d74888794 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -21,7 +21,7 @@ import logging import os import sys import argparse - +import platform import utils # 配置日志 @@ -48,7 +48,8 @@ def main(): logging.error("test model case is empty") sys.exit(-1) batch_size = os.environ.get("BS_LISTS") - model = get_model_config(test_model) + model_framework = os.environ.get("MODEL_FW").lower() + model = get_model_config(test_model, model_framework) if not model: logging.error("mode config is empty") sys.exit(-1) @@ -74,7 +75,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_detec_testcase(model, batch_size) + result = run_detec_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -84,7 +85,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_ocr_testcase(model) + result = run_ocr_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -131,12 +132,12 @@ def main(): logging.info(f"Full text result: {result}") -def get_model_config(mode_name): +def get_model_config(mode_name, model_framework): with open("model_info.json", mode='r', encoding='utf-8') as file: models = json.load(file) for model in models['models']: - if model["model_name"] == mode_name.lower() and model["framework"] == "igie": + if model["model_name"] == mode_name.lower() and model["framework"] == model_framework: return model return @@ -164,14 +165,13 @@ def run_clf_testcase(model, batch_size, whl_url): ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ """ if model["category"] == "cv/semantic_segmentation": - prepare_script += """ - pip install /mnt/deepspark/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` """ if model_name in ["resnet50_sample", "vgg16_sample"]: - if whl_url and whl_url != "None": - prepare_script += f""" - pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` - """ + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` + """ prepare_script += f""" bash ci/prepare.sh ls -l | grep onnx @@ -264,7 +264,7 @@ def run_clf_testcase(model, batch_size, whl_url): logging.debug(f"matchs:\n{matchs}") return result -def run_detec_testcase(model, batch_size): +def run_detec_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -279,11 +279,16 @@ def run_detec_testcase(model, batch_size): ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ """ - # for 4.3.0 sdk need pre install mmcv - prepare_script += """ - pip install /mnt/deepspark/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` """ + if model_name == "rtdetr": + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'paddlepaddle-[^"]*\.whl' | head -n1` + """ + # if model["need_third_part"] and model["3rd_party_repo"]: # third_party_repo = model["3rd_party_repo"] # prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n" @@ -301,6 +306,11 @@ def run_detec_testcase(model, batch_size): export DATASETS_DIR=./{dataset_n}/ """ + if platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ + for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: @@ -332,7 +342,6 @@ def run_detec_testcase(model, batch_size): pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*" matchs = re.findall(pattern, sout) for m in matchs: - result["result"].setdefault(prec, {"status": "FAIL"}) try: result["result"][prec][bs] = result["result"][prec][bs] | {m[0]: float(m[1]), m[2]: float(m[3])} except ValueError: @@ -353,7 +362,7 @@ def run_detec_testcase(model, batch_size): except ValueError: print("The string cannot be converted to a float.") result["result"][prec][bs] = result["result"][prec].get(bs, {}) | {m[0]: m[1]} - if matchs and len(matchs) == 2: + if matchs and len(matchs) >= 2: result["result"][prec]["status"] = "PASS" else: pattern = METRIC_PATTERN @@ -366,7 +375,7 @@ def run_detec_testcase(model, batch_size): return result -def run_ocr_testcase(model): +def run_ocr_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -380,7 +389,7 @@ def run_ocr_testcase(model): cd ../{model['model_path']} ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ - pip install /mnt/deepspark/install/paddlepaddle-3.0.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'paddlepaddle-[^"]*\.whl' | head -n1` unzip -q /mnt/deepspark/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR bash ci/prepare.sh """ @@ -582,6 +591,11 @@ def run_nlp_testcase(model, batch_size): export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n} cd ../{model['model_path']} """ + if model_name == "transformer" and platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index 6d0e7ef95bb0e27e9b17ea78e29ca8959cb87630..5c3878876549f2d6509794edd19c15efaae73fc6 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import platform import yaml import subprocess import json @@ -58,12 +59,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") + result = {} if model["category"] == "cv/classification": logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_clf_testcase(model, batch_size) + result = run_clf_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -73,7 +76,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_detec_testcase(model, batch_size) + result = run_detec_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -103,7 +106,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_instance_segmentation_testcase(model) + result = run_instance_segmentation_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -113,7 +116,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model, batch_size) + result = run_nlp_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -148,7 +151,7 @@ def check_model_result(result): break result["status"] = status -def run_clf_testcase(model, batch_size): +def run_clf_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -163,8 +166,8 @@ def run_clf_testcase(model, batch_size): ln -s /root/data/checkpoints/{checkpoint_n} ./ """ if model_name == "swin_transformer_large": - prepare_script += """ - pip install /root/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` """ prepare_script += """ bash ci/prepare.sh @@ -274,7 +277,7 @@ def run_clf_testcase(model, batch_size): logging.debug(f"matchs:\n{matchs}") return result -def run_detec_testcase(model, batch_size): +def run_detec_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -288,7 +291,7 @@ def run_detec_testcase(model, batch_size): cd ../{model['model_path']} ln -s /root/data/checkpoints/{checkpoint_n} ./ ln -s /root/data/datasets/{dataset_n} ./ - pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ @@ -328,6 +331,11 @@ def run_detec_testcase(model, batch_size): export CONFIG_DIR=config/{config_name}_CONFIG """ + if platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ + for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: @@ -544,7 +552,7 @@ def run_multi_object_tracking_testcase(model): return result # BERT series models -def run_nlp_testcase(model, batch_size): +def run_nlp_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -555,18 +563,17 @@ def run_nlp_testcase(model, batch_size): prepare_script = f""" set -x cd ../{model['model_path']} - pip install /root/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - bash /root/data/install/ixrt-1.0.0.alpha+corex.4.3.0-linux_x86_64.run + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ else: prepare_script = f""" set -x cd ../{model['model_path']} - pip install /root/data/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ @@ -609,20 +616,40 @@ def run_nlp_testcase(model, batch_size): if bs == "None": bs = "Default" if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: - script = f""" + if model_name == "transformer" and platform.machine() == "aarch64": + script = f""" set -x + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD cd ../{model['model_path']}/ bash scripts/infer_{model_name}_{prec}_accuracy.sh bash scripts/infer_{model_name}_{prec}_performance.sh - """ + """ + else: + script = f""" + set -x + cd ../{model['model_path']}/ + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ else: if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: - script = f""" + if model_name == "transformer" and platform.machine() == "aarch64": + script = f""" set -x + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD cd ../{model['model_path']}/ bash scripts/infer_{model_name}_{prec}_accuracy.sh --bs {bs} bash scripts/infer_{model_name}_{prec}_performance.sh --bs {bs} - """ + """ + else: + script = f""" + set -x + cd ../{model['model_path']}/ + bash scripts/infer_{model_name}_{prec}_accuracy.sh --bs {bs} + bash scripts/infer_{model_name}_{prec}_performance.sh --bs {bs} + """ result["result"][prec].setdefault(bs, {}) logging.info(f"Start running {model_name} {prec} bs: {bs} test case") @@ -726,7 +753,7 @@ def run_speech_testcase(model, batch_size): logging.debug(f"matchs:\n{matchs}") return result -def run_instance_segmentation_testcase(model): +def run_instance_segmentation_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -739,7 +766,7 @@ def run_instance_segmentation_testcase(model): cd ../{model['model_path']} ln -s /root/data/checkpoints/{checkpoint_n} ./ ln -s /root/data/datasets/{dataset_n} ./ - pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` bash ci/prepare.sh ls -l | grep onnx """ diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py index f8a684c32788b95ee45268b9a74863f5bd40aff7..a7cf196346d81764d583b733548c057b670c0729 100644 --- a/tests/run_trtllm.py +++ b/tests/run_trtllm.py @@ -58,13 +58,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") result = {} # NLP模型 if model["category"] in ["nlp/llm"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model) + result = run_nlp_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -89,7 +90,7 @@ def check_model_result(result): break result["status"] = status -def run_nlp_testcase(model): +def run_nlp_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -101,10 +102,9 @@ def run_nlp_testcase(model): prepare_script = f""" set -x cd ../{model['model_path']} - pip install /mnt/deepspark/install/tensorrt_llm-0.12.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /mnt/deepspark/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - bash /mnt/deepspark/install/ixrt-1.0.0.alpha+corex.4.3.0-linux_x86_64.run - pip install /mnt/deepspark/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorrt_llm-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ diff --git a/tests/run_vllm.py b/tests/run_vllm.py index d2617b911000853e35499c36fb7d876710e24216..450ed5931557905e73613b69d9ad072aa949655f 100644 --- a/tests/run_vllm.py +++ b/tests/run_vllm.py @@ -57,13 +57,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") result = {} # NLP模型 if model["category"] in ["nlp/llm", "multimodal/vision_language_model", "speech/asr", "speech/speech_synthesis"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model) + result = run_nlp_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -163,7 +164,7 @@ def _build_inference_script(model: Dict[str, Any], prec: str) -> str: return base_script + f"python3 offline_inference.py --model-path /mnt/deepspark/data/checkpoints/{checkpoint_n} --tp 1" case "cosyvoice": - return base_script + "cd CosyVoice\npython3 inference_test.py" + return base_script + "cd CV3-Eval\nbash run_inference_fp16_eval.sh" case "xlmroberta": return base_script + ( @@ -221,13 +222,13 @@ def _append_benchmark_script(script: str, model: Dict[str, Any]) -> str: bench = ( "python3 vllm/benchmarks/benchmark_throughput.py --model ./qwen1.5-14b " "--dataset-name sonnet --dataset-path vllm/benchmarks/sonnet.txt " - "--num-prompts 10 --trust_remote_code --max-model-len 896 -tp 2" + "--num-prompts 10 --trust-remote-code --max-model-len 896 -tp 2" ) else: bench = ( "CUDA_VISIBLE_DEVICES=0,1,3,4 python3 vllm/benchmarks/benchmark_throughput.py " f"--model ./{model_name} --dataset-name sonnet --dataset-path vllm/benchmarks/sonnet.txt " - "--num-prompts 10 --trust_remote_code --max-model-len 3096 -tp 4" + "--num-prompts 10 --trust-remote-code --max-model-len 3096 -tp 4" ) return script + common_bench + bench @@ -238,7 +239,7 @@ def _append_benchmark_script(script: str, model: Dict[str, Any]) -> str: "CUDA_VISIBLE_DEVICES=0,1,3,4 python3 vllm/benchmarks/benchmark_throughput.py " f"--model ./{model_name} --backend vllm-chat --dataset-name hf " "--dataset-path lmarena-ai/VisionArena-Chat --num-prompts 10 --hf-split train " - "-tp 4 --max-model-len 4096 --max-num-seqs 2 --trust_remote_code" + "-tp 4 --max-model-len 4096 --max-num-seqs 2 --trust-remote-code" ) return script + common_bench + bench @@ -291,15 +292,25 @@ def _parse_script_output(sout: str, prec: str, display_name: str) -> Dict[str, A "status": "PASS" } + matchs = re.findall(METRIC_PATTERN, sout) + if matchs and len(matchs) == 1: + result_entry.update(get_metric_result(matchs[0])) + result_entry["status"] = "PASS" + return result_entry + # Final fallback: generic success message if "Offline inference is successful!" in sout: return {"status": "PASS"} return result_entry +def get_metric_result(str): + if str: + return json.loads(str.replace("'", "\""))["metricResult"] + return None # --- Main function (now simple and low complexity) --- -def run_nlp_testcase(model: Dict[str, Any]) -> Dict[str, Any]: +def run_nlp_testcase(model: Dict[str, Any], whl_url: str) -> Dict[str, Any]: get_num_devices_script = "ixsmi -L | wc -l" result, _ = run_script(get_num_devices_script) num_devices = int(result.stdout.strip()) @@ -320,9 +331,13 @@ def run_nlp_testcase(model: Dict[str, Any]) -> Dict[str, Any]: set -x cd ../{model['model_path']} ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{model_name} -pip install /mnt/deepspark/install/xformers-0.0.26.post1+corex.4.3.0-cp310-cp310-linux_x86_64.whl +pip install {whl_url}`curl -s {whl_url} | grep -o 'xformers-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ + if model_name == "internlm3": + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'lmdeploy-[^"]*\.whl' | head -n1` + """ if utils.is_debug(): pip_list = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"