diff --git a/OWNERS b/OWNERS index 0cd9b621a7bfe427b94fe74b6f75ed0f01921904..8d29f4f1f05cb79f8c37835e41939722535c2667 100644 --- a/OWNERS +++ b/OWNERS @@ -10,3 +10,4 @@ approvers: - baochong - luoyang42 - wang_hua_2019 +- zhangyifan999 diff --git a/official/audio/MELGAN/README.md b/official/audio/MELGAN/README.md index 448fa1bb5d57e140d2f7777fa10e3379f0642e13..4185a79a59de82d5220dd46f58de302259a598ca 100644 --- a/official/audio/MELGAN/README.md +++ b/official/audio/MELGAN/README.md @@ -46,7 +46,7 @@ Dataset used: [LJ Speech]() - Dataset size:2.6GB,13,100 short audio clips of a single speaker reading passages from 7 non-fiction books. - Data format:Each audio file is a single-channel 16-bit PCM WAV with a sample rate of 22050 Hz - - The audio data needs to be processed to a mel-spectrum, and you can refer to the script in [mel-spectrogram data creation](https://github.com/seungwonpark/melgan/blob/master/preprocess.py). Non CUDA environment needs to delete `. cuda()` in `utils/stfy.py`. To save data in the `npy` format, `preprocess.py` also needs to be modified. As follows: + - The audio data needs to be processed to a mel-spectrum, and you can refer to the script in [mel-spectrogram data creation](https://github.com/seungwonpark/melgan/blob/master/preprocess.py). Non CUDA environment needs to delete `. cuda()` in `utils/stft.py`. To save data in the `npy` format, `preprocess.py` also needs to be modified. As follows: ``` # 37 - 38 lines diff --git a/official/audio/MELGAN/README_CN.md b/official/audio/MELGAN/README_CN.md index 03a3b7badd31889dc4666668db587d7ce3afd340..5d80d749a4a672ce67e1c1d17d4ca158284f8af1 100644 --- a/official/audio/MELGAN/README_CN.md +++ b/official/audio/MELGAN/README_CN.md @@ -46,7 +46,7 @@ MelGAN模型是非自回归全卷积模型。它的参数比同类模型少得 - Dataset size:2.6GB,包含13,100条只有一个说话人的短语音。语音的内容来自7本纪实书籍。 - 数据格式:每条语音文件都是单声道、16-bit以及采样率为22050。 - - 语音需要被处理为Mel谱, 可以参考脚本[Mel谱处理脚本](https://github.com/seungwonpark/melgan/blob/master/preprocess.py)。非CUDA环境需删除`utils/stfy.py`中的`.cuda()`,因为要保存`npy`格式的数据,所以`preproccess.py`也需要修改以下,参考代码如下: + - 语音需要被处理为Mel谱, 可以参考脚本[Mel谱处理脚本](https://github.com/seungwonpark/melgan/blob/master/preprocess.py)。非CUDA环境需删除`utils/stft.py`中的`.cuda()`,因为要保存`npy`格式的数据,所以`preproccess.py`也需要修改以下,参考代码如下: ``` # 37 - 38 行 diff --git a/official/audio/Tacotron2/README.md b/official/audio/Tacotron2/README.md index 53283ba9b019691a9454d9b9a7c8e51337f86611..51c939693002b061c2d6f4d6a4d5c54f134e39ef 100644 --- a/official/audio/Tacotron2/README.md +++ b/official/audio/Tacotron2/README.md @@ -76,8 +76,8 @@ After installing MindSpore via the official website, you can start training and # example: bash run_standalone_train.sh /path/ljspeech.hdf5 0 # run distributed training - bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] - # example: bash run_distributed_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0 + bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] + # example: bash run_distribute_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0 # run evaluation bash run_eval.sh [OUTPUT_PATH] [MODEL_CKPT] [DEVICE_ID] text is set in config.py( can modify text of ljspeech_config.yaml) @@ -246,7 +246,7 @@ Parameters for both training and evaluation can be set in [DATASET]_config.yaml ```bash cd scripts - bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] + bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] ``` Note: `DATASET_PATH` is the directory contains hdf5 file. diff --git a/official/audio/Tacotron2/README_CN.md b/official/audio/Tacotron2/README_CN.md index e104f6e4872f367bd2ab838ea118ee80fc74952c..aaf135f92cb8ce673929f52cde79a20551ee576d 100644 --- a/official/audio/Tacotron2/README_CN.md +++ b/official/audio/Tacotron2/README_CN.md @@ -76,8 +76,8 @@ Tacotron2实质上是一种包含编码器和解码器的序列到序列模型 # 示例:bash run_standalone_train.sh /path/ljspeech.hdf5 0 # 运行分布式训练 - bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] - # 示例:bash run_distributed_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0 + bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] + # 示例:bash run_distribute_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0 # 运行评估 bash run_eval.sh [OUTPUT_PATH] [MODEL_CKPT] [DEVICE_ID] text is set in config.py( can modify text of ljspeech_config.yaml) @@ -246,7 +246,7 @@ tacotron2/ ```bash cd scripts - bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] + bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN] ``` 注:`DATASET_PATH`是包含HDF5文件的目录。 diff --git a/official/audio/Tacotron2/src/tacotron2.py b/official/audio/Tacotron2/src/tacotron2.py index e685cf0f8d0ca516cc18a92674488e9a12b6ca44..a5b573f31a1615e71fb4faa2fd502f8cdfa4ef5e 100644 --- a/official/audio/Tacotron2/src/tacotron2.py +++ b/official/audio/Tacotron2/src/tacotron2.py @@ -1168,7 +1168,7 @@ class TrainStepWrap(nn.Cell): overflow = ops.logical_not(amp.all_finite(grads)) if self.reducer_flag: - overflow = self.allreduce(overflow.to(mstype.float32)) >= self.base + overflow = self.all_reduce(overflow.to(mstype.float32)) >= self.base overflow = self.loss_scaling_manager(self.loss_scale, overflow) diff --git a/official/cv/FasterRCNN/README_CN.md b/official/cv/FasterRCNN/README_CN.md index 676b27231976d34369334b0f11406f711df07f2a..ace823dbfc7e00a1fb8f7ef972b5d92316c3b737 100644 --- a/official/cv/FasterRCNN/README_CN.md +++ b/official/cv/FasterRCNN/README_CN.md @@ -625,7 +625,7 @@ bash run_infer_cpp.sh [MINDIR_PATH] [DATA_PATH] [ANNO_PATH] [DEVICE_TYPE] [IMAGE | 上传日期 | 2020/8/31 | 2021/2/10 |2022/8/10| | MindSpore版本 | 1.0.0 |1.2.0 |1.7.0| | 数据集 | COCO 2017 |COCO 2017 |FaceMaskDetection| -| 训练参数 | epoch=12, batch_size=2 |epoch=12, batch_size=2 |epoch=20,batch_size=2| +| 训练参数 | epoch=12, batch_size=2 |epoch=20, batch_size=2 |epoch=20,batch_size=2| | 优化器 | SGD |SGD |SGD| | 损失函数 | Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss |Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss |Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss| | 速度 | 1卡:190毫秒/步;8卡:200毫秒/步 | 1卡:320毫秒/步;8卡:335毫秒/步 |1卡:7328毫秒/步| diff --git a/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh b/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh index 86588bf774e0ab70aae3eed4e1acc60a4d6da7a7..8087e1f88701b48b668a4737dd5ebf9b0f4023b3 100644 --- a/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh +++ b/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh @@ -96,6 +96,7 @@ export HCCL_CONNECT_TIMEOUT=600 export DEVICE_NUM=8 export RANK_SIZE=8 export RANK_TABLE_FILE=$PATH1 +export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE" for((i=0; i<${DEVICE_NUM}; i++)) do diff --git a/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh b/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh index 8b27d1c67045df723712934d7eea82a5de73f9ff..d7af4ca64d193b2713d27dc7928443a75132dbcc 100644 --- a/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh +++ b/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh @@ -97,4 +97,5 @@ mpirun -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout --all --pre_trained=$PRETRAINED_PATH \ --backbone=$3 \ --coco_root=$PATH3 \ + --base_lr=0.008 \ --mindrecord_dir=$mindrecord_dir > train.log 2>&1 & \ No newline at end of file diff --git a/official/cv/FasterRCNN/scripts/run_eval_ascend.sh b/official/cv/FasterRCNN/scripts/run_eval_ascend.sh index d276546174af5532e7d89df4ac8276edf8b492fe..d9b2be113aacc44c9811fb762a30e45f2f00274e 100644 --- a/official/cv/FasterRCNN/scripts/run_eval_ascend.sh +++ b/official/cv/FasterRCNN/scripts/run_eval_ascend.sh @@ -95,6 +95,7 @@ export DEVICE_NUM=1 export RANK_SIZE=$DEVICE_NUM export DEVICE_ID=$5 export RANK_ID=0 +export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE" if [ -d "eval" ]; then diff --git a/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh b/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh index 565f1c56245b75d9a9cd6ad781504c41b89f18fd..828f5133e125583a73247bc1c9c433bb6806a3ce 100644 --- a/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh +++ b/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh @@ -88,6 +88,7 @@ export DEVICE_NUM=1 export DEVICE_ID=$4 export RANK_ID=0 export RANK_SIZE=1 +export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE" if [ -d "train" ]; then diff --git a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py index a49572c6aee2e115d8bfe6dd98923189fa3cceae..57c758a5110db92f064ca04b2c46fb86662d24b8 100644 --- a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py +++ b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py @@ -144,13 +144,7 @@ class BboxAssignSample(nn.Cell): num_pos = self.cast(self.logicalnot(valid_pos_index), self.ms_type) num_pos = self.sum_inds(num_pos, -1) unvalid_pos_index = self.less(self.range_pos_size, num_pos) - valid_neg_index = self.logicaland( - self.cast(self.concat(( - self.cast(self.check_neg_mask, ms.int32), - self.cast(unvalid_pos_index, ms.int32) - )), ms.bool_), - self.cast(valid_neg_index, ms.bool_) - ) + valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) pos_bboxes_ = self.gatherND(bboxes, pos_index) pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index) diff --git a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py index 7602adcc53ae5f30bb56657a26e4c56cbbe8b5ec..5cbc16ee5d2ea22a82253db0fa5c020dea5246ae 100644 --- a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py +++ b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py @@ -114,7 +114,7 @@ class BboxAssignSampleForRcnn(nn.Cell): gt_bboxes_i, self.check_gt_one) bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, ms.int32), \ (self.num_bboxes, 1)), (1, 4)), ms.bool_), \ - self.cast(bboxes, ms.float16), self.check_anchor_two) + bboxes, self.check_anchor_two) overlaps = self.iou(bboxes, gt_bboxes_i) @@ -171,13 +171,7 @@ class BboxAssignSampleForRcnn(nn.Cell): neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) unvalid_pos_index = self.less(self.range_pos_size, num_pos) - valid_neg_index = self.logicaland( - self.cast(self.concat(( - self.cast(self.check_neg_mask, ms.int32), - self.cast(unvalid_pos_index, ms.int32) - )), ms.bool_), - self.cast(valid_neg_index, ms.bool_) - ) + valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) neg_index = self.reshape(neg_index, self.reshape_shape_neg) valid_neg_index = self.cast(valid_neg_index, ms.int32) diff --git a/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py b/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py index 5317ca51c35a6ba8f3ee3de144f4e0b06f4f4623..16e2b42655ab47f2110d66a1ff803d684f7f90d6 100644 --- a/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py +++ b/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py @@ -183,21 +183,13 @@ class Proposal(nn.Cell): mlvl_proposals = mlvl_proposals + (proposals,) mlvl_mask = mlvl_mask + (mask_valid,) - proposals = self.concat_axis0( - tuple(self.cast(proposal, ms.int64) for proposal in mlvl_proposals) - ) - masks = self.concat_axis0( - tuple(self.cast(mask, ms.int64) for mask in mlvl_mask) - ) + proposals = self.concat_axis0(mlvl_proposals) + masks = self.concat_axis0(mlvl_mask) _, _, _, _, scores = self.split(proposals) scores = self.squeeze(scores) topk_mask = self.cast(self.topK_mask, self.ms_type) - scores_using = self.cast(self.select( - self.cast(masks, ms.bool_), - self.cast(scores, ms.bool_), - self.cast(topk_mask, ms.bool_) - ), ms.int32) + scores_using = self.select(masks, scores, topk_mask) _, topk_inds = self.topKv2(scores_using, self.max_num) diff --git a/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py b/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py index 07c5bc62115426cf410795b4aedcba4160ab0f3b..470f709d62aa59301115b744b159f16212641f3a 100644 --- a/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py +++ b/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py @@ -214,6 +214,7 @@ class BboxAssignSampleForRcnn(nn.Cell): # normalized box coordinate boxes = boxes / self.image_h_w box_ids = F.range(self.start, self.limit, self.delta) + box_ids = self.cast(box_ids, mstype.int32) pos_masks_fb = self.expand_dims(pos_masks_fb, -1) boxes = self.cast(boxes, mstype.float32) pos_masks_fb = self.crop_and_resize(pos_masks_fb, boxes, box_ids, self.mask_shape) diff --git a/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py b/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py index e97ee0a832da2b0c92de87095209168c849cd7ff..681b8a300d05b8f17a4d483075b5f96e5f781430 100644 --- a/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py +++ b/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py @@ -212,6 +212,7 @@ class BboxAssignSampleForRcnn(nn.Cell): # normalized box coordinate boxes = boxes / self.image_h_w box_ids = F.range(self.start, self.limit, self.delta) + box_ids = self.cast(box_ids, mstype.int32) pos_masks_fb = self.expand_dims(pos_masks_fb, -1) boxes = self.cast(boxes, mstype.float32) pos_masks_fb = self.crop_and_resize(pos_masks_fb, boxes, box_ids, self.mask_shape) diff --git a/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml b/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml index 099ea3758203638f96c6cae7886c69047b21b8f5..6fa43d8a50492f5ddfc0eaabfa45243fb115e70a 100644 --- a/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml +++ b/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml @@ -23,7 +23,7 @@ match_threshold: 0.5 nms_threshold: 0.6 min_score: 0.1 max_boxes: 100 -all_reduce_fusion_config: [29, 58, 89] +all_reduce_fusion_config: [29, 58, 89, 201] # learning rate settings lr_init: 0.01333 diff --git a/official/cv/SSD/scripts/run_distribute_train.sh b/official/cv/SSD/scripts/run_distribute_train.sh index 893f4a76db748e5d14b823f63a163a1747f27ccf..37fe46d37a076ecb0b05b08de202b1f3954bc83d 100644 --- a/official/cv/SSD/scripts/run_distribute_train.sh +++ b/official/cv/SSD/scripts/run_distribute_train.sh @@ -17,7 +17,7 @@ echo "==============================================================================================================" echo "Please run the script as: " echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET RANK_TABLE_FILE CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE" -echo "for example: bash run_distribute_train.sh 8 500 0.2 coco /data/hccl.json /config_path /opt/ssd-300.ckpt(optional) 200(optional)" +echo "for example: bash run_distribute_train.sh 8 500 0.05 coco /data/hccl.json /config_path /opt/ssd-300.ckpt(optional) 200(optional)" echo "It is better to use absolute path." echo "=================================================================================================================" diff --git a/official/cv/SSD/scripts/run_distribute_train_gpu.sh b/official/cv/SSD/scripts/run_distribute_train_gpu.sh index 0778ad70ff9414d874f0d1e3ce2ffc9d77c499f5..0ff4b1818130aded3bdd9bd5c351148704c69422 100644 --- a/official/cv/SSD/scripts/run_distribute_train_gpu.sh +++ b/official/cv/SSD/scripts/run_distribute_train_gpu.sh @@ -17,7 +17,7 @@ echo "==============================================================================================================" echo "Please run the script as: " echo "bash run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE LR DATASET CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE" -echo "for example: bash run_distribute_train_gpu.sh 8 500 0.2 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)" +echo "for example: bash run_distribute_train_gpu.sh 8 500 0.05 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)" echo "It is better to use absolute path." echo "=================================================================================================================" diff --git a/official/nlp/Pangu_alpha/src/pangu_alpha.py b/official/nlp/Pangu_alpha/src/pangu_alpha.py index 00f594b3d9a199a9e1476e472ffbef7e72caf088..02f1570be402c2206ec26e2ea417b3a2a0a35eee 100644 --- a/official/nlp/Pangu_alpha/src/pangu_alpha.py +++ b/official/nlp/Pangu_alpha/src/pangu_alpha.py @@ -23,7 +23,6 @@ from mindspore import Tensor, Parameter from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.nn import Cell -from mindspore.ops._tracefunc import trace from mindformers.modules.transformer import VocabEmbedding, TransformerEncoder, TransformerEncoderLayer, \ AttentionMask, MoEConfig @@ -306,7 +305,6 @@ class PanguAlpha_Model(Cell): self.load_embedding_from_ckpt(config.load_ckpt_path) self.run_type = config.run_type - @trace def construct_blocks(self, hidden_state, encoder_masks, init_reset, batch_valid_length): if self.blocks is not None: for i in range(self.num_layers - 1): diff --git a/research/cv/TinySAM/README.md b/research/cv/TinySAM/README.md index bd0e17c077d17d03598b7475f25e466c5bc8b466..d01ba5f57034af42f5aebf6ca94a569a37544f3d 100644 --- a/research/cv/TinySAM/README.md +++ b/research/cv/TinySAM/README.md @@ -54,6 +54,10 @@ SNN-MLP After installing MindSpore via the official website, you can start evaluation as follows: +### Download + +Download ckpts from [modelzoo](https://download-mindspore.osinfra.cn/model_zoo/research/cv/TinySAM/tinysam_mindspore.ckpt). + ### Launch ```bash diff --git a/research/recommend/ULC/README.md b/research/recommend/ULC/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8b46f25d05c4bc9a052cc9c0101c6995b49bc6e9 --- /dev/null +++ b/research/recommend/ULC/README.md @@ -0,0 +1,90 @@ + +# Contents + +- [Contents](#contents) +- [UCL Description](#TAML-description) +- [Dataset](#dataset) +- [Environment Requirements](#environment-requirements) +- [Quick Start](#quick-start) +- [Script Description](#script-description) + - [Script and Sample Code](#script-and-sample-code) + - [Training Process](#training-process) + - [Training](#training) +- [ModelZoo Homepage](#modelzoo-homepage) + +# [ULC Description](#contents) + +Conversion rate prediction is critical to many online applications such as digital display advertising. To capture +dynamic data distribution, industrial systems often require retraining models on recent data daily or weekly. However, +the delay of conversion behavior usually leads to incorrect labeling, which is called delayed feedback problem. Existing +work may fail to introduce the correct information about false negative samples due to data sparsity and dynamic data +distribution. To directly introduce the correct feedback label information, we propose an Unbiased delayed feedback +Label Correction framework (ULC), which uses an auxiliary model to correct labels for observed negative feedback +samples. Firstly, we theoretically prove that the label-corrected loss is an unbiased estimate of the oracle loss using +true labels. Then, as there are no ready training data for label correction, counterfactual labeling is used to +construct artificial training data. Furthermore, since counterfactual labeling utilizes only partial training data, we +design an embedding-based alternative training method to enhance performance. Comparative experiments on both public and +private datasets and detailed analyses show that our proposed approach effectively alleviates the delayed feedback +problem and consistently outperforms the previous state-of-the-art methods. + +A preprint version of our paper is available at http://arxiv.org/abs/2307.12756. + +# [Dataset](#contents) + +- [Criteo dataset](https://drive.google.com/file/d/1x4KktfZtls9QjNdFYKCjTpfjM4tG2PcK/view?usp=sharing) + +# [Environment Requirements](#contents) + +- Hardware(CPU) + - Prepare hardware environment with GPU processor. +- Framework + - [MindSpore-2.0.0](https://www.mindspore.cn/install/en) + +- Requirements + +```shell + + $ conda create --name --file requirements.txt + +``` + +- For more information, please check the resources below: + - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/r2.0/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/en/r2.0/index.html) + +# [Quick Start](#contents) + +After installing MindSpore via the official website, you can start training and evaluation as follows: + +- processing dataset + +# [Script Description](#contents) + +## [Script and Sample Code](#contents) + +```bash +. +└─ULC + └─src + ├─alternate_train.py # modules in ULC + ├─data.py # data process + ├─loss.py # loss in ULC + ├─main.py # train ULC + ├─metric.py # metrics in ULC + ├─models.py # ULC structure + └─utils.py # modules in ULC +``` + +## [Training Process](#contents) + +### Training + +- running on CPU + + ```python + python ./src/main.py --method ULC --l2_reg 0.00001 --cuda_device 0 --lr 0.0001 --CD 7 --batch_size 1024 --optimizer Adam --seed 0 + ``` + +# [ModelZoo Homepage](#contents) + + Please check the official [homepage](https://gitee.com/mindspore/models) \ No newline at end of file diff --git a/research/recommend/ULC/src/alternate_train.py b/research/recommend/ULC/src/alternate_train.py new file mode 100644 index 0000000000000000000000000000000000000000..8a2945c01705461ae72b6e6a58a0b7418b7330bd --- /dev/null +++ b/research/recommend/ULC/src/alternate_train.py @@ -0,0 +1,234 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from copy import deepcopy + +from models import get_model +from loss import get_loss_fn +from utils import get_optimizer +from metrics import cal_llloss_with_logits, cal_auc, cal_prauc +from data import get_criteo_dataset, RandomAccessDataset +from tqdm import tqdm +import numpy as np +import mindspore.dataset as ds +import mindspore.nn as nn +import mindspore + +def test(model, test_data, params): + all_logits = [] + all_probs = [] + all_labels = [] + model.set_train(False) + + for batch in tqdm(test_data): + batch_x = batch[0] + batch_y = batch[1] + logits = model(batch_x) + all_logits.append(logits.asnumpy()) + all_labels.append(batch_y.asnumpy()) + all_probs.append(nn.Sigmoid()(logits).asnumpy()) + + all_logits = np.reshape(np.concatenate(all_logits, axis=0), (-1,)) + all_labels = np.reshape(np.concatenate(all_labels, axis=0), (-1,)) + all_probs = np.reshape(np.concatenate(all_probs, axis=0), (-1,)) + llloss = cal_llloss_with_logits(all_labels, all_logits) + auc = cal_auc(all_labels, all_probs) + prauc = cal_prauc(all_labels, all_probs) + return auc, prauc, llloss + +def get_valid_llloss_blc(model, test_data, correction_model): + all_logits = [] + all_labels = [] + model.set_train(False) + + for batch in tqdm(test_data): + batch_x = batch[0] + batch_y = batch[1] + logits0 = correction_model(batch_x) + corrections = (nn.Sigmoid()(logits0)).flatten() + corrections = mindspore.numpy.where(batch_y < 1, corrections, batch_y.float()) + logits = model(batch_x) + all_logits.append(logits.asnumpy()) + all_labels.append(corrections.asnumpy()) + + all_logits = np.reshape(np.concatenate(all_logits, axis=0), (-1,)) + all_labels = np.reshape(np.concatenate(all_labels, axis=0), (-1,)) + + + llloss = cal_llloss_with_logits(all_labels, all_logits) + return llloss + +def alternate_run(params, wandb): + cvr_model = None + sub_model = None + dataset = get_criteo_dataset(params) + sub_params = deepcopy(params) + + sub_params["dataset"] = "fsiwsg_cd_"+str(params["CD"])\ + +"_end_"+str(params["training_end_day"])+"_seed_"+str(params["seed"]) + np.random.seed(params["seed"]) + sub_dataset = get_criteo_dataset(sub_params)["train"] + np.random.seed(params["seed"]) + + params["log_step"] = 0 + params["idx"] = 1 + for _ in range(2): + sub_model = sub_train(cvr_model, sub_dataset, params) + cvr_model = cvr_train(sub_model, dataset, params, wandb) + params["idx"] += 1 + +def sub_train(cvr_model, sub_dataset, params): + train_data_x = sub_dataset["x"].to_numpy().astype(np.float32) + train_data_label = sub_dataset["labels"] + train_data_label = 1 - train_data_label + train_data = RandomAccessDataset(train_data_x, train_data_label) + train_data_loader = ds.GeneratorDataset(source=train_data, shuffle=True, column_names=['feature', 'label']) + train_data_loader = train_data_loader.batch(batch_size=params["batch_size"]) + + model = get_model("MLP_FSIW", params) + + if cvr_model is not None: + sd = cvr_model.parameters_dict() + part_sd = {k: v for k, v in sd.items() if ("category_embeddings" in k) or ("numeric_embeddings" in k)} + model_dict = model.parameters_dict() + model_dict.update(part_sd) + mindspore.load_param_into_net(model, model_dict) + + optimizer = nn.Adam(params=model.trainable_params(), learning_rate=0.001, weight_decay=0) + loss_fn = get_loss_fn("cross_entropy_loss") + + def forward_fn(data, label): + outputs = model(data) + targets = {"label": label} + loss_dict = loss_fn(targets, outputs, params) + loss = loss_dict["loss"] + return loss + + grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters) + + for _ in range(5): + for batch in train_data_loader: + batch_x = batch[0] + batch_y = batch[1][:, 0] + targets = {"label": batch_y} + + model.set_train(True) + _, grads = grad_fn(batch_x, targets["label"]) + + optimizer(grads) + + return model + +def cvr_train(sub_model, datasets, params, wandb): + model = get_model("MLP_SIG", params) + models = {"model": model, "submodel": sub_model} + + optimizer = get_optimizer(models["model"].trainable_params(), params["optimizer"], params) + + train_dataset = datasets["train"] + train_data_x = train_dataset["x"].to_numpy().astype(np.float32) + train_data_label = train_dataset["labels"] + train_data = RandomAccessDataset(train_data_x, train_data_label) + train_data_loader = ds.GeneratorDataset(source=train_data, shuffle=True, column_names=['feature', 'label']) + train_data_loader = train_data_loader.batch(batch_size=params["batch_size"]) + + valid_dataset = datasets["valid"] + valid_data_x = valid_dataset["x"].to_numpy().astype(np.float32) + valid_data_label = valid_dataset["labels"] + valid_data = RandomAccessDataset(valid_data_x, valid_data_label) + valid_data_loader = ds.GeneratorDataset(source=valid_data, column_names=['feature', 'label']) + valid_data_loader = valid_data_loader.batch(batch_size=params["batch_size"]) + + test_dataset = datasets["test"] + test_data_x = test_dataset["x"].to_numpy().astype(np.float32) + test_data_label = test_dataset["labels"] + test_data = RandomAccessDataset(test_data_x, test_data_label) + test_data_loader = ds.GeneratorDataset(source=test_data, column_names=['feature', 'label']) + test_data_loader = test_data_loader.batch(batch_size=params["batch_size"]) + + data_loaders = { + "train_data": train_data_loader, + "test_data": test_data_loader, + "valid_data": valid_data_loader + } + optimizers = { + "optimizer": optimizer + } + + + return train(models, optimizers, data_loaders, params, wandb) + + +def train(models, optimizers, data_loaders, params, wandb): + train_data = data_loaders["train_data"] + valid_data = data_loaders["valid_data"] + test_data = data_loaders["test_data"] + best_model = None + + optimizer = optimizers["optimizer"] + + loss_fn = get_loss_fn(params["loss"]) + val_llloss = [] + test_auc, test_prauc, test_llloss = [], [], [] + + def forward_fn(data, label): + outputs = models["model"](data) + logits0 = models["submodel"](data) + correction_label = nn.Sigmoid()(logits0).flatten() + label = mindspore.numpy.where(label < 1, correction_label, label.float()) + targets = {"label": label} + loss_dict = loss_fn(targets, outputs, params) + loss = loss_dict["loss"] + + return loss + + grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters) + + for ep in range(params["train_epoch"]): + vllloss = get_valid_llloss_blc(models["model"], valid_data, models["submodel"]) + print("Val ep{}, llloss {}".format(ep, vllloss)) + tauc, tprauc, tllloss = test(models["model"], test_data, params) + print("Test ep{}, auc {}, prauc {}, llloss {}".format(ep, tauc, tprauc, tllloss)) + + if not val_llloss or vllloss < min(val_llloss): + best_model = models["model"].parameters_dict() + + val_llloss.append(vllloss) + test_auc.append(tauc) + test_prauc.append(tprauc) + test_llloss.append(tllloss) + + if len(val_llloss) - val_llloss.index(min(val_llloss)) > params["early_stop"]: + best_ep = val_llloss.index(min(val_llloss)) + print("Early stop at ep {}. Best ep {}. Best val_lloss {}.".format(ep, best_ep, min(val_llloss))) + print("Final test evaluation: auc {}, prauc {}, llloss {}."\ + .format(test_auc[best_ep], test_prauc[best_ep], test_llloss[best_ep])) + break + train_loss = [] + for batch in tqdm(train_data): + batch_x = batch[0] + batch_y = batch[1] + + models["model"].set_train(True) + models["submodel"].set_train(False) + loss, grads = grad_fn(batch_x, batch_y) + + train_loss.append(loss.asnumpy()) + optimizer(grads) + params["log_step"] += 1 + print("Train ep{}, loss {}".format(ep, np.mean(train_loss))) + + mindspore.load_param_into_net(models["model"], best_model) + return models["model"] diff --git a/research/recommend/ULC/src/data.py b/research/recommend/ULC/src/data.py new file mode 100644 index 0000000000000000000000000000000000000000..b15ee49d497c85af4b05a1500e1e552fb9e0b1af --- /dev/null +++ b/research/recommend/ULC/src/data.py @@ -0,0 +1,284 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import copy +import os +import pickle +import datetime +import pandas as pd +import numpy as np + +from sklearn.preprocessing import LabelEncoder + +from utils import parse_float_arg + +SECONDS_A_DAY = 60 * 60 * 24 +SECONDS_AN_HOUR = 60 * 60 +SECONDS_DELAY_NORM = 1 +SECONDS_FSIW_NORM = SECONDS_A_DAY * 5 +num_bin_size = (64, 16, 128, 64, 128, 64, 512, 512) + + +class RandomAccessDataset: + def __init__(self, data, label): + self._data = np.array(data) + self._label = np.array(label) + + def __getitem__(self, index): + return (self._data[index], self._label[index]) + + def __len__(self): + return len(self._data) + + +def get_data_df(params): + if params["dataset_source"] in ["criteo"]: + df = pd.read_csv(params["data_path"], sep="\t", header=None) + click_ts = df[df.columns[0]].to_numpy() + pay_ts = df[df.columns[1]].fillna(-1).to_numpy() + + if params["dataset_source"] == "criteo": + df = df[df.columns[2:]] + for c in df.columns[8:]: + df[c] = df[c].fillna("") + df[c] = df[c].astype(str) + + label_encoder = LabelEncoder() + for c in df.columns[8:]: + df[c] = label_encoder.fit_transform(df[c]) + + for i, c in enumerate(df.columns[:8]): + df[c] = df[c].fillna(-1) + df[c] = (df[c] - df[c].min()) / (df[c].max() - df[c].min()) + df[c] = np.floor(df[c] * (num_bin_size[i] - 0.00001)).astype(str) + df.columns = [str(i) for i in range(17)] + return df, click_ts, pay_ts + + +class DataDF: + + def __init__(self, features, click_ts, pay_ts, sample_ts=None, labels=None, delay_label=None): + self.x = features.copy(deep=True) + self.click_ts = copy.deepcopy(click_ts) + self.pay_ts = copy.deepcopy(pay_ts) + self.delay_label = delay_label + if sample_ts is not None: + self.sample_ts = copy.deepcopy(sample_ts) + else: + self.sample_ts = copy.deepcopy(click_ts) + if labels is not None: + self.labels = copy.deepcopy(labels) + else: + self.labels = (pay_ts > 0).astype(np.int32) + + def sub_days(self, start_day, end_day): + start_ts = start_day * SECONDS_A_DAY + end_ts = end_day * SECONDS_A_DAY + mask = np.logical_and(self.sample_ts >= start_ts, + self.sample_ts < end_ts) + return DataDF(self.x.iloc[mask], + self.click_ts[mask], + self.pay_ts[mask], + self.sample_ts[mask], + self.labels[mask]) + + def to_fsiw_1(self, cd, T): # build pre-training dataset 1 of FSIW + mask = np.logical_and(self.click_ts < T - cd, self.pay_ts > 0) + mask = np.logical_and(mask, self.pay_ts < T) + x = self.x.iloc[mask].copy(deep=True) + pay_ts = self.pay_ts[mask] + click_ts = self.click_ts[mask] + sample_ts = self.click_ts[mask] + label = np.zeros((x.shape[0],)) + label[pay_ts < T - cd] = 1 + # FSIW needs elapsed time information + x.insert(x.shape[1], column="elapse", value=( + T - click_ts - cd) / SECONDS_FSIW_NORM) + return DataDF(x, + click_ts, + pay_ts, + sample_ts, + label) + + def to_fsiw_0(self, cd, T): # build pre-training dataset 0 of FSIW + mask = np.logical_or(self.pay_ts >= T - cd, self.pay_ts < 0) + mask = np.logical_or(mask, self.pay_ts > T) + mask = np.logical_and(self.click_ts < T - cd, mask) + x = self.x.iloc[mask].copy(deep=True) + pay_ts = self.pay_ts[mask] + click_ts = self.click_ts[mask] + sample_ts = self.sample_ts[mask] + label = np.zeros((x.shape[0],)) + label[np.logical_or(pay_ts < 0, pay_ts > T)] = 1 + x.insert(x.shape[1], column="elapse", value=( + T - click_ts - cd) / SECONDS_FSIW_NORM) + return DataDF(x, + click_ts, + pay_ts, + sample_ts, + label) + + def shuffle(self): + idx = list(range(self.x.shape[0])) + np.random.shuffle(idx) + return DataDF(self.x.iloc[idx], + self.click_ts[idx], + self.pay_ts[idx], + self.sample_ts[idx], + self.labels[idx]) + + +def get_criteo_dataset(params): + name = params["dataset"] + print("loading datasest {}".format(name)) + cache_path = os.path.join( + params["data_cache_path"], "{}.pkl".format(name)) + if params["data_cache_path"] != "None" and os.path.isfile(cache_path): + print("cache_path {}".format(cache_path)) + print("\nloading from dataset cache") + with open(cache_path, "rb") as f: + data = pickle.load(f) + train_data = data["train"] + test_data = data["test"] + if "valid" in data: + valid_data = data["valid"] + if "clean" in data: + _ = data["clean"] + if "fn" in data: + fn_data = data["fn"] + else: + train_data, test_data, valid_data, fn_data = build_criteo_dataset(params, name, cache_path) + result = { + "train": { + "x": train_data.x, + "click_ts": train_data.click_ts, + "pay_ts": train_data.pay_ts, + "sample_ts": train_data.sample_ts, + "labels": train_data.labels, + }, + "test": { + "x": test_data.x, + "click_ts": test_data.click_ts, + "pay_ts": test_data.pay_ts, + "sample_ts": train_data.sample_ts, + "labels": test_data.labels, + } + } + if ("next" in name) or ("oracle" in name): + result["valid"] = { + "x": valid_data.x, + "click_ts": valid_data.click_ts, + "pay_ts": valid_data.pay_ts, + "sample_ts": valid_data.sample_ts, + "labels": valid_data.labels, + } + result["fn"] = { + "x": fn_data.x, + "click_ts": fn_data.click_ts, + "pay_ts": fn_data.pay_ts, + "sample_ts": fn_data.sample_ts, + "labels": fn_data.labels, + } + return result + + +def build_criteo_dataset(params, name, cache_path): + print("\nbuilding dataset") + + starttime = datetime.datetime.now() + if params["dataset_source"] == "criteo": + source_cache_path = "./cache_data.pkl" + if os.path.isfile(source_cache_path): + with open(source_cache_path, "rb") as f: + data = pickle.load(f) + else: + df, click_ts, pay_ts = get_data_df(params) + data = DataDF(df, click_ts, pay_ts) + with open(source_cache_path, "wb") as f: + pickle.dump(data, f, protocol=4) + endtime = datetime.datetime.now() + print("Time:{}s".format((endtime - starttime).total_seconds())) + + if "fsiwsg" in name: + cd = parse_float_arg(name, "cd") + training_start = params["training_end_day"] - params["training_duration"] + train_data = data.sub_days(training_start, params["training_end_day"]).shuffle() + test_data = data.sub_days(params["training_end_day"], params["training_end_day"] + 1) + train_data = train_data.to_fsiw_0( + cd=cd * SECONDS_A_DAY, T=params["training_end_day"] * SECONDS_A_DAY) + cvrs = np.reshape(train_data.pay_ts > 0, (-1, 1)) + pot_cvr = np.reshape(train_data.pay_ts > params["training_end_day"] * SECONDS_A_DAY, (-1, 1)) + train_data.labels = np.reshape(train_data.labels, (-1, 1)) + train_data.labels = np.concatenate( + [train_data.labels, cvrs, pot_cvr], axis=1) + test_data = test_data.to_fsiw_0( + cd=cd * SECONDS_A_DAY, T=params["training_end_day"] * SECONDS_A_DAY) + elif "fsiw_next" in name: + cd = parse_float_arg(name, "cd") + training_start = params["training_end_day"] - params["training_duration"] + train_data = data.sub_days(training_start, params["training_end_day"]).shuffle() + mask = train_data.pay_ts > (params["training_end_day"] * SECONDS_A_DAY) + train_data.labels[mask] = 0 + train_data.x.insert(train_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - train_data.click_ts) / SECONDS_FSIW_NORM) + fn_data = DataDF(train_data.x.iloc[mask], + train_data.click_ts[mask], + train_data.pay_ts[mask], + train_data.sample_ts[mask], + train_data.labels[mask]) + valid_data = data.sub_days(params["training_end_day"], + params["training_end_day"] + 1 * params["valid_test_size"]) + valid_data.x.insert(valid_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - valid_data.click_ts) / SECONDS_FSIW_NORM) + val_mask = valid_data.pay_ts > ( + (params["training_end_day"] + 1 * params["valid_test_size"]) * SECONDS_A_DAY) + valid_data.labels[val_mask] = 0 + test_data = data.sub_days(params["training_end_day"] + 1 * params["valid_test_size"], + params["training_end_day"] + 2 * params["valid_test_size"]) + test_data.x.insert(test_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - test_data.click_ts) / SECONDS_FSIW_NORM) + elif "oracle" in name: + cd = parse_float_arg(name, "cd") + training_start = params["training_end_day"] - params["training_duration"] + train_data = data.sub_days(training_start, params["training_end_day"]).shuffle() + train_data.x.insert(train_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - train_data.click_ts) / SECONDS_FSIW_NORM) + + mask = train_data.pay_ts > (params["training_end_day"] * SECONDS_A_DAY) + fn_data = DataDF(train_data.x.iloc[mask], + train_data.click_ts[mask], + train_data.pay_ts[mask], + train_data.sample_ts[mask], + train_data.labels[mask]) + fn_data.labels[:] = 0 + + valid_data = data.sub_days(params["training_end_day"], + params["training_end_day"] + 1 * params["valid_test_size"]) + valid_data.x.insert(valid_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - valid_data.click_ts) / SECONDS_FSIW_NORM) + test_data = data.sub_days(params["training_end_day"] + 1 * params["valid_test_size"], + params["training_end_day"] + 2 * params["valid_test_size"]) + test_data.x.insert(test_data.x.shape[1], column="elapse", value=(params[ + "training_end_day"] * SECONDS_A_DAY - test_data.click_ts) / SECONDS_FSIW_NORM) + else: + raise NotImplementedError("{} dataset does not exist".format(name)) + if params["data_cache_path"] != "None": + with open(cache_path, "wb") as f: + if ("next" in name) or ("oracle" in name): + pickle.dump({"train": train_data, "test": test_data, "valid": valid_data, "fn": fn_data}, f) + else: + pickle.dump({"train": train_data, "test": test_data}, f) + + return train_data, test_data, valid_data, fn_data diff --git a/research/recommend/ULC/src/loss.py b/research/recommend/ULC/src/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..743d958334e186bec5f9aef14e5f48fd69696ed9 --- /dev/null +++ b/research/recommend/ULC/src/loss.py @@ -0,0 +1,36 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore.numpy as np +import mindspore.ops as ops +import mindspore + + +def stable_log1pex(x): + return -np.where(x < 0, x, np.zeros_like(x)) + np.log(1 + np.exp(-np.absolute(x))) + +def cross_entropy_loss(targets, outputs, params=None): + z = targets["label"] + x = outputs + x = ops.Reshape()(x, (-1,)) + z = z.float() + loss_value = ops.binary_cross_entropy_with_logits(x, z, mindspore.Tensor([1.0]), mindspore.Tensor([1.0])) + + return {"loss": loss_value} + +def get_loss_fn(name): + if name == "cross_entropy_loss": + return cross_entropy_loss + raise NotImplementedError("{} loss does not implemented".format(name)) diff --git a/research/recommend/ULC/src/main.py b/research/recommend/ULC/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..406db7bdab98adbfd93d92479ab124eebca845dc --- /dev/null +++ b/research/recommend/ULC/src/main.py @@ -0,0 +1,79 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +import os +import pathlib +from copy import deepcopy + +import numpy as np + +from alternate_train import alternate_run + +wandb = None + + +def run_params(args): + params = deepcopy(vars(args)) + params["model"] = "MLP_SIG" + if args.data_cache_path != "None": + pathlib.Path(args.data_cache_path).mkdir(parents=True, exist_ok=True) + + if args.method == "ULC": + params["loss"] = "cross_entropy_loss" + params["dataset"] = "last_30_train_test_fsiw_next" + "_end_" + str(args.training_end_day) + "_seed_" + str( + args.seed) + + return params + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--method", choices=["ULC"], + type=str, required=True) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dataset_source", type=str, default="criteo", choices=["criteo"]) + parser.add_argument("--CD", type=int, default=7, + help="interval between counterfactual deadline and actual deadline") + parser.add_argument("--lr", type=float, default=1e-3) + parser.add_argument("--data_path", type=str, default="./data/data.txt", + help="path of the data.txt in criteo dataset") + parser.add_argument("--data_cache_path", type=str, default="./data") + parser.add_argument("--batch_size", type=int, + default=1024) + parser.add_argument("--epoch", type=int, default=5, + help="training epoch of pretraining") + parser.add_argument("--l2_reg", type=float, default=0, + help="l2 regularizer strength") + parser.add_argument("--training_end_day", type=int, default=58, + help="deadline for training data") + parser.add_argument("--training_duration", type=int, default=21, + help="duration of training data") + parser.add_argument("--valid_test_size", type=float, default=1, + help="duration of valid/test data") + parser.add_argument("--train_epoch", type=int, default=100, + help="max train epoch") + parser.add_argument("--early_stop", type=int, default=4) + parser.add_argument("--cuda_device", type=str, default="0") + parser.add_argument("--optimizer", type=str, default="Adam") + parser.add_argument("--save_model", type=int, default=0) + parser.add_argument("--base_model", type=str, default="MLP", choices=["MLP"]) + + args = parser.parse_args() + params = run_params(args) + os.environ["CUDA_VISIBLE_DEVICES"] = params["cuda_device"] + np.random.seed(args.seed) + + alternate_run(params, wandb) diff --git a/research/recommend/ULC/src/metrics.py b/research/recommend/ULC/src/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..274a0e5080840871819d0589ecc9f7132dcb5500 --- /dev/null +++ b/research/recommend/ULC/src/metrics.py @@ -0,0 +1,73 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from sklearn import metrics +import numpy as np + +def sigmoid(x): + return 1/(1+np.exp(np.clip(-x, a_min=-1e50, a_max=1e20))) + +def cal_auc(label, pos_prob): + fpr, tpr, _ = metrics.roc_curve(label, pos_prob, pos_label=1) + auc = metrics.auc(fpr, tpr) + return auc + +def stable_log1pex(x): + return -np.minimum(x, 0) + np.log(1+np.exp(-np.abs(x))) + +def cal_llloss_with_logits(label, logits): + ll = -np.mean(label*(-stable_log1pex(logits)) + (1-label)*(-logits - stable_log1pex(logits))) + return ll + +def cal_llloss_with_logits_and_weight(label, logits, logits0, logits1): + x = logits + + pos_loss = stable_log1pex(x) + neg_loss = x + stable_log1pex(x) + + pos_weight = 1/(logits1+1e-8) + neg_weight = logits0 + + clf_loss = np.mean( + pos_loss*pos_weight*label + neg_loss*neg_weight*(1-label)) + + weight = np.mean(pos_weight*label + neg_weight*(1-label)) + + return clf_loss/weight + +def prob_clip(x): + return np.clip(x, a_min=1e-20, a_max=1) + +def cal_llloss_with_neg_log_prob(label, neg_log_prob): + ll = -np.mean((1-label)*neg_log_prob + label*(np.log(prob_clip(1 - prob_clip(np.exp(neg_log_prob)))))) + return ll + +def cal_llloss_with_prob(label, prob): + ll = -np.mean(label*np.log(prob_clip(prob)) + (1-label)*(np.log(prob_clip(1-prob)))) + return ll + +def cal_prauc(label, pos_prob): + precision, recall, _ = metrics.precision_recall_curve(label, pos_prob) + area = metrics.auc(recall, precision) + return area + +def cal_acc(label, prob): + label = np.reshape(label, (-1,)) + prob = np.reshape(label, (-1,)) + prob_acc = np.mean(label*prob) + return prob_acc + +def stable_softplus(x): + return np.log(1 + np.exp(-np.abs(x))) + np.maximum(x, 0) diff --git a/research/recommend/ULC/src/models.py b/research/recommend/ULC/src/models.py new file mode 100644 index 0000000000000000000000000000000000000000..d8d63ab2575f88533668da2d7b68cb020ebde6c5 --- /dev/null +++ b/research/recommend/ULC/src/models.py @@ -0,0 +1,103 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore import nn +from mindspore import ops + +class MLP(nn.Cell): + def __init__(self, name, params): + super(MLP, self).__init__() + self.model_name = name + self.params = params + self.dataset_source = params["dataset_source"] + if params["dataset_source"] == "criteo": + self.category_embeddings = nn.CellList([ + nn.Embedding(55824, 64), + nn.Embedding(5443, 64), + nn.Embedding(13073, 64), + nn.Embedding(13170, 64), + nn.Embedding(3145, 64), + nn.Embedding(33843, 64), + nn.Embedding(14304, 64), + nn.Embedding(11, 64), + nn.Embedding(13601, 64) + ]) + + self.numeric_embeddings = nn.CellList([ + nn.Embedding(64, 64), + nn.Embedding(16, 64), + nn.Embedding(128, 64), + nn.Embedding(64, 64), + nn.Embedding(128, 64), + nn.Embedding(64, 64), + nn.Embedding(512, 64), + nn.Embedding(512, 64) + ]) + presize = 1088 + + if name == "MLP_FSIW": + print("using elapse feature") + presize += 1 + + self.mlp = nn.CellList([ + nn.Dense(presize, 256, activation='leakyrelu'), + nn.Dense(256, 256, activation='leakyrelu'), + nn.Dense(256, 128, activation='leakyrelu') + ]) + + if self.model_name in ["MLP_SIG", "MLP_FSIW"]: + self.mlp.append(nn.Dense(128, 1)) + else: + raise ValueError("model name {} not exist".format(name)) + + def construct(self, x): + if self.dataset_source == "criteo": + cate_embeddings = [] + nume_embeddings = [] + if self.model_name == "MLP_FSIW": + for i in range(8): + nume_embeddings.append(self.numeric_embeddings[i](x[:, i].int())) + + for i in range(9): + cate_embeddings.append(self.category_embeddings[8 - i](x[:, -i - 2].int())) + + features = nume_embeddings + cate_embeddings + [x[:, -1:]] + x = ops.Concat(axis=1)(features) + else: + for i in range(8): + nume_embeddings.append(self.numeric_embeddings[i](x[:, i].int())) + + for i in range(9): + cate_embeddings.append(self.category_embeddings[8 - i](x[:, -i - 2].int())) + + features = nume_embeddings + cate_embeddings + x = ops.Concat(axis=1)(features) + + for layer in self.mlp: + x = layer(x) + + if self.model_name in ["MLP_SIG", "MLP_FSIW"]: + return x + raise NotImplementedError() + +def get_model(name, params): + if name in ["MLP_tn_dp", "MLP_FSIW"]: + return MLP(name, params) + if name == "MLP_SIG": + if params["base_model"] == "MLP": + return MLP(name, params) + else: + raise NotImplementedError() + return 0 diff --git a/research/recommend/ULC/src/utils.py b/research/recommend/ULC/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b5aca4c044eea0cfdeb5077537e9576e121aab8d --- /dev/null +++ b/research/recommend/ULC/src/utils.py @@ -0,0 +1,34 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import re +import mindspore.nn as nn + + +def get_optimizer(p, name, params): + if name == "Adam": + return nn.Adam(params=p, learning_rate=params["lr"], weight_decay=params["l2_reg"]) + return 0 + + +def parse_float_arg(Input, prefix): + p = re.compile(prefix+"_[+-]?([0-9]*[.])?[0-9]+") + m = p.search(Input) + if m is None: + return None + Input = m.group() + p = re.compile("[+-]?([0-9]*[.])?[0-9]+") + m = p.search(Input) + return float(m.group())