diff --git a/OWNERS b/OWNERS
index 0cd9b621a7bfe427b94fe74b6f75ed0f01921904..8d29f4f1f05cb79f8c37835e41939722535c2667 100644
--- a/OWNERS
+++ b/OWNERS
@@ -10,3 +10,4 @@ approvers:
- baochong
- luoyang42
- wang_hua_2019
+- zhangyifan999
diff --git a/official/audio/MELGAN/README.md b/official/audio/MELGAN/README.md
index 448fa1bb5d57e140d2f7777fa10e3379f0642e13..4185a79a59de82d5220dd46f58de302259a598ca 100644
--- a/official/audio/MELGAN/README.md
+++ b/official/audio/MELGAN/README.md
@@ -46,7 +46,7 @@ Dataset used: [LJ Speech]()
- Dataset size:2.6GB,13,100 short audio clips of a single speaker reading passages from 7 non-fiction books.
- Data format:Each audio file is a single-channel 16-bit PCM WAV with a sample rate of 22050 Hz
- - The audio data needs to be processed to a mel-spectrum, and you can refer to the script in [mel-spectrogram data creation](https://github.com/seungwonpark/melgan/blob/master/preprocess.py). Non CUDA environment needs to delete `. cuda()` in `utils/stfy.py`. To save data in the `npy` format, `preprocess.py` also needs to be modified. As follows:
+ - The audio data needs to be processed to a mel-spectrum, and you can refer to the script in [mel-spectrogram data creation](https://github.com/seungwonpark/melgan/blob/master/preprocess.py). Non CUDA environment needs to delete `. cuda()` in `utils/stft.py`. To save data in the `npy` format, `preprocess.py` also needs to be modified. As follows:
```
# 37 - 38 lines
diff --git a/official/audio/MELGAN/README_CN.md b/official/audio/MELGAN/README_CN.md
index 03a3b7badd31889dc4666668db587d7ce3afd340..5d80d749a4a672ce67e1c1d17d4ca158284f8af1 100644
--- a/official/audio/MELGAN/README_CN.md
+++ b/official/audio/MELGAN/README_CN.md
@@ -46,7 +46,7 @@ MelGAN模型是非自回归全卷积模型。它的参数比同类模型少得
- Dataset size:2.6GB,包含13,100条只有一个说话人的短语音。语音的内容来自7本纪实书籍。
- 数据格式:每条语音文件都是单声道、16-bit以及采样率为22050。
- - 语音需要被处理为Mel谱, 可以参考脚本[Mel谱处理脚本](https://github.com/seungwonpark/melgan/blob/master/preprocess.py)。非CUDA环境需删除`utils/stfy.py`中的`.cuda()`,因为要保存`npy`格式的数据,所以`preproccess.py`也需要修改以下,参考代码如下:
+ - 语音需要被处理为Mel谱, 可以参考脚本[Mel谱处理脚本](https://github.com/seungwonpark/melgan/blob/master/preprocess.py)。非CUDA环境需删除`utils/stft.py`中的`.cuda()`,因为要保存`npy`格式的数据,所以`preproccess.py`也需要修改以下,参考代码如下:
```
# 37 - 38 行
diff --git a/official/audio/Tacotron2/README.md b/official/audio/Tacotron2/README.md
index 53283ba9b019691a9454d9b9a7c8e51337f86611..51c939693002b061c2d6f4d6a4d5c54f134e39ef 100644
--- a/official/audio/Tacotron2/README.md
+++ b/official/audio/Tacotron2/README.md
@@ -76,8 +76,8 @@ After installing MindSpore via the official website, you can start training and
# example: bash run_standalone_train.sh /path/ljspeech.hdf5 0
# run distributed training
- bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
- # example: bash run_distributed_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0
+ bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
+ # example: bash run_distribute_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0
# run evaluation
bash run_eval.sh [OUTPUT_PATH] [MODEL_CKPT] [DEVICE_ID] text is set in config.py( can modify text of ljspeech_config.yaml)
@@ -246,7 +246,7 @@ Parameters for both training and evaluation can be set in [DATASET]_config.yaml
```bash
cd scripts
- bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
+ bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
```
Note: `DATASET_PATH` is the directory contains hdf5 file.
diff --git a/official/audio/Tacotron2/README_CN.md b/official/audio/Tacotron2/README_CN.md
index e104f6e4872f367bd2ab838ea118ee80fc74952c..aaf135f92cb8ce673929f52cde79a20551ee576d 100644
--- a/official/audio/Tacotron2/README_CN.md
+++ b/official/audio/Tacotron2/README_CN.md
@@ -76,8 +76,8 @@ Tacotron2实质上是一种包含编码器和解码器的序列到序列模型
# 示例:bash run_standalone_train.sh /path/ljspeech.hdf5 0
# 运行分布式训练
- bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
- # 示例:bash run_distributed_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0
+ bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
+ # 示例:bash run_distribute_train.sh /path/ljspeech.h5 ../hccl_8p_01234567_127.0.0.1.json 8 0
# 运行评估
bash run_eval.sh [OUTPUT_PATH] [MODEL_CKPT] [DEVICE_ID] text is set in config.py( can modify text of ljspeech_config.yaml)
@@ -246,7 +246,7 @@ tacotron2/
```bash
cd scripts
- bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
+ bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] [RANK_SIZE] [DEVICE_BEGIN]
```
注:`DATASET_PATH`是包含HDF5文件的目录。
diff --git a/official/audio/Tacotron2/src/tacotron2.py b/official/audio/Tacotron2/src/tacotron2.py
index e685cf0f8d0ca516cc18a92674488e9a12b6ca44..a5b573f31a1615e71fb4faa2fd502f8cdfa4ef5e 100644
--- a/official/audio/Tacotron2/src/tacotron2.py
+++ b/official/audio/Tacotron2/src/tacotron2.py
@@ -1168,7 +1168,7 @@ class TrainStepWrap(nn.Cell):
overflow = ops.logical_not(amp.all_finite(grads))
if self.reducer_flag:
- overflow = self.allreduce(overflow.to(mstype.float32)) >= self.base
+ overflow = self.all_reduce(overflow.to(mstype.float32)) >= self.base
overflow = self.loss_scaling_manager(self.loss_scale, overflow)
diff --git a/official/cv/FasterRCNN/README_CN.md b/official/cv/FasterRCNN/README_CN.md
index 676b27231976d34369334b0f11406f711df07f2a..ace823dbfc7e00a1fb8f7ef972b5d92316c3b737 100644
--- a/official/cv/FasterRCNN/README_CN.md
+++ b/official/cv/FasterRCNN/README_CN.md
@@ -625,7 +625,7 @@ bash run_infer_cpp.sh [MINDIR_PATH] [DATA_PATH] [ANNO_PATH] [DEVICE_TYPE] [IMAGE
| 上传日期 | 2020/8/31 | 2021/2/10 |2022/8/10|
| MindSpore版本 | 1.0.0 |1.2.0 |1.7.0|
| 数据集 | COCO 2017 |COCO 2017 |FaceMaskDetection|
-| 训练参数 | epoch=12, batch_size=2 |epoch=12, batch_size=2 |epoch=20,batch_size=2|
+| 训练参数 | epoch=12, batch_size=2 |epoch=20, batch_size=2 |epoch=20,batch_size=2|
| 优化器 | SGD |SGD |SGD|
| 损失函数 | Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss |Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss |Softmax交叉熵,Sigmoid交叉熵,SmoothL1Loss|
| 速度 | 1卡:190毫秒/步;8卡:200毫秒/步 | 1卡:320毫秒/步;8卡:335毫秒/步 |1卡:7328毫秒/步|
diff --git a/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh b/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh
index 86588bf774e0ab70aae3eed4e1acc60a4d6da7a7..8087e1f88701b48b668a4737dd5ebf9b0f4023b3 100644
--- a/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh
+++ b/official/cv/FasterRCNN/scripts/run_distribute_train_ascend.sh
@@ -96,6 +96,7 @@ export HCCL_CONNECT_TIMEOUT=600
export DEVICE_NUM=8
export RANK_SIZE=8
export RANK_TABLE_FILE=$PATH1
+export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE"
for((i=0; i<${DEVICE_NUM}; i++))
do
diff --git a/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh b/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh
index 8b27d1c67045df723712934d7eea82a5de73f9ff..d7af4ca64d193b2713d27dc7928443a75132dbcc 100644
--- a/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh
+++ b/official/cv/FasterRCNN/scripts/run_distribute_train_gpu.sh
@@ -97,4 +97,5 @@ mpirun -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout --all
--pre_trained=$PRETRAINED_PATH \
--backbone=$3 \
--coco_root=$PATH3 \
+ --base_lr=0.008 \
--mindrecord_dir=$mindrecord_dir > train.log 2>&1 &
\ No newline at end of file
diff --git a/official/cv/FasterRCNN/scripts/run_eval_ascend.sh b/official/cv/FasterRCNN/scripts/run_eval_ascend.sh
index d276546174af5532e7d89df4ac8276edf8b492fe..d9b2be113aacc44c9811fb762a30e45f2f00274e 100644
--- a/official/cv/FasterRCNN/scripts/run_eval_ascend.sh
+++ b/official/cv/FasterRCNN/scripts/run_eval_ascend.sh
@@ -95,6 +95,7 @@ export DEVICE_NUM=1
export RANK_SIZE=$DEVICE_NUM
export DEVICE_ID=$5
export RANK_ID=0
+export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE"
if [ -d "eval" ];
then
diff --git a/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh b/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh
index 565f1c56245b75d9a9cd6ad781504c41b89f18fd..828f5133e125583a73247bc1c9c433bb6806a3ce 100644
--- a/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh
+++ b/official/cv/FasterRCNN/scripts/run_standalone_train_ascend.sh
@@ -88,6 +88,7 @@ export DEVICE_NUM=1
export DEVICE_ID=$4
export RANK_ID=0
export RANK_SIZE=1
+export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE"
if [ -d "train" ];
then
diff --git a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py
index a49572c6aee2e115d8bfe6dd98923189fa3cceae..57c758a5110db92f064ca04b2c46fb86662d24b8 100644
--- a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py
+++ b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample.py
@@ -144,13 +144,7 @@ class BboxAssignSample(nn.Cell):
num_pos = self.cast(self.logicalnot(valid_pos_index), self.ms_type)
num_pos = self.sum_inds(num_pos, -1)
unvalid_pos_index = self.less(self.range_pos_size, num_pos)
- valid_neg_index = self.logicaland(
- self.cast(self.concat((
- self.cast(self.check_neg_mask, ms.int32),
- self.cast(unvalid_pos_index, ms.int32)
- )), ms.bool_),
- self.cast(valid_neg_index, ms.bool_)
- )
+ valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index)
pos_bboxes_ = self.gatherND(bboxes, pos_index)
pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index)
diff --git a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py
index 7602adcc53ae5f30bb56657a26e4c56cbbe8b5ec..5cbc16ee5d2ea22a82253db0fa5c020dea5246ae 100644
--- a/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py
+++ b/official/cv/FasterRCNN/src/FasterRcnn/bbox_assign_sample_stage2.py
@@ -114,7 +114,7 @@ class BboxAssignSampleForRcnn(nn.Cell):
gt_bboxes_i, self.check_gt_one)
bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, ms.int32), \
(self.num_bboxes, 1)), (1, 4)), ms.bool_), \
- self.cast(bboxes, ms.float16), self.check_anchor_two)
+ bboxes, self.check_anchor_two)
overlaps = self.iou(bboxes, gt_bboxes_i)
@@ -171,13 +171,7 @@ class BboxAssignSampleForRcnn(nn.Cell):
neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0))
unvalid_pos_index = self.less(self.range_pos_size, num_pos)
- valid_neg_index = self.logicaland(
- self.cast(self.concat((
- self.cast(self.check_neg_mask, ms.int32),
- self.cast(unvalid_pos_index, ms.int32)
- )), ms.bool_),
- self.cast(valid_neg_index, ms.bool_)
- )
+ valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index)
neg_index = self.reshape(neg_index, self.reshape_shape_neg)
valid_neg_index = self.cast(valid_neg_index, ms.int32)
diff --git a/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py b/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py
index 5317ca51c35a6ba8f3ee3de144f4e0b06f4f4623..16e2b42655ab47f2110d66a1ff803d684f7f90d6 100644
--- a/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py
+++ b/official/cv/FasterRCNN/src/FasterRcnn/proposal_generator.py
@@ -183,21 +183,13 @@ class Proposal(nn.Cell):
mlvl_proposals = mlvl_proposals + (proposals,)
mlvl_mask = mlvl_mask + (mask_valid,)
- proposals = self.concat_axis0(
- tuple(self.cast(proposal, ms.int64) for proposal in mlvl_proposals)
- )
- masks = self.concat_axis0(
- tuple(self.cast(mask, ms.int64) for mask in mlvl_mask)
- )
+ proposals = self.concat_axis0(mlvl_proposals)
+ masks = self.concat_axis0(mlvl_mask)
_, _, _, _, scores = self.split(proposals)
scores = self.squeeze(scores)
topk_mask = self.cast(self.topK_mask, self.ms_type)
- scores_using = self.cast(self.select(
- self.cast(masks, ms.bool_),
- self.cast(scores, ms.bool_),
- self.cast(topk_mask, ms.bool_)
- ), ms.int32)
+ scores_using = self.select(masks, scores, topk_mask)
_, topk_inds = self.topKv2(scores_using, self.max_num)
diff --git a/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py b/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
index 07c5bc62115426cf410795b4aedcba4160ab0f3b..470f709d62aa59301115b744b159f16212641f3a 100644
--- a/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
+++ b/official/cv/MaskRCNN/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
@@ -214,6 +214,7 @@ class BboxAssignSampleForRcnn(nn.Cell):
# normalized box coordinate
boxes = boxes / self.image_h_w
box_ids = F.range(self.start, self.limit, self.delta)
+ box_ids = self.cast(box_ids, mstype.int32)
pos_masks_fb = self.expand_dims(pos_masks_fb, -1)
boxes = self.cast(boxes, mstype.float32)
pos_masks_fb = self.crop_and_resize(pos_masks_fb, boxes, box_ids, self.mask_shape)
diff --git a/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py b/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py
index e97ee0a832da2b0c92de87095209168c849cd7ff..681b8a300d05b8f17a4d483075b5f96e5f781430 100644
--- a/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py
+++ b/official/cv/MaskRCNN/maskrcnn_resnet50/src/maskrcnn/bbox_assign_sample_stage2.py
@@ -212,6 +212,7 @@ class BboxAssignSampleForRcnn(nn.Cell):
# normalized box coordinate
boxes = boxes / self.image_h_w
box_ids = F.range(self.start, self.limit, self.delta)
+ box_ids = self.cast(box_ids, mstype.int32)
pos_masks_fb = self.expand_dims(pos_masks_fb, -1)
boxes = self.cast(boxes, mstype.float32)
pos_masks_fb = self.crop_and_resize(pos_masks_fb, boxes, box_ids, self.mask_shape)
diff --git a/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml b/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml
index 099ea3758203638f96c6cae7886c69047b21b8f5..6fa43d8a50492f5ddfc0eaabfa45243fb115e70a 100644
--- a/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml
+++ b/official/cv/SSD/config/ssd_mobilenet_v1_fpn_config.yaml
@@ -23,7 +23,7 @@ match_threshold: 0.5
nms_threshold: 0.6
min_score: 0.1
max_boxes: 100
-all_reduce_fusion_config: [29, 58, 89]
+all_reduce_fusion_config: [29, 58, 89, 201]
# learning rate settings
lr_init: 0.01333
diff --git a/official/cv/SSD/scripts/run_distribute_train.sh b/official/cv/SSD/scripts/run_distribute_train.sh
index 893f4a76db748e5d14b823f63a163a1747f27ccf..37fe46d37a076ecb0b05b08de202b1f3954bc83d 100644
--- a/official/cv/SSD/scripts/run_distribute_train.sh
+++ b/official/cv/SSD/scripts/run_distribute_train.sh
@@ -17,7 +17,7 @@
echo "=============================================================================================================="
echo "Please run the script as: "
echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET RANK_TABLE_FILE CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
-echo "for example: bash run_distribute_train.sh 8 500 0.2 coco /data/hccl.json /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "for example: bash run_distribute_train.sh 8 500 0.05 coco /data/hccl.json /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
echo "It is better to use absolute path."
echo "================================================================================================================="
diff --git a/official/cv/SSD/scripts/run_distribute_train_gpu.sh b/official/cv/SSD/scripts/run_distribute_train_gpu.sh
index 0778ad70ff9414d874f0d1e3ce2ffc9d77c499f5..0ff4b1818130aded3bdd9bd5c351148704c69422 100644
--- a/official/cv/SSD/scripts/run_distribute_train_gpu.sh
+++ b/official/cv/SSD/scripts/run_distribute_train_gpu.sh
@@ -17,7 +17,7 @@
echo "=============================================================================================================="
echo "Please run the script as: "
echo "bash run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE LR DATASET CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
-echo "for example: bash run_distribute_train_gpu.sh 8 500 0.2 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "for example: bash run_distribute_train_gpu.sh 8 500 0.05 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
echo "It is better to use absolute path."
echo "================================================================================================================="
diff --git a/official/nlp/Pangu_alpha/src/pangu_alpha.py b/official/nlp/Pangu_alpha/src/pangu_alpha.py
index 00f594b3d9a199a9e1476e472ffbef7e72caf088..02f1570be402c2206ec26e2ea417b3a2a0a35eee 100644
--- a/official/nlp/Pangu_alpha/src/pangu_alpha.py
+++ b/official/nlp/Pangu_alpha/src/pangu_alpha.py
@@ -23,7 +23,6 @@ from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.nn import Cell
-from mindspore.ops._tracefunc import trace
from mindformers.modules.transformer import VocabEmbedding, TransformerEncoder, TransformerEncoderLayer, \
AttentionMask, MoEConfig
@@ -306,7 +305,6 @@ class PanguAlpha_Model(Cell):
self.load_embedding_from_ckpt(config.load_ckpt_path)
self.run_type = config.run_type
- @trace
def construct_blocks(self, hidden_state, encoder_masks, init_reset, batch_valid_length):
if self.blocks is not None:
for i in range(self.num_layers - 1):
diff --git a/research/cv/TinySAM/README.md b/research/cv/TinySAM/README.md
index bd0e17c077d17d03598b7475f25e466c5bc8b466..d01ba5f57034af42f5aebf6ca94a569a37544f3d 100644
--- a/research/cv/TinySAM/README.md
+++ b/research/cv/TinySAM/README.md
@@ -54,6 +54,10 @@ SNN-MLP
After installing MindSpore via the official website, you can start evaluation as follows:
+### Download
+
+Download ckpts from [modelzoo](https://download-mindspore.osinfra.cn/model_zoo/research/cv/TinySAM/tinysam_mindspore.ckpt).
+
### Launch
```bash
diff --git a/research/recommend/ULC/README.md b/research/recommend/ULC/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b46f25d05c4bc9a052cc9c0101c6995b49bc6e9
--- /dev/null
+++ b/research/recommend/ULC/README.md
@@ -0,0 +1,90 @@
+
+# Contents
+
+- [Contents](#contents)
+- [UCL Description](#TAML-description)
+- [Dataset](#dataset)
+- [Environment Requirements](#environment-requirements)
+- [Quick Start](#quick-start)
+- [Script Description](#script-description)
+ - [Script and Sample Code](#script-and-sample-code)
+ - [Training Process](#training-process)
+ - [Training](#training)
+- [ModelZoo Homepage](#modelzoo-homepage)
+
+# [ULC Description](#contents)
+
+Conversion rate prediction is critical to many online applications such as digital display advertising. To capture
+dynamic data distribution, industrial systems often require retraining models on recent data daily or weekly. However,
+the delay of conversion behavior usually leads to incorrect labeling, which is called delayed feedback problem. Existing
+work may fail to introduce the correct information about false negative samples due to data sparsity and dynamic data
+distribution. To directly introduce the correct feedback label information, we propose an Unbiased delayed feedback
+Label Correction framework (ULC), which uses an auxiliary model to correct labels for observed negative feedback
+samples. Firstly, we theoretically prove that the label-corrected loss is an unbiased estimate of the oracle loss using
+true labels. Then, as there are no ready training data for label correction, counterfactual labeling is used to
+construct artificial training data. Furthermore, since counterfactual labeling utilizes only partial training data, we
+design an embedding-based alternative training method to enhance performance. Comparative experiments on both public and
+private datasets and detailed analyses show that our proposed approach effectively alleviates the delayed feedback
+problem and consistently outperforms the previous state-of-the-art methods.
+
+A preprint version of our paper is available at http://arxiv.org/abs/2307.12756.
+
+# [Dataset](#contents)
+
+- [Criteo dataset](https://drive.google.com/file/d/1x4KktfZtls9QjNdFYKCjTpfjM4tG2PcK/view?usp=sharing)
+
+# [Environment Requirements](#contents)
+
+- Hardware(CPU)
+ - Prepare hardware environment with GPU processor.
+- Framework
+ - [MindSpore-2.0.0](https://www.mindspore.cn/install/en)
+
+- Requirements
+
+```shell
+
+ $ conda create --name --file requirements.txt
+
+```
+
+- For more information, please check the resources below:
+ - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/r2.0/index.html)
+ - [MindSpore Python API](https://www.mindspore.cn/docs/en/r2.0/index.html)
+
+# [Quick Start](#contents)
+
+After installing MindSpore via the official website, you can start training and evaluation as follows:
+
+- processing dataset
+
+# [Script Description](#contents)
+
+## [Script and Sample Code](#contents)
+
+```bash
+.
+└─ULC
+ └─src
+ ├─alternate_train.py # modules in ULC
+ ├─data.py # data process
+ ├─loss.py # loss in ULC
+ ├─main.py # train ULC
+ ├─metric.py # metrics in ULC
+ ├─models.py # ULC structure
+ └─utils.py # modules in ULC
+```
+
+## [Training Process](#contents)
+
+### Training
+
+- running on CPU
+
+ ```python
+ python ./src/main.py --method ULC --l2_reg 0.00001 --cuda_device 0 --lr 0.0001 --CD 7 --batch_size 1024 --optimizer Adam --seed 0
+ ```
+
+# [ModelZoo Homepage](#contents)
+
+ Please check the official [homepage](https://gitee.com/mindspore/models)
\ No newline at end of file
diff --git a/research/recommend/ULC/src/alternate_train.py b/research/recommend/ULC/src/alternate_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a2945c01705461ae72b6e6a58a0b7418b7330bd
--- /dev/null
+++ b/research/recommend/ULC/src/alternate_train.py
@@ -0,0 +1,234 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from copy import deepcopy
+
+from models import get_model
+from loss import get_loss_fn
+from utils import get_optimizer
+from metrics import cal_llloss_with_logits, cal_auc, cal_prauc
+from data import get_criteo_dataset, RandomAccessDataset
+from tqdm import tqdm
+import numpy as np
+import mindspore.dataset as ds
+import mindspore.nn as nn
+import mindspore
+
+def test(model, test_data, params):
+ all_logits = []
+ all_probs = []
+ all_labels = []
+ model.set_train(False)
+
+ for batch in tqdm(test_data):
+ batch_x = batch[0]
+ batch_y = batch[1]
+ logits = model(batch_x)
+ all_logits.append(logits.asnumpy())
+ all_labels.append(batch_y.asnumpy())
+ all_probs.append(nn.Sigmoid()(logits).asnumpy())
+
+ all_logits = np.reshape(np.concatenate(all_logits, axis=0), (-1,))
+ all_labels = np.reshape(np.concatenate(all_labels, axis=0), (-1,))
+ all_probs = np.reshape(np.concatenate(all_probs, axis=0), (-1,))
+ llloss = cal_llloss_with_logits(all_labels, all_logits)
+ auc = cal_auc(all_labels, all_probs)
+ prauc = cal_prauc(all_labels, all_probs)
+ return auc, prauc, llloss
+
+def get_valid_llloss_blc(model, test_data, correction_model):
+ all_logits = []
+ all_labels = []
+ model.set_train(False)
+
+ for batch in tqdm(test_data):
+ batch_x = batch[0]
+ batch_y = batch[1]
+ logits0 = correction_model(batch_x)
+ corrections = (nn.Sigmoid()(logits0)).flatten()
+ corrections = mindspore.numpy.where(batch_y < 1, corrections, batch_y.float())
+ logits = model(batch_x)
+ all_logits.append(logits.asnumpy())
+ all_labels.append(corrections.asnumpy())
+
+ all_logits = np.reshape(np.concatenate(all_logits, axis=0), (-1,))
+ all_labels = np.reshape(np.concatenate(all_labels, axis=0), (-1,))
+
+
+ llloss = cal_llloss_with_logits(all_labels, all_logits)
+ return llloss
+
+def alternate_run(params, wandb):
+ cvr_model = None
+ sub_model = None
+ dataset = get_criteo_dataset(params)
+ sub_params = deepcopy(params)
+
+ sub_params["dataset"] = "fsiwsg_cd_"+str(params["CD"])\
+ +"_end_"+str(params["training_end_day"])+"_seed_"+str(params["seed"])
+ np.random.seed(params["seed"])
+ sub_dataset = get_criteo_dataset(sub_params)["train"]
+ np.random.seed(params["seed"])
+
+ params["log_step"] = 0
+ params["idx"] = 1
+ for _ in range(2):
+ sub_model = sub_train(cvr_model, sub_dataset, params)
+ cvr_model = cvr_train(sub_model, dataset, params, wandb)
+ params["idx"] += 1
+
+def sub_train(cvr_model, sub_dataset, params):
+ train_data_x = sub_dataset["x"].to_numpy().astype(np.float32)
+ train_data_label = sub_dataset["labels"]
+ train_data_label = 1 - train_data_label
+ train_data = RandomAccessDataset(train_data_x, train_data_label)
+ train_data_loader = ds.GeneratorDataset(source=train_data, shuffle=True, column_names=['feature', 'label'])
+ train_data_loader = train_data_loader.batch(batch_size=params["batch_size"])
+
+ model = get_model("MLP_FSIW", params)
+
+ if cvr_model is not None:
+ sd = cvr_model.parameters_dict()
+ part_sd = {k: v for k, v in sd.items() if ("category_embeddings" in k) or ("numeric_embeddings" in k)}
+ model_dict = model.parameters_dict()
+ model_dict.update(part_sd)
+ mindspore.load_param_into_net(model, model_dict)
+
+ optimizer = nn.Adam(params=model.trainable_params(), learning_rate=0.001, weight_decay=0)
+ loss_fn = get_loss_fn("cross_entropy_loss")
+
+ def forward_fn(data, label):
+ outputs = model(data)
+ targets = {"label": label}
+ loss_dict = loss_fn(targets, outputs, params)
+ loss = loss_dict["loss"]
+ return loss
+
+ grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters)
+
+ for _ in range(5):
+ for batch in train_data_loader:
+ batch_x = batch[0]
+ batch_y = batch[1][:, 0]
+ targets = {"label": batch_y}
+
+ model.set_train(True)
+ _, grads = grad_fn(batch_x, targets["label"])
+
+ optimizer(grads)
+
+ return model
+
+def cvr_train(sub_model, datasets, params, wandb):
+ model = get_model("MLP_SIG", params)
+ models = {"model": model, "submodel": sub_model}
+
+ optimizer = get_optimizer(models["model"].trainable_params(), params["optimizer"], params)
+
+ train_dataset = datasets["train"]
+ train_data_x = train_dataset["x"].to_numpy().astype(np.float32)
+ train_data_label = train_dataset["labels"]
+ train_data = RandomAccessDataset(train_data_x, train_data_label)
+ train_data_loader = ds.GeneratorDataset(source=train_data, shuffle=True, column_names=['feature', 'label'])
+ train_data_loader = train_data_loader.batch(batch_size=params["batch_size"])
+
+ valid_dataset = datasets["valid"]
+ valid_data_x = valid_dataset["x"].to_numpy().astype(np.float32)
+ valid_data_label = valid_dataset["labels"]
+ valid_data = RandomAccessDataset(valid_data_x, valid_data_label)
+ valid_data_loader = ds.GeneratorDataset(source=valid_data, column_names=['feature', 'label'])
+ valid_data_loader = valid_data_loader.batch(batch_size=params["batch_size"])
+
+ test_dataset = datasets["test"]
+ test_data_x = test_dataset["x"].to_numpy().astype(np.float32)
+ test_data_label = test_dataset["labels"]
+ test_data = RandomAccessDataset(test_data_x, test_data_label)
+ test_data_loader = ds.GeneratorDataset(source=test_data, column_names=['feature', 'label'])
+ test_data_loader = test_data_loader.batch(batch_size=params["batch_size"])
+
+ data_loaders = {
+ "train_data": train_data_loader,
+ "test_data": test_data_loader,
+ "valid_data": valid_data_loader
+ }
+ optimizers = {
+ "optimizer": optimizer
+ }
+
+
+ return train(models, optimizers, data_loaders, params, wandb)
+
+
+def train(models, optimizers, data_loaders, params, wandb):
+ train_data = data_loaders["train_data"]
+ valid_data = data_loaders["valid_data"]
+ test_data = data_loaders["test_data"]
+ best_model = None
+
+ optimizer = optimizers["optimizer"]
+
+ loss_fn = get_loss_fn(params["loss"])
+ val_llloss = []
+ test_auc, test_prauc, test_llloss = [], [], []
+
+ def forward_fn(data, label):
+ outputs = models["model"](data)
+ logits0 = models["submodel"](data)
+ correction_label = nn.Sigmoid()(logits0).flatten()
+ label = mindspore.numpy.where(label < 1, correction_label, label.float())
+ targets = {"label": label}
+ loss_dict = loss_fn(targets, outputs, params)
+ loss = loss_dict["loss"]
+
+ return loss
+
+ grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters)
+
+ for ep in range(params["train_epoch"]):
+ vllloss = get_valid_llloss_blc(models["model"], valid_data, models["submodel"])
+ print("Val ep{}, llloss {}".format(ep, vllloss))
+ tauc, tprauc, tllloss = test(models["model"], test_data, params)
+ print("Test ep{}, auc {}, prauc {}, llloss {}".format(ep, tauc, tprauc, tllloss))
+
+ if not val_llloss or vllloss < min(val_llloss):
+ best_model = models["model"].parameters_dict()
+
+ val_llloss.append(vllloss)
+ test_auc.append(tauc)
+ test_prauc.append(tprauc)
+ test_llloss.append(tllloss)
+
+ if len(val_llloss) - val_llloss.index(min(val_llloss)) > params["early_stop"]:
+ best_ep = val_llloss.index(min(val_llloss))
+ print("Early stop at ep {}. Best ep {}. Best val_lloss {}.".format(ep, best_ep, min(val_llloss)))
+ print("Final test evaluation: auc {}, prauc {}, llloss {}."\
+ .format(test_auc[best_ep], test_prauc[best_ep], test_llloss[best_ep]))
+ break
+ train_loss = []
+ for batch in tqdm(train_data):
+ batch_x = batch[0]
+ batch_y = batch[1]
+
+ models["model"].set_train(True)
+ models["submodel"].set_train(False)
+ loss, grads = grad_fn(batch_x, batch_y)
+
+ train_loss.append(loss.asnumpy())
+ optimizer(grads)
+ params["log_step"] += 1
+ print("Train ep{}, loss {}".format(ep, np.mean(train_loss)))
+
+ mindspore.load_param_into_net(models["model"], best_model)
+ return models["model"]
diff --git a/research/recommend/ULC/src/data.py b/research/recommend/ULC/src/data.py
new file mode 100644
index 0000000000000000000000000000000000000000..b15ee49d497c85af4b05a1500e1e552fb9e0b1af
--- /dev/null
+++ b/research/recommend/ULC/src/data.py
@@ -0,0 +1,284 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import copy
+import os
+import pickle
+import datetime
+import pandas as pd
+import numpy as np
+
+from sklearn.preprocessing import LabelEncoder
+
+from utils import parse_float_arg
+
+SECONDS_A_DAY = 60 * 60 * 24
+SECONDS_AN_HOUR = 60 * 60
+SECONDS_DELAY_NORM = 1
+SECONDS_FSIW_NORM = SECONDS_A_DAY * 5
+num_bin_size = (64, 16, 128, 64, 128, 64, 512, 512)
+
+
+class RandomAccessDataset:
+ def __init__(self, data, label):
+ self._data = np.array(data)
+ self._label = np.array(label)
+
+ def __getitem__(self, index):
+ return (self._data[index], self._label[index])
+
+ def __len__(self):
+ return len(self._data)
+
+
+def get_data_df(params):
+ if params["dataset_source"] in ["criteo"]:
+ df = pd.read_csv(params["data_path"], sep="\t", header=None)
+ click_ts = df[df.columns[0]].to_numpy()
+ pay_ts = df[df.columns[1]].fillna(-1).to_numpy()
+
+ if params["dataset_source"] == "criteo":
+ df = df[df.columns[2:]]
+ for c in df.columns[8:]:
+ df[c] = df[c].fillna("")
+ df[c] = df[c].astype(str)
+
+ label_encoder = LabelEncoder()
+ for c in df.columns[8:]:
+ df[c] = label_encoder.fit_transform(df[c])
+
+ for i, c in enumerate(df.columns[:8]):
+ df[c] = df[c].fillna(-1)
+ df[c] = (df[c] - df[c].min()) / (df[c].max() - df[c].min())
+ df[c] = np.floor(df[c] * (num_bin_size[i] - 0.00001)).astype(str)
+ df.columns = [str(i) for i in range(17)]
+ return df, click_ts, pay_ts
+
+
+class DataDF:
+
+ def __init__(self, features, click_ts, pay_ts, sample_ts=None, labels=None, delay_label=None):
+ self.x = features.copy(deep=True)
+ self.click_ts = copy.deepcopy(click_ts)
+ self.pay_ts = copy.deepcopy(pay_ts)
+ self.delay_label = delay_label
+ if sample_ts is not None:
+ self.sample_ts = copy.deepcopy(sample_ts)
+ else:
+ self.sample_ts = copy.deepcopy(click_ts)
+ if labels is not None:
+ self.labels = copy.deepcopy(labels)
+ else:
+ self.labels = (pay_ts > 0).astype(np.int32)
+
+ def sub_days(self, start_day, end_day):
+ start_ts = start_day * SECONDS_A_DAY
+ end_ts = end_day * SECONDS_A_DAY
+ mask = np.logical_and(self.sample_ts >= start_ts,
+ self.sample_ts < end_ts)
+ return DataDF(self.x.iloc[mask],
+ self.click_ts[mask],
+ self.pay_ts[mask],
+ self.sample_ts[mask],
+ self.labels[mask])
+
+ def to_fsiw_1(self, cd, T): # build pre-training dataset 1 of FSIW
+ mask = np.logical_and(self.click_ts < T - cd, self.pay_ts > 0)
+ mask = np.logical_and(mask, self.pay_ts < T)
+ x = self.x.iloc[mask].copy(deep=True)
+ pay_ts = self.pay_ts[mask]
+ click_ts = self.click_ts[mask]
+ sample_ts = self.click_ts[mask]
+ label = np.zeros((x.shape[0],))
+ label[pay_ts < T - cd] = 1
+ # FSIW needs elapsed time information
+ x.insert(x.shape[1], column="elapse", value=(
+ T - click_ts - cd) / SECONDS_FSIW_NORM)
+ return DataDF(x,
+ click_ts,
+ pay_ts,
+ sample_ts,
+ label)
+
+ def to_fsiw_0(self, cd, T): # build pre-training dataset 0 of FSIW
+ mask = np.logical_or(self.pay_ts >= T - cd, self.pay_ts < 0)
+ mask = np.logical_or(mask, self.pay_ts > T)
+ mask = np.logical_and(self.click_ts < T - cd, mask)
+ x = self.x.iloc[mask].copy(deep=True)
+ pay_ts = self.pay_ts[mask]
+ click_ts = self.click_ts[mask]
+ sample_ts = self.sample_ts[mask]
+ label = np.zeros((x.shape[0],))
+ label[np.logical_or(pay_ts < 0, pay_ts > T)] = 1
+ x.insert(x.shape[1], column="elapse", value=(
+ T - click_ts - cd) / SECONDS_FSIW_NORM)
+ return DataDF(x,
+ click_ts,
+ pay_ts,
+ sample_ts,
+ label)
+
+ def shuffle(self):
+ idx = list(range(self.x.shape[0]))
+ np.random.shuffle(idx)
+ return DataDF(self.x.iloc[idx],
+ self.click_ts[idx],
+ self.pay_ts[idx],
+ self.sample_ts[idx],
+ self.labels[idx])
+
+
+def get_criteo_dataset(params):
+ name = params["dataset"]
+ print("loading datasest {}".format(name))
+ cache_path = os.path.join(
+ params["data_cache_path"], "{}.pkl".format(name))
+ if params["data_cache_path"] != "None" and os.path.isfile(cache_path):
+ print("cache_path {}".format(cache_path))
+ print("\nloading from dataset cache")
+ with open(cache_path, "rb") as f:
+ data = pickle.load(f)
+ train_data = data["train"]
+ test_data = data["test"]
+ if "valid" in data:
+ valid_data = data["valid"]
+ if "clean" in data:
+ _ = data["clean"]
+ if "fn" in data:
+ fn_data = data["fn"]
+ else:
+ train_data, test_data, valid_data, fn_data = build_criteo_dataset(params, name, cache_path)
+ result = {
+ "train": {
+ "x": train_data.x,
+ "click_ts": train_data.click_ts,
+ "pay_ts": train_data.pay_ts,
+ "sample_ts": train_data.sample_ts,
+ "labels": train_data.labels,
+ },
+ "test": {
+ "x": test_data.x,
+ "click_ts": test_data.click_ts,
+ "pay_ts": test_data.pay_ts,
+ "sample_ts": train_data.sample_ts,
+ "labels": test_data.labels,
+ }
+ }
+ if ("next" in name) or ("oracle" in name):
+ result["valid"] = {
+ "x": valid_data.x,
+ "click_ts": valid_data.click_ts,
+ "pay_ts": valid_data.pay_ts,
+ "sample_ts": valid_data.sample_ts,
+ "labels": valid_data.labels,
+ }
+ result["fn"] = {
+ "x": fn_data.x,
+ "click_ts": fn_data.click_ts,
+ "pay_ts": fn_data.pay_ts,
+ "sample_ts": fn_data.sample_ts,
+ "labels": fn_data.labels,
+ }
+ return result
+
+
+def build_criteo_dataset(params, name, cache_path):
+ print("\nbuilding dataset")
+
+ starttime = datetime.datetime.now()
+ if params["dataset_source"] == "criteo":
+ source_cache_path = "./cache_data.pkl"
+ if os.path.isfile(source_cache_path):
+ with open(source_cache_path, "rb") as f:
+ data = pickle.load(f)
+ else:
+ df, click_ts, pay_ts = get_data_df(params)
+ data = DataDF(df, click_ts, pay_ts)
+ with open(source_cache_path, "wb") as f:
+ pickle.dump(data, f, protocol=4)
+ endtime = datetime.datetime.now()
+ print("Time:{}s".format((endtime - starttime).total_seconds()))
+
+ if "fsiwsg" in name:
+ cd = parse_float_arg(name, "cd")
+ training_start = params["training_end_day"] - params["training_duration"]
+ train_data = data.sub_days(training_start, params["training_end_day"]).shuffle()
+ test_data = data.sub_days(params["training_end_day"], params["training_end_day"] + 1)
+ train_data = train_data.to_fsiw_0(
+ cd=cd * SECONDS_A_DAY, T=params["training_end_day"] * SECONDS_A_DAY)
+ cvrs = np.reshape(train_data.pay_ts > 0, (-1, 1))
+ pot_cvr = np.reshape(train_data.pay_ts > params["training_end_day"] * SECONDS_A_DAY, (-1, 1))
+ train_data.labels = np.reshape(train_data.labels, (-1, 1))
+ train_data.labels = np.concatenate(
+ [train_data.labels, cvrs, pot_cvr], axis=1)
+ test_data = test_data.to_fsiw_0(
+ cd=cd * SECONDS_A_DAY, T=params["training_end_day"] * SECONDS_A_DAY)
+ elif "fsiw_next" in name:
+ cd = parse_float_arg(name, "cd")
+ training_start = params["training_end_day"] - params["training_duration"]
+ train_data = data.sub_days(training_start, params["training_end_day"]).shuffle()
+ mask = train_data.pay_ts > (params["training_end_day"] * SECONDS_A_DAY)
+ train_data.labels[mask] = 0
+ train_data.x.insert(train_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - train_data.click_ts) / SECONDS_FSIW_NORM)
+ fn_data = DataDF(train_data.x.iloc[mask],
+ train_data.click_ts[mask],
+ train_data.pay_ts[mask],
+ train_data.sample_ts[mask],
+ train_data.labels[mask])
+ valid_data = data.sub_days(params["training_end_day"],
+ params["training_end_day"] + 1 * params["valid_test_size"])
+ valid_data.x.insert(valid_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - valid_data.click_ts) / SECONDS_FSIW_NORM)
+ val_mask = valid_data.pay_ts > (
+ (params["training_end_day"] + 1 * params["valid_test_size"]) * SECONDS_A_DAY)
+ valid_data.labels[val_mask] = 0
+ test_data = data.sub_days(params["training_end_day"] + 1 * params["valid_test_size"],
+ params["training_end_day"] + 2 * params["valid_test_size"])
+ test_data.x.insert(test_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - test_data.click_ts) / SECONDS_FSIW_NORM)
+ elif "oracle" in name:
+ cd = parse_float_arg(name, "cd")
+ training_start = params["training_end_day"] - params["training_duration"]
+ train_data = data.sub_days(training_start, params["training_end_day"]).shuffle()
+ train_data.x.insert(train_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - train_data.click_ts) / SECONDS_FSIW_NORM)
+
+ mask = train_data.pay_ts > (params["training_end_day"] * SECONDS_A_DAY)
+ fn_data = DataDF(train_data.x.iloc[mask],
+ train_data.click_ts[mask],
+ train_data.pay_ts[mask],
+ train_data.sample_ts[mask],
+ train_data.labels[mask])
+ fn_data.labels[:] = 0
+
+ valid_data = data.sub_days(params["training_end_day"],
+ params["training_end_day"] + 1 * params["valid_test_size"])
+ valid_data.x.insert(valid_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - valid_data.click_ts) / SECONDS_FSIW_NORM)
+ test_data = data.sub_days(params["training_end_day"] + 1 * params["valid_test_size"],
+ params["training_end_day"] + 2 * params["valid_test_size"])
+ test_data.x.insert(test_data.x.shape[1], column="elapse", value=(params[
+ "training_end_day"] * SECONDS_A_DAY - test_data.click_ts) / SECONDS_FSIW_NORM)
+ else:
+ raise NotImplementedError("{} dataset does not exist".format(name))
+ if params["data_cache_path"] != "None":
+ with open(cache_path, "wb") as f:
+ if ("next" in name) or ("oracle" in name):
+ pickle.dump({"train": train_data, "test": test_data, "valid": valid_data, "fn": fn_data}, f)
+ else:
+ pickle.dump({"train": train_data, "test": test_data}, f)
+
+ return train_data, test_data, valid_data, fn_data
diff --git a/research/recommend/ULC/src/loss.py b/research/recommend/ULC/src/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..743d958334e186bec5f9aef14e5f48fd69696ed9
--- /dev/null
+++ b/research/recommend/ULC/src/loss.py
@@ -0,0 +1,36 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import mindspore.numpy as np
+import mindspore.ops as ops
+import mindspore
+
+
+def stable_log1pex(x):
+ return -np.where(x < 0, x, np.zeros_like(x)) + np.log(1 + np.exp(-np.absolute(x)))
+
+def cross_entropy_loss(targets, outputs, params=None):
+ z = targets["label"]
+ x = outputs
+ x = ops.Reshape()(x, (-1,))
+ z = z.float()
+ loss_value = ops.binary_cross_entropy_with_logits(x, z, mindspore.Tensor([1.0]), mindspore.Tensor([1.0]))
+
+ return {"loss": loss_value}
+
+def get_loss_fn(name):
+ if name == "cross_entropy_loss":
+ return cross_entropy_loss
+ raise NotImplementedError("{} loss does not implemented".format(name))
diff --git a/research/recommend/ULC/src/main.py b/research/recommend/ULC/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..406db7bdab98adbfd93d92479ab124eebca845dc
--- /dev/null
+++ b/research/recommend/ULC/src/main.py
@@ -0,0 +1,79 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import argparse
+import os
+import pathlib
+from copy import deepcopy
+
+import numpy as np
+
+from alternate_train import alternate_run
+
+wandb = None
+
+
+def run_params(args):
+ params = deepcopy(vars(args))
+ params["model"] = "MLP_SIG"
+ if args.data_cache_path != "None":
+ pathlib.Path(args.data_cache_path).mkdir(parents=True, exist_ok=True)
+
+ if args.method == "ULC":
+ params["loss"] = "cross_entropy_loss"
+ params["dataset"] = "last_30_train_test_fsiw_next" + "_end_" + str(args.training_end_day) + "_seed_" + str(
+ args.seed)
+
+ return params
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--method", choices=["ULC"],
+ type=str, required=True)
+ parser.add_argument("--seed", type=int, default=0)
+ parser.add_argument("--dataset_source", type=str, default="criteo", choices=["criteo"])
+ parser.add_argument("--CD", type=int, default=7,
+ help="interval between counterfactual deadline and actual deadline")
+ parser.add_argument("--lr", type=float, default=1e-3)
+ parser.add_argument("--data_path", type=str, default="./data/data.txt",
+ help="path of the data.txt in criteo dataset")
+ parser.add_argument("--data_cache_path", type=str, default="./data")
+ parser.add_argument("--batch_size", type=int,
+ default=1024)
+ parser.add_argument("--epoch", type=int, default=5,
+ help="training epoch of pretraining")
+ parser.add_argument("--l2_reg", type=float, default=0,
+ help="l2 regularizer strength")
+ parser.add_argument("--training_end_day", type=int, default=58,
+ help="deadline for training data")
+ parser.add_argument("--training_duration", type=int, default=21,
+ help="duration of training data")
+ parser.add_argument("--valid_test_size", type=float, default=1,
+ help="duration of valid/test data")
+ parser.add_argument("--train_epoch", type=int, default=100,
+ help="max train epoch")
+ parser.add_argument("--early_stop", type=int, default=4)
+ parser.add_argument("--cuda_device", type=str, default="0")
+ parser.add_argument("--optimizer", type=str, default="Adam")
+ parser.add_argument("--save_model", type=int, default=0)
+ parser.add_argument("--base_model", type=str, default="MLP", choices=["MLP"])
+
+ args = parser.parse_args()
+ params = run_params(args)
+ os.environ["CUDA_VISIBLE_DEVICES"] = params["cuda_device"]
+ np.random.seed(args.seed)
+
+ alternate_run(params, wandb)
diff --git a/research/recommend/ULC/src/metrics.py b/research/recommend/ULC/src/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..274a0e5080840871819d0589ecc9f7132dcb5500
--- /dev/null
+++ b/research/recommend/ULC/src/metrics.py
@@ -0,0 +1,73 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from sklearn import metrics
+import numpy as np
+
+def sigmoid(x):
+ return 1/(1+np.exp(np.clip(-x, a_min=-1e50, a_max=1e20)))
+
+def cal_auc(label, pos_prob):
+ fpr, tpr, _ = metrics.roc_curve(label, pos_prob, pos_label=1)
+ auc = metrics.auc(fpr, tpr)
+ return auc
+
+def stable_log1pex(x):
+ return -np.minimum(x, 0) + np.log(1+np.exp(-np.abs(x)))
+
+def cal_llloss_with_logits(label, logits):
+ ll = -np.mean(label*(-stable_log1pex(logits)) + (1-label)*(-logits - stable_log1pex(logits)))
+ return ll
+
+def cal_llloss_with_logits_and_weight(label, logits, logits0, logits1):
+ x = logits
+
+ pos_loss = stable_log1pex(x)
+ neg_loss = x + stable_log1pex(x)
+
+ pos_weight = 1/(logits1+1e-8)
+ neg_weight = logits0
+
+ clf_loss = np.mean(
+ pos_loss*pos_weight*label + neg_loss*neg_weight*(1-label))
+
+ weight = np.mean(pos_weight*label + neg_weight*(1-label))
+
+ return clf_loss/weight
+
+def prob_clip(x):
+ return np.clip(x, a_min=1e-20, a_max=1)
+
+def cal_llloss_with_neg_log_prob(label, neg_log_prob):
+ ll = -np.mean((1-label)*neg_log_prob + label*(np.log(prob_clip(1 - prob_clip(np.exp(neg_log_prob))))))
+ return ll
+
+def cal_llloss_with_prob(label, prob):
+ ll = -np.mean(label*np.log(prob_clip(prob)) + (1-label)*(np.log(prob_clip(1-prob))))
+ return ll
+
+def cal_prauc(label, pos_prob):
+ precision, recall, _ = metrics.precision_recall_curve(label, pos_prob)
+ area = metrics.auc(recall, precision)
+ return area
+
+def cal_acc(label, prob):
+ label = np.reshape(label, (-1,))
+ prob = np.reshape(label, (-1,))
+ prob_acc = np.mean(label*prob)
+ return prob_acc
+
+def stable_softplus(x):
+ return np.log(1 + np.exp(-np.abs(x))) + np.maximum(x, 0)
diff --git a/research/recommend/ULC/src/models.py b/research/recommend/ULC/src/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8d63ab2575f88533668da2d7b68cb020ebde6c5
--- /dev/null
+++ b/research/recommend/ULC/src/models.py
@@ -0,0 +1,103 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from mindspore import nn
+from mindspore import ops
+
+class MLP(nn.Cell):
+ def __init__(self, name, params):
+ super(MLP, self).__init__()
+ self.model_name = name
+ self.params = params
+ self.dataset_source = params["dataset_source"]
+ if params["dataset_source"] == "criteo":
+ self.category_embeddings = nn.CellList([
+ nn.Embedding(55824, 64),
+ nn.Embedding(5443, 64),
+ nn.Embedding(13073, 64),
+ nn.Embedding(13170, 64),
+ nn.Embedding(3145, 64),
+ nn.Embedding(33843, 64),
+ nn.Embedding(14304, 64),
+ nn.Embedding(11, 64),
+ nn.Embedding(13601, 64)
+ ])
+
+ self.numeric_embeddings = nn.CellList([
+ nn.Embedding(64, 64),
+ nn.Embedding(16, 64),
+ nn.Embedding(128, 64),
+ nn.Embedding(64, 64),
+ nn.Embedding(128, 64),
+ nn.Embedding(64, 64),
+ nn.Embedding(512, 64),
+ nn.Embedding(512, 64)
+ ])
+ presize = 1088
+
+ if name == "MLP_FSIW":
+ print("using elapse feature")
+ presize += 1
+
+ self.mlp = nn.CellList([
+ nn.Dense(presize, 256, activation='leakyrelu'),
+ nn.Dense(256, 256, activation='leakyrelu'),
+ nn.Dense(256, 128, activation='leakyrelu')
+ ])
+
+ if self.model_name in ["MLP_SIG", "MLP_FSIW"]:
+ self.mlp.append(nn.Dense(128, 1))
+ else:
+ raise ValueError("model name {} not exist".format(name))
+
+ def construct(self, x):
+ if self.dataset_source == "criteo":
+ cate_embeddings = []
+ nume_embeddings = []
+ if self.model_name == "MLP_FSIW":
+ for i in range(8):
+ nume_embeddings.append(self.numeric_embeddings[i](x[:, i].int()))
+
+ for i in range(9):
+ cate_embeddings.append(self.category_embeddings[8 - i](x[:, -i - 2].int()))
+
+ features = nume_embeddings + cate_embeddings + [x[:, -1:]]
+ x = ops.Concat(axis=1)(features)
+ else:
+ for i in range(8):
+ nume_embeddings.append(self.numeric_embeddings[i](x[:, i].int()))
+
+ for i in range(9):
+ cate_embeddings.append(self.category_embeddings[8 - i](x[:, -i - 2].int()))
+
+ features = nume_embeddings + cate_embeddings
+ x = ops.Concat(axis=1)(features)
+
+ for layer in self.mlp:
+ x = layer(x)
+
+ if self.model_name in ["MLP_SIG", "MLP_FSIW"]:
+ return x
+ raise NotImplementedError()
+
+def get_model(name, params):
+ if name in ["MLP_tn_dp", "MLP_FSIW"]:
+ return MLP(name, params)
+ if name == "MLP_SIG":
+ if params["base_model"] == "MLP":
+ return MLP(name, params)
+ else:
+ raise NotImplementedError()
+ return 0
diff --git a/research/recommend/ULC/src/utils.py b/research/recommend/ULC/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5aca4c044eea0cfdeb5077537e9576e121aab8d
--- /dev/null
+++ b/research/recommend/ULC/src/utils.py
@@ -0,0 +1,34 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import re
+import mindspore.nn as nn
+
+
+def get_optimizer(p, name, params):
+ if name == "Adam":
+ return nn.Adam(params=p, learning_rate=params["lr"], weight_decay=params["l2_reg"])
+ return 0
+
+
+def parse_float_arg(Input, prefix):
+ p = re.compile(prefix+"_[+-]?([0-9]*[.])?[0-9]+")
+ m = p.search(Input)
+ if m is None:
+ return None
+ Input = m.group()
+ p = re.compile("[+-]?([0-9]*[.])?[0-9]+")
+ m = p.search(Input)
+ return float(m.group())