- 正文前感谢昇腾各位工作人员,没有你们的辛勤就没有我们的进步
- 本文立意交流大赛MseLossGrad算子编译过程
- MseLossGrad算子是反向算子,怎么计算的就是一个难点,连公式都不知,那怎么继续编写算子
- 通过对提供的案例进行分析,得到计算公式如下
reduction = "mean"
if 'mean' == reduction:
reduce_elts = 1.0
for i in input_predict.shape:
reduce_elts *= i
cof = (reduce_elts**(-1)) * 2.0
else:
cof = 2.0
sub_res = input_predict - input_label
norm_grad = sub_res * cof
golden = norm_grad * input_dout
复制
- 由公式可知有标量计算
- 那么如何从输入string “mean” 转换成 float型,再输入op_kernel参与计算便是其中另一难点
const char *pvalue = context->GetAttrs()->GetStr(0);
float reduce_elts = 1.0;
float cof = 1.0;
uint8_t i =0;
uint8_t j =0;
printf("getstr %s\r\n", pvalue);
if(strcmp(pvalue,"mean") == 0)
{
for(i = 0;i<dimNum0;i++)
{
reduce_elts = reduce_elts*shape0[i];
}
cof = 1.0/reduce_elts*2.0;
print("cof info %f",cof);
tiling.set_cof(cof);//获取属性值
}
else
{
tiling.set_cof(2.0);//获取属性值
}
复制
__aicore__ inline void Compute(int32_t progress) {
LocalTensor<DTYPE_PREDICT> inLocal = inQueueIN.DeQue<DTYPE_PREDICT>();
LocalTensor<DTYPE_PREDICT> predictLocal = inLocal;
LocalTensor<DTYPE_LABEL> labelLocal = inLocal[this->tileLength];
LocalTensor<DTYPE_LABEL> doutLocal = inLocal[2*this->tileLength];
LocalTensor<DTYPE_Y> outLocal = outQueueOUT.AllocTensor<DTYPE_Y>();
//采用div + Muls + Add实现
Sub(outLocal, predictLocal, labelLocal, this->tileLength);
Muls(outLocal,outLocal,static_cast<DTYPE_Y>(this->cof),this->tileLength);
Mul(outLocal,outLocal,doutLocal,this->tileLength);
outQueueOUT.EnQue<DTYPE_Y>(outLocal);
inQueueIN.FreeTensor(inLocal);
}
复制