Skip to content

Commit

Permalink
multiply add int dtype (#3119)
Browse files Browse the repository at this point in the history
* multiply add int dtype

* non-user multiply op add int

Co-authored-by: guoran <[email protected]>
Former-commit-id: b9c3bb5
  • Loading branch information
guo-ran and guoran authored Jul 2, 2020
1 parent df33356 commit a67f0fe
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 10 deletions.
4 changes: 4 additions & 0 deletions oneflow/core/kernel/kernel_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ KU_INTEGRAL_METHOD InitializeWithConf(DeviceCtx* ctx, const InitializerConf& ini
}
}

KU_INTEGRAL_METHOD Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
for (int64_t i = 0; i < n; ++i) { z[i] = x[i] * y[i]; }
}

#define INSTANTIATE_KERNEL_UTIL(type_cpp, type_proto) \
template struct CpuKernelUtilIf<type_cpp, KernelUtil<DeviceType::kCPU, type_cpp>>; \
template struct KernelUtil<DeviceType::kCPU, type_cpp>;
Expand Down
5 changes: 5 additions & 0 deletions oneflow/core/kernel/kernel_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,11 @@ KU_INTEGRAL_METHOD Axpy(DeviceCtx* ctx, const int n, const T alpha, const T* x,
n, alpha, x, incx, y, incy);
}
KU_INTEGRAL_METHOD Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z) {
MulGpu<T>
<<<BlocksNum4ThreadsNum(n), kCudaThreadsNumPerBlock, 0, ctx->cuda_stream()>>>(n, x, y, z);
}
#define INSTANTIATE_KERNEL_UTIL(type_cpp, type_proto) \
template struct GpuKernelUtilIf<type_cpp, KernelUtil<DeviceType::kGPU, type_cpp>>; \
template struct KernelUtil<DeviceType::kGPU, type_cpp>;
Expand Down
2 changes: 2 additions & 0 deletions oneflow/core/kernel/kernel_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ struct KernelUtil<DeviceType::kCPU, T, typename std::enable_if<IsIntegral<T>::va
const int incy);
static void InitializeWithConf(DeviceCtx* ctx, const InitializerConf& initializer_conf,
uint32_t random_seed, Blob* blob);
static void Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z);
};

// GPU, Integral, Floating
Expand Down Expand Up @@ -305,6 +306,7 @@ struct KernelUtil<DeviceType::kGPU, T, typename std::enable_if<IsIntegral<T>::va
public GpuKernelUtilIf<T, KernelUtil<DeviceType::kGPU, T>> {
static void Axpy(DeviceCtx* ctx, const int n, const T alpha, const T* x, const int incx, T* y,
const int incy);
static void Mul(DeviceCtx* ctx, const int64_t n, const T* x, const T* y, T* z);
};

using CopyBlobFieldMthd = void (Blob::*)(DeviceCtx*, const Blob*);
Expand Down
2 changes: 1 addition & 1 deletion oneflow/core/kernel/multiply_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ const PbMessage& MultiplyKernel<device_type, T>::GetCustomizedOpConf() const {
return this->op_conf().multiply_conf();
}

ADD_DEFAULT_KERNEL_CREATOR(OperatorConf::kMultiplyConf, MultiplyKernel, FLOATING_DATA_TYPE_SEQ);
ADD_DEFAULT_KERNEL_CREATOR(OperatorConf::kMultiplyConf, MultiplyKernel, ARITHMETIC_DATA_TYPE_SEQ);

} // namespace oneflow
3 changes: 2 additions & 1 deletion oneflow/customized/kernels/multiply_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class MultiplyKernel final : public user_op::OpKernel {
return Maybe<void>::Ok(); \
});

OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MULTIPLY_KERNEL, DEVICE_TYPE_SEQ, FLOATING_DATA_TYPE_SEQ)
OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MULTIPLY_KERNEL, DEVICE_TYPE_SEQ,
ARITHMETIC_DATA_TYPE_SEQ)
#undef REGISTER_MULTIPLY_KERNEL

} // namespace oneflow
19 changes: 11 additions & 8 deletions oneflow/python/test/ops/test_multiply.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,26 @@ def _test_element_wise_mul_fw_bw(test_case, device, shape, type_name):

@flow.global_function(func_config)
def test_element_wise_mul_job(
x=flow.FixedTensorDef(shape, dtype=flow_type),
y=flow.FixedTensorDef(shape, dtype=flow_type),
x=flow.FixedTensorDef(shape, dtype=flow.float),
y=flow.FixedTensorDef(shape, dtype=flow.float),
):
with flow.fixed_placement(device, "0:0"):
x += flow.get_variable(
name="vx",
shape=(1,),
dtype=flow_type,
dtype=flow.float,
initializer=flow.zeros_initializer(),
)
y += flow.get_variable(
name="vy",
shape=(1,),
dtype=flow_type,
dtype=flow.float,
initializer=flow.zeros_initializer(),
)
x = flow.cast(x, dtype=flow_type)
y = flow.cast(y, dtype=flow_type)
out = flow.math.multiply(x, y)
out = flow.cast(out, dtype=flow.float)
flow.losses.add_loss(out)

flow.watch(x, test_global_storage.Setter("x"))
Expand All @@ -74,9 +77,9 @@ def test_element_wise_mul_job(

check_point = flow.train.CheckPoint()
check_point.init()
test_element_wise_mul_job(
np.random.rand(*shape).astype(np_type), np.random.rand(*shape).astype(np_type)
).get()
x = np.random.randint(low=0, high=10, size=shape).astype(np.float32)
y = np.random.randint(low=0, high=10, size=shape).astype(np.float32)
test_element_wise_mul_job(x, y).get()
test_case.assertTrue(
np.allclose(
test_global_storage.Get("x") * test_global_storage.Get("y"),
Expand All @@ -101,6 +104,6 @@ def test_element_wise_mul_fw_bw(test_case):
arg_dict = OrderedDict()
arg_dict["device"] = ["gpu", "cpu"]
arg_dict["shape"] = [(96, 96)]
arg_dict["type_name"] = ["float32", "double"]
arg_dict["type_name"] = ["float32", "double", "int8", "int32", "int64"]
for arg in GenArgDict(arg_dict):
_test_element_wise_mul_fw_bw(test_case, **arg)

0 comments on commit a67f0fe

Please sign in to comment.