Skip to content

Commit

Permalink
tc comprehension integration Ref. SINGA-482
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name authored and dcslin committed Nov 1, 2019
1 parent 806dbe7 commit dbdf054
Show file tree
Hide file tree
Showing 12 changed files with 707 additions and 30 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ OPTION(ENABLE_DIST "Enable distributed training" OFF)
OPTION(DISABLE_WARNINGS "Disable warnings under windows" ON)
OPTION(USE_MODULES "Compile dependent libs as submodules together with singa" OFF)
OPTION(USE_MKLDNN "Use mkl-dnn libs" OFF)
OPTION(USE_TC "Use tensor comprehension" OFF)


# TODO: remove all USE_CBLAS in codes
Expand Down
47 changes: 45 additions & 2 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ ENDIF()

IF(USE_PYTHON)
IF(USE_PYTHON3)
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
FIND_PACKAGE(PythonInterp 3 REQUIRED)
FIND_PACKAGE(PythonLibs 3 REQUIRED)
FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
ELSE()
ELSE()
FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
FIND_PACKAGE(SWIG 3.0.8 REQUIRED)
Expand All @@ -142,10 +142,53 @@ IF(USE_JAVA)
FIND_PACKAGE(SWIG 3.0 REQUIRED)
ENDIF()


IF(USE_MKLDNN)
FIND_PATH(MKLDNN_INCLUDE_DIR NAME "mkldnn.hpp" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
FIND_LIBRARY(MKLDNN_LIBRARIES NAME "mkldnn" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
MESSAGE(STATUS "Found MKLDNN at ${MKLDNN_INCLUDE_DIR}")
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
LIST(APPEND SINGA_LINKER_LIBS ${MKLDNN_LIBRARIES})
ENDIF()


IF(USE_TC)
### Tensor comprehensions
INCLUDE_DIRECTORIES(/root/TensorComprehensions)
INCLUDE_DIRECTORIES(/root/TensorComprehensions/tc/version)
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build)
# polyhedral model required
INCLUDE_DIRECTORIES(/root/TensorComprehensions/isl_interface/include)
# dlpack
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/dlpack/include)
# islpp
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/islpp/include)
# gflags
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/gflags/include)
# glog
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/glog)
# Halide
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include/Halide)
# llvm
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include)
# torch ATen header
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib/include)

# find Halide lib
set(HALIDE_PREFIX "/root/conda/envs/tc_build")
find_library(HALIDE_LIBRARIES REQUIRED NAMES Halide PATHS ${HALIDE_PREFIX} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH)
message(STATUS "Found Halide.so file: ${HALIDE_LIBRARIES}")

# find tc lib
link_directories(/root/TensorComprehensions/build/tc/aten)
link_directories(/root/TensorComprehensions/build/tc/lang)
link_directories(/root/TensorComprehensions/build/tc/core)
link_directories(/root/TensorComprehensions/build/tc/autotuner)
link_directories(/root/TensorComprehensions/build/tc/proto)

# torch(aten)
link_directories(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib)

LIST(APPEND SINGA_LINKER_LIBS ${HALIDE_LIBRARIES} tc_aten tc_lang tc_core_cpu tc_cuda tc_core_cuda_no_sdk tc_core tc_autotuner tc_proto ATen)
### Tensor comprehensions
ENDIF()
1 change: 1 addition & 0 deletions cmake/Templates/singa_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@
// #cmakedefine CUDNN_VERSION @CUDNN_VERSION@

#cmakedefine USE_MKLDNN
#cmakedefine USE_TC
93 changes: 93 additions & 0 deletions include/singa/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@
#include <tuple>
#include <memory>

#ifdef USE_TC
#include <dlpack/dlpack.h>
#include <tc/core/tensor.h>
#include <tc/utils/compiler_options.h>
#include <tc/core/compiler.h>
#include <tc/core/utils/time.h>
#include <tc/core/cuda/cuda_backend.h>
#include <tc/core/cuda/cuda_tc_executor.h>
#include <tc/core/cpu/cpu_backend.h>
#include <tc/core/cpu/cpu_tc_executor.h>
#include <tc/core/check.h>
#include <tc/core/tc_executor.h>
#endif // USE_TC

#include "singa/core/common.h"
#include "singa/core/device.h"
#include "singa/proto/core.pb.h"
Expand Down Expand Up @@ -603,6 +617,85 @@ Tensor ConcatRows(const vector<Tensor> &in);
Tensor ConcatenateColumns(const vector<Tensor> &in);
/// Alias name for function ConcatenateColumns
Tensor ConcatColumns(const vector<Tensor> &in);




#ifdef USE_TC
/// tc integration start
DLManagedTensor *toDLPack(const Tensor &src);

inline std::vector<tc::DLTensorUPtr>
makeDLTensors(const std::vector<Tensor> &tensors);

template <typename Backend>
std::unique_ptr<typename Backend::ExecutorType>
compileTC(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs,
const typename Backend::MappingOptionsType &options,
const tc::CompilerOptions &compilerOptions = tc::CompilerOptions());

std::vector<tc::DLTensorUPtr>
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs);

std::vector<Tensor> prepareOutputs(const std::string &tc,
const std::string &entryPoint,
const std::vector<Tensor> &inputs);

template <typename Executor>
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs);

// makeDLConstTensors implementation
inline std::vector<tc::DLConstTensorUPtr>
makeDLConstTensors(const std::vector<Tensor> &tensors) {
std::vector<tc::DLConstTensorUPtr> dlTensors;
for (auto tensor : tensors) {
auto dlMTensor = toDLPack(tensor);
dlTensors.push_back(tc::makeDLConstTensor(&(dlMTensor->dl_tensor)));
dlMTensor->deleter(dlMTensor);
}
return dlTensors;
}

// makeDLTensors implementation
inline std::vector<tc::DLTensorUPtr>
makeDLTensors(const std::vector<Tensor> &tensors) {
std::vector<tc::DLTensorUPtr> dlTensors;
for (auto tensor : tensors) {
auto dlMTensor = toDLPack(tensor);
dlTensors.push_back(tc::makeDLTensor(&(dlMTensor->dl_tensor)));
dlMTensor->deleter(dlMTensor);
}
return dlTensors;
}

// compile implementation
template <typename Backend>
std::unique_ptr<typename Backend::ExecutorType>
compileTC(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs,
const typename Backend::MappingOptionsType &options,
const tc::CompilerOptions &compilerOptions) {
auto inputDLTensors = makeDLConstTensors(inputs);
return tc::compile<Backend>(tc, entryPoint, extractRawPtrs(inputDLTensors),
options, compilerOptions);
}

// run implementation
template <typename Executor>
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs) {
auto inputDLTensors = makeDLConstTensors(inputs);
auto outputDLTensors = makeDLTensors(outputs);
return executor.run(extractRawPtrs(inputDLTensors),
extractRawPtrs(outputDLTensors));
}

/// tc integration end
#endif // USE_TC

} // namespace singa

#endif // SINGA_CORE_TENSOR_H_
113 changes: 113 additions & 0 deletions src/core/tensor/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@

#define Noaxis 9999

#ifdef USE_TC
// namespace is already exist in singa
// aliasing to avoid duplicates
namespace tclang = lang;
#endif // USE_TC

namespace singa {

Tensor::~Tensor() {
Expand Down Expand Up @@ -1334,4 +1340,111 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
return out.Reshape(s);
}


#ifdef USE_TC
/// tc integration start
struct SingaDLManagedTensor {
Tensor handle;
DLManagedTensor tensor;
};

void deleter(DLManagedTensor *arg) {
delete static_cast<SingaDLManagedTensor *>(arg->manager_ctx);
}

static DLDataType getDLDataType(const Tensor &t) {
DLDataType dtype;
dtype.lanes = 1;
dtype.bits = SizeOf(t.data_type()) * 8;
switch (t.data_type()) {
case kFloat32:
dtype.code = DLDataTypeCode::kDLFloat;
break;
default:
throw std::logic_error("only kFloat32 is supported for dlpack conversion");
break;
}
return dtype;
}

static DLContext getDLContext(const Tensor &tensor, const int64_t &device_id) {
DLContext ctx;
ctx.device_id = device_id;
if (tensor.device()->lang() == kCuda) {
ctx.device_type = DLDeviceType::kDLGPU;
} else {
ctx.device_type = DLDeviceType::kDLCPU;
}
return ctx;
}

// This function returns a shared_ptr to memory managed DLpack tensor
// constructed out of ATen tensor
DLManagedTensor *toDLPack(const Tensor &src) {
SingaDLManagedTensor *singaDLManagedTensor(new SingaDLManagedTensor);
singaDLManagedTensor->handle = src;
singaDLManagedTensor->tensor.manager_ctx = singaDLManagedTensor;
singaDLManagedTensor->tensor.deleter = &deleter;
singaDLManagedTensor->tensor.dl_tensor.data = src.block()->mutable_data();
int64_t device_id = src.device()->id();
singaDLManagedTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
singaDLManagedTensor->tensor.dl_tensor.ndim = src.nDim();
singaDLManagedTensor->tensor.dl_tensor.dtype = getDLDataType(src);

auto shapeVec =
new std::vector<int64_t>(src.shape().begin(), src.shape().end());
singaDLManagedTensor->tensor.dl_tensor.shape = shapeVec->data();

auto strideVec =
new std::vector<int64_t>(src.stride().begin(), src.stride().end());
singaDLManagedTensor->tensor.dl_tensor.strides = strideVec->data();

singaDLManagedTensor->tensor.dl_tensor.byte_offset = 0;
return &(singaDLManagedTensor->tensor);
}

// prepare output
std::vector<tc::DLTensorUPtr>
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs) {
auto parsedTcs = tc::detail::parse(tc);
if (parsedTcs.count(entryPoint) != 1u) {
TC_CHECK_GE(parsedTcs.size(), 1u)
<< "No TC was parsed, should have thrown earlier";
throw tclang::ErrorReport(parsedTcs.begin()->second)
<< "\nattempting to access undefined entryPoint: " << entryPoint;
}
auto inputDLTensors = makeDLConstTensors(inputs);
return makeDLTensorVector(tc::detail::inferOutputTensorInfo(
parsedTcs.at(entryPoint), extractRawPtrs(inputDLTensors)));
}

std::vector<Tensor> prepareOutputs(const std::string &tc,
const std::string &entryPoint,
const std::vector<Tensor> &inputs) {
std::vector<Tensor> outputs;
auto outTensorInfo = inferOutputTensorInfo(tc, entryPoint, inputs);
if (outTensorInfo.size() == 0) {
return outputs;
}
TC_CHECK_GE(inputs.size(), 1u)
<< "NYI: Need >= 1 input tensors to determine "
<< "backend and prepare ATen outputs. Add an overload with just an ATen "
<< "backend";

auto dev = inputs[0].device();
auto dtype = inputs[0].data_type();
for (size_t i = 0; i < outTensorInfo.size(); ++i) {
tc::TensorInfo info(outTensorInfo[i]);
Shape shape(info.shape.begin(), info.shape.end());

Tensor tmp(shape, dev, dtype);
outputs.push_back(tmp);
}
return outputs;
}
/// tc integration end
#endif // USE_TC


} // namespace singa
42 changes: 42 additions & 0 deletions src/model/operation/tc_fn.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*********************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************/
#ifdef USE_TC
#include "./tc_fn.h"

namespace singa {

TcFnHandle::TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs)
{
tc_string = tcDefinition;
tc_name = entryFn;
auto naiveOptions = tc::CudaBackend::MappingOptionsType::makeNaiveMappingOptions();
pExecutor = singa::compileTC<tc::CudaBackend>(tcDefinition, entryFn, inputs, {naiveOptions});
};

Tensor tcExecute(const TcFnHandle &tcFnhandle, const std::vector<Tensor> &inputs)
{
auto outputs = singa::prepareOutputs(tcFnhandle.tc_string, tcFnhandle.tc_name, inputs);
singa::runTC(*(tcFnhandle.pExecutor), inputs, outputs);
return outputs[0];
}

}
#endif // USE_TC
42 changes: 42 additions & 0 deletions src/model/operation/tc_fn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*********************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************/
//#ifndef SINGA_MODEL_OPERATION_TC_FN_H_
//#define SINGA_MODEL_OPERATION_TC_FN_H_

#ifdef USE_TC

#include "singa/core/tensor.h"

namespace singa {

class TcFnHandle {
public:
TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs);
std::string tc_string;
std::string tc_name;
std::unique_ptr<typename tc::CudaBackend::ExecutorType> pExecutor;
};

Tensor tcExecute(const TcFnHandle &smh, const std::vector<Tensor> &inputs);

} // namespace singa

#endif // USE_TC
Loading

0 comments on commit dbdf054

Please sign in to comment.