From 73603a0aae7e94a8dd40214a2b871063e24eb9c3 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Mon, 7 Sep 2020 22:25:40 +0800 Subject: [PATCH] upgrade XRT XLA to TF 2.3.0 (#3531) * compile tf 2.3.0 with gcc 7.3 * fix oneflow eigen * minor fix * fix include * update protobuf if xla is on * update path of tf proto generated cpp files * fix path in script * add .clangd to git ignore * update xla ifs * update scripts * update path in script for clangd * add gitignore * add cmake flag XRT_TF_URL * rm comment * check in changes * bash tricks to enable gcc 7.3 * use arg to control tuna * bumpversion * fix build wheel * use real path * add dir for cpu * fix unwanted yum update cublas * uncomment all * rm suffix of wheelhouse_dir * add log info Co-authored-by: tsai Co-authored-by: tsai Former-commit-id: da12e8db4f52d3c5351f0e43f3677dd948d3801d --- .gitignore | 2 + cmake/third_party.cmake | 1 + cmake/third_party/eigen.cmake | 6 +- cmake/third_party/protobuf.cmake | 2 +- cmake/third_party/tensorflow.cmake | 67 ++++++++++++++++------ docker/package/manylinux/Dockerfile | 20 +++++-- docker/package/manylinux/build_wheel.sh | 13 ++++- docker/package/manylinux/build_xla.sh | 23 ++++++++ docker/package/manylinux/launch.sh | 4 ++ docker/package/manylinux/make_release.sh | 48 +++++++++++++--- oneflow/python/test/ops/test_optimizers.py | 4 +- oneflow/python/version.py | 2 +- oneflow/xrt/xla/xla_allocator.cpp | 3 +- oneflow/xrt/xla/xla_allocator.h | 11 +++- oneflow/xrt/xla/xla_graph_compiler.cpp | 7 ++- 15 files changed, 167 insertions(+), 46 deletions(-) create mode 100644 docker/package/manylinux/build_xla.sh create mode 100644 docker/package/manylinux/launch.sh diff --git a/.gitignore b/.gitignore index 81f4241c588..b4b03a602cf 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ wheelhouse* /oneflow/python/__export_symbols__.py /oneflow/python/compatibility.py /oneflow/python/framework/sysconfig_gen.py +.clangd +compile_commands.json diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index a908ecb0bc8..886c7400adb 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -214,6 +214,7 @@ include_directories(${ONEFLOW_INCLUDE_SRC_DIRS}) if(WITH_XLA) list(APPEND oneflow_third_party_dependencies tensorflow_copy_libs_to_destination) + list(APPEND oneflow_third_party_dependencies tensorflow_symlink_headers) list(APPEND oneflow_third_party_libs ${TENSORFLOW_XLA_LIBRARIES}) endif() diff --git a/cmake/third_party/eigen.cmake b/cmake/third_party/eigen.cmake index 178b11f8f7e..4302429c25f 100644 --- a/cmake/third_party/eigen.cmake +++ b/cmake/third_party/eigen.cmake @@ -4,8 +4,8 @@ set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_DIR}/eigen/include/eigen3) set(EIGEN_INSTALL_DIR ${THIRD_PARTY_DIR}/eigen) if(WITH_XLA) - #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz") - set(EIGEN_URL "https://bitbucket.org/eigen/eigen/get/8071cda5714d.tar.gz") + #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz") + set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz") else() set(EIGEN_URL ${THIRD_PARTY_SUBMODULE_DIR}/eigen/src/eigen) endif() @@ -17,7 +17,7 @@ endif() #add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING -DEIGEN_NO_MALLOC -DEIGEN_USE_GPU) if (THIRD_PARTY) - + ExternalProject_Add(eigen PREFIX eigen URL ${EIGEN_URL} diff --git a/cmake/third_party/protobuf.cmake b/cmake/third_party/protobuf.cmake index 023ae812de1..2d4dfcde9d0 100644 --- a/cmake/third_party/protobuf.cmake +++ b/cmake/third_party/protobuf.cmake @@ -6,7 +6,7 @@ set(PROTOBUF_BINARY_DIR ${THIRD_PARTY_DIR}/protobuf/bin) set(PROTOBUF_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) if(WITH_XLA) - set(PROTOBUF_URL "https://storage.googleapis.com/mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/310ba5ee72661c081129eb878c1bbcec936b20f0.tar.gz") + set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/archive/v3.9.2.zip") else() set(PROTOBUF_URL ${THIRD_PARTY_SUBMODULE_DIR}/protobuf/src/protobuf) endif() diff --git a/cmake/third_party/tensorflow.cmake b/cmake/third_party/tensorflow.cmake index ac1868a1803..997b9ef8dd5 100644 --- a/cmake/third_party/tensorflow.cmake +++ b/cmake/third_party/tensorflow.cmake @@ -36,8 +36,7 @@ set(TENSORFLOW_INSTALL_DIR ${THIRD_PARTY_DIR}/tensorflow) set(PATCHES_DIR ${PROJECT_SOURCE_DIR}/oneflow/xrt/patches) set(TENSORFLOW_JIT_DIR ${TENSORFLOW_SRCS_DIR}/tensorflow/compiler/jit) - -set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/genfiles) +set(TENSORFLOW_GEN_DIR ${TENSORFLOW_SRCS_DIR}/bazel-out/${TENSORFLOW_GENFILE_DIR}/bin) set(TENSORFLOW_EXTERNAL_DIR ${TENSORFLOW_SRCS_DIR}/bazel-tensorflow/external) set(THIRD_ABSL_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_absl) set(THIRD_PROTOBUF_DIR ${TENSORFLOW_EXTERNAL_DIR}/com_google_protobuf/src) @@ -54,36 +53,68 @@ list(APPEND TENSORFLOW_XLA_INCLUDE_DIR ${THIRD_SNAPPY_DIR} ${THIRD_RE2_DIR} ) -include_directories(${TENSORFLOW_XLA_INCLUDE_DIR}) + +list(APPEND TENSORFLOW_XLA_INCLUDE_INSTALL_DIR + "${TENSORFLOW_INSTALL_DIR}/include/tensorflow_inc" + "${TENSORFLOW_INSTALL_DIR}/include/tensorflow_gen" + "${TENSORFLOW_INSTALL_DIR}/include/absl" + "${TENSORFLOW_INSTALL_DIR}/include/protobuf" + "${TENSORFLOW_INSTALL_DIR}/include/boringssl" + "${TENSORFLOW_INSTALL_DIR}/include/snappy" + "${TENSORFLOW_INSTALL_DIR}/include/re2" +) + list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1) list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so) link_directories(${TENSORFLOW_INSTALL_DIR}/lib) +if(NOT XRT_TF_URL) + set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/1f_dep_v2.3.0r4.zip) +endif() if (THIRD_PARTY) ExternalProject_Add(${TENSORFLOW_PROJECT} PREFIX ${TENSORFLOW_SOURCES_DIR} - GIT_REPOSITORY ${TENSORFLOW_GIT_URL} - GIT_TAG ${TENSORFLOW_GIT_TAG} + URL ${XRT_TF_URL} CONFIGURE_COMMAND "" BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} && - bazel build ${TENSORFLOW_BUILD_CMD} -j 20 //tensorflow/compiler/jit/xla_lib:libxla_core.so + bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so INSTALL_COMMAND "" ) -set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.1) -set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so) + set(TENSORFLOW_XLA_FRAMEWORK_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so.2) + set(TENSORFLOW_XLA_CORE_LIB ${TENSORFLOW_SRCS_DIR}/bazel-bin/tensorflow/compiler/jit/xla_lib/libxla_core.so) -add_custom_target(tensorflow_create_library_dir - COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib - DEPENDS ${TENSORFLOW_PROJECT}) + add_custom_target(tensorflow_create_library_dir + COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/lib + DEPENDS ${TENSORFLOW_PROJECT}) + + add_custom_target(tensorflow_copy_libs_to_destination + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.2 + ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so + DEPENDS tensorflow_create_library_dir) + + add_custom_target(tensorflow_create_include_dir + COMMAND ${CMAKE_COMMAND} -E make_directory ${TENSORFLOW_INSTALL_DIR}/include + DEPENDS ${TENSORFLOW_PROJECT}) + + add_custom_target(tensorflow_symlink_headers + DEPENDS tensorflow_create_include_dir) + + foreach(src_dst_pair IN ZIP_LISTS TENSORFLOW_XLA_INCLUDE_DIR TENSORFLOW_XLA_INCLUDE_INSTALL_DIR) + set(src ${src_dst_pair_0}) + set(dst ${src_dst_pair_1}) + add_custom_command(TARGET tensorflow_symlink_headers + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${src} + ${dst} + ) + endforeach() -add_custom_target(tensorflow_copy_libs_to_destination - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${TENSORFLOW_XLA_FRAMEWORK_LIB} ${TENSORFLOW_XLA_CORE_LIB} ${TENSORFLOW_INSTALL_DIR}/lib - COMMAND ${CMAKE_COMMAND} -E create_symlink - ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so.1 - ${TENSORFLOW_INSTALL_DIR}/lib/libtensorflow_framework.so - DEPENDS tensorflow_create_library_dir) endif(THIRD_PARTY) +include_directories(${TENSORFLOW_XLA_INCLUDE_INSTALL_DIR}) + endif(WITH_XLA) diff --git a/docker/package/manylinux/Dockerfile b/docker/package/manylinux/Dockerfile index 6c1544a709e..f29b60d9ce4 100644 --- a/docker/package/manylinux/Dockerfile +++ b/docker/package/manylinux/Dockerfile @@ -1,4 +1,6 @@ ARG from +ARG use_tuna_yum=1 +ARG pip_args="-i https://pypi.tuna.tsinghua.edu.cn/simple" FROM ${from} LABEL maintainer="OneFlow Maintainers" @@ -13,9 +15,11 @@ ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig # use tuna mirror -COPY docker/package/manylinux/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo -RUN yum makecache +COPY docker/package/manylinux/CentOS-Base.repo /tmp/CentOS-Base.repo +RUN if [ "${use_tuna}" = "1" ]; then mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/ && yum makecache ; fi +# in 10.1, cuda yum repo will update cublas to 10.2 and breaks build +RUN yum-config-manager --disable cuda ARG MANYLINUX_SHA=f5da004 RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/pypa/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip @@ -25,10 +29,10 @@ ENV SSL_CERT_FILE=/opt/_internal/certs.pem RUN yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo && \ rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ yum update -y && yum install -y epel-release && \ - yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel + yum -y install centos-release-scl && \ + yum install -y intel-mkl-64bit-2020.0-088 nasm swig rdma-core-devel devtoolset-7-gcc* -ENV TUNA_INDEX="-i https://pypi.tuna.tsinghua.edu.cn/simple" -RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake +RUN /opt/python/cp35-cp35m/bin/pip install $pip_args -U cmake && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake # overwrite patchelf to fix "maximum size exceed" problem RUN mkdir -p /tmp && curl -L -o 0.11.zip https://github.com/NixOS/patchelf/archive/0.11.zip && unzip 0.11.zip && cd patchelf-0.11 && sed -i 's/32/64/g' src/patchelf.cc && ./bootstrap.sh && ./configure && make -j`nproc` && make install && cd .. && rm -rf patchelf-0.11 0.11.zip @@ -40,4 +44,10 @@ RUN /opt/python/cp35-cp35m/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements. && /opt/python/cp38-cp38/bin/pip install $TUNA_INDEX -r /tmp/dev-requirements.txt --user \ && rm /tmp/dev-requirements.txt +RUN curl -L https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64 -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel \ + && bazel + +RUN echo "source scl_source enable devtoolset-7" >> ~/.bashrc + CMD ["/oneflow-src/docker/package/manylinux/build_wheel.sh"] diff --git a/docker/package/manylinux/build_wheel.sh b/docker/package/manylinux/build_wheel.sh index d0ed5847a2b..c3ed5d9d991 100755 --- a/docker/package/manylinux/build_wheel.sh +++ b/docker/package/manylinux/build_wheel.sh @@ -48,6 +48,14 @@ fi cd $ONEFLOW_SRC_DIR +# TF requires py3 to build +export PATH=/opt/python/cp37-cp37m/bin:$PATH +python --version +gcc --version + +# specify a mounted dir as bazel cache dir +export TEST_TMPDIR=$CACHE_DIR/bazel_cache + THIRD_PARTY_BUILD_DIR=$CACHE_DIR/build-third-party THIRD_PARTY_INSTALL_DIR=$CACHE_DIR/build-third-party-install COMMON_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DBUILD_RDMA=ON -DTHIRD_PARTY_DIR=$THIRD_PARTY_INSTALL_DIR" @@ -86,9 +94,10 @@ do rm -rf $ONEFLOW_BUILD_DIR/python_scripts/oneflow/*.so rm -rf $ONEFLOW_SRC_DIR/build/bdist.linux-x86_64 rm -rf $ONEFLOW_SRC_DIR/build/lib - cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON\ + cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ $COMMON_CMAKE_ARGS \ - -DPython3_ROOT_DIR=$PY_ROOT \ + -DPython3_EXECUTABLE=${PY_BIN} \ $EXTRA_ONEFLOW_CMAKE_ARGS \ $ONEFLOW_SRC_DIR cmake --build . -j `nproc` diff --git a/docker/package/manylinux/build_xla.sh b/docker/package/manylinux/build_xla.sh new file mode 100644 index 00000000000..f203061a8dc --- /dev/null +++ b/docker/package/manylinux/build_xla.sh @@ -0,0 +1,23 @@ +set -ex +ONEFLOW_SRC_DIR=${ONEFLOW_SRC_DIR:-${PWD}} +wheelhouse_dir=${ONEFLOW_SRC_DIR}/wheelhouse-xla + +# TF requires py3 to build +PY_ROOT=/opt/python/cp37-cp37m +PY_BIN=${PY_ROOT}/bin +export PATH=$PY_BIN:$PATH +python --version + +source scl_source enable devtoolset-7 + +cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-10.2-xla +cache_dir=$ONEFLOW_SRC_DIR/manylinux2014-build-cache-cuda-11.0-xla +export TEST_TMPDIR=$cache_dir/bazel_cache +gcc --version + +bash docker/package/manylinux/build_wheel.sh \ + --python3.6 \ + --cache-dir $cache_dir \ + --house-dir $wheelhouse_dir \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DWITH_XLA=ON diff --git a/docker/package/manylinux/launch.sh b/docker/package/manylinux/launch.sh new file mode 100644 index 00000000000..b0f540d5ed7 --- /dev/null +++ b/docker/package/manylinux/launch.sh @@ -0,0 +1,4 @@ +set -ex +docker run --rm -it \ + -v `pwd`:`pwd` \ + -w `pwd` oneflow:rel-manylinux2014-cuda-11.0 bash diff --git a/docker/package/manylinux/make_release.sh b/docker/package/manylinux/make_release.sh index 3663fc55c04..61cd91c0306 100644 --- a/docker/package/manylinux/make_release.sh +++ b/docker/package/manylinux/make_release.sh @@ -1,9 +1,12 @@ set -ex -wheelhouse_dir=/oneflow-src/wheelhouse +wheelhouse_dir=`pwd`/wheelhouse package_name=oneflow +tuna_build_args="" +tuna_build_args="--build-arg use_tuna_yum=0 --build-arg pip_args=""" + function release() { set -ex docker_tag=oneflow:rel-manylinux2014-cuda-$1 @@ -12,22 +15,41 @@ function release() { else cudnn_version=7 fi - docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 -f docker/package/manylinux/Dockerfile -t $docker_tag . - docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src $docker_tag \ - /oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cuda-$1 \ - --house-dir $wheelhouse_dir \ + docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \ + ${tuna_build_args} \ + -f docker/package/manylinux/Dockerfile -t $docker_tag . + docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \ + docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1 \ + --house-dir ${wheelhouse_dir} \ --package-name ${package_name}_cu`echo $1 | tr -d .` } function release_cpu() { - docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:rel-manylinux2014-cuda-10.2 \ - /oneflow-src/docker/package/manylinux/build_wheel.sh --cache-dir /oneflow-src/manylinux2014-build-cache-cpu \ - --house-dir $wheelhouse_dir \ + docker run --rm -it -v `pwd`:`pwd` -w `pwd` oneflow:rel-manylinux2014-cuda-10.2 \ + docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cpu \ + --house-dir ${wheelhouse_dir} \ -DBUILD_CUDA=OFF \ --package-name "${package_name}_cpu" } -release_cpu +function release_xla() { + set -ex + docker_tag=oneflow:rel-manylinux2014-cuda-$1 + if [ "$1" == "11.0" ]; then + cudnn_version=8 + else + cudnn_version=7 + fi + docker build --build-arg from=nvidia/cuda:$1-cudnn${cudnn_version}-devel-centos7 \ + ${tuna_build_args} \ + -f docker/package/manylinux/Dockerfile -t $docker_tag . + docker run --rm -it -v `pwd`:`pwd` -w `pwd` $docker_tag \ + bash -l docker/package/manylinux/build_wheel.sh --cache-dir `pwd`/manylinux2014-build-cache-cuda-$1-xla \ + --house-dir ${wheelhouse_dir} \ + --package-name ${package_name}_cu`echo $1 | tr -d .`_xla \ + -DWITH_XLA=ON +} + release 11.0 release 10.2 release 10.1 @@ -35,3 +57,11 @@ release 10.0 release 9.2 release 9.1 release 9.0 + +release_cpu + +release_xla 11.0 +release_xla 10.2 +release_xla 10.1 +release_xla 10.0 +# failed to build XLA with CUDA 9.X diff --git a/oneflow/python/test/ops/test_optimizers.py b/oneflow/python/test/ops/test_optimizers.py index 0e2b2372c0d..f07ee9dc3fe 100644 --- a/oneflow/python/test/ops/test_optimizers.py +++ b/oneflow/python/test/ops/test_optimizers.py @@ -87,7 +87,9 @@ def testRmsprop( gradients = tape.gradient(loss, var) opt.apply_gradients(zip([gradients], [var])) - assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,) + assert np.allclose(x.flatten(), var.numpy().flatten(), rtol=1e-3, atol=1e-3,), ( + x.flatten() - var.numpy().flatten() + ) def compare_with_tensorflow_adam( diff --git a/oneflow/python/version.py b/oneflow/python/version.py index 639f0c49dbf..0a201297351 100644 --- a/oneflow/python/version.py +++ b/oneflow/python/version.py @@ -13,4 +13,4 @@ See the License for the specific language governing permissions and limitations under the License. """ -__version__ = "0.1.10" +__version__ = "0.1.11b1" diff --git a/oneflow/xrt/xla/xla_allocator.cpp b/oneflow/xrt/xla/xla_allocator.cpp index 24694a5fbae..19ab7143b99 100644 --- a/oneflow/xrt/xla/xla_allocator.cpp +++ b/oneflow/xrt/xla/xla_allocator.cpp @@ -38,7 +38,8 @@ XlaAllocator::XlaAllocator(const se::Platform *platform, DeviceBufferAllocator * XlaAllocator::~XlaAllocator() {} xla::StatusOr XlaAllocator::Allocate(int device_ordinal, uint64 size, - bool retry_on_failure) { + bool retry_on_failure, + int64 /*memory_space*/) { se::DeviceMemoryBase memory_base; if (allocate_index_ < populated_buffers_.size() && populated_buffers_[allocate_index_].populated) { diff --git a/oneflow/xrt/xla/xla_allocator.h b/oneflow/xrt/xla/xla_allocator.h index e33fa9b7b9e..f746bc10d87 100644 --- a/oneflow/xrt/xla/xla_allocator.h +++ b/oneflow/xrt/xla/xla_allocator.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_ #define ONEFLOW_XRT_XLA_XLA_ALLOCATOR_H_ +#include "oneflow/core/common/util.h" #include "oneflow/xrt/xla/memory/device_buffer_allocator.h" #include "tensorflow/compiler/xla/statusor.h" @@ -28,14 +29,16 @@ namespace mola { namespace se = tensorflow::se; using uint64 = tensorflow::uint64; +using int64 = tensorflow::int64; class XlaAllocator : public se::DeviceMemoryAllocator { public: explicit XlaAllocator(const se::Platform *platform, DeviceBufferAllocator *allocator); virtual ~XlaAllocator(); - + using se::DeviceMemoryAllocator::Allocate; xla::StatusOr Allocate(int device_ordinal, uint64 size, - bool retry_on_failure) override; + bool retry_on_failure, + int64 /*memory_space*/) override; tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override; bool AllowsAsynchronousDeallocation() const override { return true; } @@ -47,6 +50,10 @@ class XlaAllocator : public se::DeviceMemoryAllocator { void PopulateDeviceMemory(const std::vector &device_buffers, const std::vector &allocation_indices); + stream_executor::port::StatusOr GetStream( + int device_ordinal) override { + UNIMPLEMENTED(); + }; private: DeviceBufferAllocator *allocator_; diff --git a/oneflow/xrt/xla/xla_graph_compiler.cpp b/oneflow/xrt/xla/xla_graph_compiler.cpp index 767ea91aacf..883e406c28c 100644 --- a/oneflow/xrt/xla/xla_graph_compiler.cpp +++ b/oneflow/xrt/xla/xla_graph_compiler.cpp @@ -137,10 +137,11 @@ std::shared_ptr XlaGraphCompiler::BuildExecutable( xla::ExecutableBuildOptions build_options; build_options.set_device_ordinal(this->device_ordinal_); build_options.set_result_layout(xla_output_shape); - MOLA_CHECK_AND_ASSIGN(auto executable, + MOLA_CHECK_AND_ASSIGN(auto executables, client->Compile(computation, argument_layouts, build_options)); - return std::make_shared(builder_->name(), this->device_, xla_input_shapes, xla_output_shape, - std::move(executable)); + CHECK(executables.size() == 1); + return std::make_shared(builder_->name(), this->device_, xla_input_shapes, + xla_output_shape, std::move(executables.at(0))); } void XlaGraphCompiler::BuildEntryParameters(const std::vector &entry_params,