From 4ed2054c4ca94086585fe32ea48b8885b06a453f Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 18 Mar 2022 14:49:07 +0000
Subject: [PATCH 01/51] add arepo directory

---
 src/amuse/community/arepo/Makefile      | 42 +++++++++++++++++++++++++
 src/amuse/community/arepo/__init__.py   |  1 +
 src/amuse/community/arepo/interface.cc  | 11 +++++++
 src/amuse/community/arepo/interface.py  | 24 ++++++++++++++
 src/amuse/community/arepo/src/Makefile  | 27 ++++++++++++++++
 src/amuse/community/arepo/src/test.cc   |  6 ++++
 src/amuse/community/arepo/test_arepo.py | 14 +++++++++
 7 files changed, 125 insertions(+)
 create mode 100644 src/amuse/community/arepo/Makefile
 create mode 100644 src/amuse/community/arepo/__init__.py
 create mode 100644 src/amuse/community/arepo/interface.cc
 create mode 100644 src/amuse/community/arepo/interface.py
 create mode 100644 src/amuse/community/arepo/src/Makefile
 create mode 100644 src/amuse/community/arepo/src/test.cc
 create mode 100644 src/amuse/community/arepo/test_arepo.py

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
new file mode 100644
index 0000000000..7c392db261
--- /dev/null
+++ b/src/amuse/community/arepo/Makefile
@@ -0,0 +1,42 @@
+# standard amuse configuration include
+# config.mk will be made after ./configure has run
+ifeq ($(origin AMUSE_DIR), undefined)
+  AMUSE_DIR := $(shell amusifier --get-amuse-dir)
+endif
+-include $(AMUSE_DIR)/config.mk
+
+MPICXX   ?= mpicxx
+
+CFLAGS   += -Wall -g
+CXXFLAGS += $(CFLAGS) 
+LDFLAGS  += -lm $(MUSE_LD_FLAGS)
+
+OBJS = interface.o
+
+CODELIB = src/libarepo.a
+
+all: arepo_worker 
+
+clean:
+	$(RM) -rf __pycache__
+	$(RM) -f *.so *.o *.pyc worker_code.cc worker_code.h 
+	$(RM) *~ arepo_worker worker_code.cc
+	make -C src clean
+
+distclean: clean
+	make -C src distclean
+
+$(CODELIB):
+	make -C src all
+
+worker_code.cc: interface.py
+	$(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@
+
+worker_code.h: interface.py
+	$(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@
+
+arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
+	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@
+
+.cc.o: $<
+	$(CXX) $(CXXFLAGS) -c -o $@ $< 
diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py
new file mode 100644
index 0000000000..abe3ba85b6
--- /dev/null
+++ b/src/amuse/community/arepo/__init__.py
@@ -0,0 +1 @@
+# generated file
\ No newline at end of file
diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
new file mode 100644
index 0000000000..a590e82689
--- /dev/null
+++ b/src/amuse/community/arepo/interface.cc
@@ -0,0 +1,11 @@
+extern int echo(int input);
+
+/*
+ * Interface code
+ */
+ 
+int echo_int(int input, int * output){
+    *output = echo(input);
+    return 0;
+}
+
diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
new file mode 100644
index 0000000000..64130dc384
--- /dev/null
+++ b/src/amuse/community/arepo/interface.py
@@ -0,0 +1,24 @@
+from amuse.community import *
+
+class arepoInterface(CodeInterface):
+    
+    include_headers = ['worker_code.h']
+    
+    def __init__(self, **keyword_arguments):
+        CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)
+    
+    @legacy_function
+    def echo_int():
+        function = LegacyFunctionSpecification()  
+        function.addParameter('int_in', dtype='int32', direction=function.IN)
+        function.addParameter('int_out', dtype='int32', direction=function.OUT)
+        function.result_type = 'int32'
+        function.can_handle_array = True
+        return function
+        
+    
+class arepo(InCodeComponentImplementation):
+
+    def __init__(self, **options):
+        InCodeComponentImplementation.__init__(self,  arepoInterface(**options), **options)
+    
diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
new file mode 100644
index 0000000000..66f18374a1
--- /dev/null
+++ b/src/amuse/community/arepo/src/Makefile
@@ -0,0 +1,27 @@
+CFLAGS   += -Wall -g
+CXXFLAGS += $(CFLAGS) 
+LDFLAGS  += -lm $(MUSE_LD_FLAGS)
+
+CODELIB = libarepo.a
+
+CODEOBJS = test.o
+
+AR = ar ruv
+RANLIB = ranlib
+RM = rm
+
+all: $(CODELIB) 
+
+
+clean:
+	$(RM) -f *.o *.a
+
+distclean: clean
+
+$(CODELIB): $(CODEOBJS)
+	$(RM) -f $@
+	$(AR) $@ $(CODEOBJS)
+	$(RANLIB) $@
+
+.cc.o: $<
+	$(CXX) $(CXXFLAGS) -c -o $@ $< 
diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc
new file mode 100644
index 0000000000..c30eeef8cb
--- /dev/null
+++ b/src/amuse/community/arepo/src/test.cc
@@ -0,0 +1,6 @@
+/*
+ * Example function for a code
+ */
+int echo(int input){
+    return input;
+}
diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py
new file mode 100644
index 0000000000..8cdeabb474
--- /dev/null
+++ b/src/amuse/community/arepo/test_arepo.py
@@ -0,0 +1,14 @@
+from amuse.test.amusetest import TestWithMPI
+
+from .interface import arepoInterface
+from .interface import arepo
+
+class arepoInterfaceTests(TestWithMPI):
+    
+    def test1(self):
+        instance = arepoInterface()
+        result,error = instance.echo_int(12)
+        self.assertEquals(error, 0)
+        self.assertEquals(result, 12)
+        instance.stop()
+    

From 5507dc490d9f8760e9edb0a9dae7b4141881dd86 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 18 Mar 2022 14:57:05 +0000
Subject: [PATCH 02/51] add arepo source code

---
 .../arepo/src/add_backgroundgrid/add_bggrid.c |  492 ++
 .../arepo/src/add_backgroundgrid/add_bggrid.h |   58 +
 .../src/add_backgroundgrid/calc_weights.c     |  301 +
 .../arepo/src/add_backgroundgrid/distribute.c |  328 ++
 .../community/arepo/src/cooling/cooling.c     |  870 +++
 .../arepo/src/cooling/cooling_proto.h         |   49 +
 .../arepo/src/cooling/cooling_vars.h          |   80 +
 src/amuse/community/arepo/src/debug_md5/Md5.c |  472 ++
 src/amuse/community/arepo/src/debug_md5/Md5.h |   92 +
 .../arepo/src/debug_md5/calc_checksum.c       |  121 +
 .../community/arepo/src/domain/bsd_tree.h     |  865 +++
 src/amuse/community/arepo/src/domain/domain.c |  633 ++
 src/amuse/community/arepo/src/domain/domain.h |  156 +
 .../arepo/src/domain/domain_DC_update.c       |  699 +++
 .../arepo/src/domain/domain_balance.c         | 1154 ++++
 .../community/arepo/src/domain/domain_box.c   |  336 ++
 .../arepo/src/domain/domain_counttogo.c       |   84 +
 .../arepo/src/domain/domain_exchange.c        |  399 ++
 .../arepo/src/domain/domain_rearrange.c       |  129 +
 .../arepo/src/domain/domain_sort_kernels.c    |  158 +
 .../arepo/src/domain/domain_toplevel.c        |  393 ++
 .../community/arepo/src/domain/domain_vars.c  |  117 +
 src/amuse/community/arepo/src/domain/peano.c  |  569 ++
 src/amuse/community/arepo/src/fof/fof.c       |  967 ++++
 src/amuse/community/arepo/src/fof/fof.h       |  319 +
 .../community/arepo/src/fof/fof_distribute.c  |  420 ++
 .../community/arepo/src/fof/fof_findgroups.c  |  720 +++
 src/amuse/community/arepo/src/fof/fof_io.c    | 3151 ++++++++++
 .../community/arepo/src/fof/fof_nearest.c     |  473 ++
 .../arepo/src/fof/fof_sort_kernels.c          |  495 ++
 src/amuse/community/arepo/src/fof/fof_vars.c  |   79 +
 .../community/arepo/src/gitversion/version    |    7 +
 .../community/arepo/src/gitversion/version.h  |   38 +
 src/amuse/community/arepo/src/gravity/accel.c |  347 ++
 .../community/arepo/src/gravity/forcetree.c   | 1827 ++++++
 .../community/arepo/src/gravity/forcetree.h   |  168 +
 .../arepo/src/gravity/forcetree_ewald.c       |  529 ++
 .../src/gravity/forcetree_optimizebalance.c   |  486 ++
 .../arepo/src/gravity/forcetree_walk.c        |  709 +++
 .../arepo/src/gravity/grav_external.c         |  579 ++
 .../arepo/src/gravity/grav_softening.c        |  215 +
 .../community/arepo/src/gravity/gravdirect.c  |  259 +
 .../community/arepo/src/gravity/gravtree.c    |  749 +++
 .../arepo/src/gravity/gravtree_forcetest.c    | 1089 ++++
 .../community/arepo/src/gravity/longrange.c   |  199 +
 .../arepo/src/gravity/pm/pm_mpi_fft.c         | 1771 ++++++
 .../arepo/src/gravity/pm/pm_nonperiodic.c     | 2087 +++++++
 .../arepo/src/gravity/pm/pm_periodic.c        | 2034 +++++++
 .../arepo/src/gravity/pm/pm_periodic2d.c      |  905 +++
 .../arepo/src/hydro/finite_volume_solver.c    | 1895 ++++++
 .../community/arepo/src/hydro/gradients.c     |  149 +
 src/amuse/community/arepo/src/hydro/mhd.c     |   99 +
 src/amuse/community/arepo/src/hydro/riemann.c |  955 +++
 .../community/arepo/src/hydro/riemann_hllc.c  |  213 +
 .../community/arepo/src/hydro/riemann_hlld.c  |  567 ++
 src/amuse/community/arepo/src/hydro/scalars.c |  107 +
 .../src/hydro/update_primitive_variables.c    |  343 ++
 src/amuse/community/arepo/src/init/begrun.c   |  344 ++
 src/amuse/community/arepo/src/init/density.c  |  635 ++
 src/amuse/community/arepo/src/init/init.c     |  835 +++
 src/amuse/community/arepo/src/io/global.c     |  257 +
 src/amuse/community/arepo/src/io/hdf5_util.c  |  881 +++
 src/amuse/community/arepo/src/io/io.c         | 2226 +++++++
 src/amuse/community/arepo/src/io/io_fields.c  |  765 +++
 src/amuse/community/arepo/src/io/logs.c       |  623 ++
 src/amuse/community/arepo/src/io/parameters.c |  861 +++
 src/amuse/community/arepo/src/io/read_ic.c    | 1900 ++++++
 src/amuse/community/arepo/src/io/restart.c    | 1549 +++++
 src/amuse/community/arepo/src/main/allvars.c  |  331 ++
 src/amuse/community/arepo/src/main/allvars.h  | 1924 +++++++
 src/amuse/community/arepo/src/main/main.c     |  296 +
 .../community/arepo/src/main/main_original.c  |  299 +
 .../community/arepo/src/main/main_reduced.c   |  135 +
 src/amuse/community/arepo/src/main/proto.h    |  665 +++
 src/amuse/community/arepo/src/main/run.c      |  660 +++
 .../arepo/src/mesh/criterion_derefinement.c   |  181 +
 .../arepo/src/mesh/criterion_refinement.c     |  267 +
 src/amuse/community/arepo/src/mesh/mesh.h     |  268 +
 .../community/arepo/src/mesh/refinement.c     |  217 +
 .../arepo/src/mesh/set_vertex_velocities.c    |  321 ++
 .../arepo/src/mesh/voronoi/voronoi.c          | 1163 ++++
 .../arepo/src/mesh/voronoi/voronoi.h          |  379 ++
 .../arepo/src/mesh/voronoi/voronoi_1d.c       |  363 ++
 .../src/mesh/voronoi/voronoi_1d_spherical.c   |  339 ++
 .../arepo/src/mesh/voronoi/voronoi_2d.c       | 2110 +++++++
 .../arepo/src/mesh/voronoi/voronoi_3d.c       | 5111 +++++++++++++++++
 .../arepo/src/mesh/voronoi/voronoi_check.c    |  407 ++
 .../src/mesh/voronoi/voronoi_derefinement.c   | 1088 ++++
 .../src/mesh/voronoi/voronoi_dynamic_update.c | 1037 ++++
 .../arepo/src/mesh/voronoi/voronoi_exchange.c |  531 ++
 .../src/mesh/voronoi/voronoi_ghost_search.c   | 1773 ++++++
 .../src/mesh/voronoi/voronoi_gradients_lsf.c  |  944 +++
 .../mesh/voronoi/voronoi_gradients_onedims.c  |  204 +
 .../src/mesh/voronoi/voronoi_refinement.c     |  425 ++
 .../arepo/src/mesh/voronoi/voronoi_utils.c    |  501 ++
 .../src/mpi_utils/checksummed_sendrecv.c      |  321 ++
 .../src/mpi_utils/hypercube_allgatherv.c      |   94 +
 .../community/arepo/src/mpi_utils/mpi_util.c  |  375 ++
 .../arepo/src/mpi_utils/myIBarrier.c          |  175 +
 .../arepo/src/mpi_utils/myIBarrier.h          |   51 +
 .../arepo/src/mpi_utils/myalltoall.c          |  122 +
 .../community/arepo/src/mpi_utils/pinning.c   |  292 +
 .../src/mpi_utils/sizelimited_sendrecv.c      |  116 +
 .../community/arepo/src/ngbtree/ngbtree.c     | 1394 +++++
 .../arepo/src/ngbtree/ngbtree_search.c        |  376 ++
 .../arepo/src/ngbtree/ngbtree_walk.c          |  225 +
 .../arepo/src/star_formation/sfr_eEOS.c       |  539 ++
 .../arepo/src/star_formation/starformation.c  |  437 ++
 .../community/arepo/src/subfind/subfind.c     |  577 ++
 .../community/arepo/src/subfind/subfind.h     |  213 +
 .../arepo/src/subfind/subfind_coll_domain.c   |  620 ++
 .../arepo/src/subfind/subfind_coll_tree.c     |  992 ++++
 .../arepo/src/subfind/subfind_coll_treewalk.c |  460 ++
 .../arepo/src/subfind/subfind_collective.c    | 2417 ++++++++
 .../arepo/src/subfind/subfind_density.c       |  662 +++
 .../arepo/src/subfind/subfind_distribute.c    |  421 ++
 .../arepo/src/subfind/subfind_findlinkngb.c   |  539 ++
 .../community/arepo/src/subfind/subfind_io.c  |  156 +
 .../arepo/src/subfind/subfind_loctree.c       |  930 +++
 .../arepo/src/subfind/subfind_nearesttwo.c    |  475 ++
 .../arepo/src/subfind/subfind_properties.c    | 1195 ++++
 .../arepo/src/subfind/subfind_reprocess.c     |  240 +
 .../arepo/src/subfind/subfind_serial.c        |  807 +++
 .../community/arepo/src/subfind/subfind_so.c  |  964 ++++
 .../arepo/src/subfind/subfind_so_potegy.c     |  853 +++
 .../arepo/src/subfind/subfind_sort_kernels.c  |  442 ++
 .../arepo/src/subfind/subfind_vars.c          |  102 +
 .../arepo/src/time_integration/darkenergy.c   |   74 +
 .../src/time_integration/do_gravity_hydro.c   |  484 ++
 .../arepo/src/time_integration/driftfac.c     |  307 +
 .../arepo/src/time_integration/predict.c      |  506 ++
 .../arepo/src/time_integration/timestep.c     |  980 ++++
 .../arepo/src/time_integration/timestep.h     |   88 +
 .../src/time_integration/timestep_treebased.c |  494 ++
 .../community/arepo/src/utils/allocate.c      |  133 +
 src/amuse/community/arepo/src/utils/debug.c   |  148 +
 src/amuse/community/arepo/src/utils/dtypes.h  |  195 +
 .../arepo/src/utils/generic_comm_helpers2.h   |  724 +++
 .../community/arepo/src/utils/mpz_extension.c |  119 +
 .../community/arepo/src/utils/mymalloc.c      |  792 +++
 .../community/arepo/src/utils/parallel_sort.c |  743 +++
 .../community/arepo/src/utils/predicates.c    | 4292 ++++++++++++++
 src/amuse/community/arepo/src/utils/system.c  | 1300 +++++
 src/amuse/community/arepo/src/utils/tags.h    |   50 +
 src/amuse/community/arepo/src/utils/timer.h   |  251 +
 145 files changed, 95582 insertions(+)
 create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c
 create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h
 create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c
 create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/distribute.c
 create mode 100644 src/amuse/community/arepo/src/cooling/cooling.c
 create mode 100644 src/amuse/community/arepo/src/cooling/cooling_proto.h
 create mode 100644 src/amuse/community/arepo/src/cooling/cooling_vars.h
 create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.c
 create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.h
 create mode 100644 src/amuse/community/arepo/src/debug_md5/calc_checksum.c
 create mode 100644 src/amuse/community/arepo/src/domain/bsd_tree.h
 create mode 100644 src/amuse/community/arepo/src/domain/domain.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain.h
 create mode 100644 src/amuse/community/arepo/src/domain/domain_DC_update.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_balance.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_box.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_counttogo.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_exchange.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_rearrange.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_sort_kernels.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_toplevel.c
 create mode 100644 src/amuse/community/arepo/src/domain/domain_vars.c
 create mode 100644 src/amuse/community/arepo/src/domain/peano.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof.h
 create mode 100644 src/amuse/community/arepo/src/fof/fof_distribute.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof_findgroups.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof_io.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof_nearest.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof_sort_kernels.c
 create mode 100644 src/amuse/community/arepo/src/fof/fof_vars.c
 create mode 100644 src/amuse/community/arepo/src/gitversion/version
 create mode 100644 src/amuse/community/arepo/src/gitversion/version.h
 create mode 100644 src/amuse/community/arepo/src/gravity/accel.c
 create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.c
 create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.h
 create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_ewald.c
 create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c
 create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_walk.c
 create mode 100644 src/amuse/community/arepo/src/gravity/grav_external.c
 create mode 100644 src/amuse/community/arepo/src/gravity/grav_softening.c
 create mode 100644 src/amuse/community/arepo/src/gravity/gravdirect.c
 create mode 100644 src/amuse/community/arepo/src/gravity/gravtree.c
 create mode 100644 src/amuse/community/arepo/src/gravity/gravtree_forcetest.c
 create mode 100644 src/amuse/community/arepo/src/gravity/longrange.c
 create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c
 create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c
 create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic.c
 create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c
 create mode 100644 src/amuse/community/arepo/src/hydro/finite_volume_solver.c
 create mode 100644 src/amuse/community/arepo/src/hydro/gradients.c
 create mode 100644 src/amuse/community/arepo/src/hydro/mhd.c
 create mode 100644 src/amuse/community/arepo/src/hydro/riemann.c
 create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hllc.c
 create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hlld.c
 create mode 100644 src/amuse/community/arepo/src/hydro/scalars.c
 create mode 100644 src/amuse/community/arepo/src/hydro/update_primitive_variables.c
 create mode 100644 src/amuse/community/arepo/src/init/begrun.c
 create mode 100644 src/amuse/community/arepo/src/init/density.c
 create mode 100644 src/amuse/community/arepo/src/init/init.c
 create mode 100644 src/amuse/community/arepo/src/io/global.c
 create mode 100644 src/amuse/community/arepo/src/io/hdf5_util.c
 create mode 100644 src/amuse/community/arepo/src/io/io.c
 create mode 100644 src/amuse/community/arepo/src/io/io_fields.c
 create mode 100644 src/amuse/community/arepo/src/io/logs.c
 create mode 100644 src/amuse/community/arepo/src/io/parameters.c
 create mode 100644 src/amuse/community/arepo/src/io/read_ic.c
 create mode 100644 src/amuse/community/arepo/src/io/restart.c
 create mode 100644 src/amuse/community/arepo/src/main/allvars.c
 create mode 100644 src/amuse/community/arepo/src/main/allvars.h
 create mode 100644 src/amuse/community/arepo/src/main/main.c
 create mode 100644 src/amuse/community/arepo/src/main/main_original.c
 create mode 100644 src/amuse/community/arepo/src/main/main_reduced.c
 create mode 100644 src/amuse/community/arepo/src/main/proto.h
 create mode 100644 src/amuse/community/arepo/src/main/run.c
 create mode 100644 src/amuse/community/arepo/src/mesh/criterion_derefinement.c
 create mode 100644 src/amuse/community/arepo/src/mesh/criterion_refinement.c
 create mode 100644 src/amuse/community/arepo/src/mesh/mesh.h
 create mode 100644 src/amuse/community/arepo/src/mesh/refinement.c
 create mode 100644 src/amuse/community/arepo/src/mesh/set_vertex_velocities.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.h
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c
 create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/mpi_util.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.h
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/myalltoall.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/pinning.c
 create mode 100644 src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c
 create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree.c
 create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_search.c
 create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c
 create mode 100644 src/amuse/community/arepo/src/star_formation/sfr_eEOS.c
 create mode 100644 src/amuse/community/arepo/src/star_formation/starformation.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind.h
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_domain.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_tree.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_collective.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_density.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_distribute.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_io.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_loctree.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_properties.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_reprocess.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_serial.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so_potegy.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c
 create mode 100644 src/amuse/community/arepo/src/subfind/subfind_vars.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/darkenergy.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/driftfac.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/predict.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.c
 create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.h
 create mode 100644 src/amuse/community/arepo/src/time_integration/timestep_treebased.c
 create mode 100644 src/amuse/community/arepo/src/utils/allocate.c
 create mode 100644 src/amuse/community/arepo/src/utils/debug.c
 create mode 100644 src/amuse/community/arepo/src/utils/dtypes.h
 create mode 100644 src/amuse/community/arepo/src/utils/generic_comm_helpers2.h
 create mode 100644 src/amuse/community/arepo/src/utils/mpz_extension.c
 create mode 100644 src/amuse/community/arepo/src/utils/mymalloc.c
 create mode 100644 src/amuse/community/arepo/src/utils/parallel_sort.c
 create mode 100644 src/amuse/community/arepo/src/utils/predicates.c
 create mode 100644 src/amuse/community/arepo/src/utils/system.c
 create mode 100644 src/amuse/community/arepo/src/utils/tags.h
 create mode 100644 src/amuse/community/arepo/src/utils/timer.h

diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c
new file mode 100644
index 0000000000..ea94880120
--- /dev/null
+++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c
@@ -0,0 +1,492 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/add_backgroundgrid/add_bggrid.c
+ * \date        05/2018
+ * \brief       Re-gridding of ICs to ensure that the entire computational
+ *              domain contains gas cells.
+ * \details     Can be used to convert SPH ICs to Arepo ICs.
+ *              contains functions:
+ *                int add_backgroundgrid(void)
+ *                void modify_boxsize(double new_val)
+ *                void prepare_domain_backgroundgrid(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "add_bggrid.h"
+
+#ifdef ADDBACKGROUNDGRID
+
+static void modify_boxsize(double new_val);
+
+MyIDType IDNew;
+
+/*! \brief Re-gridding of ICs onto oct-tree nodes.
+ *
+ *  If this is active, no simulation is performed.
+ *
+ *  \return void
+ */
+int add_backgroundgrid(void)
+{
+  int i, no, numnodes;
+  long long ngas_count_all_old;
+  double vol, voltot, mgas, mtot;
+  int flag_all, flag = 0;
+
+  mpi_printf("\n\nADD BACKGROUND GRID: Adding background grid to IC file\n\n");
+
+  for(i = 0, mgas = 0; i < NumGas; i++)
+    if(P[i].Type == 0)
+      mgas += P[i].Mass;
+
+  MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("ADD BACKGROUND GRID: Total gas mass before remap=%g\n", mtot);
+
+  ngas_count_all_old = All.TotNumGas;
+
+  ngb_treefree();
+
+  domain_free();
+
+  domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */
+
+  numnodes = construct_forcetree(1, 1, 0, 0); /* build tree only with gas cells */
+
+  for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++)
+    {
+      if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */
+        {
+          vol += Nodes[i].len * Nodes[i].len * Nodes[i].len;
+        }
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      no = Father[i];
+      vol += Nodes[no].len * Nodes[no].len * Nodes[no].len / 8;
+    }
+
+  MPI_Allreduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("\nADD BACKGROUND GRID: voltot=%g  %g\n", voltot, pow(DomainLen, 3));
+
+  int count_leaves = 0, count_leaves_all;
+
+  for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++)
+    {
+      if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */
+        {
+          if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize)
+            if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize)
+              if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize)
+                count_leaves++;
+        }
+    }
+
+  MPI_Allreduce(&count_leaves, &count_leaves_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("ADD BACKGROUND GRID: count_leaves_all=%d\n\n", count_leaves_all);
+
+  if((NumGas + count_leaves >= All.MaxPartSph) || (NumPart + count_leaves >= All.MaxPart))
+    flag = 1;
+
+  MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /*Increase storage for newly added gas particles */
+  if(flag_all)
+    domain_resize_storage(count_leaves, count_leaves, 0);
+
+  /* determine maximum ID */
+  MyIDType maxid, newid, *tmp;
+  int *list;
+
+  for(i = 0, maxid = 0; i < NumPart; i++)
+    if(P[i].ID > maxid)
+      maxid = P[i].ID;
+
+  tmp = mymalloc("tmp", NTask * sizeof(MyIDType));
+
+  MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(i = 0; i < NTask; i++)
+    if(tmp[i] > maxid)
+      maxid = tmp[i];
+
+  myfree(tmp);
+  // maxid is now the total maximum ID number of all particles
+
+  list = mymalloc("list", NTask * sizeof(int));
+
+  MPI_Allgather(&count_leaves, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD);
+
+  newid = maxid + 1;
+
+  for(i = 0; i < ThisTask; i++)
+    newid += list[i];
+
+  myfree(list);
+
+  // newid is now the maxid+total of count_leaves over all previous tasks
+
+  IDNew = maxid + 1; /* old gas particles will have IDs below this */
+
+  // move all particle and sph particle data down the arrays by
+  // count_leaves.
+
+  memmove(P + count_leaves, P, sizeof(struct particle_data) * NumPart);
+  memmove(SphP + count_leaves, SphP, sizeof(struct sph_particle_data) * NumGas);
+
+  NumPart += count_leaves;
+  NumGas += count_leaves;
+
+  // this is the same loop as determined count_leaves above, so
+  // it will be applied count_leaves times again.
+  count_leaves = 0;
+  for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++)
+    {
+      if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */
+        {
+          if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize)
+            if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize)
+              if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize)
+                {
+                  P[count_leaves].Pos[0] = Nodes[i].center[0];
+                  P[count_leaves].Pos[1] = Nodes[i].center[1];
+                  P[count_leaves].Pos[2] = Nodes[i].center[2];
+                  P[count_leaves].Vel[0] = 0;
+                  P[count_leaves].Vel[1] = 0;
+                  P[count_leaves].Vel[2] = 0;
+
+                  P[count_leaves].Mass         = 0;
+                  P[count_leaves].TimeBinHydro = 0;
+                  P[count_leaves].TimeBinGrav  = 0;
+
+                  P[count_leaves].Ti_Current = All.Ti_Current;
+
+#ifdef MHD
+                  SphP[count_leaves].B[0] = 0;
+                  SphP[count_leaves].B[1] = 0;
+                  SphP[count_leaves].B[2] = 0;
+                  SphP[count_leaves].DivB = 0;
+#endif /* #ifdef MHD */
+
+                  P[count_leaves].Type          = 0;
+                  P[count_leaves].SofteningType = All.SofteningTypeOfPartType[0];
+
+                  // this puts the new ID at the right spot
+                  P[count_leaves].ID = newid++;
+
+                  SphP[count_leaves].Volume      = Nodes[i].len * Nodes[i].len * Nodes[i].len;
+                  SphP[count_leaves].Utherm      = 0;
+                  SphP[count_leaves].Energy      = 0;
+                  SphP[count_leaves].Momentum[0] = 0;
+                  SphP[count_leaves].Momentum[1] = 0;
+                  SphP[count_leaves].Momentum[2] = 0;
+
+                  count_leaves++;
+                }
+        }
+    }
+
+  /* Delete the force tree */
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+  force_treefree();
+
+  calculate_weights();
+  distribute_particles();
+
+  int count_elim = 0, count_elim_all;
+
+  for(i = 0; i < NumGas; i++)
+    if(P[i].Type == 0)
+      {
+        if(P[i].ID <= maxid)
+          {
+            // remove particle i by swapping in the last sph particle
+            // and then swap the last particle to that spot
+            P[i]          = P[NumGas - 1];
+            P[NumGas - 1] = P[NumPart - 1];
+
+            SphP[i] = SphP[NumGas - 1];
+
+            NumPart--;
+            NumGas--;
+            i--;
+
+            count_elim++;
+          }
+        else
+          {
+            if(P[i].Mass > 0)
+              {
+                SphP[i].Utherm = SphP[i].Energy / P[i].Mass;
+                P[i].Vel[0]    = SphP[i].Momentum[0] / P[i].Mass;
+                P[i].Vel[1]    = SphP[i].Momentum[1] / P[i].Mass;
+                P[i].Vel[2]    = SphP[i].Momentum[2] / P[i].Mass;
+              }
+          }
+      }
+
+  MPI_Allreduce(&count_elim, &count_elim_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  sumup_large_ints(1, &NumPart, &All.TotNumPart);
+  sumup_large_ints(1, &NumGas, &All.TotNumGas);
+
+  mpi_printf("\nADD BACKGROUND GRID: count_elim_all=%d  IDNew=%d\n", count_elim_all, IDNew);
+  mpi_printf("ADD BACKGROUND GRID: added particles=%d  (task 0: NumGas=%d)\n", count_leaves_all - count_elim_all, NumGas);
+  mpi_printf("ADD BACKGROUND GRID: new particle number=%d\n", All.TotNumPart);
+  mpi_printf("ADD BACKGROUND GRID: new gas particle number=%d\n\n", All.TotNumGas);
+
+  for(i = 0, mgas = 0; i < NumGas; i++)
+    if(P[i].Type == 0)
+      mgas += P[i].Mass;
+
+  MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("ADD BACKGROUND GRID: Total gas mass after remap=%g\n", mtot);
+
+  savepositions(0, 0);
+
+  mpi_printf("\nADD BACKGROUND GRID: GridSize = %d\n", All.GridSize);
+  mpi_printf(
+      "ADD BACKGROUND GRID: Suggested value for MeanVolume = %g\nADD BACKGROUND GRID: Suggested value for ReferenceGasPartMass = %g\n",
+      pow(All.BoxSize / All.GridSize, 3), mtot / ngas_count_all_old);
+  mpi_printf("ADD BACKGROUND GRID: Suggested value for BoxSize = %g\n", All.BoxSize);
+  mpi_printf("ADD BACKGROUND GRID: Done!\n\n");
+
+  return 0;
+}
+
+/*! \brief Changes the box size to a new value.
+ *
+ *  LONG_X, LONG_Y and LONG_Z are still active as specified in Config file.
+ *
+ *  \param[in] new_val New box size.
+ *
+ *  \return void
+ */
+void modify_boxsize(double new_val)
+{
+  All.BoxSize = new_val;
+
+  boxSize = All.BoxSize;
+  boxHalf = 0.5 * All.BoxSize;
+#ifdef LONG_X
+  boxHalf_X = boxHalf * LONG_X;
+  boxSize_X = boxSize * LONG_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+  boxHalf_Y = boxHalf * LONG_Y;
+  boxSize_Y = boxSize * LONG_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+  boxHalf_Z = boxHalf * LONG_Z;
+  boxSize_Z = boxSize * LONG_Z;
+#endif /* #ifdef LONG_Z */
+}
+
+/*! \brief Prepares computational box; makes sure simulation volume is large
+ *         enough.
+ *
+ *  \return void
+ */
+void prepare_domain_backgroundgrid(void)
+{
+  int i, j, shift_half_box = 0, min_topleave_num = 0, set_grid_size_flag = 0;
+  unsigned int size, bit_num;
+  double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
+  double len_gas, xmin_gas[3], xmax_gas[3], xmin_gas_glob[3], xmax_gas_glob[3];
+  double min_box_size, max_box_size;
+
+  mpi_printf("\n\nADD BACKGROUND GRID: preparing domain for first domain decomposition\n");
+
+  /* Checking GridSize limits */
+  if(All.GridSize < 0)
+    terminate("GridSize = %d is less than zero. This is not allowed.", All.GridSize);
+
+  if(All.GridSize > ADDBACKGROUNDGRIDMAX)
+    terminate("GridSize = %d is exceeding the max grid size = %d", All.GridSize, ADDBACKGROUNDGRIDMAX);
+
+  if(All.GridSize > 0)
+    set_grid_size_flag = 1;
+
+  /* Now checking it is a power of two. If not assign the closest value (is this required?) */
+  bit_num = 0;
+  size    = ADDBACKGROUNDGRIDMAX;
+  while(((size & 1) == 0) && size > 1)
+    {
+      size >>= 1;
+      bit_num++;
+    }
+
+  for(j = 1; j < bit_num; j++)
+    {
+      size = All.GridSize;
+      size >>= (bit_num - j);
+      if((size & 1) == 1)
+        break;
+    }
+
+  mpi_printf("ADD BACKGROUND GRID: original value of GridSize =  %d\n", All.GridSize);
+
+  All.GridSize = (size << (bit_num - j - 1));
+
+  if(All.GridSize < 1)
+    All.GridSize = 1;
+
+  mpi_printf("ADD BACKGROUND GRID: closest power of two corresponding to GridSize = %d is taken as initial guess\n", 2 * All.GridSize);
+
+  /* determine local extension */
+  for(j = 0; j < 3; j++)
+    {
+      xmin[j]     = MAX_REAL_NUMBER;
+      xmax[j]     = -MAX_REAL_NUMBER;
+      xmin_gas[j] = MAX_REAL_NUMBER;
+      xmax_gas[j] = -MAX_REAL_NUMBER;
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      for(j = 0; j < 3; j++)
+        {
+          if(xmin[j] > P[i].Pos[j])
+            xmin[j] = P[i].Pos[j];
+
+          if(xmax[j] < P[i].Pos[j])
+            xmax[j] = P[i].Pos[j];
+        }
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      for(j = 0; j < 3; j++)
+        {
+          if(xmin_gas[j] > P[i].Pos[j])
+            xmin_gas[j] = P[i].Pos[j];
+
+          if(xmax_gas[j] < P[i].Pos[j])
+            xmax_gas[j] = P[i].Pos[j];
+        }
+    }
+
+  MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  MPI_Allreduce(xmin_gas, xmin_gas_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  MPI_Allreduce(xmax_gas, xmax_gas_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+  mpi_printf("ADD BACKGROUND GRID: Min and max coordinates.\n");
+  mpi_printf("ADD BACKGROUND GRID: xmin|ymin|zmin=% g|% g|% g.\n", xmin_glob[0], xmin_glob[1], xmin_glob[2]);
+  mpi_printf("ADD BACKGROUND GRID: xmax|ymax|zmax=% g|% g|% g.\n", xmax_glob[0], xmax_glob[1], xmax_glob[2]);
+  mpi_printf("ADD BACKGROUND GRID: xmin_gas|ymin_gas|zmin_gas=% g|% g|% g.\n", xmin_gas_glob[0], xmin_gas_glob[1], xmin_gas_glob[2]);
+  mpi_printf("ADD BACKGROUND GRID: xmax_gas|ymax_gas|zmax_gas=% g|% g|% g.\n", xmax_gas_glob[0], xmax_gas_glob[1], xmax_gas_glob[2]);
+
+  len     = 0;
+  len_gas = 0;
+  for(j = 0; j < 3; j++)
+    {
+      if(xmax_glob[j] - xmin_glob[j] > len)
+        len = xmax_glob[j] - xmin_glob[j];
+
+      if(xmax_gas_glob[j] - xmin_gas_glob[j] > len_gas)
+        len_gas = xmax_gas_glob[j] - xmin_gas_glob[j];
+
+      if(xmin_glob[j] < 0)
+        shift_half_box = 1;
+    }
+
+  max_box_size = FACTOR_MAX_BOX_SIZE * len_gas;
+  min_box_size = FACTOR_MIN_BOX_SIZE * len_gas;
+
+  if(All.BoxSize < min_box_size)
+    {
+      mpi_printf("ADD BACKGROUND GRID: Need to increase the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, min_box_size);
+      modify_boxsize(min_box_size);
+    }
+  if(All.BoxSize > max_box_size)
+    {
+      mpi_printf("ADD BACKGROUND GRID: Need to decrease the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, max_box_size);
+      modify_boxsize(max_box_size);
+    }
+
+  mpi_printf("ADD BACKGROUND GRID: Domain extent %g, BoxSize = %g, ratio = %g\n", len, All.BoxSize, len / All.BoxSize);
+  mpi_printf("ADD BACKGROUND GRID: Gas extent %g, BoxSize = %g, ratio = %g\n", len_gas, All.BoxSize, len_gas / All.BoxSize);
+
+  /* the terminate condition must be checked properly */
+  if(!set_grid_size_flag)
+    {
+      while(min_topleave_num < NTask && (All.BoxSize / len_gas) > All.GridSize && All.GridSize < ADDBACKGROUNDGRIDMAX)
+        {
+          All.GridSize <<= 1;
+          min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0);
+          mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n",
+                     All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize);
+        }
+    }
+  else
+    {
+      All.GridSize <<= 1;
+      min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0);
+      mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n",
+                 All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize);
+    }
+
+  if(min_topleave_num < NTask)
+    {
+      char buf[500];
+      sprintf(buf,
+              "min_topleave_num=%d < NTask=%d, MaxGridSize=%d. Try either to run with less task or to set the BoxSize to a smaller "
+              "value\n",
+              min_topleave_num, NTask, ADDBACKGROUNDGRIDMAX);
+      terminate(buf);
+    }
+
+  if(len_gas / All.BoxSize > All.GridSize)
+    {
+      char buf[500];
+      sprintf(buf, "len_gas/BoxSize=%g > GridSize=%d, MaxGridSize=%d. GridSize should be increased if possible\n",
+              len_gas / All.BoxSize, All.GridSize, ADDBACKGROUNDGRIDMAX);
+      terminate(buf);
+    }
+
+  if(shift_half_box)
+    {
+      mpi_printf("ADD BACKGROUND GRID: Need to shift particles by half box size\n\n");
+      for(i = 0; i < NumPart; i++)
+        {
+          P[i].Pos[0] += 0.5 * All.BoxSize;
+          P[i].Pos[1] += 0.5 * All.BoxSize;
+          P[i].Pos[2] += 0.5 * All.BoxSize;
+        }
+    }
+}
+
+#endif /* #ifdef ADDBACKGROUNDGRID */
diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h
new file mode 100644
index 0000000000..47c81c199b
--- /dev/null
+++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h
@@ -0,0 +1,58 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/add_backgroundgrid/add_bggrid.h
+ * \date        05/2018
+ * \brief       Re-gridding of ICs to ensure that the entire computational
+ *              domain contains gas cells.
+ * \details     Can be used to convert SPH ICs to Arepo ICs.
+ *              Interface functions:
+ *                int add_backgroundgrid(void);
+ *                void prepare_domain_backgroundgrid(void);
+ *              Functions of this module called in:
+ *                init() (init.c)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef ADD_BGGRID_H
+#define ADD_BGGRID_H
+
+#include "../main/allvars.h"
+
+#ifdef ADDBACKGROUNDGRID
+
+#define ADDBACKGROUNDGRIDMAX 256
+#define FACTOR_MAX_BOX_SIZE 15.0
+#define FACTOR_MIN_BOX_SIZE 2.0
+
+extern MyIDType IDNew;
+
+int add_backgroundgrid(void);
+void prepare_domain_backgroundgrid(void);
+void calculate_weights();
+void distribute_particles();
+
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+#endif /* ADD_BGGRID_H */
diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c
new file mode 100644
index 0000000000..8e0f2ea04e
--- /dev/null
+++ b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c
@@ -0,0 +1,301 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/add_backgroundgrid/calc_weights.c
+ * \date        05/2018
+ * \brief       Routine that calculates the cumulative weights of neighboring
+ *              cells.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void calculate_weights()
+ *                int find_cells_evaluate(int target, int mode, int thread_id)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <mpi.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "add_bggrid.h"
+
+#ifdef ADDBACKGROUNDGRID
+
+static int find_cells_evaluate(int target, int mode, int thread_id);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Hsml;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = P[i].Pos[0];
+  in->Pos[1] = P[i].Pos[1];
+  in->Pos[2] = P[i].Pos[2];
+
+  in->Hsml = SphP[i].Hsml;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Weight;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      SphP[i].Weight = out->Weight;
+    }
+  else /* combine */
+    {
+      SphP[i].Weight += out->Weight;
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int idx;
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= TimeBinsGravity.NActiveParticles)
+          break;
+
+        int i = TimeBinsGravity.ActiveParticleList[idx];
+        if(i < 0)
+          continue;
+
+        find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Calculates SPH weights of each cell.
+ *
+ *  \return void
+ */
+void calculate_weights()
+{
+  domain_free();
+  domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */
+
+  ngb_treeallocate();
+  ngb_treebuild(NumGas);
+
+  mpi_printf("ADD BACKGROUND GRID: distribution of fluid quantities in a SPH-like fashion\n");
+  mpi_printf("ADD BACKGROUND GRID: finding the normalization factors\n");
+
+  TimeBinsGravity.NActiveParticles = 0;
+
+  int i;
+  for(i = 0; i < NumGas; i++)
+    {
+      if(P[i].Mass > 0)
+        {
+          TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i;
+          TimeBinsGravity.NActiveParticles++;
+        }
+    }
+
+  generic_set_MaxNexport();
+
+  generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported);
+
+  mpi_printf("ADD BACKGROUND GRID: done\n");
+}
+
+/*! \brief finds cells and adds up weights in an SPH fashion
+ *
+ *  \param[in] target Index of particle/cell
+ *  \param[in] mode Flag if it operates on local or imported data
+ *  \param[in] threadid ID of thread
+ *
+ *  \return 0
+ */
+int find_cells_evaluate(int target, int mode, int thread_id)
+{
+  int j, n, numnodes, *firstnode;
+  double h, h2, hinv, hinv3;
+  MyDouble dx, dy, dz, r;
+  MyDouble *pos;
+  double xtmp, ytmp, ztmp;
+
+  double weight = 0;
+
+  data_in local, *target_data;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos  = target_data->Pos;
+  h    = target_data->Hsml;
+  h2   = h * h;
+  hinv = 1.0 / h;
+#ifndef TWODIMS
+  hinv3 = hinv * hinv * hinv;
+#else  /* #ifndef TWODIMS */
+  hinv3 = hinv * hinv / boxSize_Z;
+#endif /* #ifndef TWODIMS #else */
+
+  int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode);
+
+  for(n = 0; n < nfound; n++)
+    {
+      j = Thread[thread_id].Ngblist[n];
+
+      if(P[j].ID >= IDNew)
+        {
+          dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]);
+          dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]);
+          dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]);
+
+          double r2 = dx * dx + dy * dy + dz * dz;
+
+          if(r2 < h2)
+            {
+              r = sqrt(r2);
+
+              double u = r * hinv;
+              double wk;
+              if(u < 0.5)
+                wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
+              else
+                wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
+
+              weight += wk * SphP[j].Volume;
+            }
+        }
+    }
+
+  out.Weight = weight;
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef ADDBACKGROUNDGRID */
diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c
new file mode 100644
index 0000000000..aad7d150c5
--- /dev/null
+++ b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c
@@ -0,0 +1,328 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/add_backgroundgrid/distribute.c
+ * \date        05/2018
+ * \brief       Distributes the cell properties in an SPH kernel weighted
+ *              fashion to neighboring cells.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void distribute_particles(void)
+ *                int find_cells_evaluate(int target, int mode, int thread_id)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <mpi.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "add_bggrid.h"
+
+#ifdef ADDBACKGROUNDGRID
+
+static int find_cells_evaluate(int target, int mode, int thread_id);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Hsml;
+  MyFloat Weight;
+  MyFloat Mass;
+  MyFloat InternalEnergy;
+  MyFloat Momentum[3];
+#ifdef MHD
+  MyFloat B[3];
+#endif /* #ifdef MHD */
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = P[i].Pos[0];
+  in->Pos[1] = P[i].Pos[1];
+  in->Pos[2] = P[i].Pos[2];
+
+  in->Hsml = SphP[i].Hsml;
+
+  in->Weight         = SphP[i].Weight;
+  in->Mass           = P[i].Mass;
+  in->InternalEnergy = SphP[i].Utherm * P[i].Mass;
+
+  int k;
+  for(k = 0; k < 3; k++)
+    in->Momentum[k] = P[i].Vel[k] * P[i].Mass;
+
+#ifdef MHD
+  for(k = 0; k < 3; k++)
+    in->B[k] = SphP[i].B[k];
+#endif /* #ifdef MHD */
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  char nothing;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode) { return; }
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int idx;
+  {
+    int j, threadid = get_thread_num();
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= TimeBinsGravity.NActiveParticles)
+          break;
+
+        int i = TimeBinsGravity.ActiveParticleList[idx];
+        if(i < 0)
+          continue;
+
+        find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Main function to distribute hydro quantities over a kernel average.
+ *
+ *  \return void
+ */
+void distribute_particles(void)
+{
+  mpi_printf("ADD BACKGROUND GRID: distributing the fluid quantities\n");
+
+  generic_set_MaxNexport();
+
+  generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported);
+
+#ifdef MHD
+  /* now divide the B field in each cell by the weight (sum of the wk's,
+     which we stored in SphP.divB */
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].ID >= IDNew)
+        {
+          int j;
+          if(SphP[i].DivB > 0)
+            for(j = 0; j < 3; j++)
+              SphP[i].B[j] /= SphP[i].DivB;
+        }
+    }
+#endif /* #ifdef MHD */
+
+  mpi_printf("ADD BACKGROUND GRID: done\n");
+}
+
+/*! \brief Distributes imported properties on neighbouring cells.
+ *
+ *  \param[in] target Index of particle/cell.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return 0
+ */
+int find_cells_evaluate(int target, int mode, int thread_id)
+{
+  int j, n, numnodes, *firstnode;
+  double h, h2, hinv, hinv3;
+  MyDouble dx, dy, dz, r;
+  MyDouble *pos;
+  double xtmp, ytmp, ztmp;
+
+  data_in local, *target_data;
+  data_out out;
+  out.nothing = 0;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos  = target_data->Pos;
+  h    = target_data->Hsml;
+  h2   = h * h;
+  hinv = 1.0 / h;
+#ifndef TWODIMS
+  hinv3 = hinv * hinv * hinv;
+#else  /* #ifndef  TWODIMS */
+  hinv3 = hinv * hinv / boxSize_Z;
+#endif /* #ifndef  TWODIMS #else */
+
+  int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode);
+
+  double wsum = 0;
+
+  for(n = 0; n < nfound; n++)
+    {
+      j = Thread[thread_id].Ngblist[n];
+
+      if(P[j].ID < IDNew)
+        continue;
+
+      dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]);
+      dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]);
+      dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]);
+
+      double r2 = dx * dx + dy * dy + dz * dz;
+
+      if(r2 < h2)
+        {
+          r = sqrt(r2);
+
+          double u = r * hinv;
+          double wk;
+          if(u < 0.5)
+            wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
+          else
+            wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
+
+          double weight = SphP[j].Volume * wk / target_data->Weight;
+
+          wsum += weight;
+
+          P[j].Mass += target_data->Mass * weight;
+          SphP[j].Energy += target_data->InternalEnergy * weight;
+
+          int k;
+          for(k = 0; k < 3; k++)
+            SphP[j].Momentum[k] += target_data->Momentum[k] * weight;
+
+#ifdef MHD
+          for(k = 0; k < 3; k++)
+            SphP[j].B[k] += target_data->B[k] * weight;
+          SphP[j].DivB += wk;
+#endif /* #ifdef MHD */
+        }
+    }
+
+  if(wsum > 1.01)
+    {
+      printf("wsum=%g, Weight=%g, target=%d\n", wsum, target_data->Weight, target);
+      terminate("bla");
+    }
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef ADDBACKGROUNDGRID */
diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c
new file mode 100644
index 0000000000..7e7cebbc98
--- /dev/null
+++ b/src/amuse/community/arepo/src/cooling/cooling.c
@@ -0,0 +1,870 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/cooling/cooling.c
+ * \date        05/2018
+ * \brief       Module for gas radiative cooling
+ * \details     contains functions:
+ *                double DoCooling(double u_old, double rho, double dt, double
+ *                  *ne_guess)
+ *                double GetCoolingTime(double u_old, double rho, double
+ *                  *ne_guess)
+ *                double convert_u_to_temp(double u, double rho, double
+ *                  *ne_guess)
+ *                void find_abundances_and_rates(double logT, double rho,
+ *                  double *ne_guess)
+ *                double CoolingRateFromU(double u, double rho, double
+ *                  *ne_guess)
+ *                void SetOutputGasState(int i, double *ne_guess, double *nH0,
+ *                  double *coolrate)
+ *                double CoolingRate(double logT, double rho, double *nelec)
+ *                void MakeRateTable(void)
+ *                void ReadIonizeParams(char *fname, int which)
+ *                void IonizeParamsUVB(void)
+ *                void SetZeroIonization(void)
+ *                void IonizeParams(void)
+ *                void InitCool(void)
+ *                void cooling_only(void)
+ *                void cool_cell(int i)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef COOLING
+
+static double Tmin = 0.0;     /*!< min temperature in log10 */
+static double Tmax = 9.0;     /*!< max temperature in log10 */
+static double deltaT;         /*!< log10 of temperature spacing in the interpolation tables */
+static GasState gs;           /*!< gas state */
+static RateTable *RateT;      /*!< tabulated rates */
+static PhotoTable *PhotoTUVB; /*!< photo-ionization/heating rate table for UV background */
+static PhotoCurrent pc;       /*!< current interpolated photo rates */
+static int NheattabUVB;       /*!< length of UVB photo table */
+static DoCoolData DoCool;     /*!< cooling data */
+
+/*! \brief Computes the new internal energy per unit mass.
+ *
+ *  The function solves for the new internal energy per unit mass of the gas
+ *  by integrating the equation for the internal energy with an implicit
+ *  Euler scheme. The root of resulting non linear equation,
+ *  which gives tnew internal energy, is found with the bisection method.
+ *  Arguments are passed in code units.
+ *
+ *  \param[in] u_old the initial (before cooling is applied) internal energy
+ *             per unit mass of the gas cell.
+ *  \param[in] rho   the proper density of the gas cell.
+ *  \param[in] dt    the duration of the time step.
+ *  \param[in] ne_guess electron number density relative to hydrogen number
+ *             density (for molecular weight computation).
+ *
+ *  \return The new internal energy per unit mass of the gas cell.
+ */
+double DoCooling(double u_old, double rho, double dt, double *ne_guess)
+{
+  double u, du;
+  double u_lower, u_upper;
+  double ratefact;
+  double LambdaNet;
+
+  int iter = 0;
+
+  DoCool.u_old_input    = u_old;
+  DoCool.rho_input      = rho;
+  DoCool.dt_input       = dt;
+  DoCool.ne_guess_input = *ne_guess;
+
+  if(!gsl_finite(u_old))
+    terminate("invalid input: u_old=%g\n", u_old);
+
+  if(u_old < 0 || rho < 0)
+    terminate("invalid input: task=%d u_old=%g  rho=%g  dt=%g  All.MinEgySpec=%g\n", ThisTask, u_old, rho, dt, All.MinEgySpec);
+
+  rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */
+  u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs;
+  dt *= All.UnitTime_in_s / All.HubbleParam;
+
+  gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */
+  ratefact = gs.nHcgs * gs.nHcgs / rho;
+
+  u       = u_old;
+  u_lower = u;
+  u_upper = u;
+
+  LambdaNet = CoolingRateFromU(u, rho, ne_guess);
+
+  /* bracketing */
+  if(u - u_old - ratefact * LambdaNet * dt < 0) /* heating */
+    {
+      u_upper *= sqrt(1.1);
+      u_lower /= sqrt(1.1);
+      while(u_upper - u_old - ratefact * CoolingRateFromU(u_upper, rho, ne_guess) * dt < 0)
+        {
+          u_upper *= 1.1;
+          u_lower *= 1.1;
+        }
+    }
+
+  if(u - u_old - ratefact * LambdaNet * dt > 0)
+    {
+      u_lower /= sqrt(1.1);
+      u_upper *= sqrt(1.1);
+      while(u_lower - u_old - ratefact * CoolingRateFromU(u_lower, rho, ne_guess) * dt > 0)
+        {
+          u_upper /= 1.1;
+          u_lower /= 1.1;
+        }
+    }
+
+  do
+    {
+      u = 0.5 * (u_lower + u_upper);
+
+      LambdaNet = CoolingRateFromU(u, rho, ne_guess);
+
+      if(u - u_old - ratefact * LambdaNet * dt > 0)
+        {
+          u_upper = u;
+        }
+      else
+        {
+          u_lower = u;
+        }
+
+      du = u_upper - u_lower;
+
+      iter++;
+
+      if(iter >= (MAXITER - 10))
+        printf("u= %g\n", u);
+    }
+  while(fabs(du / u) > 1.0e-6 && iter < MAXITER);
+
+  if(iter >= MAXITER)
+    terminate(
+        "failed to converge in DoCooling(): DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= "
+        "%g\n",
+        DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input);
+
+  u *= All.UnitDensity_in_cgs / All.UnitPressure_in_cgs; /* to internal units */
+
+  return u;
+}
+
+/*! \brief Returns the cooling time.
+ *
+ *  If we actually have heating, a cooling time of 0 is returned.
+ *
+ *  \param[in] u_old The initial (before cooling is applied) internal energy
+ *             per unit mass of the gas cell.
+ *  \param[in] rho The proper density of the gas cell.
+ *  \param[in] ne_guess Electron number density relative to hydrogen number
+ *             density (for molecular weight computation).
+ *
+ *  \return Cooling time; 0 if heating.
+ */
+double GetCoolingTime(double u_old, double rho, double *ne_guess)
+{
+  double u;
+  double ratefact;
+  double LambdaNet, coolingtime;
+
+  DoCool.u_old_input    = u_old;
+  DoCool.rho_input      = rho;
+  DoCool.ne_guess_input = *ne_guess;
+
+  rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */
+  u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs;
+
+  gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */
+  ratefact = gs.nHcgs * gs.nHcgs / rho;
+
+  u = u_old;
+
+  LambdaNet = CoolingRateFromU(u, rho, ne_guess);
+
+  if(LambdaNet >= 0) /* ups, we have actually heating due to UV background */
+    return 0;
+
+  coolingtime = u_old / (-ratefact * LambdaNet);
+
+  coolingtime *= All.HubbleParam / All.UnitTime_in_s;
+
+  return coolingtime;
+}
+
+/*! \brief Compute gas temperature from internal energy per unit mass.
+ *
+ *   This function determines the electron fraction, and hence the mean
+ *   molecular weight. With it arrives at a self-consistent temperature.
+ *   Element abundances and the rates for the emission are also computed.
+ *
+ *  \param[in] u   internal energy per unit mass.
+ *  \param[in] rho gas density.
+ *  \param[in, out] ne_guess electron number density relative to hydrogen
+ *                  number density
+ *
+ *  \return The gas temperature.
+ */
+double convert_u_to_temp(double u, double rho, double *ne_guess)
+{
+  double temp, temp_old, temp_new, max = 0, ne_old;
+  double mu;
+  int iter = 0;
+
+  double u_input, rho_input, ne_input;
+
+  u_input   = u;
+  rho_input = rho;
+  ne_input  = *ne_guess;
+
+  mu   = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess);
+  temp = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu;
+
+  do
+    {
+      ne_old = *ne_guess;
+
+      find_abundances_and_rates(log10(temp), rho, ne_guess);
+      temp_old = temp;
+
+      mu = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess);
+
+      temp_new = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu;
+
+      max = dmax(max, temp_new / (1 + gs.yhelium + *ne_guess) * fabs((*ne_guess - ne_old) / (temp_new - temp_old + 1.0)));
+
+      temp = temp_old + (temp_new - temp_old) / (1 + max);
+      iter++;
+
+      if(iter > (MAXITER - 10))
+        printf("-> temp= %g ne=%g\n", temp, *ne_guess);
+    }
+  while(fabs(temp - temp_old) > 1.0e-3 * temp && iter < MAXITER);
+
+  if(iter >= MAXITER)
+    {
+      printf("failed to converge in convert_u_to_temp()\n");
+      printf("u_input= %g\nrho_input=%g\n ne_input=%g\n", u_input, rho_input, ne_input);
+      printf("DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n", DoCool.u_old_input,
+             DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input);
+      terminate("convergence failure");
+    }
+
+  gs.mu = mu;
+
+  return temp;
+}
+
+/*! \brief Computes the actual abundance ratios.
+ *
+ *  The chemical composition of the gas is primordial (no metals are present).
+ *
+ *  \param[in] logT log10 of gas temperature.
+ *  \param[in] rho Gas density.
+ *  \param[in, out] ne_guess Electron number density relative to hydrogen
+ *                  number density.
+ *
+ *  \return void
+ */
+void find_abundances_and_rates(double logT, double rho, double *ne_guess)
+{
+  double neold, nenew;
+  int j, niter;
+  double flow, fhi, t;
+
+  double logT_input, rho_input, ne_input;
+
+  logT_input = logT;
+  rho_input  = rho;
+  ne_input   = *ne_guess;
+
+  if(!gsl_finite(logT))
+    terminate("logT=%g\n", logT);
+
+  if(logT <= Tmin) /* everything neutral */
+    {
+      gs.nH0    = 1.0;
+      gs.nHe0   = gs.yhelium;
+      gs.nHp    = 0;
+      gs.nHep   = 0;
+      gs.nHepp  = 0;
+      gs.ne     = 0;
+      *ne_guess = 0;
+      return;
+    }
+
+  if(logT >= Tmax) /* everything is ionized */
+    {
+      gs.nH0    = 0;
+      gs.nHe0   = 0;
+      gs.nHp    = 1.0;
+      gs.nHep   = 0;
+      gs.nHepp  = gs.yhelium;
+      gs.ne     = gs.nHp + 2.0 * gs.nHepp;
+      *ne_guess = gs.ne; /* note: in units of the hydrogen number density */
+      return;
+    }
+
+  t    = (logT - Tmin) / deltaT;
+  j    = (int)t;
+  fhi  = t - j;
+  flow = 1 - fhi;
+
+  if(*ne_guess == 0)
+    *ne_guess = 1.0;
+
+  gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */
+
+  gs.ne    = *ne_guess;
+  neold    = gs.ne;
+  niter    = 0;
+  gs.necgs = gs.ne * gs.nHcgs;
+
+  /* evaluate number densities iteratively (cf KWH eqns 33-38) in units of nH */
+  do
+    {
+      niter++;
+
+      gs.aHp   = flow * RateT[j].AlphaHp + fhi * RateT[j + 1].AlphaHp;
+      gs.aHep  = flow * RateT[j].AlphaHep + fhi * RateT[j + 1].AlphaHep;
+      gs.aHepp = flow * RateT[j].AlphaHepp + fhi * RateT[j + 1].AlphaHepp;
+      gs.ad    = flow * RateT[j].Alphad + fhi * RateT[j + 1].Alphad;
+      gs.geH0  = flow * RateT[j].GammaeH0 + fhi * RateT[j + 1].GammaeH0;
+      gs.geHe0 = flow * RateT[j].GammaeHe0 + fhi * RateT[j + 1].GammaeHe0;
+      gs.geHep = flow * RateT[j].GammaeHep + fhi * RateT[j + 1].GammaeHep;
+
+      if(gs.necgs <= 1.e-25 || pc.J_UV == 0)
+        {
+          gs.gJH0ne = gs.gJHe0ne = gs.gJHepne = 0;
+        }
+      else
+        {
+          gs.gJH0ne  = pc.gJH0 / gs.necgs;
+          gs.gJHe0ne = pc.gJHe0 / gs.necgs;
+          gs.gJHepne = pc.gJHep / gs.necgs;
+        }
+
+      gs.nH0 = gs.aHp / (gs.aHp + gs.geH0 + gs.gJH0ne); /* eqn (33) */
+      gs.nHp = 1.0 - gs.nH0;                            /* eqn (34) */
+
+      if((gs.gJHe0ne + gs.geHe0) <= SMALLNUM) /* no ionization at all */
+        {
+          gs.nHep  = 0.0;
+          gs.nHepp = 0.0;
+          gs.nHe0  = gs.yhelium;
+        }
+      else
+        {
+          gs.nHep =
+              gs.yhelium / (1.0 + (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne) + (gs.geHep + gs.gJHepne) / gs.aHepp); /* eqn (35) */
+          gs.nHe0  = gs.nHep * (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne);                                          /* eqn (36) */
+          gs.nHepp = gs.nHep * (gs.geHep + gs.gJHepne) / gs.aHepp;                                                   /* eqn (37) */
+        }
+
+      neold = gs.ne;
+
+      gs.ne    = gs.nHp + gs.nHep + 2 * gs.nHepp; /* eqn (38) */
+      gs.necgs = gs.ne * gs.nHcgs;
+
+      if(pc.J_UV == 0)
+        break;
+
+      nenew    = 0.5 * (gs.ne + neold);
+      gs.ne    = nenew;
+      gs.necgs = gs.ne * gs.nHcgs;
+
+      if(fabs(gs.ne - neold) < 1.0e-4)
+        break;
+
+      if(niter > (MAXITER - 10))
+        printf("ne= %g  niter=%d\n", gs.ne, niter);
+    }
+  while(niter < MAXITER);
+
+  if(niter >= MAXITER)
+    {
+      printf("gs.aHp = %le\n", gs.aHp);
+      char buff[1000];
+      sprintf(buff, "%s/cooling_task%d.dat", All.OutputDir, ThisTask);
+      FILE *fp = fopen(buff, "w");
+      fwrite(&All.Time, sizeof(double), 1, fp);
+      fwrite(&logT_input, sizeof(double), 1, fp);
+      fwrite(&rho_input, sizeof(double), 1, fp);
+      fwrite(&ne_input, sizeof(double), 1, fp);
+      fclose(fp);
+      terminate(
+          "no convergence reached in find_abundances_and_rates(): logT_input= %g  rho_input= %g  ne_input= %g "
+          "DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n",
+          logT_input, rho_input, ne_input, DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input);
+    }
+  gs.bH0  = flow * RateT[j].BetaH0 + fhi * RateT[j + 1].BetaH0;
+  gs.bHep = flow * RateT[j].BetaHep + fhi * RateT[j + 1].BetaHep;
+  gs.bff  = flow * RateT[j].Betaff + fhi * RateT[j + 1].Betaff;
+
+  *ne_guess = gs.ne;
+}
+
+/*! \brief Get cooling rate from gas internal energy.
+ *
+ *  This function first computes the self-consistent temperature
+ *  and abundance ratios, and then it calculates
+ *  (heating rate-cooling rate)/n_h^2 in cgs units.
+ *
+ *  \param[in] u Gas internal energy per unit mass.
+ *  \param[in] rho Gas density.
+ *  \param[in, out] ne_guess Electron number density relative to hydrogen
+ *                  number density.
+ *
+ *  \return Cooling rate.
+ */
+double CoolingRateFromU(double u, double rho, double *ne_guess)
+{
+  double temp;
+
+  temp = convert_u_to_temp(u, rho, ne_guess);
+
+  return CoolingRate(log10(temp), rho, ne_guess);
+}
+
+/*! \brief  This function computes the self-consistent temperature and
+ *          abundance ratios.
+ *
+ *  Used only in io_fields.c for calculating output fields.
+ *
+ *  \param[in] i index into SphP for gas cell to consider.
+ *  \param[in, out] ne_guess pointer to electron number density relative to
+ *                  hydrogen number density (modified).
+ *  \param[out] nH0 Pointer to the neutral hydrogen fraction (set to value in
+ *              the GasState struct).
+ *  \param[out] coolrate Pointer to cooling rate (set to value from
+ *              CoolingRateFromU).
+ *
+ *  \return void
+ */
+void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate)
+{
+  double sfr = 0;
+  double rho = SphP[i].Density * All.cf_a3inv;
+  double u   = dmax(All.MinEgySpec, SphP[i].Utherm);
+
+  /* update GasState as appropriate given compile-time options and cell properties */
+#if defined(USE_SFR)
+  sfr = get_starformation_rate(i);
+#endif /* #if defined(USE_SFR) */
+
+  /* update DoCool */
+  DoCool.u_old_input    = u;
+  DoCool.rho_input      = rho;
+  DoCool.ne_guess_input = *ne_guess;
+
+  /* convert to physical cgs units */
+  rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam;
+  u *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs;
+
+  /* calculate cooling rate (and so ne_guess and all of gs including nH0, nHeII) */
+  *coolrate = CoolingRateFromU(u, rho, ne_guess);
+
+  *nH0 = gs.nH0;
+}
+
+/*! \brief  Calculate (heating rate-cooling rate)/n_h^2 in cgs units.
+ *
+ *  \param[in] logT log10 of gas temperature.
+ *  \param[in] rho Gas density.
+ *  \param[in, out] nelec Electron number density relative to hydrogen number
+ *                  density.
+ *
+ *  \return (heating rate-cooling rate)/n_h^2.
+ */
+double CoolingRate(double logT, double rho, double *nelec)
+{
+  double Lambda, Heat;
+  double LambdaExc, LambdaIon, LambdaRec, LambdaFF, LambdaCmptn = 0.0;
+  double LambdaExcH0, LambdaExcHep, LambdaIonH0, LambdaIonHe0, LambdaIonHep;
+  double LambdaRecHp, LambdaRecHep, LambdaRecHepp, LambdaRecHepd;
+  double redshift;
+  double T;
+  double LambdaPrim = 0.0, LambdaMet = 0.0, LambdaDust = 0.0, LambdaMol = 0.0;
+
+  if(logT <= Tmin)
+    logT = Tmin + 0.5 * deltaT; /* floor at Tmin */
+
+  gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */
+
+  if(logT < Tmax)
+    {
+      find_abundances_and_rates(logT, rho, nelec);
+
+      /* Compute cooling and heating rate (cf KWH Table 1) in units of nH**2 */
+      T = pow(10.0, logT);
+
+      LambdaExcH0   = gs.bH0 * gs.ne * gs.nH0;
+      LambdaExcHep  = gs.bHep * gs.ne * gs.nHep;
+      LambdaExc     = LambdaExcH0 + LambdaExcHep; /* excitation */
+      LambdaIonH0   = 2.18e-11 * gs.geH0 * gs.ne * gs.nH0;
+      LambdaIonHe0  = 3.94e-11 * gs.geHe0 * gs.ne * gs.nHe0;
+      LambdaIonHep  = 8.72e-11 * gs.geHep * gs.ne * gs.nHep;
+      LambdaIon     = LambdaIonH0 + LambdaIonHe0 + LambdaIonHep; /* ionization */
+      LambdaRecHp   = 1.036e-16 * T * gs.ne * (gs.aHp * gs.nHp);
+      LambdaRecHep  = 1.036e-16 * T * gs.ne * (gs.aHep * gs.nHep);
+      LambdaRecHepp = 1.036e-16 * T * gs.ne * (gs.aHepp * gs.nHepp);
+      LambdaRecHepd = 6.526e-11 * gs.ad * gs.ne * gs.nHep;
+      LambdaRec     = LambdaRecHp + LambdaRecHep + LambdaRecHepp + LambdaRecHepd;
+      LambdaFF      = gs.bff * (gs.nHp + gs.nHep + 4 * gs.nHepp) * gs.ne;
+      LambdaPrim    = LambdaExc + LambdaIon + LambdaRec + LambdaFF;
+
+      if(All.ComovingIntegrationOn)
+        {
+          redshift    = 1 / All.Time - 1;
+          LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs;
+        }
+      else
+        LambdaCmptn = 0;
+
+      Lambda = LambdaPrim + LambdaMet + LambdaDust + LambdaCmptn + LambdaMol;
+
+      Heat = 0;
+      if(pc.J_UV != 0)
+        Heat += (gs.nH0 * pc.epsH0 + gs.nHe0 * pc.epsHe0 + gs.nHep * pc.epsHep) / gs.nHcgs;
+    }
+  else /* here we're outside of tabulated rates, T>Tmax K */
+    {
+      /* at high T (fully ionized); only free-free and Compton cooling are present. Assumes no heating. */
+      Heat = 0;
+
+      LambdaExcH0 = LambdaExcHep = LambdaIonH0 = LambdaIonHe0 = LambdaIonHep = LambdaRecHp = LambdaRecHep = LambdaRecHepp =
+          LambdaRecHepd                                                                                   = 0;
+
+      /* very hot: H and He both fully ionized */
+      gs.nHp   = 1.0;
+      gs.nHep  = 0;
+      gs.nHepp = gs.yhelium;
+      gs.ne    = gs.nHp + 2.0 * gs.nHepp;
+      *nelec   = gs.ne; /* note: in units of the hydrogen number density */
+
+      T        = pow(10.0, logT);
+      LambdaFF = 1.42e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - logT) * (5.5 - logT) / 3)) * (gs.nHp + 4 * gs.nHepp) * gs.ne;
+
+      if(All.ComovingIntegrationOn)
+        {
+          redshift = 1 / All.Time - 1;
+          /* add inverse Compton cooling off the microwave background */
+          LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs;
+        }
+      else
+        LambdaCmptn = 0;
+
+      Lambda = LambdaFF + LambdaCmptn;
+    }
+
+  return (Heat - Lambda);
+}
+
+/*! \brief Make cooling rates interpolation table.
+ *
+ *  Set up interpolation tables in T for cooling rates given in
+ *  KWH, ApJS, 105, 19.
+ *
+ *  \return void
+ */
+void MakeRateTable(void)
+{
+  int i;
+  double T;
+  double Tfact;
+
+  gs.yhelium = (1 - gs.XH) / (4 * gs.XH);
+  gs.mhboltz = PROTONMASS / BOLTZMANN;
+  if(All.MinGasTemp > 0.0)
+    Tmin = log10(0.1 * All.MinGasTemp);
+  else
+    Tmin = 1.0;
+  deltaT    = (Tmax - Tmin) / NCOOLTAB;
+  gs.ethmin = pow(10.0, Tmin) * (1. + gs.yhelium) / ((1. + 4. * gs.yhelium) * gs.mhboltz * GAMMA_MINUS1);
+  /* minimum internal energy for neutral gas */
+
+  for(i = 0; i <= NCOOLTAB; i++)
+    {
+      RateT[i].BetaH0 = RateT[i].BetaHep = RateT[i].Betaff = RateT[i].AlphaHp = RateT[i].AlphaHep = RateT[i].AlphaHepp =
+          RateT[i].Alphad = RateT[i].GammaeH0 = RateT[i].GammaeHe0 = RateT[i].GammaeHep = 0;
+
+      T     = pow(10.0, Tmin + deltaT * i);
+      Tfact = 1.0 / (1 + sqrt(T / 1.0e5));
+
+      /* collisional excitation */
+      /* Cen 1992 */
+      if(118348 / T < 70)
+        RateT[i].BetaH0 = 7.5e-19 * exp(-118348 / T) * Tfact;
+      if(473638 / T < 70)
+        RateT[i].BetaHep = 5.54e-17 * pow(T, -0.397) * exp(-473638 / T) * Tfact;
+
+      /* free-free */
+      RateT[i].Betaff = 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - log10(T)) * (5.5 - log10(T)) / 3));
+
+      /* recombination */
+      /* Cen 1992 */
+      /* Hydrogen II */
+      RateT[i].AlphaHp = 8.4e-11 * pow(T / 1000, -0.2) / (1. + pow(T / 1.0e6, 0.7)) / sqrt(T);
+      /* Helium II */
+      RateT[i].AlphaHep = 1.5e-10 * pow(T, -0.6353);
+      /* Helium III */
+      RateT[i].AlphaHepp = 4. * RateT[i].AlphaHp;
+
+      /* Cen 1992 */
+      /* dielectric recombination */
+      if(470000 / T < 70)
+        RateT[i].Alphad = 1.9e-3 * pow(T, -1.5) * exp(-470000 / T) * (1. + 0.3 * exp(-94000 / T));
+
+      /* collisional ionization */
+      /* Cen 1992 */
+      /* Hydrogen */
+      if(157809.1 / T < 70)
+        RateT[i].GammaeH0 = 5.85e-11 * sqrt(T) * exp(-157809.1 / T) * Tfact;
+      /* Helium */
+      if(285335.4 / T < 70)
+        RateT[i].GammaeHe0 = 2.38e-11 * sqrt(T) * exp(-285335.4 / T) * Tfact;
+      /* Hellium II */
+      if(631515.0 / T < 70)
+        RateT[i].GammaeHep = 5.68e-12 * sqrt(T) * exp(-631515.0 / T) * Tfact;
+    }
+}
+
+/*! \brief Read table input for ionizing parameters.
+ *
+ *  \param[in] fname Name of file that contains the tabulated parameters.
+ *  \param[in] which Flag used to identify the type of the ionizing background
+ *                   (0 = UV background, 1 = AGN background, 2=RADCOOL).
+ *
+ *  \return void
+ */
+void ReadIonizeParams(char *fname, int which)
+{
+  int iter, i;
+  FILE *fdcool;
+  float dummy;
+
+  if(which == 0)
+    {
+      NheattabUVB = 0;
+
+      for(iter = 0, i = 0; iter < 2; iter++)
+        {
+          if(!(fdcool = fopen(fname, "r")))
+            terminate("COOLING: cannot read ionization table in file `%s'\n", fname);
+          if(iter == 0)
+            while(fscanf(fdcool, "%g %g %g %g %g %g %g", &dummy, &dummy, &dummy, &dummy, &dummy, &dummy, &dummy) != EOF)
+              NheattabUVB++;
+          if(iter == 1)
+            while(fscanf(fdcool, "%g %g %g %g %g %g %g", &PhotoTUVB[i].variable, &PhotoTUVB[i].gH0, &PhotoTUVB[i].gHe,
+                         &PhotoTUVB[i].gHep, &PhotoTUVB[i].eH0, &PhotoTUVB[i].eHe, &PhotoTUVB[i].eHep) != EOF)
+              i++;
+          fclose(fdcool);
+
+          if(iter == 0)
+            {
+              PhotoTUVB = (PhotoTable *)mymalloc("PhotoT", NheattabUVB * sizeof(PhotoTable));
+              mpi_printf("COOLING: read ionization table with %d entries in file `%s'.\n", NheattabUVB, fname);
+            }
+        }
+      /* ignore zeros at end of treecool file */
+      for(i = 0; i < NheattabUVB; ++i)
+        if(PhotoTUVB[i].gH0 == 0.0)
+          break;
+
+      NheattabUVB = i;
+      mpi_printf("COOLING: using %d ionization table entries from file `%s'.\n", NheattabUVB, fname);
+    }
+}
+
+/*! \brief Set the ionization parameters for the UV background.
+ *
+ *  \return void
+ */
+void IonizeParamsUVB(void)
+{
+  int i, ilow;
+  double logz, dzlow, dzhi;
+  double redshift;
+
+  if(All.ComovingIntegrationOn)
+    redshift = 1 / All.Time - 1;
+  else
+    {
+      redshift = 0.0;
+    }
+
+  logz = log10(redshift + 1.0);
+  ilow = 0;
+  for(i = 0; i < NheattabUVB; i++)
+    {
+      if(PhotoTUVB[i].variable < logz)
+        ilow = i;
+      else
+        break;
+    }
+
+  dzlow = logz - PhotoTUVB[ilow].variable;
+  dzhi  = PhotoTUVB[ilow + 1].variable - logz;
+
+  if(NheattabUVB == 0 || logz > PhotoTUVB[NheattabUVB - 1].variable || PhotoTUVB[ilow].gH0 == 0 || PhotoTUVB[ilow + 1].gH0 == 0)
+    {
+      SetZeroIonization();
+      return;
+    }
+  else
+    pc.J_UV = 1;
+
+  pc.gJH0   = pow(10., (dzhi * log10(PhotoTUVB[ilow].gH0) + dzlow * log10(PhotoTUVB[ilow + 1].gH0)) / (dzlow + dzhi));
+  pc.gJHe0  = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHe) + dzlow * log10(PhotoTUVB[ilow + 1].gHe)) / (dzlow + dzhi));
+  pc.gJHep  = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHep) + dzlow * log10(PhotoTUVB[ilow + 1].gHep)) / (dzlow + dzhi));
+  pc.epsH0  = pow(10., (dzhi * log10(PhotoTUVB[ilow].eH0) + dzlow * log10(PhotoTUVB[ilow + 1].eH0)) / (dzlow + dzhi));
+  pc.epsHe0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHe) + dzlow * log10(PhotoTUVB[ilow + 1].eHe)) / (dzlow + dzhi));
+  pc.epsHep = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHep) + dzlow * log10(PhotoTUVB[ilow + 1].eHep)) / (dzlow + dzhi));
+
+  return;
+}
+
+/*! \brief Reset the ionization parameters.
+ *
+ *  \return void
+ */
+void SetZeroIonization(void) { memset(&pc, 0, sizeof(PhotoCurrent)); }
+
+/*! \brief Wrapper function to set the ionizing background.
+ *
+ *  \return void
+ */
+void IonizeParams(void) { IonizeParamsUVB(); }
+
+/*! \brief Initialize the cooling module.
+ *
+ *  This function initializes the cooling module. In particular,
+ *  it allocates the memory for the cooling rate and ionization tables
+ *  and initializes them.
+ *
+ *  \return void
+ */
+void InitCool(void)
+{
+  /* set default hydrogen mass fraction */
+  gs.XH = HYDROGEN_MASSFRAC;
+
+  /* zero photo-ionization/heating rates */
+  SetZeroIonization();
+
+  /* allocate and construct rate table */
+  RateT = (RateTable *)mymalloc("RateT", (NCOOLTAB + 1) * sizeof(RateTable));
+  ;
+  MakeRateTable();
+
+  /* read photo tables */
+  ReadIonizeParams(All.TreecoolFile, 0);
+
+  mpi_printf("GFM_COOLING: time, time begin = %le\t%le\n", All.Time, All.TimeBegin);
+  All.Time = All.TimeBegin;
+  set_cosmo_factors_for_current_time();
+
+  IonizeParams();
+}
+
+/*! \brief Apply the isochoric cooling to all the active gas cells.
+ *
+ *  \return void
+ */
+void cooling_only(void) /* normal cooling routine when star formation is disabled */
+{
+  int idx, i;
+
+  CPU_Step[CPU_MISC] += measure_time();
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i >= 0)
+        {
+          if(P[i].Mass == 0 && P[i].ID == 0)
+            continue; /* skip cells that have been swallowed or eliminated */
+
+          cool_cell(i);
+        }
+    }
+  CPU_Step[CPU_COOLINGSFR] += measure_time();
+}
+
+/*! \brief Apply the isochoric cooling to a given gas cell.
+ *
+ *  This function applies the normal isochoric cooling to a single gas cell.
+ *  Once the cooling has been applied according to one of the cooling models
+ *  implemented, the internal energy per unit mass, the total energy and the
+ *  pressure of the cell are updated.
+ *
+ *  \param[in] i Index of the gas cell to which cooling is applied.
+ *
+ *  \return void
+ */
+void cool_cell(int i)
+{
+  double dt, dtime, ne = 1;
+  double unew, dens, dtcool;
+
+  dens = SphP[i].Density;
+
+  dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval;
+
+  dtime = All.cf_atime * dt / All.cf_time_hubble_a;
+
+  dtcool = dtime;
+
+  ne         = SphP[i].Ne; /* electron abundance (gives ionization state and mean molecular weight) */
+  unew       = DoCooling(dmax(All.MinEgySpec, SphP[i].Utherm), dens * All.cf_a3inv, dtcool, &ne);
+  SphP[i].Ne = ne;
+
+  if(unew < 0)
+    terminate("invalid temperature: Thistask=%d i=%d unew=%g\n", ThisTask, i, unew);
+
+  double du = unew - SphP[i].Utherm;
+
+  if(unew < All.MinEgySpec)
+    du = All.MinEgySpec - SphP[i].Utherm;
+
+  SphP[i].Utherm += du;
+  SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass;
+
+#ifdef OUTPUT_COOLHEAT
+  if(dtime > 0)
+    SphP[i].CoolHeat = du * P[i].Mass / dtime;
+#endif /* #ifdef OUTPUT_COOLHEAT */
+
+  set_pressure_of_cell(i);
+}
+
+#endif /* #ifdef COOLING */
diff --git a/src/amuse/community/arepo/src/cooling/cooling_proto.h b/src/amuse/community/arepo/src/cooling/cooling_proto.h
new file mode 100644
index 0000000000..cbd304a838
--- /dev/null
+++ b/src/amuse/community/arepo/src/cooling/cooling_proto.h
@@ -0,0 +1,49 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/cooling/cooling_proto.h
+ * \date        05/2018
+ * \brief       Header for cooling functions.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef INLINE_FUNC
+#define INLINE_FUNC
+#endif /* #ifndef INLINE_FUNC */
+
+void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate);
+
+double convert_u_to_temp(double u, double rho, double *ne_guess);
+double CoolingRate(double logT, double rho, double *nelec);
+double CoolingRateFromU(double u, double rho, double *ne_guess);
+double DoCooling(double u_old, double rho, double dt, double *ne_guess);
+double GetCoolingTime(double u_old, double rho, double *ne_guess);
+
+void find_abundances_and_rates(double logT, double rho, double *ne_guess);
+void InitCool(void);
+void IonizeParamsUVB(void);
+void IonizeParams(void);
+void ReadIonizeParams(char *fname, int which);
+void SetZeroIonization(void);
diff --git a/src/amuse/community/arepo/src/cooling/cooling_vars.h b/src/amuse/community/arepo/src/cooling/cooling_vars.h
new file mode 100644
index 0000000000..22a737522d
--- /dev/null
+++ b/src/amuse/community/arepo/src/cooling/cooling_vars.h
@@ -0,0 +1,80 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/cooling/cooling_vars.h
+ * \date        05/2018
+ * \brief       Variables for cooling.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#define NCOOLTAB 2000
+#define SMALLNUM 1.0e-60
+#define COOLLIM 0.1
+#define HEATLIM 20.0
+#define eV_to_K 11606.0
+#define eV_to_erg 1.60184e-12
+#define MAX_TABLESIZE 250 /* Max # of lines in TREECOOL */
+
+/* data for gas state */
+typedef struct
+{
+  double ne, necgs, nHcgs;
+  double bH0, bHep, bff, aHp, aHep, aHepp, ad, geH0, geHe0, geHep;
+  double gJH0ne, gJHe0ne, gJHepne;
+  double nH0, nHp, nHep, nHe0, nHepp;
+  double XH, yhelium;
+  double mhboltz;
+  double ethmin; /* minimum internal energy for neutral gas */
+  double mu;
+} GasState;
+
+/* tabulated rates */
+typedef struct
+{
+  double BetaH0, BetaHep, Betaff;
+  double AlphaHp, AlphaHep, Alphad, AlphaHepp;
+  double GammaeH0, GammaeHe0, GammaeHep;
+} RateTable;
+
+/* photo-ionization/heating rate table */
+typedef struct
+{
+  float variable;       /* logz for UVB */
+  float gH0, gHe, gHep; /* photo-ionization rates */
+  float eH0, eHe, eHep; /* photo-heating rates */
+} PhotoTable;
+
+/* current interpolated photo-ionization/heating rates */
+typedef struct
+{
+  char J_UV;
+  double gJH0, gJHep, gJHe0, epsH0, epsHep, epsHe0;
+} PhotoCurrent;
+
+/* cooling data */
+typedef struct
+{
+  double u_old_input, rho_input, dt_input, ne_guess_input;
+} DoCoolData;
diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.c b/src/amuse/community/arepo/src/debug_md5/Md5.c
new file mode 100644
index 0000000000..5ac2d223fe
--- /dev/null
+++ b/src/amuse/community/arepo/src/debug_md5/Md5.c
@@ -0,0 +1,472 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/debug_md5/Md5.c
+ * \date        MM/YYYY
+ * \brief
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ */
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*
+ **********************************************************************
+ ** md5.c                                                            **
+ ** RSA Data Security, Inc. MD5 Message Digest Algorithm             **
+ ** Created: 2/17/90 RLR                                             **
+ ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version                  **
+ **********************************************************************
+ */
+
+/*
+ **********************************************************************
+ ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
+ **                                                                  **
+ ** License to copy and use this software is granted provided that   **
+ ** it is identified as the "RSA Data Security, Inc. MD5 Message     **
+ ** Digest Algorithm" in all material mentioning or referencing this **
+ ** software or this function.                                       **
+ **                                                                  **
+ ** License is also granted to make and use derivative works         **
+ ** provided that such works are identified as "derived from the RSA **
+ ** Data Security, Inc. MD5 Message Digest Algorithm" in all         **
+ ** material mentioning or referencing the derived work.             **
+ **                                                                  **
+ ** RSA Data Security, Inc. makes no representations concerning      **
+ ** either the merchantability of this software or the suitability   **
+ ** of this software for any particular purpose.  It is provided "as **
+ ** is" without express or implied warranty of any kind.             **
+ **                                                                  **
+ ** These notices must be retained in any copies of any part of this **
+ ** documentation and/or software.                                   **
+ **********************************************************************
+ */
+
+/* -- include the following line if the md5.h header file is separate -- */
+#include "Md5.h"
+
+/* forward declaration */
+static void Transform();
+static void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen);
+
+static unsigned char PADDING[64] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+/* F, G and H are basic MD5 functions: selection, majority, parity */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/* ROTATE_LEFT rotates x left n bits */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */
+/* Rotation is separate from addition to prevent recomputation */
+#define FF(a, b, c, d, x, s, ac)                 \
+  {                                              \
+    (a) += F((b), (c), (d)) + (x) + (UINT4)(ac); \
+    (a) = ROTATE_LEFT((a), (s));                 \
+    (a) += (b);                                  \
+  }
+#define GG(a, b, c, d, x, s, ac)                 \
+  {                                              \
+    (a) += G((b), (c), (d)) + (x) + (UINT4)(ac); \
+    (a) = ROTATE_LEFT((a), (s));                 \
+    (a) += (b);                                  \
+  }
+#define HH(a, b, c, d, x, s, ac)                 \
+  {                                              \
+    (a) += H((b), (c), (d)) + (x) + (UINT4)(ac); \
+    (a) = ROTATE_LEFT((a), (s));                 \
+    (a) += (b);                                  \
+  }
+#define II(a, b, c, d, x, s, ac)                 \
+  {                                              \
+    (a) += I((b), (c), (d)) + (x) + (UINT4)(ac); \
+    (a) = ROTATE_LEFT((a), (s));                 \
+    (a) += (b);                                  \
+  }
+
+void MD5Init(MD5_CTX *mdContext)
+{
+  mdContext->i[0] = mdContext->i[1] = (UINT4)0;
+
+  /* Load magic initialization constants.
+   */
+  mdContext->buf[0] = (UINT4)0x67452301;
+  mdContext->buf[1] = (UINT4)0xefcdab89;
+  mdContext->buf[2] = (UINT4)0x98badcfe;
+  mdContext->buf[3] = (UINT4)0x10325476;
+}
+
+void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong)
+{
+  while(inLenLong > 0)
+    {
+      unsigned int inLen = 0x10000000;
+      if(inLen > inLenLong)
+        inLen = inLenLong;
+      MD5Update(mdContext, inBuf, inLen);
+      inBuf += inLen;
+      inLenLong -= inLen;
+    }
+}
+
+void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen)
+{
+  UINT4 in[16];
+  int mdi;
+  unsigned int i, ii;
+
+  /* compute number of bytes mod 64 */
+  mdi = (int)((mdContext->i[0] >> 3) & 0x3F);
+
+  /* update number of bits */
+  if((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0])
+    mdContext->i[1]++;
+  mdContext->i[0] += ((UINT4)inLen << 3);
+  mdContext->i[1] += ((UINT4)inLen >> 29);
+
+  while(inLen--)
+    {
+      /* add new character to buffer, increment mdi */
+      mdContext->in[mdi++] = *inBuf++;
+
+      /* transform if necessary */
+      if(mdi == 0x40)
+        {
+          for(i = 0, ii = 0; i < 16; i++, ii += 4)
+            in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) |
+                    (((UINT4)mdContext->in[ii + 1]) << 8) | ((UINT4)mdContext->in[ii]);
+          Transform(mdContext->buf, in);
+          mdi = 0;
+        }
+    }
+}
+
+void MD5Final(MD5_CTX *mdContext)
+{
+  UINT4 in[16];
+  int mdi;
+  unsigned int i, ii;
+  unsigned int padLen;
+
+  /* save number of bits */
+  in[14] = mdContext->i[0];
+  in[15] = mdContext->i[1];
+
+  /* compute number of bytes mod 64 */
+  mdi = (int)((mdContext->i[0] >> 3) & 0x3F);
+
+  /* pad out to 56 mod 64 */
+  padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi);
+  MD5Update(mdContext, PADDING, padLen);
+
+  /* append length in bits and transform */
+  for(i = 0, ii = 0; i < 14; i++, ii += 4)
+    in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) | (((UINT4)mdContext->in[ii + 1]) << 8) |
+            ((UINT4)mdContext->in[ii]);
+  Transform(mdContext->buf, in);
+
+  /* store buffer in digest */
+  for(i = 0, ii = 0; i < 4; i++, ii += 4)
+    {
+      mdContext->digest[ii]     = (unsigned char)(mdContext->buf[i] & 0xFF);
+      mdContext->digest[ii + 1] = (unsigned char)((mdContext->buf[i] >> 8) & 0xFF);
+      mdContext->digest[ii + 2] = (unsigned char)((mdContext->buf[i] >> 16) & 0xFF);
+      mdContext->digest[ii + 3] = (unsigned char)((mdContext->buf[i] >> 24) & 0xFF);
+    }
+}
+
+/* Basic MD5 step. Transform buf based on in.
+ */
+static void Transform(buf, in) UINT4 *buf;
+UINT4 *in;
+{
+  UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
+
+  /* Round 1 */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+  FF(a, b, c, d, in[0], S11, 3614090360);  /* 1 */
+  FF(d, a, b, c, in[1], S12, 3905402710);  /* 2 */
+  FF(c, d, a, b, in[2], S13, 606105819);   /* 3 */
+  FF(b, c, d, a, in[3], S14, 3250441966);  /* 4 */
+  FF(a, b, c, d, in[4], S11, 4118548399);  /* 5 */
+  FF(d, a, b, c, in[5], S12, 1200080426);  /* 6 */
+  FF(c, d, a, b, in[6], S13, 2821735955);  /* 7 */
+  FF(b, c, d, a, in[7], S14, 4249261313);  /* 8 */
+  FF(a, b, c, d, in[8], S11, 1770035416);  /* 9 */
+  FF(d, a, b, c, in[9], S12, 2336552879);  /* 10 */
+  FF(c, d, a, b, in[10], S13, 4294925233); /* 11 */
+  FF(b, c, d, a, in[11], S14, 2304563134); /* 12 */
+  FF(a, b, c, d, in[12], S11, 1804603682); /* 13 */
+  FF(d, a, b, c, in[13], S12, 4254626195); /* 14 */
+  FF(c, d, a, b, in[14], S13, 2792965006); /* 15 */
+  FF(b, c, d, a, in[15], S14, 1236535329); /* 16 */
+
+  /* Round 2 */
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+  GG(a, b, c, d, in[1], S21, 4129170786);  /* 17 */
+  GG(d, a, b, c, in[6], S22, 3225465664);  /* 18 */
+  GG(c, d, a, b, in[11], S23, 643717713);  /* 19 */
+  GG(b, c, d, a, in[0], S24, 3921069994);  /* 20 */
+  GG(a, b, c, d, in[5], S21, 3593408605);  /* 21 */
+  GG(d, a, b, c, in[10], S22, 38016083);   /* 22 */
+  GG(c, d, a, b, in[15], S23, 3634488961); /* 23 */
+  GG(b, c, d, a, in[4], S24, 3889429448);  /* 24 */
+  GG(a, b, c, d, in[9], S21, 568446438);   /* 25 */
+  GG(d, a, b, c, in[14], S22, 3275163606); /* 26 */
+  GG(c, d, a, b, in[3], S23, 4107603335);  /* 27 */
+  GG(b, c, d, a, in[8], S24, 1163531501);  /* 28 */
+  GG(a, b, c, d, in[13], S21, 2850285829); /* 29 */
+  GG(d, a, b, c, in[2], S22, 4243563512);  /* 30 */
+  GG(c, d, a, b, in[7], S23, 1735328473);  /* 31 */
+  GG(b, c, d, a, in[12], S24, 2368359562); /* 32 */
+
+  /* Round 3 */
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+  HH(a, b, c, d, in[5], S31, 4294588738);  /* 33 */
+  HH(d, a, b, c, in[8], S32, 2272392833);  /* 34 */
+  HH(c, d, a, b, in[11], S33, 1839030562); /* 35 */
+  HH(b, c, d, a, in[14], S34, 4259657740); /* 36 */
+  HH(a, b, c, d, in[1], S31, 2763975236);  /* 37 */
+  HH(d, a, b, c, in[4], S32, 1272893353);  /* 38 */
+  HH(c, d, a, b, in[7], S33, 4139469664);  /* 39 */
+  HH(b, c, d, a, in[10], S34, 3200236656); /* 40 */
+  HH(a, b, c, d, in[13], S31, 681279174);  /* 41 */
+  HH(d, a, b, c, in[0], S32, 3936430074);  /* 42 */
+  HH(c, d, a, b, in[3], S33, 3572445317);  /* 43 */
+  HH(b, c, d, a, in[6], S34, 76029189);    /* 44 */
+  HH(a, b, c, d, in[9], S31, 3654602809);  /* 45 */
+  HH(d, a, b, c, in[12], S32, 3873151461); /* 46 */
+  HH(c, d, a, b, in[15], S33, 530742520);  /* 47 */
+  HH(b, c, d, a, in[2], S34, 3299628645);  /* 48 */
+
+  /* Round 4 */
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+  II(a, b, c, d, in[0], S41, 4096336452);  /* 49 */
+  II(d, a, b, c, in[7], S42, 1126891415);  /* 50 */
+  II(c, d, a, b, in[14], S43, 2878612391); /* 51 */
+  II(b, c, d, a, in[5], S44, 4237533241);  /* 52 */
+  II(a, b, c, d, in[12], S41, 1700485571); /* 53 */
+  II(d, a, b, c, in[3], S42, 2399980690);  /* 54 */
+  II(c, d, a, b, in[10], S43, 4293915773); /* 55 */
+  II(b, c, d, a, in[1], S44, 2240044497);  /* 56 */
+  II(a, b, c, d, in[8], S41, 1873313359);  /* 57 */
+  II(d, a, b, c, in[15], S42, 4264355552); /* 58 */
+  II(c, d, a, b, in[6], S43, 2734768916);  /* 59 */
+  II(b, c, d, a, in[13], S44, 1309151649); /* 60 */
+  II(a, b, c, d, in[4], S41, 4149444226);  /* 61 */
+  II(d, a, b, c, in[11], S42, 3174756917); /* 62 */
+  II(c, d, a, b, in[2], S43, 718787259);   /* 63 */
+  II(b, c, d, a, in[9], S44, 3951481745);  /* 64 */
+
+  buf[0] += a;
+  buf[1] += b;
+  buf[2] += c;
+  buf[3] += d;
+}
+
+/*
+ **********************************************************************
+ ** End of md5.c                                                     **
+ ******************************* (cut) ********************************
+ */
+
+/*
+ **********************************************************************
+ ** md5driver.c -- sample routines to test                           **
+ ** RSA Data Security, Inc. MD5 message digest algorithm.            **
+ ** Created: 2/16/90 RLR                                             **
+ ** Updated: 1/91 SRD                                                **
+ **********************************************************************
+ */
+
+/*
+ **********************************************************************
+ ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
+ **                                                                  **
+ ** RSA Data Security, Inc. makes no representations concerning      **
+ ** either the merchantability of this software or the suitability   **
+ ** of this software for any particular purpose.  It is provided "as **
+ ** is" without express or implied warranty of any kind.             **
+ **                                                                  **
+ ** These notices must be retained in any copies of any part of this **
+ ** documentation and/or software.                                   **
+ **********************************************************************
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+/* -- include the following file if the file md5.h is separate -- */
+/* #include "md5.h" */
+
+/* Prints message digest buffer in mdContext as 32 hexadecimal digits.
+   Order is from low-order byte to high-order byte of digest.
+   Each byte is printed with high-order hexadecimal digit first.
+ */
+static void MDPrint(mdContext) MD5_CTX *mdContext;
+{
+  int i;
+
+  for(i = 0; i < 16; i++)
+    printf("%02x", mdContext->digest[i]);
+}
+
+/* size of test block */
+#define TEST_BLOCK_SIZE 1000
+
+/* number of blocks to process */
+#define TEST_BLOCKS 10000
+
+/* number of test bytes = TEST_BLOCK_SIZE * TEST_BLOCKS */
+static long TEST_BYTES = (long)TEST_BLOCK_SIZE * (long)TEST_BLOCKS;
+
+/* A time trial routine, to measure the speed of MD5.
+   Measures wall time required to digest TEST_BLOCKS * TEST_BLOCK_SIZE
+   characters.
+ */
+static void MDTimeTrial()
+{
+  MD5_CTX mdContext;
+  time_t endTime, startTime;
+  unsigned char data[TEST_BLOCK_SIZE];
+  unsigned int i;
+
+  /* initialize test data */
+  for(i = 0; i < TEST_BLOCK_SIZE; i++)
+    data[i] = (unsigned char)(i & 0xFF);
+
+  /* start timer */
+  printf("MD5 time trial. Processing %ld characters...\n", TEST_BYTES);
+  time(&startTime);
+
+  /* digest data in TEST_BLOCK_SIZE byte blocks */
+  MD5Init(&mdContext);
+  for(i = TEST_BLOCKS; i > 0; i--)
+    MD5Update(&mdContext, data, TEST_BLOCK_SIZE);
+  MD5Final(&mdContext);
+
+  /* stop timer, get time difference */
+  time(&endTime);
+  MDPrint(&mdContext);
+  printf(" is digest of test input.\n");
+  printf("Seconds to process test input: %ld\n", (long)(endTime - startTime));
+  printf("Characters processed per second: %ld\n", TEST_BYTES / (endTime - startTime));
+}
+
+/* Computes the message digest for string inString.
+   Prints out message digest, a space, the string (in quotes) and a
+   carriage return.
+ */
+static void MDString(inString) char *inString;
+{
+  MD5_CTX mdContext;
+  unsigned int len = strlen(inString);
+
+  MD5Init(&mdContext);
+  MD5Update(&mdContext, (unsigned char *)inString, len);
+  MD5Final(&mdContext);
+  MDPrint(&mdContext);
+  printf(" \"%s\"\n\n", inString);
+}
+
+/* Computes the message digest for a specified file.
+   Prints out message digest, a space, the file name, and a carriage
+   return.
+ */
+static void MDFile(filename) char *filename;
+{
+  FILE *inFile = fopen(filename, "rb");
+  MD5_CTX mdContext;
+  int bytes;
+  unsigned char data[1024];
+
+  if(inFile == NULL)
+    {
+      printf("%s can't be opened.\n", filename);
+      return;
+    }
+
+  MD5Init(&mdContext);
+  while((bytes = fread(data, 1, 1024, inFile)) != 0)
+    MD5Update(&mdContext, data, bytes);
+  MD5Final(&mdContext);
+  MDPrint(&mdContext);
+  printf(" %s\n", filename);
+  fclose(inFile);
+}
+
+/* Writes the message digest of the data from stdin onto stdout,
+   followed by a carriage return.
+ */
+static void MDFilter()
+{
+  MD5_CTX mdContext;
+  int bytes;
+  unsigned char data[16];
+
+  MD5Init(&mdContext);
+  while((bytes = fread(data, 1, 16, stdin)) != 0)
+    MD5Update(&mdContext, data, bytes);
+  MD5Final(&mdContext);
+  MDPrint(&mdContext);
+  printf("\n");
+}
+
+/* Runs a standard suite of test data.
+ */
+static void MDTestSuite()
+{
+  printf("MD5 test suite results:\n\n");
+  MDString("");
+  MDString("a");
+  MDString("abc");
+  MDString("message digest");
+  MDString("abcdefghijklmnopqrstuvwxyz");
+  MDString("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
+  MDString(
+      "1234567890123456789012345678901234567890\
+1234567890123456789012345678901234567890");
+  /* Contents of file foo are "abc" */
+  MDFile("foo");
+}
diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.h b/src/amuse/community/arepo/src/debug_md5/Md5.h
new file mode 100644
index 0000000000..df809ed5d2
--- /dev/null
+++ b/src/amuse/community/arepo/src/debug_md5/Md5.h
@@ -0,0 +1,92 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/debug_md5/Md5.h
+ * \date        05/2018
+ * \brief       Header for implementation of MD5 checksums.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+/*
+ **********************************************************************
+ ** md5.h -- Header file for implementation of MD5                   **
+ ** RSA Data Security, Inc. MD5 Message Digest Algorithm             **
+ ** Created: 2/17/90 RLR                                             **
+ ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version              **
+ ** Revised (for MD5): RLR 4/27/91                                   **
+ **   -- G modified to have y&~z instead of y&z                      **
+ **   -- FF, GG, HH modified to add in last register done            **
+ **   -- Access pattern: round 2 works mod 5, round 3 works mod 3    **
+ **   -- distinct additive constant for each step                    **
+ **   -- round 4 added, working mod 7                                **
+ **********************************************************************
+ */
+
+/*
+ **********************************************************************
+ ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
+ **                                                                  **
+ ** License to copy and use this software is granted provided that   **
+ ** it is identified as the "RSA Data Security, Inc. MD5 Message     **
+ ** Digest Algorithm" in all material mentioning or referencing this **
+ ** software or this function.                                       **
+ **                                                                  **
+ ** License is also granted to make and use derivative works         **
+ ** provided that such works are identified as "derived from the RSA **
+ ** Data Security, Inc. MD5 Message Digest Algorithm" in all         **
+ ** material mentioning or referencing the derived work.             **
+ **                                                                  **
+ ** RSA Data Security, Inc. makes no representations concerning      **
+ ** either the merchantability of this software or the suitability   **
+ ** of this software for any particular purpose.  It is provided "as **
+ ** is" without express or implied warranty of any kind.             **
+ **                                                                  **
+ ** These notices must be retained in any copies of any part of this **
+ ** documentation and/or software.                                   **
+ **********************************************************************
+ */
+
+/* typedef a 32 bit type */
+typedef unsigned long int UINT4;
+
+/* Data structure for MD5 (Message Digest) computation */
+typedef struct
+{
+  UINT4 i[2];               /* number of _bits_ handled mod 2^64 */
+  UINT4 buf[4];             /* scratch buffer */
+  unsigned char in[64];     /* input buffer */
+  unsigned char digest[16]; /* actual digest after MD5Final call */
+} MD5_CTX;
+
+void MD5Final(MD5_CTX *mdContext);
+// void MD5Update(MD5_CTX * mdContext, unsigned char *inBuf, unsigned int inLen);
+void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong);
+void MD5Init(MD5_CTX *mdContext);
+
+/*
+ **********************************************************************
+ ** End of md5.h                                                     **
+ ******************************* (cut) ********************************
+ */
diff --git a/src/amuse/community/arepo/src/debug_md5/calc_checksum.c b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c
new file mode 100644
index 0000000000..3f710ef036
--- /dev/null
+++ b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c
@@ -0,0 +1,121 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/debug_md5/calc_checksum.c
+ * \date        05/2018
+ * \brief       Functions to calculate an MD5 checksum from a dataset.
+ * \details     contains functions:
+ *                void calc_memory_checksum(void *base, size_t bytes)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "Md5.h"
+
+/*! \brief Calculates a md5 checksum (on all MPI tasks) and prints it.
+ *
+ *  \param[in] base Pointer to start of data.
+ *  \param[in] bytes Number of bytes to be checked.
+ *
+ *  \return void
+ */
+void calc_memory_checksum(void *base, size_t bytes)
+{
+  MD5_CTX sum;
+  union
+  {
+    unsigned char digest[16];
+    int val[4];
+  } u, uglob;
+
+  MD5Init(&sum);
+  MD5UpdateLong(&sum, base, bytes);
+  MD5Final(&sum);
+
+  int i;
+
+  for(i = 0; i < 16; i++)
+    u.digest[i] = sum.digest[i];
+
+  MPI_Allreduce(u.val, uglob.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      printf("Step=%d  MD5=", All.NumCurrentTiStep);
+      for(i = 0; i < 16; i++)
+        printf("%02x", uglob.digest[i]);
+      printf("\n");
+    }
+}
+
+#ifdef RESTART_DEBUG
+/*! \brief Calculates md5 checksums of main data structures of a restart file.
+ *
+ *  \return void
+ */
+void log_restart_debug(void)
+{
+  MD5_CTX sum;
+  union
+  {
+    unsigned char digest[16];
+    int val[4];
+  } u, uglob_P, uglob_SphP;
+  int i;
+
+  MD5Init(&sum);
+  MD5UpdateLong(&sum, (void *)P, NumPart * sizeof(struct particle_data));
+  MD5Final(&sum);
+
+  for(i = 0; i < 16; i++)
+    u.digest[i] = sum.digest[i];
+
+  MPI_Allreduce(u.val, uglob_P.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  MD5Init(&sum);
+  MD5UpdateLong(&sum, (void *)SphP, NumGas * sizeof(struct sph_particle_data));
+  MD5Final(&sum);
+
+  for(i = 0; i < 16; i++)
+    u.digest[i] = sum.digest[i];
+
+  MPI_Allreduce(u.val, uglob_SphP.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      fprintf(FdRestartTest, "\n");
+      fprintf(FdRestartTest, "Step=%8d  P[]        ", All.NumCurrentTiStep);
+      for(i = 0; i < 16; i++)
+        fprintf(FdRestartTest, "%02x", uglob_P.digest[i]);
+      fprintf(FdRestartTest, "\n");
+      fprintf(FdRestartTest, "               SphP[]     ");
+      for(i = 0; i < 16; i++)
+        fprintf(FdRestartTest, "%02x", uglob_SphP.digest[i]);
+      fprintf(FdRestartTest, "\n");
+      fflush(FdRestartTest);
+    }
+}
+#endif
diff --git a/src/amuse/community/arepo/src/domain/bsd_tree.h b/src/amuse/community/arepo/src/domain/bsd_tree.h
new file mode 100644
index 0000000000..c8f763abdf
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/bsd_tree.h
@@ -0,0 +1,865 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/bsd_tree.h
+ * \date        05/2018
+ * \brief       BSD tree.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 29.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+/*      $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $  */
+/*      $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $    */
+/* $FreeBSD: src/sys/sys/tree.h,v 1.9.4.2 2012/11/17 11:37:26 svnexp Exp $ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TREE_H_
+#define _SYS_TREE_H_
+
+#include <sys/cdefs.h>
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *      - every search path from the root to a leaf consists of the
+ *        same number of black nodes,
+ *      - each red node (except for the root) has a black parent,
+ *      - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)                    \
+  struct name                                     \
+  {                                               \
+    struct type *sph_root; /* root of the tree */ \
+  }
+
+#define SPLAY_INITIALIZER(root) \
+  {                             \
+    NULL                        \
+  }
+
+#define SPLAY_INIT(root)       \
+  do                           \
+    {                          \
+      (root)->sph_root = NULL; \
+    }                          \
+  while(/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)                       \
+  struct                                        \
+  {                                             \
+    struct type *spe_left;  /* left element */  \
+    struct type *spe_right; /* right element */ \
+  }
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field)                         \
+  do                                                                 \
+    {                                                                \
+      SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
+      SPLAY_RIGHT(tmp, field)             = (head)->sph_root;        \
+      (head)->sph_root                    = tmp;                     \
+    }                                                                \
+  while(/*CONSTCOND*/ 0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field)                          \
+  do                                                                 \
+    {                                                                \
+      SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
+      SPLAY_LEFT(tmp, field)               = (head)->sph_root;       \
+      (head)->sph_root                     = tmp;                    \
+    }                                                                \
+  while(/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field)                            \
+  do                                                                \
+    {                                                               \
+      SPLAY_LEFT(tmp, field) = (head)->sph_root;                    \
+      tmp                    = (head)->sph_root;                    \
+      (head)->sph_root       = SPLAY_LEFT((head)->sph_root, field); \
+    }                                                               \
+  while(/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field)                             \
+  do                                                                  \
+    {                                                                 \
+      SPLAY_RIGHT(tmp, field) = (head)->sph_root;                     \
+      tmp                     = (head)->sph_root;                     \
+      (head)->sph_root        = SPLAY_RIGHT((head)->sph_root, field); \
+    }                                                                 \
+  while(/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field)                             \
+  do                                                                               \
+    {                                                                              \
+      SPLAY_RIGHT(left, field)             = SPLAY_LEFT((head)->sph_root, field);  \
+      SPLAY_LEFT(right, field)             = SPLAY_RIGHT((head)->sph_root, field); \
+      SPLAY_LEFT((head)->sph_root, field)  = SPLAY_RIGHT(node, field);             \
+      SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);              \
+    }                                                                              \
+  while(/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)                                       \
+  void name##_SPLAY(struct name *, struct type *);                                    \
+  void name##_SPLAY_MINMAX(struct name *, int);                                       \
+  struct type *name##_SPLAY_INSERT(struct name *, struct type *);                     \
+  struct type *name##_SPLAY_REMOVE(struct name *, struct type *);                     \
+                                                                                      \
+  /* Finds the node with the same key as elm */                                       \
+  static __inline struct type *name##_SPLAY_FIND(struct name *head, struct type *elm) \
+  {                                                                                   \
+    if(SPLAY_EMPTY(head))                                                             \
+      return (NULL);                                                                  \
+    name##_SPLAY(head, elm);                                                          \
+    if((cmp)(elm, (head)->sph_root) == 0)                                             \
+      return (head->sph_root);                                                        \
+    return (NULL);                                                                    \
+  }                                                                                   \
+                                                                                      \
+  static __inline struct type *name##_SPLAY_NEXT(struct name *head, struct type *elm) \
+  {                                                                                   \
+    name##_SPLAY(head, elm);                                                          \
+    if(SPLAY_RIGHT(elm, field) != NULL)                                               \
+      {                                                                               \
+        elm = SPLAY_RIGHT(elm, field);                                                \
+        while(SPLAY_LEFT(elm, field) != NULL)                                         \
+          {                                                                           \
+            elm = SPLAY_LEFT(elm, field);                                             \
+          }                                                                           \
+      }                                                                               \
+    else                                                                              \
+      elm = NULL;                                                                     \
+    return (elm);                                                                     \
+  }                                                                                   \
+                                                                                      \
+  static __inline struct type *name##_SPLAY_MIN_MAX(struct name *head, int val)       \
+  {                                                                                   \
+    name##_SPLAY_MINMAX(head, val);                                                   \
+    return (SPLAY_ROOT(head));                                                        \
+  }
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)                                           \
+  struct type *name##_SPLAY_INSERT(struct name *head, struct type *elm)                  \
+  {                                                                                      \
+    if(SPLAY_EMPTY(head))                                                                \
+      {                                                                                  \
+        SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;                         \
+      }                                                                                  \
+    else                                                                                 \
+      {                                                                                  \
+        int __comp;                                                                      \
+        name##_SPLAY(head, elm);                                                         \
+        __comp = (cmp)(elm, (head)->sph_root);                                           \
+        if(__comp < 0)                                                                   \
+          {                                                                              \
+            SPLAY_LEFT(elm, field)              = SPLAY_LEFT((head)->sph_root, field);   \
+            SPLAY_RIGHT(elm, field)             = (head)->sph_root;                      \
+            SPLAY_LEFT((head)->sph_root, field) = NULL;                                  \
+          }                                                                              \
+        else if(__comp > 0)                                                              \
+          {                                                                              \
+            SPLAY_RIGHT(elm, field)              = SPLAY_RIGHT((head)->sph_root, field); \
+            SPLAY_LEFT(elm, field)               = (head)->sph_root;                     \
+            SPLAY_RIGHT((head)->sph_root, field) = NULL;                                 \
+          }                                                                              \
+        else                                                                             \
+          return ((head)->sph_root);                                                     \
+      }                                                                                  \
+    (head)->sph_root = (elm);                                                            \
+    return (NULL);                                                                       \
+  }                                                                                      \
+                                                                                         \
+  struct type *name##_SPLAY_REMOVE(struct name *head, struct type *elm)                  \
+  {                                                                                      \
+    struct type *__tmp;                                                                  \
+    if(SPLAY_EMPTY(head))                                                                \
+      return (NULL);                                                                     \
+    name##_SPLAY(head, elm);                                                             \
+    if((cmp)(elm, (head)->sph_root) == 0)                                                \
+      {                                                                                  \
+        if(SPLAY_LEFT((head)->sph_root, field) == NULL)                                  \
+          {                                                                              \
+            (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);                     \
+          }                                                                              \
+        else                                                                             \
+          {                                                                              \
+            __tmp            = SPLAY_RIGHT((head)->sph_root, field);                     \
+            (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);                      \
+            name##_SPLAY(head, elm);                                                     \
+            SPLAY_RIGHT((head)->sph_root, field) = __tmp;                                \
+          }                                                                              \
+        return (elm);                                                                    \
+      }                                                                                  \
+    return (NULL);                                                                       \
+  }                                                                                      \
+                                                                                         \
+  void name##_SPLAY(struct name *head, struct type *elm)                                 \
+  {                                                                                      \
+    struct type __node, *__left, *__right, *__tmp;                                       \
+    int __comp;                                                                          \
+                                                                                         \
+    SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;                     \
+    __left = __right = &__node;                                                          \
+                                                                                         \
+    while((__comp = (cmp)(elm, (head)->sph_root)) != 0)                                  \
+      {                                                                                  \
+        if(__comp < 0)                                                                   \
+          {                                                                              \
+            __tmp = SPLAY_LEFT((head)->sph_root, field);                                 \
+            if(__tmp == NULL)                                                            \
+              break;                                                                     \
+            if((cmp)(elm, __tmp) < 0)                                                    \
+              {                                                                          \
+                SPLAY_ROTATE_RIGHT(head, __tmp, field);                                  \
+                if(SPLAY_LEFT((head)->sph_root, field) == NULL)                          \
+                  break;                                                                 \
+              }                                                                          \
+            SPLAY_LINKLEFT(head, __right, field);                                        \
+          }                                                                              \
+        else if(__comp > 0)                                                              \
+          {                                                                              \
+            __tmp = SPLAY_RIGHT((head)->sph_root, field);                                \
+            if(__tmp == NULL)                                                            \
+              break;                                                                     \
+            if((cmp)(elm, __tmp) > 0)                                                    \
+              {                                                                          \
+                SPLAY_ROTATE_LEFT(head, __tmp, field);                                   \
+                if(SPLAY_RIGHT((head)->sph_root, field) == NULL)                         \
+                  break;                                                                 \
+              }                                                                          \
+            SPLAY_LINKRIGHT(head, __left, field);                                        \
+          }                                                                              \
+      }                                                                                  \
+    SPLAY_ASSEMBLE(head, &__node, __left, __right, field);                               \
+  }                                                                                      \
+                                                                                         \
+  /* Splay with either the minimum or the maximum element                                \
+   * Used to find minimum or maximum element in tree.                                    \
+   */                                                                                    \
+  void name##_SPLAY_MINMAX(struct name *head, int __comp)                                \
+  {                                                                                      \
+    struct type __node, *__left, *__right, *__tmp;                                       \
+                                                                                         \
+    SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;                     \
+    __left = __right = &__node;                                                          \
+                                                                                         \
+    while(1)                                                                             \
+      {                                                                                  \
+        if(__comp < 0)                                                                   \
+          {                                                                              \
+            __tmp = SPLAY_LEFT((head)->sph_root, field);                                 \
+            if(__tmp == NULL)                                                            \
+              break;                                                                     \
+            if(__comp < 0)                                                               \
+              {                                                                          \
+                SPLAY_ROTATE_RIGHT(head, __tmp, field);                                  \
+                if(SPLAY_LEFT((head)->sph_root, field) == NULL)                          \
+                  break;                                                                 \
+              }                                                                          \
+            SPLAY_LINKLEFT(head, __right, field);                                        \
+          }                                                                              \
+        else if(__comp > 0)                                                              \
+          {                                                                              \
+            __tmp = SPLAY_RIGHT((head)->sph_root, field);                                \
+            if(__tmp == NULL)                                                            \
+              break;                                                                     \
+            if(__comp > 0)                                                               \
+              {                                                                          \
+                SPLAY_ROTATE_LEFT(head, __tmp, field);                                   \
+                if(SPLAY_RIGHT((head)->sph_root, field) == NULL)                         \
+                  break;                                                                 \
+              }                                                                          \
+            SPLAY_LINKRIGHT(head, __left, field);                                        \
+          }                                                                              \
+      }                                                                                  \
+    SPLAY_ASSEMBLE(head, &__node, __left, __right, field);                               \
+  }
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head) for((x) = SPLAY_MIN(name, head); (x) != NULL; (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)                       \
+  struct name                                     \
+  {                                               \
+    struct type *rbh_root; /* root of the tree */ \
+  }
+
+#define RB_INITIALIZER(root) \
+  {                          \
+    NULL                     \
+  }
+
+#define RB_INIT(root)          \
+  do                           \
+    {                          \
+      (root)->rbh_root = NULL; \
+    }                          \
+  while(/*CONSTCOND*/ 0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type)                            \
+  struct                                          \
+  {                                               \
+    struct type *rbe_left;   /* left element */   \
+    struct type *rbe_right;  /* right element */  \
+    struct type *rbe_parent; /* parent element */ \
+    int rbe_color;           /* node color */     \
+  }
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field)                         \
+  do                                                       \
+    {                                                      \
+      RB_PARENT(elm, field) = parent;                      \
+      RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;   \
+      RB_COLOR(elm, field)                       = RB_RED; \
+    }                                                      \
+  while(/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) \
+  do                                       \
+    {                                      \
+      RB_COLOR(black, field) = RB_BLACK;   \
+      RB_COLOR(red, field)   = RB_RED;     \
+    }                                      \
+  while(/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x) \
+  do                  \
+    {                 \
+    }                 \
+  while(0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field)                     \
+  do                                                              \
+    {                                                             \
+      (tmp) = RB_RIGHT(elm, field);                               \
+      if((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL)    \
+        {                                                         \
+          RB_PARENT(RB_LEFT(tmp, field), field) = (elm);          \
+        }                                                         \
+      RB_AUGMENT(elm);                                            \
+      if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \
+        {                                                         \
+          if((elm) == RB_LEFT(RB_PARENT(elm, field), field))      \
+            RB_LEFT(RB_PARENT(elm, field), field) = (tmp);        \
+          else                                                    \
+            RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);       \
+        }                                                         \
+      else                                                        \
+        (head)->rbh_root = (tmp);                                 \
+      RB_LEFT(tmp, field)   = (elm);                              \
+      RB_PARENT(elm, field) = (tmp);                              \
+      RB_AUGMENT(tmp);                                            \
+      if((RB_PARENT(tmp, field)))                                 \
+        RB_AUGMENT(RB_PARENT(tmp, field));                        \
+    }                                                             \
+  while(/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field)                    \
+  do                                                              \
+    {                                                             \
+      (tmp) = RB_LEFT(elm, field);                                \
+      if((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL)    \
+        {                                                         \
+          RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);         \
+        }                                                         \
+      RB_AUGMENT(elm);                                            \
+      if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \
+        {                                                         \
+          if((elm) == RB_LEFT(RB_PARENT(elm, field), field))      \
+            RB_LEFT(RB_PARENT(elm, field), field) = (tmp);        \
+          else                                                    \
+            RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);       \
+        }                                                         \
+      else                                                        \
+        (head)->rbh_root = (tmp);                                 \
+      RB_RIGHT(tmp, field)  = (elm);                              \
+      RB_PARENT(elm, field) = (tmp);                              \
+      RB_AUGMENT(tmp);                                            \
+      if((RB_PARENT(tmp, field)))                                 \
+        RB_AUGMENT(RB_PARENT(tmp, field));                        \
+    }                                                             \
+  while(/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, )
+#define RB_PROTOTYPE_STATIC(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)                      \
+  attr void name##_RB_INSERT_COLOR(struct name *, struct type *);                \
+  attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \
+  attr struct type *name##_RB_REMOVE(struct name *, struct type *);              \
+  attr struct type *name##_RB_INSERT(struct name *, struct type *);              \
+  attr struct type *name##_RB_FIND(struct name *, struct type *);                \
+  attr struct type *name##_RB_NFIND(struct name *, struct type *);               \
+  attr struct type *name##_RB_NEXT(struct type *);                               \
+  attr struct type *name##_RB_PREV(struct type *);                               \
+  attr struct type *name##_RB_MINMAX(struct name *, int);
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, )
+#define RB_GENERATE_STATIC(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)                                            \
+  attr void name##_RB_INSERT_COLOR(struct name *head, struct type *elm)                               \
+  {                                                                                                   \
+    struct type *parent, *gparent, *tmp;                                                              \
+    while((parent = RB_PARENT(elm, field)) != NULL && RB_COLOR(parent, field) == RB_RED)              \
+      {                                                                                               \
+        gparent = RB_PARENT(parent, field);                                                           \
+        if(parent == RB_LEFT(gparent, field))                                                         \
+          {                                                                                           \
+            tmp = RB_RIGHT(gparent, field);                                                           \
+            if(tmp && RB_COLOR(tmp, field) == RB_RED)                                                 \
+              {                                                                                       \
+                RB_COLOR(tmp, field) = RB_BLACK;                                                      \
+                RB_SET_BLACKRED(parent, gparent, field);                                              \
+                elm = gparent;                                                                        \
+                continue;                                                                             \
+              }                                                                                       \
+            if(RB_RIGHT(parent, field) == elm)                                                        \
+              {                                                                                       \
+                RB_ROTATE_LEFT(head, parent, tmp, field);                                             \
+                tmp    = parent;                                                                      \
+                parent = elm;                                                                         \
+                elm    = tmp;                                                                         \
+              }                                                                                       \
+            RB_SET_BLACKRED(parent, gparent, field);                                                  \
+            RB_ROTATE_RIGHT(head, gparent, tmp, field);                                               \
+          }                                                                                           \
+        else                                                                                          \
+          {                                                                                           \
+            tmp = RB_LEFT(gparent, field);                                                            \
+            if(tmp && RB_COLOR(tmp, field) == RB_RED)                                                 \
+              {                                                                                       \
+                RB_COLOR(tmp, field) = RB_BLACK;                                                      \
+                RB_SET_BLACKRED(parent, gparent, field);                                              \
+                elm = gparent;                                                                        \
+                continue;                                                                             \
+              }                                                                                       \
+            if(RB_LEFT(parent, field) == elm)                                                         \
+              {                                                                                       \
+                RB_ROTATE_RIGHT(head, parent, tmp, field);                                            \
+                tmp    = parent;                                                                      \
+                parent = elm;                                                                         \
+                elm    = tmp;                                                                         \
+              }                                                                                       \
+            RB_SET_BLACKRED(parent, gparent, field);                                                  \
+            RB_ROTATE_LEFT(head, gparent, tmp, field);                                                \
+          }                                                                                           \
+      }                                                                                               \
+    RB_COLOR(head->rbh_root, field) = RB_BLACK;                                                       \
+  }                                                                                                   \
+                                                                                                      \
+  attr void name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm)          \
+  {                                                                                                   \
+    struct type *tmp;                                                                                 \
+    while((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && elm != RB_ROOT(head))                  \
+      {                                                                                               \
+        if(RB_LEFT(parent, field) == elm)                                                             \
+          {                                                                                           \
+            tmp = RB_RIGHT(parent, field);                                                            \
+            if(RB_COLOR(tmp, field) == RB_RED)                                                        \
+              {                                                                                       \
+                RB_SET_BLACKRED(tmp, parent, field);                                                  \
+                RB_ROTATE_LEFT(head, parent, tmp, field);                                             \
+                tmp = RB_RIGHT(parent, field);                                                        \
+              }                                                                                       \
+            if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&   \
+               (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK))   \
+              {                                                                                       \
+                RB_COLOR(tmp, field) = RB_RED;                                                        \
+                elm                  = parent;                                                        \
+                parent               = RB_PARENT(elm, field);                                         \
+              }                                                                                       \
+            else                                                                                      \
+              {                                                                                       \
+                if(RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) \
+                  {                                                                                   \
+                    struct type *oleft;                                                               \
+                    if((oleft = RB_LEFT(tmp, field)) != NULL)                                         \
+                      RB_COLOR(oleft, field) = RB_BLACK;                                              \
+                    RB_COLOR(tmp, field) = RB_RED;                                                    \
+                    RB_ROTATE_RIGHT(head, tmp, oleft, field);                                         \
+                    tmp = RB_RIGHT(parent, field);                                                    \
+                  }                                                                                   \
+                RB_COLOR(tmp, field)    = RB_COLOR(parent, field);                                    \
+                RB_COLOR(parent, field) = RB_BLACK;                                                   \
+                if(RB_RIGHT(tmp, field))                                                              \
+                  RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;                                   \
+                RB_ROTATE_LEFT(head, parent, tmp, field);                                             \
+                elm = RB_ROOT(head);                                                                  \
+                break;                                                                                \
+              }                                                                                       \
+          }                                                                                           \
+        else                                                                                          \
+          {                                                                                           \
+            tmp = RB_LEFT(parent, field);                                                             \
+            if(RB_COLOR(tmp, field) == RB_RED)                                                        \
+              {                                                                                       \
+                RB_SET_BLACKRED(tmp, parent, field);                                                  \
+                RB_ROTATE_RIGHT(head, parent, tmp, field);                                            \
+                tmp = RB_LEFT(parent, field);                                                         \
+              }                                                                                       \
+            if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&   \
+               (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK))   \
+              {                                                                                       \
+                RB_COLOR(tmp, field) = RB_RED;                                                        \
+                elm                  = parent;                                                        \
+                parent               = RB_PARENT(elm, field);                                         \
+              }                                                                                       \
+            else                                                                                      \
+              {                                                                                       \
+                if(RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK)   \
+                  {                                                                                   \
+                    struct type *oright;                                                              \
+                    if((oright = RB_RIGHT(tmp, field)) != NULL)                                       \
+                      RB_COLOR(oright, field) = RB_BLACK;                                             \
+                    RB_COLOR(tmp, field) = RB_RED;                                                    \
+                    RB_ROTATE_LEFT(head, tmp, oright, field);                                         \
+                    tmp = RB_LEFT(parent, field);                                                     \
+                  }                                                                                   \
+                RB_COLOR(tmp, field)    = RB_COLOR(parent, field);                                    \
+                RB_COLOR(parent, field) = RB_BLACK;                                                   \
+                if(RB_LEFT(tmp, field))                                                               \
+                  RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;                                    \
+                RB_ROTATE_RIGHT(head, parent, tmp, field);                                            \
+                elm = RB_ROOT(head);                                                                  \
+                break;                                                                                \
+              }                                                                                       \
+          }                                                                                           \
+      }                                                                                               \
+    if(elm)                                                                                           \
+      RB_COLOR(elm, field) = RB_BLACK;                                                                \
+  }                                                                                                   \
+                                                                                                      \
+  attr struct type *name##_RB_REMOVE(struct name *head, struct type *elm)                             \
+  {                                                                                                   \
+    struct type *child, *parent, *old = elm;                                                          \
+    int color;                                                                                        \
+    if(RB_LEFT(elm, field) == NULL)                                                                   \
+      child = RB_RIGHT(elm, field);                                                                   \
+    else if(RB_RIGHT(elm, field) == NULL)                                                             \
+      child = RB_LEFT(elm, field);                                                                    \
+    else                                                                                              \
+      {                                                                                               \
+        struct type *left;                                                                            \
+        elm = RB_RIGHT(elm, field);                                                                   \
+        while((left = RB_LEFT(elm, field)) != NULL)                                                   \
+          elm = left;                                                                                 \
+        child  = RB_RIGHT(elm, field);                                                                \
+        parent = RB_PARENT(elm, field);                                                               \
+        color  = RB_COLOR(elm, field);                                                                \
+        if(child)                                                                                     \
+          RB_PARENT(child, field) = parent;                                                           \
+        if(parent)                                                                                    \
+          {                                                                                           \
+            if(RB_LEFT(parent, field) == elm)                                                         \
+              RB_LEFT(parent, field) = child;                                                         \
+            else                                                                                      \
+              RB_RIGHT(parent, field) = child;                                                        \
+            RB_AUGMENT(parent);                                                                       \
+          }                                                                                           \
+        else                                                                                          \
+          RB_ROOT(head) = child;                                                                      \
+        if(RB_PARENT(elm, field) == old)                                                              \
+          parent = elm;                                                                               \
+        (elm)->field = (old)->field;                                                                  \
+        if(RB_PARENT(old, field))                                                                     \
+          {                                                                                           \
+            if(RB_LEFT(RB_PARENT(old, field), field) == old)                                          \
+              RB_LEFT(RB_PARENT(old, field), field) = elm;                                            \
+            else                                                                                      \
+              RB_RIGHT(RB_PARENT(old, field), field) = elm;                                           \
+            RB_AUGMENT(RB_PARENT(old, field));                                                        \
+          }                                                                                           \
+        else                                                                                          \
+          RB_ROOT(head) = elm;                                                                        \
+        RB_PARENT(RB_LEFT(old, field), field) = elm;                                                  \
+        if(RB_RIGHT(old, field))                                                                      \
+          RB_PARENT(RB_RIGHT(old, field), field) = elm;                                               \
+        if(parent)                                                                                    \
+          {                                                                                           \
+            left = parent;                                                                            \
+            do                                                                                        \
+              {                                                                                       \
+                RB_AUGMENT(left);                                                                     \
+              }                                                                                       \
+            while((left = RB_PARENT(left, field)) != NULL);                                           \
+          }                                                                                           \
+        goto color;                                                                                   \
+      }                                                                                               \
+    parent = RB_PARENT(elm, field);                                                                   \
+    color  = RB_COLOR(elm, field);                                                                    \
+    if(child)                                                                                         \
+      RB_PARENT(child, field) = parent;                                                               \
+    if(parent)                                                                                        \
+      {                                                                                               \
+        if(RB_LEFT(parent, field) == elm)                                                             \
+          RB_LEFT(parent, field) = child;                                                             \
+        else                                                                                          \
+          RB_RIGHT(parent, field) = child;                                                            \
+        RB_AUGMENT(parent);                                                                           \
+      }                                                                                               \
+    else                                                                                              \
+      RB_ROOT(head) = child;                                                                          \
+  color:                                                                                              \
+    if(color == RB_BLACK)                                                                             \
+      name##_RB_REMOVE_COLOR(head, parent, child);                                                    \
+    return (old);                                                                                     \
+  }                                                                                                   \
+                                                                                                      \
+  /* Inserts a node into the RB tree */                                                               \
+  attr struct type *name##_RB_INSERT(struct name *head, struct type *elm)                             \
+  {                                                                                                   \
+    struct type *tmp;                                                                                 \
+    struct type *parent = NULL;                                                                       \
+    int comp            = 0;                                                                          \
+    tmp                 = RB_ROOT(head);                                                              \
+    while(tmp)                                                                                        \
+      {                                                                                               \
+        parent = tmp;                                                                                 \
+        comp   = (cmp)(elm, parent);                                                                  \
+        if(comp < 0)                                                                                  \
+          tmp = RB_LEFT(tmp, field);                                                                  \
+        else if(comp > 0)                                                                             \
+          tmp = RB_RIGHT(tmp, field);                                                                 \
+        else                                                                                          \
+          return (tmp);                                                                               \
+      }                                                                                               \
+    RB_SET(elm, parent, field);                                                                       \
+    if(parent != NULL)                                                                                \
+      {                                                                                               \
+        if(comp < 0)                                                                                  \
+          RB_LEFT(parent, field) = elm;                                                               \
+        else                                                                                          \
+          RB_RIGHT(parent, field) = elm;                                                              \
+        RB_AUGMENT(parent);                                                                           \
+      }                                                                                               \
+    else                                                                                              \
+      RB_ROOT(head) = elm;                                                                            \
+    name##_RB_INSERT_COLOR(head, elm);                                                                \
+    return (NULL);                                                                                    \
+  }                                                                                                   \
+                                                                                                      \
+  /* Finds the node with the same key as elm */                                                       \
+  attr struct type *name##_RB_FIND(struct name *head, struct type *elm)                               \
+  {                                                                                                   \
+    struct type *tmp = RB_ROOT(head);                                                                 \
+    int comp;                                                                                         \
+    while(tmp)                                                                                        \
+      {                                                                                               \
+        comp = cmp(elm, tmp);                                                                         \
+        if(comp < 0)                                                                                  \
+          tmp = RB_LEFT(tmp, field);                                                                  \
+        else if(comp > 0)                                                                             \
+          tmp = RB_RIGHT(tmp, field);                                                                 \
+        else                                                                                          \
+          return (tmp);                                                                               \
+      }                                                                                               \
+    return (NULL);                                                                                    \
+  }                                                                                                   \
+                                                                                                      \
+  /* Finds the first node greater than or equal to the search key */                                  \
+  attr struct type *name##_RB_NFIND(struct name *head, struct type *elm)                              \
+  {                                                                                                   \
+    struct type *tmp = RB_ROOT(head);                                                                 \
+    struct type *res = NULL;                                                                          \
+    int comp;                                                                                         \
+    while(tmp)                                                                                        \
+      {                                                                                               \
+        comp = cmp(elm, tmp);                                                                         \
+        if(comp < 0)                                                                                  \
+          {                                                                                           \
+            res = tmp;                                                                                \
+            tmp = RB_LEFT(tmp, field);                                                                \
+          }                                                                                           \
+        else if(comp > 0)                                                                             \
+          tmp = RB_RIGHT(tmp, field);                                                                 \
+        else                                                                                          \
+          return (tmp);                                                                               \
+      }                                                                                               \
+    return (res);                                                                                     \
+  }                                                                                                   \
+                                                                                                      \
+  /* ARGSUSED */                                                                                      \
+  attr struct type *name##_RB_NEXT(struct type *elm)                                                  \
+  {                                                                                                   \
+    if(RB_RIGHT(elm, field))                                                                          \
+      {                                                                                               \
+        elm = RB_RIGHT(elm, field);                                                                   \
+        while(RB_LEFT(elm, field))                                                                    \
+          elm = RB_LEFT(elm, field);                                                                  \
+      }                                                                                               \
+    else                                                                                              \
+      {                                                                                               \
+        if(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field)))                   \
+          elm = RB_PARENT(elm, field);                                                                \
+        else                                                                                          \
+          {                                                                                           \
+            while(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field)))           \
+              elm = RB_PARENT(elm, field);                                                            \
+            elm = RB_PARENT(elm, field);                                                              \
+          }                                                                                           \
+      }                                                                                               \
+    return (elm);                                                                                     \
+  }                                                                                                   \
+                                                                                                      \
+  /* ARGSUSED */                                                                                      \
+  attr struct type *name##_RB_PREV(struct type *elm)                                                  \
+  {                                                                                                   \
+    if(RB_LEFT(elm, field))                                                                           \
+      {                                                                                               \
+        elm = RB_LEFT(elm, field);                                                                    \
+        while(RB_RIGHT(elm, field))                                                                   \
+          elm = RB_RIGHT(elm, field);                                                                 \
+      }                                                                                               \
+    else                                                                                              \
+      {                                                                                               \
+        if(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field)))                  \
+          elm = RB_PARENT(elm, field);                                                                \
+        else                                                                                          \
+          {                                                                                           \
+            while(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field)))            \
+              elm = RB_PARENT(elm, field);                                                            \
+            elm = RB_PARENT(elm, field);                                                              \
+          }                                                                                           \
+      }                                                                                               \
+    return (elm);                                                                                     \
+  }                                                                                                   \
+                                                                                                      \
+  attr struct type *name##_RB_MINMAX(struct name *head, int val)                                      \
+  {                                                                                                   \
+    struct type *tmp    = RB_ROOT(head);                                                              \
+    struct type *parent = NULL;                                                                       \
+    while(tmp)                                                                                        \
+      {                                                                                               \
+        parent = tmp;                                                                                 \
+        if(val < 0)                                                                                   \
+          tmp = RB_LEFT(tmp, field);                                                                  \
+        else                                                                                          \
+          tmp = RB_RIGHT(tmp, field);                                                                 \
+      }                                                                                               \
+    return (parent);                                                                                  \
+  }
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_PREV(name, x, y) name##_RB_PREV(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) for((x) = RB_MIN(name, head); (x) != NULL; (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y) \
+  for((x) = RB_MIN(name, head); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head) for((x) = RB_MAX(name, head); (x) != NULL; (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \
+  for((x) = RB_MAX(name, head); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y))
+
+#endif /* _SYS_TREE_H_ */
diff --git a/src/amuse/community/arepo/src/domain/domain.c b/src/amuse/community/arepo/src/domain/domain.c
new file mode 100644
index 0000000000..4557c25ff5
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain.c
@@ -0,0 +1,633 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain.c
+ * \date        05/2018
+ * \brief       Code for domain decomposition.
+ * \details     This file contains the code for the domain decomposition of the
+ *              simulation volume.  The domains are constructed from disjoint
+ *              subsets of the leaves of a fiducial top-level tree that covers
+ *              the full simulation volume. Domain boundaries hence run along
+ *              tree-node divisions of a fiducial global Barnes-Hut tree. As a
+ *              result of this method, the tree force are in principle strictly
+ *              independent of the way the domains are cut. The domain
+ *              decomposition can be carried out for an arbitrary number of
+ *              CPUs. Individual domains are not cubical, but spatially
+ *              coherent since the leaves are traversed in a Peano-Hilbert
+ *              order and individual domains form segments along this order.
+ *              This also ensures that each domain has a small surface to
+ *              volume ratio, which minimizes communication.
+ *              contains functions:
+ *                void domain_Decomposition(void)
+ *                void domain_prepare_voronoi_dynamic_update(void)
+ *                void domain_voronoi_dynamic_flag_particles(void)
+ *                void domain_voronoi_dynamic_update_execute(void)
+ *                void domain_preserve_relevant_topnode_data(void)
+ *                void domain_find_total_cost(void)
+ *                peano1D domain_double_to_int(double d)
+ *                void domain_allocate(void)
+ *                void domain_free(void)
+ *                void domain_printf(char *buf)
+ *                void domain_report_balance(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 16.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief The main routine for the domain decomposition.
+ *
+ *  It acts as a driver routine that allocates various temporary buffers,
+ *  maps the particles back onto the periodic box if needed, and then does the
+ *  domain decomposition, and a final Peano-Hilbert order of all particles
+ *  as a tuning measure.
+ *
+ *  \return void
+ */
+void domain_Decomposition(void)
+{
+  TIMER_START(CPU_DOMAIN);
+
+  double t0 = second();
+
+  mpi_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep);
+
+  /* Prepare */
+  domain_prepare_voronoi_dynamic_update();
+
+  /* map the particles back onto the box */
+  do_box_wrapping();
+
+  /* Initialize and allocate */
+  domain_init_sum_cost();
+  domain_allocate();
+  domain_allocate_lists();
+
+  topNodes = (struct local_topnode_data *)mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(struct local_topnode_data)));
+  /* find total cost factors */
+  domain_find_total_cost();
+  /* determine global dimensions of domain grid */
+  domain_findExtent();
+
+  /* determine top-level tree */
+  domain_determineTopTree();
+
+  /* find the split of the top-level tree */
+  domain_combine_topleaves_to_domains(All.MultipleDomains * NTask, NTopleaves);
+
+  /* combine on each MPI task several of the domains (namely the number All.MultipleDomains) */
+  domain_combine_multipledomains();
+
+  /* permutate the task assignment such that the smallest number of particles needs to be moved */
+  domain_optimize_domain_to_task_mapping();
+
+  double ta = second();
+  /* in case we retain the neighbor connectivity, do some preparatory flagging */
+  domain_voronoi_dynamic_flag_particles();
+  /* eliminate cells that might have been eliminated or were turned into stars */
+  domain_rearrange_particle_sequence();
+  /* determine for each cpu how many particles have to be shifted to other cpus */
+  domain_countToGo();
+  double tb = second();
+  mpi_printf("DOMAIN: particle rearrangement work took %g sec\n", timediff(ta, tb));
+
+  /* finally, carry out the actual particle exchange */
+  domain_exchange();
+
+  /* copy what we need for the topnodes */
+  domain_preserve_relevant_topnode_data();
+  myfree(topNodes);
+  domain_free_lists();
+  TimeOfLastDomainConstruction = All.Time;
+
+  double t1 = second();
+  mpi_printf("DOMAIN: domain decomposition done. (took in total %g sec)\n", timediff(t0, t1));
+
+  TIMER_STOP(CPU_DOMAIN);
+  TIMER_START(CPU_PEANO);
+
+  peano_hilbert_order();
+  myfree(Key);
+
+  TIMER_STOPSTART(CPU_PEANO, CPU_DOMAIN);
+
+  myfree(DomainListOfLocalTopleaves);
+
+#ifdef ONEDIMS
+  voronoi_1D_order();
+#endif /* #ifdef ONEDIMS */
+
+  TopNodes   = (struct topnode_data *)myrealloc_movable(TopNodes, NTopnodes * sizeof(struct topnode_data));
+  DomainTask = (int *)myrealloc_movable(DomainTask, NTopleaves * sizeof(int));
+
+  domain_voronoi_dynamic_update_execute();
+
+  DomainListOfLocalTopleaves =
+      (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (NTopleaves * sizeof(int)));
+
+  memset(DomainNLocalTopleave, 0, NTask * sizeof(int));
+
+  for(int i = 0; i < NTopleaves; i++)
+    DomainNLocalTopleave[DomainTask[i]]++;
+
+  DomainFirstLocTopleave[0] = 0;
+  for(int i = 1; i < NTask; i++)
+    DomainFirstLocTopleave[i] = DomainFirstLocTopleave[i - 1] + DomainNLocalTopleave[i - 1];
+
+  memset(DomainNLocalTopleave, 0, NTask * sizeof(int));
+
+  for(int i = 0; i < NTopleaves; i++)
+    {
+      int task                        = DomainTask[i];
+      int off                         = DomainFirstLocTopleave[task] + DomainNLocalTopleave[task]++;
+      DomainListOfLocalTopleaves[off] = i;
+    }
+
+  reconstruct_timebins();
+
+  for(int i = 0; i < GRAVCOSTLEVELS; i++)
+    All.LevelHasBeenMeasured[i] = 0;
+
+  domain_report_balance();
+
+  TIMER_STOP(CPU_DOMAIN);
+}
+
+/*! \brief Prepares for voronoi dynamic update.
+ *
+ *  Allocates required arrays and communicates required information.
+ *
+ *  \return void
+ */
+void domain_prepare_voronoi_dynamic_update(void)
+{
+  /* prepare storage for translation table */
+  N_trans     = NumGas; /* length of translation table */
+  trans_table = mymalloc_movable(&trans_table, "trans_table", N_trans * sizeof(struct trans_data));
+  MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+}
+
+/*! \brief Flag particles that need to be exported.
+ *
+ *  Go through all local particles and check if they are within new local
+ *  domains. If this is not the case, flag them as to be exported.
+ *
+ *  \return void
+ */
+void domain_voronoi_dynamic_flag_particles(void)
+{
+  /* flag the particles that need to be exported */
+  for(int i = 0; i < NumPart; i++)
+    {
+      int no = 0;
+
+      while(topNodes[no].Daughter >= 0)
+        no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3);
+
+      no = topNodes[no].Leaf;
+
+      int task = DomainTask[no];
+      domain_mark_in_trans_table(i, task);
+    }
+}
+
+/*! \brief Execute voronoi_dynamic_update
+ *
+ *  Calls domain_exchange_and_update_DC() if needed.
+ *
+ *  \return void
+ */
+void domain_voronoi_dynamic_update_execute(void)
+{
+  CPU_Step[CPU_DOMAIN] += measure_time();
+  if(Largest_Nvc > 0)
+    domain_exchange_and_update_DC();
+
+  myfree_movable(trans_table);
+
+  CPU_Step[CPU_MESH_DYNAMIC] += measure_time();
+}
+
+/*! \brief Save the new top-level tree data into global arrays.
+ *
+ *  \return void
+ */
+void domain_preserve_relevant_topnode_data(void)
+{
+  for(int i = 0; i < NTopnodes; i++)
+    {
+      TopNodes[i].StartKey = topNodes[i].StartKey;
+      TopNodes[i].Size     = topNodes[i].Size;
+      TopNodes[i].Daughter = topNodes[i].Daughter;
+      TopNodes[i].Leaf     = topNodes[i].Leaf;
+
+      int bits   = my_ffsll(TopNodes[i].Size);
+      int blocks = (bits - 1) / 3 - 1;
+
+      for(int j = 0; j < 8; j++)
+        {
+          peano1D xb, yb, zb;
+          peano_hilbert_key_inverse(TopNodes[i].StartKey + j * (TopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb);
+          xb >>= blocks;
+          yb >>= blocks;
+          zb >>= blocks;
+          int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2);
+          if(idx < 0 || idx > 7)
+            terminate("j=%d  idx=%d", j, idx);
+
+          TopNodes[i].MortonToPeanoSubnode[idx] = j;
+        }
+    }
+}
+
+/*! \brief Calculates the total cost of different operations.
+ *
+ *  This function gathers information about the cost of gravity and
+ *  hydrodynamics calculation as well as the particle load.
+ *
+ *  \return void
+ */
+void domain_find_total_cost(void)
+{
+  if(All.MultipleDomains < 1 || All.MultipleDomains > 512)
+    terminate("All.MultipleDomains < 1 || All.MultipleDomains > 512");
+
+  gravcost = sphcost  = 0;
+  double partcount    = 0;
+  double sphpartcount = 0;
+
+  for(int i = 0; i < NumPart; i++)
+    {
+#ifdef ADDBACKGROUNDGRID
+      if(P[i].Type != 0)
+        continue;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+      partcount += 1.0;
+
+      gravcost += domain_grav_tot_costfactor(i);
+
+      double hydrocost = domain_hydro_tot_costfactor(i);
+      sphcost += hydrocost;
+
+      if(hydrocost > 0)
+        sphpartcount += 1.0;
+    }
+
+  double loc[4] = {gravcost, sphcost, partcount, sphpartcount}, sum[4];
+
+  MPI_Allreduce(loc, sum, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  totgravcost            = sum[0];
+  totsphcost             = sum[1];
+  totpartcount           = sum[2];
+  double totsphpartcount = sum[3];
+
+  if(totsphcost > 0 && totgravcost > 0 && totsphpartcount > (All.TopNodeFactor * All.MultipleDomains * NTask))
+    {
+      /* in this case we give equal weight to gravitational work-load, hydro work load, and particle load.
+       */
+      normsum_work    = 0.333333;
+      normsum_load    = 0.333333;
+      normsum_worksph = 0.333333;
+      fac_work        = normsum_work / totgravcost;
+      fac_load        = normsum_load / totpartcount;
+      fac_worksph     = normsum_worksph / totsphcost;
+    }
+  else if(totgravcost > 0)
+    {
+      /* in this case we give equal weight to gravitational work-load and particle load.
+       * The final pieces should have at most imbalance 2.0 in either of the two
+       */
+      normsum_work    = 0.5;
+      normsum_load    = 0.5;
+      normsum_worksph = 0;
+      fac_work        = normsum_work / totgravcost;
+      fac_load        = normsum_load / totpartcount;
+      fac_worksph     = 0.0;
+    }
+  else if(totsphcost > 0)
+    {
+      /* here we only appear to do hydrodynamics. We hence give equal weight to SPH cost and
+       * particle load.
+       */
+      normsum_work    = 0;
+      normsum_load    = 0.5;
+      normsum_worksph = 0.5;
+      fac_work        = 0.0;
+      fac_load        = normsum_load / totpartcount;
+      fac_worksph     = normsum_worksph / totsphcost;
+    }
+  else
+    terminate("strange: totsphcost=%g  totgravcost=%g\n", totsphcost, totgravcost);
+}
+
+/*! \brief Coordinate conversion to integer.
+ *
+ *  \param[in] d coordinate in double precision.
+ *
+ *  \return coordinate in integer of type peano1D.
+ */
+peano1D domain_double_to_int(double d)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d = d;
+  return (peano1D)((u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - BITS_PER_DIMENSION));
+}
+
+/*! \brief Allocates memory
+ *
+ *  This function allocates all the stuff that will be required for the
+ *  tree-construction/walk later on.
+ *
+ *  \return void
+ */
+void domain_allocate(void)
+{
+  MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1);
+
+  if(DomainStartList)
+    terminate("domain storage already allocated");
+
+  DomainStartList        = (int *)mymalloc_movable(&DomainStartList, "DomainStartList", (NTask * All.MultipleDomains * sizeof(int)));
+  DomainEndList          = (int *)mymalloc_movable(&DomainEndList, "DomainEndList", (NTask * All.MultipleDomains * sizeof(int)));
+  DomainFirstLocTopleave = (int *)mymalloc_movable(&DomainFirstLocTopleave, "DomainFirstLocTopleave", NTask * sizeof(int));
+  DomainNLocalTopleave   = (int *)mymalloc_movable(&DomainNLocalTopleave, "DomainNLocalTopleave", NTask * sizeof(int));
+  TopNodes               = (struct topnode_data *)mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(struct topnode_data)));
+  DomainTask             = (int *)mymalloc_movable(&DomainTask, "DomainTask", (MaxTopNodes * sizeof(int)));
+  DomainListOfLocalTopleaves =
+      (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (MaxTopNodes * sizeof(int)));
+}
+
+/*! \brief Free arrays needed in domain decomposition.
+ *
+ *  This is the counterpart to domain_allocate; need to free arrays in reverse
+ *  allocation order.
+ *
+ * \return void
+ */
+void domain_free(void)
+{
+  if(!DomainStartList)
+    terminate("domain storage not allocated");
+
+  myfree_movable(DomainListOfLocalTopleaves);
+  myfree_movable(DomainTask);
+  myfree_movable(TopNodes);
+  myfree_movable(DomainNLocalTopleave);
+  myfree_movable(DomainFirstLocTopleave);
+  myfree_movable(DomainEndList);
+  myfree_movable(DomainStartList);
+
+  DomainTask             = NULL;
+  TopNodes               = NULL;
+  DomainNLocalTopleave   = NULL;
+  DomainFirstLocTopleave = NULL;
+  DomainEndList          = NULL;
+  DomainStartList        = NULL;
+}
+
+/*! \brief Print message in domain.txt logfile.
+ *
+ *  \param[in] buf String to be printed to domain.txt.
+ *
+ *  \return void
+ */
+void domain_printf(char *buf)
+{
+  if(RestartFlag <= 2)
+    fprintf(FdDomain, "%s", buf);
+}
+
+/*! \brief Function that reports load-balancing
+ *
+ *  Function calculates load-balancing of the simulation and prints
+ *  it to domain.txt
+ *
+ *  \return void
+ */
+void domain_report_balance(void)
+{
+  /* get total particle counts */
+  long long loc_count[2 * TIMEBINS], glob_count[2 * TIMEBINS];
+
+  for(int i = 0; i < TIMEBINS; i++)
+    {
+      loc_count[i]            = TimeBinsGravity.TimeBinCount[i];
+      loc_count[TIMEBINS + i] = TimeBinsHydro.TimeBinCount[i];
+    }
+
+  MPI_Reduce(loc_count, glob_count, 2 * TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  double loc_max_data[2 * TIMEBINS + 3], glob_max_data[2 * TIMEBINS + 3];
+
+  loc_max_data[2 * TIMEBINS + 0] = NumPart;
+  loc_max_data[2 * TIMEBINS + 1] = NumGas;
+  loc_max_data[2 * TIMEBINS + 2] = NumPart - NumGas;
+
+  double glob_sum_data[2 * TIMEBINS];
+
+  double *loc_HydroCost  = &loc_max_data[0];
+  double *loc_GravCost   = &loc_max_data[TIMEBINS];
+  double *max_HydroCost  = &glob_max_data[0];
+  double *max_GravCost   = &glob_max_data[TIMEBINS];
+  double *glob_HydroCost = &glob_sum_data[0];
+  double *glob_GravCost  = &glob_sum_data[TIMEBINS];
+
+  for(int i = 0; i < TIMEBINS; i++)
+    {
+      loc_GravCost[i]  = 0;
+      loc_HydroCost[i] = 0;
+    }
+
+#ifdef SELFGRAVITY
+  for(int i = 0; i < NumPart; i++)
+    {
+      for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++)
+        {
+#ifdef HIERARCHICAL_GRAVITY
+          if(bin >= P[i].TimeBinGrav)
+#endif /*  #ifdef HIERARCHICAL_GRAVITY */
+            {
+              if(domain_bintolevel[bin] >= 0)
+                loc_GravCost[bin] += MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]];
+              else
+                {
+                  if(domain_refbin[bin] >= 0)
+                    loc_GravCost[bin] +=
+                        MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]];
+                  else
+                    loc_GravCost[bin] += 1.0;
+                }
+            }
+        }
+    }
+#endif /* #ifdef SELFGRAVITY */
+
+  for(int i = 0; i < NumPart; i++)
+    if(P[i].Type == 0)
+      loc_HydroCost[P[i].TimeBinHydro] += 1.0;
+
+  /* now determine the cumulative cost for the hydrodynamics */
+  for(int i = 1; i <= All.HighestOccupiedTimeBin; i++)
+    loc_HydroCost[i] += loc_HydroCost[i - 1];
+
+  MPI_Reduce(loc_max_data, glob_sum_data, 2 * TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(loc_max_data, glob_max_data, 2 * TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      double max_tot = glob_max_data[2 * TIMEBINS + 0];
+      double max_sph = glob_max_data[2 * TIMEBINS + 1];
+      double max_dm  = glob_max_data[2 * TIMEBINS + 2];
+
+      long long *tot_count     = &glob_count[0];
+      long long *tot_count_sph = &glob_count[TIMEBINS];
+
+      long long tot_cumulative[TIMEBINS];
+      tot_cumulative[0] = tot_count[0];
+
+      for(int i = 1; i < TIMEBINS; i++)
+        tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1];
+
+      double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0;
+
+      All.TotGravCost = 0;
+
+      for(int i = 0; i < TIMEBINS; i++)
+        {
+          All.TotGravCost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask;
+
+          tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask;
+          max_gravcost += domain_to_be_balanced[i] * max_GravCost[i];
+
+          tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask;
+          max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i];
+        }
+
+      double bal_grav_bin[TIMEBINS], bal_grav_bin_rel[TIMEBINS];
+      double bal_hydro_bin[TIMEBINS], bal_hydro_bin_rel[TIMEBINS];
+
+      for(int i = 0; i < TIMEBINS; i++)
+        {
+          if(tot_count[i] > 0)
+            {
+              bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask + 1.0e-60);
+              bal_grav_bin_rel[i] =
+                  (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost + 1.0e-60);
+            }
+          else
+            {
+              bal_grav_bin[i]     = 0.0;
+              bal_grav_bin_rel[i] = 0.0;
+            }
+
+          if(tot_count_sph[i] > 0)
+            {
+              bal_hydro_bin[i]     = max_HydroCost[i] / (glob_HydroCost[i] / NTask + 1.0e-60);
+              bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) /
+                                     (tot_hydrocost + 1.0e-60);
+            }
+          else
+            {
+              bal_hydro_bin[i]     = 0.0;
+              bal_hydro_bin_rel[i] = 0.0;
+            }
+        }
+
+      char buf[1000];
+
+      sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time);
+
+      domain_printf(buf);
+
+      sprintf(buf, "Timebins:       Gravity       Hydro  cumulative      grav-balance       hydro-balance\n");
+
+      domain_printf(buf);
+
+      long long tot = 0, tot_sph = 0;
+
+      for(int i = TIMEBINS - 1; i >= 0; i--)
+        {
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+          if(tot_count_sph[i] > 0 || tot_count[i] > 0)
+#else  /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */
+          if(tot_count[i] > 0)
+            tot += tot_count[i];
+
+          if(tot_count_sph[i] > 0)
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \
+          #else */
+            {
+              char buf[1000];
+
+              sprintf(buf, "%c%cbin=%2d     %10llu  %10llu  %10llu  %c %6.3f |%6.3f  %c   %6.3f |%6.3f\n",
+                      i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i,
+                      tot_count[i], tot_count_sph[i], tot_cumulative[i], domain_bintolevel[i] >= 0 ? 'm' : ' ', bal_grav_bin[i],
+                      bal_grav_bin_rel[i], domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]);
+
+              domain_printf(buf);
+
+              tot += tot_count[i];
+              tot_sph += tot_count_sph[i];
+            }
+        }
+
+      sprintf(buf, "-------------------------------------------------------------------------------------\n");
+
+      domain_printf(buf);
+
+      sprintf(buf, "BALANCE,  LOAD:  %6.3f      %6.3f      %6.3f  WORK:     %6.3f              %6.3f\n",
+              max_dm / (tot - tot_sph + 1.0e-60) * NTask, max_sph / (tot_sph + 1.0e-60) * NTask, max_tot / (tot + 1.0e-60) * NTask,
+              max_gravcost / (tot_gravcost + 1.0e-60), max_hydrocost / (tot_hydrocost + 1.0e-60));
+
+      domain_printf(buf);
+
+      sprintf(buf, "-------------------------------------------------------------------------------------\n");
+
+      domain_printf(buf);
+
+      sprintf(buf, "\n");
+
+      domain_printf(buf);
+
+      myflush(FdDomain);
+    }
+
+  /* the following needs to be known by all the tasks */
+  MPI_Bcast(&All.TotGravCost, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+}
diff --git a/src/amuse/community/arepo/src/domain/domain.h b/src/amuse/community/arepo/src/domain/domain.h
new file mode 100644
index 0000000000..f52781918a
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain.h
@@ -0,0 +1,156 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain.h
+ * \date        05/2018
+ * \brief       Header for domain decomposition.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 28.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef ALLVARS_H
+#include "../main/allvars.h"
+#endif /* #ifndef ALLVARS_H */
+
+#ifndef DOMAIN_H
+#define DOMAIN_H
+
+#define MASK_ACTIVE_FLAG_IN_TYPE 127
+#define SET_ACTIVE_FLAG_IN_TYPE 128
+
+enum domain_displace_mode
+{
+  DISPLACE_POSITION_FORWARD,
+  DISPLACE_POSITION_BACKWARD
+};
+
+extern struct local_topnode_data
+{
+  peanokey Size;     /*!< number of Peano-Hilbert mesh-cells represented by top-level node */
+  peanokey StartKey; /*!< first Peano-Hilbert key in top-level node */
+  long long Count;   /*!< counts the number of particles in this top-level node */
+  double Cost;
+  double SphCost;
+  int Daughter; /*!< index of first daughter cell (out of 8) of top-level node */
+  int Leaf;     /*!< if the node is a leaf, this gives its number when all leaves are traversed in Peano-Hilbert order */
+  int Parent;
+  int PIndex; /*!< first particle in node */
+
+} * topNodes, *branchNodes; /*!< points to the root node of the top-level tree */
+
+struct domain_count_data
+{
+  int task;
+  int count;
+  int origintask;
+};
+
+extern struct domain_peano_hilbert_data
+{
+  peanokey key;
+  int index;
+} * mp;
+
+extern struct trans_data
+{
+  MyIDType ID;
+  int new_task;
+  int new_index;
+  int wrapped;
+} * trans_table;
+
+extern int N_trans;
+
+extern int Nbranch;
+
+extern double fac_work, fac_load, fac_worksph;
+extern double normsum_work, normsum_load, normsum_worksph;
+
+extern double totgravcost, totpartcount, gravcost, totsphcost, sphcost;
+
+extern struct domain_cost_data
+{
+  int no;
+  float Work;    /*!< total "work" due to the particles stored by a leave node */
+  float WorkSph; /*!< total "work" due to the particles stored by a leave node */
+  int Count;     /*!< a table that gives the total number of particles held by each processor */
+  int CountSph;  /*!< a table that gives the total number of SPH particles held by each processor */
+} * DomainLeaveNode;
+
+/* toGo[partner] gives the number of particles on the current task that have to go to task 'partner'
+ */
+extern int *toGo, *toGoSph;
+extern int *toGet, *toGetSph;
+extern int *list_NumPart;
+extern int *list_NumGas;
+extern int *list_load;
+extern int *list_loadsph;
+extern double *list_work;
+extern double *list_worksph;
+
+/* functions for domain decomposition */
+peano1D domain_double_to_int(double d);
+double domain_grav_tot_costfactor(int i);
+double domain_hydro_tot_costfactor(int i);
+void domain_init_sum_cost(void);
+void domain_printf(char *buf);
+void domain_report_balance(void);
+int domain_sort_load(const void *a, const void *b);
+int domain_compare_count(const void *a, const void *b);
+int domain_sort_task(const void *a, const void *b);
+int domain_compare_count(const void *a, const void *b);
+void domain_rearrange_particle_sequence(void);
+void domain_combine_topleaves_to_domains(int ncpu, int ndomain);
+void domain_combine_multipledomains(void);
+void domain_allocate(void);
+void domain_Decomposition(void);
+int domain_compare_key(const void *a, const void *b);
+int domain_countToGo(void);
+int domain_determineTopTree(void);
+void domain_exchange(void);
+void domain_findExtent(void);
+void domain_free(void);
+void domain_sumCost(void);
+void domain_walktoptree(int no);
+void domain_optimize_domain_to_task_mapping(void);
+int domain_compare_count(const void *a, const void *b);
+void domain_allocate_lists(void);
+void domain_free_lists(void);
+int domain_unpack_tree_branch(int no, int parent);
+void domain_do_local_refine(int n, int *list);
+void domain_preserve_relevant_topnode_data(void);
+void domain_find_total_cost(void);
+void domain_voronoi_dynamic_update_execute(void);
+void domain_prepare_voronoi_dynamic_update(void);
+void domain_voronoi_dynamic_flag_particles(void);
+void domain_mark_in_trans_table(int i, int task);
+void domain_exchange_and_update_DC(void);
+int domain_compare_connection_ID(const void *a, const void *b);
+int domain_compare_local_trans_data_ID(const void *a, const void *b);
+int domain_compare_recv_trans_data_ID(const void *a, const void *b);
+int domain_compare_recv_trans_data_oldtask(const void *a, const void *b);
+void mysort_domain(void *b, size_t n, size_t s);
+void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode);
+
+#endif /* #ifndef DOMAIN_H */
diff --git a/src/amuse/community/arepo/src/domain/domain_DC_update.c b/src/amuse/community/arepo/src/domain/domain_DC_update.c
new file mode 100644
index 0000000000..bf960ebfc1
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_DC_update.c
@@ -0,0 +1,699 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_DC_update.c
+ * \date        05/2018
+ * \brief       Algorithms for voronoi dynamic update
+ * \details     contains functions:
+ *                void domain_mark_in_trans_table(int i, int task)
+ *                void domain_exchange_and_update_DC(void)
+ *                int domain_compare_connection_ID(const void *a,
+ *                  const void *b)
+ *                int domain_compare_local_trans_data_ID(const void *a,
+ *                  const void *b)
+ *                int domain_compare_recv_trans_data_ID(const void *a,
+ *                  const void *b)
+ *                int domain_compare_recv_trans_data_oldtask(const void *a,
+ *                  const void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 17.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+struct trans_data *trans_table;
+int N_trans;
+
+/*! \brief Data structure for local auxiliary translation table.
+ */
+static struct local_aux_trans_data
+{
+  MyIDType ID;
+  int new_index;
+} * local_trans_data;
+
+/*! \brief Data structure for communicating the translation table.
+ */
+static struct aux_trans_data
+{
+  MyIDType ID;
+  int old_task;
+  int old_index;
+  int new_index;
+} * send_trans_data, *recv_trans_data;
+
+/*! \brief Data structure for transcribing data.
+ */
+static struct aux_transscribe_data
+{
+  int old_index;
+  int new_task;
+  int new_index;
+  int image_flags;
+} * send_transscribe_data, *recv_transscribe_data;
+
+/*! \brief Fill translation table.
+ *
+ *  Mark where cells are moved to and mark in DC accordingly to make sure
+ *  they get communicated to the same task.
+ *
+ *  \param[in] i Index in P and SphP arrays.
+ *  \param[in] task Task to which particle i is exported.
+ *
+ *  \return void
+ */
+void domain_mark_in_trans_table(int i, int task)
+{
+  if(Largest_Nvc > 0)
+    {
+      if(i < NumGas)
+        {
+          trans_table[i].ID       = P[i].ID;
+          trans_table[i].new_task = task;
+
+          int q = SphP[i].first_connection;
+
+          while(q >= 0)
+            {
+              int qq = DC[q].next;
+              if(q == qq)
+                terminate("preventing getting stuck in a loop due to q == DC[q].next : i=%d q=%d last_connection=%d", i, q,
+                          SphP[i].last_connection);
+
+              if((P[i].Mass == 0 && P[i].ID == 0) || P[i].Type != 0) /* this cell has been deleted or turned into a star */
+                DC[q].next = -1;
+              else
+                DC[q].next = task; /* we will temporarily use the next variable to store the new task */
+
+              if(q == SphP[i].last_connection)
+                break;
+
+              q = qq;
+            }
+        }
+      else if(i < N_trans)
+        trans_table[i].new_task = -1; /* this one has been removed by rerrange_particle_sequence() */
+    }
+}
+
+/*! \brief Communicates connections.
+ *
+ *  This algorithms communicates Delauny connections and updates them on the
+ *  new task.
+ *
+ *  \return void
+ */
+void domain_exchange_and_update_DC(void)
+{
+  double t0 = second();
+
+#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY)
+  /* remove all image flags, after our box movement stunt they are all incorrect anyway */
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      DC[i].image_flags = 1;
+    }
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */
+
+  /* first, we need to complete the translation table */
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(int i = 0; i < N_trans; i++)
+    if(trans_table[i].new_task >= 0)
+      Send_count[trans_table[i].new_task]++;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  int nimport = 0, nexport = 0;
+  Recv_offset[0] = Send_offset[0] = 0;
+
+  for(int j = 0; j < NTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  send_trans_data = mymalloc("send_trans_data", nexport * sizeof(struct aux_trans_data));
+  recv_trans_data = mymalloc("recv_trans_data", nimport * sizeof(struct aux_trans_data));
+
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(int i = 0; i < N_trans; i++)
+    {
+      int task = trans_table[i].new_task;
+      if(task >= 0)
+        {
+          send_trans_data[Send_offset[task] + Send_count[task]].ID        = trans_table[i].ID;
+          send_trans_data[Send_offset[task] + Send_count[task]].old_index = i;
+          send_trans_data[Send_offset[task] + Send_count[task]].old_task  = ThisTask;
+          Send_count[task]++;
+        }
+    }
+
+  /* exchange the data */
+  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&send_trans_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, &recv_trans_data[Recv_offset[recvTask]],
+                       Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  /* let's now sort the incoming list according to ID */
+  mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_ID);
+
+  /* make an auxiliary list for the local particles that we will also sort according to ID */
+  local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data));
+  for(int i = 0; i < NumGas; i++)
+    {
+      local_trans_data[i].ID        = P[i].ID;
+      local_trans_data[i].new_index = i;
+    }
+  mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID);
+
+  int i, j;
+  /* now we go through and put in the new index for matching IDs */
+  for(i = 0, j = 0; i < nimport && j < NumGas;)
+    {
+      if(recv_trans_data[i].ID < local_trans_data[j].ID)
+        {
+          recv_trans_data[i].new_index = -1; /* this particle has been eliminated */
+          i++;
+        }
+      else if(recv_trans_data[i].ID > local_trans_data[j].ID)
+        j++;
+      else
+        {
+          recv_trans_data[i].new_index = local_trans_data[j].new_index;
+          i++;
+          j++;
+        }
+    }
+
+  for(; i < nimport; i++)
+    recv_trans_data[i].new_index = -1; /* this particle has been eliminated */
+
+  myfree(local_trans_data);
+
+  /* now order the received data by sending task, so that we can return it */
+  mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_oldtask);
+
+  /* return the data */
+  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&recv_trans_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, &send_trans_data[Send_offset[recvTask]],
+                       Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  /* now let's fill in the new_index entry into the translation table */
+  for(int i = 0; i < nexport; i++)
+    trans_table[send_trans_data[i].old_index].new_index = send_trans_data[i].new_index;
+
+  myfree(recv_trans_data);
+  myfree(send_trans_data);
+
+  /* it's now time to transcribe the task and index fields in the DC list */
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      int task = DC[i].task;
+      if(task >= 0)
+        {
+          if(task >= NTask)
+            terminate("i=%d Nvc=%d MaxNvc=%d task=%d\n", i, Nvc, MaxNvc, task);
+
+          Send_count[task]++;
+        }
+    }
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  nimport = nexport = 0;
+  Recv_offset[0] = Send_offset[0] = 0;
+
+  for(int j = 0; j < NTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  send_transscribe_data = mymalloc("send_transscribe_data", nexport * sizeof(struct aux_transscribe_data));
+  recv_transscribe_data = mymalloc("recv_transscribe_data", nimport * sizeof(struct aux_transscribe_data));
+
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      int task = DC[i].task;
+      if(task >= 0)
+        {
+          send_transscribe_data[Send_offset[task] + Send_count[task]].old_index   = DC[i].index;
+          send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags = DC[i].image_flags;
+          Send_count[task]++;
+        }
+    }
+
+  /* exchange the data */
+  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&send_transscribe_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_transscribe_data),
+                       MPI_BYTE, recvTask, TAG_DENS_B, &recv_transscribe_data[Recv_offset[recvTask]],
+                       Recv_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  for(int i = 0; i < nimport; i++)
+    {
+      if(recv_transscribe_data[i].old_index >= N_trans)
+        terminate("recv_transscribe_data[i].old_index >= N_trans");
+
+      if(recv_transscribe_data[i].old_index < 0)
+        terminate("recv_transscribe_data[i].old_index < 0");
+
+      int old_index = recv_transscribe_data[i].old_index;
+
+      recv_transscribe_data[i].new_task  = trans_table[old_index].new_task;
+      recv_transscribe_data[i].new_index = trans_table[old_index].new_index;
+
+#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY)
+      // Nothing to do here
+#else  /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */
+      if(recv_transscribe_data[i].new_task >= 0)
+        {
+          if(trans_table[old_index].wrapped)
+            {
+              int bitflags = ffs(recv_transscribe_data[i].image_flags) - 1;
+              int zbits    = (bitflags / 9);
+              int ybits    = (bitflags - zbits * 9) / 3;
+              int xbits    = bitflags - zbits * 9 - ybits * 3;
+
+              if(trans_table[old_index].wrapped & 1)
+                {
+                  if(xbits == 1)
+                    xbits = 0;
+                  else if(xbits == 0)
+                    xbits = 2;
+                  else /* xbits == 2 */
+                    terminate("b");
+                }
+              else if(trans_table[old_index].wrapped & 2)
+                {
+                  if(xbits == 1)
+                    {
+                      terminate("a");
+                    }
+                  else if(xbits == 0)
+                    xbits = 1;
+                  else /* xbits == 2 */
+                    xbits = 0;
+                }
+
+              if(trans_table[old_index].wrapped & 4)
+                {
+                  if(ybits == 1)
+                    ybits = 0;
+                  else if(ybits == 0)
+                    ybits = 2;
+                  else
+                    {
+                      terminate("b");
+                    }
+                }
+              else if(trans_table[old_index].wrapped & 8)
+                {
+                  if(ybits == 1)
+                    {
+                      terminate("a");
+                    }
+                  else if(ybits == 0)
+                    ybits = 1;
+                  else
+                    ybits = 0;
+                }
+
+              if(trans_table[old_index].wrapped & 16)
+                {
+                  if(zbits == 1)
+                    zbits = 0;
+                  else if(zbits == 0)
+                    zbits = 2;
+                  else
+                    {
+                      terminate("b");
+                    }
+                }
+              else if(trans_table[old_index].wrapped & 32)
+                {
+                  if(zbits == 1)
+                    {
+                      terminate("a");
+                    }
+                  else if(zbits == 0)
+                    zbits = 1;
+                  else
+                    zbits = 0;
+                }
+
+              recv_transscribe_data[i].image_flags = (1 << (zbits * 9 + ybits * 3 + xbits));
+            }
+        }
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) #else */
+    }
+
+  /* now return the data */
+  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&recv_transscribe_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_transscribe_data),
+                       MPI_BYTE, recvTask, TAG_DENS_B, &send_transscribe_data[Send_offset[recvTask]],
+                       Send_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  /* copy the results over to the DC structure */
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      int task = DC[i].task;
+      if(task >= 0)
+        {
+          DC[i].task        = send_transscribe_data[Send_offset[task] + Send_count[task]].new_task;
+          DC[i].index       = send_transscribe_data[Send_offset[task] + Send_count[task]].new_index;
+          DC[i].image_flags = send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags;
+          Send_count[task]++;
+        }
+    }
+
+  myfree(recv_transscribe_data);
+  myfree(send_transscribe_data);
+
+  /* now we can exchange the DC data. The task where each item should go is stored in 'next' at this point */
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  /* count where they should go */
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      if(DC[i].task >= 0)
+        {
+          int task = DC[i].next;
+          if(task >= 0)
+            {
+              if(task >= NTask)
+                terminate("Thistask=%d  i=%d Nvc=%d MaxNvc=%d DC[i].task=%d DC[i].next=%d\n", ThisTask, i, Nvc, MaxNvc, DC[i].task,
+                          DC[i].next);
+
+              if(DC[i].index >= 0)
+                Send_count[task]++;
+            }
+        }
+    }
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  nimport = nexport = 0;
+  Recv_offset[0] = Send_offset[0] = 0;
+
+  for(int j = 0; j < NTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  /* make sure that we have enough room to store the new DC list */
+  while(nimport > MaxNvc)
+    {
+      int old_MaxNvc = MaxNvc;
+      Mesh.Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR;
+      MaxNvc = Mesh.Indi.AllocFacNvc;
+#ifdef VERBOSE
+      printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, Mesh.Indi.AllocFacNvc);
+#endif /* #ifdef VERBOSE */
+      DC = myrealloc_movable(DC, MaxNvc * sizeof(connection));
+      for(int n = old_MaxNvc; n < MaxNvc; n++)
+        DC[n].task = -1;
+    }
+
+  connection *tmpDC = mymalloc("tmpDC", nexport * sizeof(connection));
+
+  for(int j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(int i = 0; i < MaxNvc; i++)
+    {
+      if(DC[i].task >= 0)
+        {
+          int task = DC[i].next;
+
+          if(task >= 0 && DC[i].index >= 0)
+            tmpDC[Send_offset[task] + Send_count[task]++] = DC[i];
+        }
+    }
+
+  /* exchange the connection information */
+
+  for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&tmpDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B,
+                       &DC[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B,
+                       MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+
+  myfree(tmpDC);
+
+  Nvc = nimport;
+
+  /* mark the remaining ones as available */
+  for(int i = Nvc; i < MaxNvc - 1; i++)
+    {
+      DC[i].next = i + 1;
+      DC[i].task = -1;
+    }
+  DC[MaxNvc - 1].next = -1;
+  DC[MaxNvc - 1].task = -1;
+
+  if(Nvc < MaxNvc)
+    FirstUnusedConnection = Nvc;
+  else
+    FirstUnusedConnection = -1;
+
+  /* now we need to connect the information to the particles, this we do via the IDs */
+
+  local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data));
+  for(int i = 0; i < NumGas; i++)
+    {
+      local_trans_data[i].ID        = P[i].ID;
+      local_trans_data[i].new_index = i; /* is here used as rank of the particle */
+    }
+  mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID);
+
+  mysort(DC, Nvc, sizeof(connection), domain_compare_connection_ID);
+
+  int last = -1;
+  for(i = 0, j = 0; i < NumGas && j < Nvc; i++)
+    {
+      int k = local_trans_data[i].new_index;
+
+      if(P[k].ID < DC[j].ID)
+        {
+          /* this particle has no connection information (new cell) */
+          SphP[k].first_connection = -1;
+          SphP[k].last_connection  = -1;
+        }
+      else if(P[k].ID == DC[j].ID)
+        {
+          SphP[k].first_connection = j;
+
+          while(j < Nvc)
+            {
+              SphP[k].last_connection = j;
+
+              if(last >= 0)
+                DC[last].next = j;
+
+              last = j;
+              j++;
+              if(j >= Nvc)
+                break;
+              if(P[k].ID != DC[j].ID)
+                break;
+            }
+        }
+      else
+        {
+          terminate("strange");
+        }
+    }
+
+  for(; i < NumGas; i++)
+    {
+      int k                    = local_trans_data[i].new_index;
+      SphP[k].first_connection = -1;
+      SphP[k].last_connection  = -1;
+    }
+
+  if(last >= 0)
+    DC[last].next = -1;
+
+  myfree(local_trans_data);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: done with rearranging connection information (took %g sec)\n", timediff(t0, t1));
+}
+
+/*! \brief Compare which ID is larger.
+ *
+ *  For connection data.
+ *
+ *  \param[in] a Pointer to first object.
+ *  \param[in] b Pointer to second object.
+ *
+ *  \return (-1,0,1) -1 if a->ID is smaller.
+ */
+int domain_compare_connection_ID(const void *a, const void *b)
+{
+  if(((connection *)a)->ID < (((connection *)b)->ID))
+    return -1;
+
+  if(((connection *)a)->ID > (((connection *)b)->ID))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compare which ID is larger.
+ *
+ *  For local_aux_trans_data.
+ *
+ *  \param[in] a Pointer to first object.
+ *  \param[in] b Pointer to second object.
+ *
+ *  \return (-1,0,1) -1 if a->ID is smaller.
+ */
+int domain_compare_local_trans_data_ID(const void *a, const void *b)
+{
+  if(((struct local_aux_trans_data *)a)->ID < (((struct local_aux_trans_data *)b)->ID))
+    return -1;
+
+  if(((struct local_aux_trans_data *)a)->ID > (((struct local_aux_trans_data *)b)->ID))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compare which ID is larger.
+ *
+ *  For aux_trans_data.
+ *
+ *  \param[in] a Pointer to first object.
+ *  \param[in] b Pointer to second object.
+ *
+ *  \return (-1,0,1) -1 if a->ID is smaller.
+ */
+int domain_compare_recv_trans_data_ID(const void *a, const void *b)
+{
+  if(((struct aux_trans_data *)a)->ID < (((struct aux_trans_data *)b)->ID))
+    return -1;
+
+  if(((struct aux_trans_data *)a)->ID > (((struct aux_trans_data *)b)->ID))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compare which old_task is larger.
+ *
+ *  For aux_trans_data.
+ *
+ *  \param[in] a Pointer to first object.
+ *  \param[in] b Pointer to second object.
+ *
+ *  \return (-1,0,1) -1 if a->old_task is smaller.
+ */
+int domain_compare_recv_trans_data_oldtask(const void *a, const void *b)
+{
+  if(((struct aux_trans_data *)a)->old_task < (((struct aux_trans_data *)b)->old_task))
+    return -1;
+
+  if(((struct aux_trans_data *)a)->old_task > (((struct aux_trans_data *)b)->old_task))
+    return +1;
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_balance.c b/src/amuse/community/arepo/src/domain/domain_balance.c
new file mode 100644
index 0000000000..fcb384ae38
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_balance.c
@@ -0,0 +1,1154 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain/domain_balance.c
+ * \date        05/2018
+ * \brief       Load-balancing algorithms.
+ * \details     Algorithms to estimate cost of different particles and cells
+ *              and to balance the workload and memory usage equally over the
+ *              mpi tasks.
+ *              contains functions:
+ *                double domain_grav_tot_costfactor(int i)
+ *                double domain_hydro_tot_costfactor(int i)
+ *                void domain_init_sum_cost(void)
+ *                void domain_sumCost(void)
+ *                void domain_combine_topleaves_to_domains(int ncpu, int
+ *                  ndomain)
+ *                int domain_sort_task(const void *a, const void *b)
+ *                int domain_sort_load(const void *a, const void *b)
+ *                static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+ *                void domain_combine_multipledomains(void)
+ *                void domain_optimize_domain_to_task_mapping(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 17.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/bsd_tree.h"
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+/* do some preparation work for use of red-black ordered binary tree based on BSD macros */
+
+/*! \brief Defines structure of mytree nodes.
+ */
+struct mydata
+{
+  double pri;
+  int target;
+  RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */
+};
+
+/* prototype of comparison function of tree elements */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs);
+
+/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */
+RB_HEAD(mytree, mydata);
+
+/* the following macros declare appropriate function prototypes and functions needed for this type of tree */
+RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp);
+RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp);
+
+/*! \brief Computes gravity cost.
+ *
+ *  All timebins in which the particle appears are summed, and the relative
+ *  frequency with which this timebin is executed is taken into account.
+ *
+ *  \param[in] i Index of cell in P and SphP array.
+ *
+ *  \return cost-factor.
+ */
+double domain_grav_tot_costfactor(int i)
+{
+  double w = MIN_FLOAT_NUMBER;
+
+#ifdef SELFGRAVITY
+  for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++)
+    {
+      if(domain_to_be_balanced[bin])
+        {
+#ifdef HIERARCHICAL_GRAVITY
+          if(bin >= P[i].TimeBinGrav)
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+            {
+              if(domain_bintolevel[bin] >= 0)
+                w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]];
+              else
+                {
+                  if(domain_refbin[bin] >= 0)
+                    w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]];
+                  else
+                    w += domain_grav_weight[bin];
+                }
+            }
+        }
+    }
+#endif /* #ifdef SELFGRAVITY */
+
+  return w;
+}
+
+/*! \brief Computes hydro cost.
+ *
+ *  If a cell is active on a certain timebin, it is assigned a cost of "1".
+ *  All active timebins are summed, and the frequency with which each timebin
+ *  is executed is taken into account.
+ *
+ *  \param[in] i Index of cell in P and SphP array.
+ *
+ *  \return cost-factor.
+ */
+double domain_hydro_tot_costfactor(int i)
+{
+  double w = 0;
+
+  if(P[i].Type == 0)
+    for(int bin = P[i].TimeBinHydro; bin <= All.HighestOccupiedTimeBin; bin++)
+      if(domain_to_be_balanced[bin])
+        w += domain_hydro_weight[bin];
+
+  return w;
+}
+
+/*! \brief Prepares cost measurement.
+ *
+ *  This function prepares the measurement of the total cost on each domain.
+ *  In particular, we determine how the timebins are mapped to the explicit
+ *  measurements of the gravity cost stored in the P.GravCost[] array (which
+ *  in general will only be available for a subset of all timebins). For the
+ *  unmatched timebins, a closest bin is selected that is the most similar in
+ *  terms of particle number on the bin. Finally, the routine also determines
+ *  how often each timebin is executed in one cycle associated with the
+ *  highest occupied timebin.
+ *
+ *  \return void
+ */
+void domain_init_sum_cost(void)
+{
+  long long tot_count[TIMEBINS], tot_count_sph[TIMEBINS];
+
+  sumup_large_ints(TIMEBINS, TimeBinsGravity.TimeBinCount, tot_count);
+  sumup_large_ints(TIMEBINS, TimeBinsHydro.TimeBinCount, tot_count_sph);
+
+  for(int i = 0; i < TIMEBINS; i++)
+    {
+      domain_bintolevel[i] = -1;
+      domain_refbin[i]     = -1;
+    }
+
+  for(int j = 0; j < GRAVCOSTLEVELS; j++) /* bins that have known levels at this point */
+    if(All.LevelToTimeBin[j] >= 0)
+      domain_bintolevel[All.LevelToTimeBin[j]] = j;
+
+  for(int i = 0; i < TIMEBINS; i++)
+    if(tot_count[i] > 0 && domain_bintolevel[i] < 0) /* need to find a reference bin for this one */
+      {
+        double mindiff = MAX_REAL_NUMBER;
+        int ref_bin    = -1;
+        for(int j = 0; j < TIMEBINS; j++)
+          if(domain_bintolevel[j] >= 0 && tot_count[j] > 0)
+            {
+              if(mindiff > llabs(tot_count[i] - tot_count[j]))
+                {
+                  mindiff = llabs(tot_count[i] - tot_count[j]);
+                  ref_bin = j;
+                }
+            }
+
+        if(ref_bin >= 0)
+          domain_refbin[i] = ref_bin;
+      }
+
+  for(int i = 0; i < TIMEBINS; i++)
+    {
+      domain_to_be_balanced[i] = 0;
+      domain_grav_weight[i]    = 1;
+      domain_hydro_weight[i]   = 1;
+    }
+
+#ifdef HIERARCHICAL_GRAVITY
+
+  domain_to_be_balanced[All.HighestActiveTimeBin] = 1;
+  domain_grav_weight[All.HighestActiveTimeBin]    = 1;
+  domain_hydro_weight[All.HighestActiveTimeBin]   = 1;
+
+  for(int j = All.HighestActiveTimeBin - 1; j >= All.LowestOccupiedTimeBin; j--)
+    {
+      if(tot_count[j] > 0 || tot_count_sph[j] > 0)
+        domain_to_be_balanced[j] = 1;
+
+      domain_grav_weight[j] += 2;
+    }
+
+  for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2)
+    {
+      if(tot_count[i] > 0)
+        {
+          domain_grav_weight[i] = weight;
+
+          for(int j = i - 1; j >= All.LowestOccupiedTimeBin; j--)
+            domain_grav_weight[j] += 2 * weight;
+        }
+
+      if(tot_count_sph[i] > 0)
+        domain_hydro_weight[i] = weight;
+    }
+
+#else /* #ifdef HIERARCHICAL_GRAVITY */
+
+  domain_to_be_balanced[All.HighestActiveTimeBin] = 1;
+  domain_grav_weight[All.HighestActiveTimeBin]    = 1;
+  domain_hydro_weight[All.HighestActiveTimeBin]   = 1;
+
+  for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2)
+    {
+      if(tot_count[i] > 0 || tot_count_sph[i] > 0)
+        domain_to_be_balanced[i] = 1;
+
+      if(tot_count[i] > 0)
+        domain_grav_weight[i] = weight;
+
+      if(tot_count_sph[i] > 0)
+        domain_hydro_weight[i] = weight;
+    }
+
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+}
+
+/*! \brief Determine cost and load
+ *
+ *  This function determines the cost and load associated with each top-level
+ *  leaf node of the tree. These leave nodes can be distributed among the
+ *  processors in order to reach a good work-load and memory-load balance.
+ *
+ *  \return void
+ */
+void domain_sumCost(void)
+{
+  int i, j, n, no, nexport = 0, nimport = 0, ngrp, task, loc_first_no;
+
+  struct domain_cost_data *loc_DomainLeaveNode, *listCost, *export_node_data, *import_node_data;
+
+  int *blocksize = mymalloc("blocksize", sizeof(int) * NTask);
+  int blk        = NTopleaves / NTask;
+  int rmd        = NTopleaves - blk * NTask; /* remainder */
+  int pivot_no   = rmd * (blk + 1);
+
+  for(task = 0, loc_first_no = 0; task < NTask; task++)
+    {
+      if(task < rmd)
+        blocksize[task] = blk + 1;
+      else
+        blocksize[task] = blk;
+
+      if(task < ThisTask)
+        loc_first_no += blocksize[task];
+    }
+
+  loc_DomainLeaveNode = mymalloc("loc_DomainLeaveNode", blocksize[ThisTask] * sizeof(struct domain_cost_data));
+  memset(loc_DomainLeaveNode, 0, blocksize[ThisTask] * sizeof(struct domain_cost_data));
+
+  listCost = mymalloc("listCost", NTopleaves * sizeof(struct domain_cost_data));
+
+  int *no_place = mymalloc("no_place", NTopleaves * sizeof(int));
+  memset(no_place, -1, NTopleaves * sizeof(int));
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  /* find for each particle its top-leave, and then add the associated cost with it */
+  for(n = 0; n < NumPart; n++)
+    {
+#ifdef ADDBACKGROUNDGRID
+      if(P[n].Type != 0)
+        continue;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+      no = 0;
+
+      peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1));
+      int shift     = 3 * (BITS_PER_DIMENSION - 1);
+
+      while(topNodes[no].Daughter >= 0)
+        {
+          no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift);
+          mask >>= 3;
+          shift -= 3;
+        }
+
+      no = topNodes[no].Leaf;
+
+      int p = no_place[no];
+      if(p < 0)
+        {
+          p            = nexport++;
+          no_place[no] = p;
+
+          memset(&listCost[p], 0, sizeof(struct domain_cost_data));
+          listCost[p].no = no;
+
+          if(no < pivot_no)
+            task = no / (blk + 1);
+          else
+            task = rmd + (no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */
+
+          if(task < 0 || task > NTask)
+            terminate("task < 0 || task > NTask");
+
+          Send_count[task]++;
+        }
+
+      listCost[p].Count += 1;
+      listCost[p].Work += domain_grav_tot_costfactor(n);
+      listCost[p].WorkSph += domain_hydro_tot_costfactor(n);
+
+      if(P[n].Type == 0)
+        listCost[p].CountSph += 1;
+    }
+
+  myfree(no_place);
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  export_node_data = mymalloc("export_node_data", nexport * sizeof(struct domain_cost_data));
+  import_node_data = mymalloc("import_node_data", nimport * sizeof(struct domain_cost_data));
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0; i < nexport; i++)
+    {
+      if(listCost[i].no < pivot_no)
+        task = listCost[i].no / (blk + 1);
+      else
+        task = rmd +
+               (listCost[i].no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */
+
+      int ind               = Send_offset[task] + Send_count[task]++;
+      export_node_data[ind] = listCost[i];
+    }
+
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, &import_node_data[Recv_offset[recvTask]],
+                       Recv_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  for(i = 0; i < nimport; i++)
+    {
+      int j = import_node_data[i].no - loc_first_no;
+
+      if(j < 0 || j >= blocksize[ThisTask])
+        terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d   loc_first_no=%d  import_node_data[i].no=%d  i=%d  nimport=%d", j,
+                  blocksize[ThisTask], loc_first_no, import_node_data[i].no, i, nimport);
+
+      loc_DomainLeaveNode[j].Count += import_node_data[i].Count;
+      loc_DomainLeaveNode[j].Work += import_node_data[i].Work;
+      loc_DomainLeaveNode[j].CountSph += import_node_data[i].CountSph;
+      loc_DomainLeaveNode[j].WorkSph += import_node_data[i].WorkSph;
+    }
+
+  myfree(import_node_data);
+  myfree(export_node_data);
+
+  /* now share the cost data across all processors */
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(task = 0; task < NTask; task++)
+    bytecounts[task] = blocksize[task] * sizeof(struct domain_cost_data);
+
+  for(task = 1, byteoffset[0] = 0; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  MPI_Allgatherv(loc_DomainLeaveNode, bytecounts[ThisTask], MPI_BYTE, DomainLeaveNode, bytecounts, byteoffset, MPI_BYTE,
+                 MPI_COMM_WORLD);
+
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(listCost);
+  myfree(loc_DomainLeaveNode);
+  myfree(blocksize);
+}
+
+/*! \brief Uses cost function to combine top-level nodes to domains.
+ *
+ *  This function uses the cumulative cost function (which weights work-load
+ *  and memory-load equally) to subdivide the list of top-level leave nodes
+ *  into pieces that are (approximately) equal in size.
+ *
+ *  \param[in] ncpu Number of chunks/damains.
+ *  \param[in] ndomain Number of topleaves.
+ *
+ *  \return void
+ */
+void domain_combine_topleaves_to_domains(int ncpu, int ndomain)
+{
+  double t0 = second();
+
+  double max_work     = 0;
+  double workhalfnode = 0.5 / ndomain;
+  double workavg      = 1.0 / ncpu;
+  double work_before = 0, workavg_before = 0;
+  int start = 0;
+
+  int nabove_grav = 0, nabove_sph = 0;
+  double todistribute_grav = 0.0;
+  double todistribute_sph  = 0.0;
+  double weightsum_grav    = 0.0;
+  double weightsum_sph     = 0.0;
+
+  for(int i = 0; i < ndomain; i++)
+    {
+      if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu)
+        {
+          nabove_grav++;
+          todistribute_grav += DomainLeaveNode[i].Work - normsum_work / ncpu / fac_work;
+        }
+      else
+        weightsum_grav += DomainLeaveNode[i].Count;
+
+      if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu)
+        {
+          nabove_sph++;
+          todistribute_sph += DomainLeaveNode[i].WorkSph - normsum_worksph / ncpu / fac_worksph;
+        }
+      else
+        weightsum_sph += DomainLeaveNode[i].Count;
+    }
+
+  struct leafnode_data
+  {
+    double workgrav;
+    double worksph;
+  };
+
+  struct leafnode_data *leaf = (struct leafnode_data *)mymalloc("leaf", ndomain * sizeof(struct leafnode_data));
+
+  for(int i = 0; i < ndomain; i++)
+    {
+      leaf[i].workgrav = DomainLeaveNode[i].Work;
+      leaf[i].worksph  = DomainLeaveNode[i].WorkSph;
+
+      if(fac_work > 0 && weightsum_grav > 0)
+        {
+          if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu)
+            leaf[i].workgrav = normsum_work / ncpu / fac_work;
+          else
+            leaf[i].workgrav += (DomainLeaveNode[i].Count / weightsum_grav) * todistribute_grav;
+        }
+
+      if(fac_worksph > 0 && weightsum_sph > 0)
+        {
+          if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu)
+            leaf[i].worksph = normsum_worksph / ncpu / fac_worksph;
+          else
+            leaf[i].worksph += (DomainLeaveNode[i].Count / weightsum_sph) * todistribute_sph;
+        }
+    }
+
+  for(int i = 0; i < ncpu; i++)
+    {
+      double work = 0;
+      int end     = start;
+
+      work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph;
+
+      while((work + work_before +
+                 (end + 1 < ndomain ? fac_work * leaf[end + 1].workgrav + fac_load * DomainLeaveNode[end + 1].Count +
+                                          fac_worksph * leaf[end + 1].worksph
+                                    : 0) <
+             workavg + workavg_before + workhalfnode) ||
+            (i == ncpu - 1 && end < ndomain - 1))
+        {
+          if((ndomain - end) > (ncpu - i))
+            end++;
+          else
+            break;
+
+          work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph;
+        }
+
+      DomainStartList[i] = start;
+      DomainEndList[i]   = end;
+
+      work_before += work;
+      workavg_before += workavg;
+      start = end + 1;
+
+      if(max_work < work)
+        max_work = work;
+    }
+
+  myfree(leaf);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: balance reached among multiple-domains=%g, average leave-nodes per domain=%g  (took %g sec)\n",
+             max_work / workavg, ((double)ndomain) / ncpu, timediff(t0, t1));
+}
+
+/*! \brief Structure containing data for segments.
+ */
+static struct domain_segments_data
+{
+  int task, start, end;
+  double bin_GravCost[TIMEBINS];
+  double bin_HydroCost[TIMEBINS];
+  double work;
+  double load;
+  double worksph;
+  double normalized_load;
+} * domainAssign;
+
+/*! \brief Structure containing data for task list.
+ */
+struct tasklist_data
+{
+  double bin_GravCost[TIMEBINS];
+  double bin_HydroCost[TIMEBINS];
+  double work;
+  double load;
+  double worksph;
+  int count;
+} * tasklist;
+
+/*! \brief Comparison function for domain_segments_data structure.
+ *
+ *  Compares field task.
+ *
+ *  \param a Pointer to fist object.
+ *  \param b Pointer to second object.
+ *
+ *  \return (-1,0,1); -1 if a < b.
+ */
+int domain_sort_task(const void *a, const void *b)
+{
+  if(((struct domain_segments_data *)a)->task < (((struct domain_segments_data *)b)->task))
+    return -1;
+
+  if(((struct domain_segments_data *)a)->task > (((struct domain_segments_data *)b)->task))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison functions for domain_segmens_data structures.
+ *
+ *  Compares field normalized_load.
+ *
+ *  \param a Pointer to fist object.
+ *  \param b Pointer to second object.
+ *
+ *  \return (-1,0,1) -1 if a>b.
+ */
+int domain_sort_load(const void *a, const void *b)
+{
+  if(((struct domain_segments_data *)a)->normalized_load > (((struct domain_segments_data *)b)->normalized_load))
+    return -1;
+
+  if(((struct domain_segments_data *)a)->normalized_load < (((struct domain_segments_data *)b)->normalized_load))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for objects of type mydata.
+ *
+ *  Compares elements pri and target.
+ *
+ *  \param lhs Pointer to fist object.
+ *  \param rhs Pointer to second object.
+ *
+ *  \return (-1,0,1); -1 if lhs < rhs.
+ */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+{
+  if(lhs->pri < rhs->pri)
+    return -1;
+  else if(lhs->pri > rhs->pri)
+    return 1;
+  else if(lhs->target < rhs->target)
+    return -1;
+  else if(lhs->target > rhs->target)
+    return 1;
+
+  return 0;
+}
+
+/*! \brief Assigns the domain pieces to individual MPI tasks with the goal to
+ *         balance the work-load on different timebins.
+ *
+ *  The algorithm used works as follows:
+ *  The domains are assigned to the CPUs in sequence of decreasing "effective
+ *  load", which is a simple combined measure of relative total gravity, hydro
+ *  and memory load. For each assignment, a number of possible target CPUs are
+ *  evaluated, and the assignment leading to the lowest total runtime is
+ *  adopted. The set of target CPUs that is tested in each step is the one
+ *  that consists of the CPUs that currently have the lowest load in the set
+ *  of primary tasks that are examined.
+ *
+ *  \return void
+ */
+void domain_combine_multipledomains(void)
+{
+  double t0 = second();
+
+  int ndomains = All.MultipleDomains * NTask;
+
+  domainAssign = (struct domain_segments_data *)mymalloc("domainAssign", ndomains * sizeof(struct domain_segments_data));
+
+  tasklist = mymalloc("tasklist", NTask * sizeof(struct tasklist_data));
+
+  for(int ta = 0; ta < NTask; ta++)
+    {
+      tasklist[ta].load    = 0;
+      tasklist[ta].work    = 0;
+      tasklist[ta].worksph = 0;
+      tasklist[ta].count   = 0;
+
+      for(int i = 0; i < TIMEBINS; i++)
+        {
+          tasklist[ta].bin_GravCost[i]  = 0;
+          tasklist[ta].bin_HydroCost[i] = 0;
+        }
+    }
+
+  for(int n = 0; n < ndomains; n++)
+    for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++)
+      DomainTask[i] = n;
+
+  /* we first determine the grav-cost and hydro-cost separately for each
+   * timebin of all the domain-pieces that are available for a
+   * mapping to individual MPI tasks
+   */
+
+  struct cost_data
+  {
+    double GravCost;
+    double HydroCost;
+  } * loc_bin_Cost, *glob_bin_Cost;
+
+  loc_bin_Cost  = mymalloc_clear("loc_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS);
+  glob_bin_Cost = mymalloc_clear("glob_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS);
+
+  for(int i = 0; i < NumPart; i++)
+    {
+#ifdef ADDBACKGROUNDGRID
+      if(P[i].Type != 0)
+        continue;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+      int no = 0;
+
+      peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1));
+      int shift     = 3 * (BITS_PER_DIMENSION - 1);
+
+      while(topNodes[no].Daughter >= 0)
+        {
+          no = topNodes[no].Daughter + (int)((Key[i] & mask) >> shift);
+          mask >>= 3;
+          shift -= 3;
+        }
+
+      no = topNodes[no].Leaf;
+
+      int n = DomainTask[no];
+
+#ifdef SELFGRAVITY
+      for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++)
+        {
+          if(domain_to_be_balanced[bin])
+            {
+#ifdef HIERARCHICAL_GRAVITY
+              if(bin >= P[i].TimeBinGrav)
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+                {
+                  if(domain_bintolevel[bin] >= 0)
+                    loc_bin_Cost[bin * ndomains + n].GravCost +=
+                        MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]];
+                  else
+                    {
+                      if(domain_refbin[bin] >= 0)
+                        loc_bin_Cost[bin * ndomains + n].GravCost +=
+                            MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]];
+                      else
+                        loc_bin_Cost[bin * ndomains + n].GravCost += domain_grav_weight[bin];
+                    }
+                }
+            }
+        }
+#endif /* #ifdef SELFGRAVITY */
+
+      if(P[i].Type == 0)
+        {
+          for(int bin = P[i].TimeBinHydro; bin <= All.HighestActiveTimeBin; bin++)
+            if(domain_to_be_balanced[bin])
+              loc_bin_Cost[bin * ndomains + n].HydroCost += domain_hydro_weight[bin];
+        }
+    }
+
+  allreduce_sparse_double_sum((double *)(loc_bin_Cost + All.LowestOccupiedTimeBin * ndomains),
+                              (double *)(glob_bin_Cost + All.LowestOccupiedTimeBin * ndomains),
+                              2 * ndomains * (All.HighestOccupiedTimeBin - All.LowestOccupiedTimeBin + 1));
+
+  /* now assign this cost to the domainAssign-structure, which keeps track of the different pieces */
+  double tot_work    = 0;
+  double tot_load    = 0;
+  double tot_worksph = 0;
+
+  for(int n = 0; n < ndomains; n++)
+    {
+      domainAssign[n].start   = DomainStartList[n];
+      domainAssign[n].end     = DomainEndList[n];
+      domainAssign[n].work    = 0;
+      domainAssign[n].load    = 0;
+      domainAssign[n].worksph = 0;
+
+      for(int i = 0; i < TIMEBINS; i++)
+        {
+          domainAssign[n].bin_GravCost[i]  = glob_bin_Cost[i * ndomains + n].GravCost;
+          domainAssign[n].bin_HydroCost[i] = glob_bin_Cost[i * ndomains + n].HydroCost;
+        }
+
+      for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++)
+        {
+          domainAssign[n].work += DomainLeaveNode[i].Work;
+          domainAssign[n].load += DomainLeaveNode[i].Count;
+          domainAssign[n].worksph += DomainLeaveNode[i].WorkSph;
+        }
+
+      tot_work += domainAssign[n].work;
+      tot_load += domainAssign[n].load;
+      tot_worksph += domainAssign[n].worksph;
+    }
+
+  for(int n = 0; n < ndomains; n++)
+    {
+      domainAssign[n].normalized_load = domainAssign[n].work / (tot_work + MIN_FLOAT_NUMBER) +
+                                        domainAssign[n].worksph / (tot_worksph + MIN_FLOAT_NUMBER) +
+                                        domainAssign[n].load / ((double)tot_load + MIN_FLOAT_NUMBER);
+    }
+
+  myfree(glob_bin_Cost);
+  myfree(loc_bin_Cost);
+
+  /* sort the pieces according to their normalized work-load, with the most heavily loaded coming first */
+  mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_load);
+
+  /* initialize a structure that stores the maximum gravity and hydro cost load for each timebin */
+  double max_GravCost[TIMEBINS], max_HydroCost[TIMEBINS];
+  for(int i = 0; i < TIMEBINS; i++)
+    {
+      max_GravCost[i]  = 0;
+      max_HydroCost[i] = 0;
+    }
+
+  double max_load = 0;
+
+  /* create priority trees, one for the cost of each occupied timebin,
+   * one for the hydro cost of each occupied timebin */
+  struct mytree queue_gravcost[TIMEBINS];
+  struct mytree queue_hydrocost[TIMEBINS];
+  struct mytree queue_load;
+  struct mydata *ngrav[TIMEBINS];
+  struct mydata *nhydro[TIMEBINS];
+  struct mydata *nload;
+
+  for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++)
+    {
+      if(domain_to_be_balanced[bin])
+        {
+          RB_INIT(&queue_gravcost[bin]);
+          ngrav[bin] = mymalloc("ngrav[bin]", NTask * sizeof(struct mydata));
+
+          RB_INIT(&queue_hydrocost[bin]);
+          nhydro[bin] = mymalloc("nhydro[bin]", NTask * sizeof(struct mydata));
+        }
+    }
+
+  RB_INIT(&queue_load);
+  nload = mymalloc("nload", NTask * sizeof(struct mydata));
+  for(int i = 0; i < NTask; i++)
+    {
+      nload[i].pri    = 0;
+      nload[i].target = i;
+      RB_INSERT(mytree, &queue_load, &nload[i]);
+    }
+
+  /* fill in all the tasks into each queue. The priority will be the current cost of the bin, the tag 'val' is used to label the task
+   */
+  for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++)
+    {
+      if(!domain_to_be_balanced[bin])
+        continue;
+
+      for(int i = 0; i < NTask; i++)
+        {
+          ngrav[bin][i].pri    = 0;
+          ngrav[bin][i].target = i;
+          RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][i]);
+
+          nhydro[bin][i].pri    = 0;
+          nhydro[bin][i].target = i;
+          RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][i]);
+        }
+    }
+
+  int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED;
+  if(n_lowest > NTask)
+    n_lowest = NTask;
+
+  int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int));
+  struct mydata *np;
+
+  /* now assign each of the domains to a CPU, trying to minimize the overall runtime */
+  for(int n = 0; n < ndomains; n++)
+    {
+      double best_runtime = MAX_FLOAT_NUMBER;
+      int best_target     = -1;
+
+      for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++)
+        {
+          if(!domain_to_be_balanced[bin])
+            continue;
+
+          int target;
+
+          for(int set = 0; set < 2; set++)
+            {
+              if(set == 0)
+                {
+#ifndef SELFGRAVITY
+                  continue;
+#endif /* #ifndef SELFGRAVITY */
+                  /* look up the n_lowest smallest elements from the tree */
+                  for(np = RB_MIN(mytree, &queue_gravcost[bin]), rep = 0; np != NULL && rep < n_lowest;
+                      np = RB_NEXT(mytree, &queue_gravcost[bin], np), rep++)
+                    candidates[rep] = np->target;
+                }
+              else
+                {
+                  for(np = RB_MIN(mytree, &queue_hydrocost[bin]), rep = 0; np != NULL && rep < n_lowest;
+                      np = RB_NEXT(mytree, &queue_hydrocost[bin], np), rep++)
+                    candidates[rep] = np->target;
+                }
+
+              for(rep = 0; rep < n_lowest; rep++)
+                {
+                  target = candidates[rep];
+
+                  double runtime = 0;
+
+                  for(int i = 0; i < TIMEBINS; i++)
+                    {
+                      double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i];
+                      if(sum < max_GravCost[i])
+                        sum = max_GravCost[i];
+
+                      runtime += sum / (totgravcost + MIN_FLOAT_NUMBER);
+                    }
+
+                  for(int i = 0; i < TIMEBINS; i++)
+                    {
+                      double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i];
+                      if(sum < max_HydroCost[i])
+                        sum = max_HydroCost[i];
+
+                      runtime += sum / (totsphcost + MIN_FLOAT_NUMBER);
+                    }
+
+                  double load = domainAssign[n].load + tasklist[target].load;
+                  if(load < max_load)
+                    load = max_load;
+
+                  runtime += ((double)load) / totpartcount;
+
+                  if(runtime < best_runtime || best_target < 0)
+                    {
+                      best_runtime = runtime;
+                      best_target  = target;
+                    }
+                }
+            }
+        }
+
+      /* now check also the load queue */
+      for(np = RB_MIN(mytree, &queue_load), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queue_load, np), rep++)
+        candidates[rep] = np->target;
+
+      int target;
+
+      for(rep = 0; rep < n_lowest; rep++)
+        {
+          target = candidates[rep];
+
+          double runtime = 0;
+
+          for(int i = 0; i < TIMEBINS; i++)
+            {
+              double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i];
+              if(sum < max_GravCost[i])
+                sum = max_GravCost[i];
+
+              runtime += sum / (totgravcost + 1.0e-60);
+            }
+
+          for(int i = 0; i < TIMEBINS; i++)
+            {
+              double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i];
+              if(sum < max_HydroCost[i])
+                sum = max_HydroCost[i];
+
+              runtime += sum / (totsphcost + 1.0e-60);
+            }
+
+          double load = domainAssign[n].load + tasklist[target].load;
+          if(load < max_load)
+            load = max_load;
+
+          runtime += ((double)load) / totpartcount;
+
+          if(runtime < best_runtime || best_target < 0)
+            {
+              best_runtime = runtime;
+              best_target  = target;
+            }
+        }
+
+      if(best_target < 0)
+        terminate("best_target < 0");
+
+      target = best_target;
+
+      domainAssign[n].task = target;
+      tasklist[target].work += domainAssign[n].work;
+      tasklist[target].load += domainAssign[n].load;
+      tasklist[target].worksph += domainAssign[n].worksph;
+      tasklist[target].count++;
+
+      /* now update the elements in the sorted trees */
+
+      RB_REMOVE(mytree, &queue_load, &nload[target]);
+      nload[target].pri = tasklist[target].load;
+      RB_INSERT(mytree, &queue_load, &nload[target]);
+
+      if(max_load < tasklist[target].load)
+        max_load = tasklist[target].load;
+
+      for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++)
+        {
+          if(domain_to_be_balanced[bin])
+            {
+              tasklist[target].bin_GravCost[bin] += domainAssign[n].bin_GravCost[bin];
+              tasklist[target].bin_HydroCost[bin] += domainAssign[n].bin_HydroCost[bin];
+
+              double eps_grav = 1.0e-9 * (domainAssign[n].load / totpartcount) *
+                                totgravcost; /* these will be added in order to break degeneracies in the sort-order in case the
+                                                grav/hydro cost in certain cells is zero */
+              double eps_hydro = 1.0e-9 * (domainAssign[n].load / totpartcount) * totsphcost;
+
+              RB_REMOVE(mytree, &queue_gravcost[bin], &ngrav[bin][target]);
+              ngrav[bin][target].pri = ngrav[bin][target].pri + domainAssign[n].bin_GravCost[bin] + eps_grav;
+              RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][target]);
+
+              RB_REMOVE(mytree, &queue_hydrocost[bin], &nhydro[bin][target]);
+              nhydro[bin][target].pri = nhydro[bin][target].pri + domainAssign[n].bin_HydroCost[bin] + eps_hydro;
+              RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][target]);
+
+              if(max_GravCost[bin] < tasklist[target].bin_GravCost[bin])
+                max_GravCost[bin] = tasklist[target].bin_GravCost[bin];
+
+              if(max_HydroCost[bin] < tasklist[target].bin_HydroCost[bin])
+                max_HydroCost[bin] = tasklist[target].bin_HydroCost[bin];
+            }
+        }
+    }
+
+  myfree(candidates);
+
+  /* free the elements for the RB tree again */
+  myfree(nload);
+  for(int bin = All.HighestOccupiedTimeBin; bin >= All.LowestOccupiedTimeBin; bin--)
+    {
+      if(domain_to_be_balanced[bin])
+        {
+          myfree(nhydro[bin]);
+          myfree(ngrav[bin]);
+        }
+    }
+
+  mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_task);
+
+  for(int n = 0; n < ndomains; n++)
+    {
+      DomainStartList[n] = domainAssign[n].start;
+      DomainEndList[n]   = domainAssign[n].end;
+
+      for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++)
+        DomainTask[i] = domainAssign[n].task;
+    }
+
+  myfree(tasklist);
+  myfree(domainAssign);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: combining multiple-domains took %g sec\n", timediff(t0, t1));
+}
+
+/*! \brief Assign domains to tasks to minimize communication.
+ *
+ *  This function determines a permutation of the new assignment of domains to
+ *  CPUs such that the number of particles that has to be moved given the
+ *  current distribution of particles is minimized.
+ *
+ *  \return void
+ */
+void domain_optimize_domain_to_task_mapping(void)
+{
+  double t0 = second();
+
+  int *count_per_task = mymalloc_clear("count_per_task", NTask * sizeof(int));
+
+  /* count how many we want to send to each task */
+  for(int i = 0; i < NumPart; i++)
+    {
+      int no = 0;
+
+      while(topNodes[no].Daughter >= 0)
+        no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3);
+
+      no = topNodes[no].Leaf;
+
+      int task = DomainTask[no];
+      count_per_task[task]++;
+    }
+
+  /* find the task that holds most of our particles (we really would like to be this task) */
+
+  int maxcount = count_per_task[0], maxtask = 0;
+  for(int i = 1; i < NTask; i++)
+    if(count_per_task[i] > maxcount)
+      {
+        maxcount = count_per_task[i];
+        maxtask  = i;
+      }
+
+  struct domain_count_data loc_count;
+  struct domain_count_data *domain_count = mymalloc("domain_count", NTask * sizeof(struct domain_count_data));
+
+  loc_count.task       = maxtask;
+  loc_count.count      = maxcount;
+  loc_count.origintask = ThisTask;
+
+  MPI_Allgather(&loc_count, sizeof(struct domain_count_data), MPI_BYTE, domain_count, sizeof(struct domain_count_data), MPI_BYTE,
+                MPI_COMM_WORLD);
+
+  qsort(domain_count, NTask, sizeof(struct domain_count_data), domain_compare_count);
+
+  /* this array will hold a permutation of all tasks constructed such that
+     particle exchange should be minimized */
+
+  int *new_task = mymalloc("new_task", NTask * sizeof(int));
+
+  /* this array will now flag tasks that have been assigned */
+  for(int i = 0; i < NTask; i++)
+    {
+      count_per_task[i] = 0;
+      new_task[i]       = -1;
+    }
+
+  for(int i = 0; i < NTask; i++)
+    {
+      int task   = domain_count[i].task;
+      int origin = domain_count[i].origintask;
+
+      if(new_task[task] == -1 && count_per_task[origin] == 0)
+        {
+          count_per_task[origin] = 1; /* taken */
+          new_task[task]         = origin;
+        }
+    }
+
+  /* now we have to fill up still unassigned ones in case there were collisions */
+  for(int i = 0, j = 0; i < NTask; i++)
+    {
+      if(new_task[i] == -1)
+        {
+          while(count_per_task[j])
+            j++;
+
+          new_task[i]       = j;
+          count_per_task[j] = 1;
+        }
+    }
+
+  int *copy_DomainStartList = mymalloc("copy_DomainStartList", All.MultipleDomains * NTask * sizeof(int));
+  int *copy_DomainEndList   = mymalloc("copy_DomainEndList", All.MultipleDomains * NTask * sizeof(int));
+
+  memcpy(copy_DomainStartList, DomainStartList, All.MultipleDomains * NTask * sizeof(int));
+  memcpy(copy_DomainEndList, DomainEndList, All.MultipleDomains * NTask * sizeof(int));
+
+  /* apply permutation to DomainTask assignment */
+
+  for(int i = 0; i < NTask; i++)
+    for(int m = 0; m < All.MultipleDomains; m++)
+      {
+        DomainStartList[new_task[i] * All.MultipleDomains + m] = copy_DomainStartList[i * All.MultipleDomains + m];
+
+        DomainEndList[new_task[i] * All.MultipleDomains + m] = copy_DomainEndList[i * All.MultipleDomains + m];
+      }
+
+  myfree(copy_DomainEndList);
+  myfree(copy_DomainStartList);
+
+  for(int i = 0; i < NTopleaves; i++)
+    DomainTask[i] = new_task[DomainTask[i]];
+
+  myfree(new_task);
+  myfree(domain_count);
+  myfree(count_per_task);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: task reshuffling took %g sec\n", timediff(t0, t1));
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_box.c b/src/amuse/community/arepo/src/domain/domain_box.c
new file mode 100644
index 0000000000..d7466f2449
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_box.c
@@ -0,0 +1,336 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_box.c
+ * \date        05/2018
+ * \brief       Routines that determine domain box and do periodic wrapping.
+ * \details     contains files:
+ *                void domain_findExtent(void)
+ *                void do_box_wrapping(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief Move the coordinate in pos by the global displacement vector
+ *
+ *  \param[in] pos coordinate vector (3 entries).
+ *  \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD
+ *
+ *  \return void
+ */
+void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode)
+{
+  if(mode == DISPLACE_POSITION_FORWARD)
+    {
+      double xtmp, ytmp, ztmp;
+      pos[0] = WRAP_X(pos[0] + All.GlobalDisplacementVector[0]);
+      pos[1] = WRAP_Y(pos[1] + All.GlobalDisplacementVector[1]);
+      pos[2] = WRAP_Z(pos[2] + All.GlobalDisplacementVector[2]);
+    }
+  else if(mode == DISPLACE_POSITION_BACKWARD)
+    {
+      double xtmp, ytmp, ztmp;
+      pos[0] = WRAP_X(pos[0] - All.GlobalDisplacementVector[0]);
+      pos[1] = WRAP_Y(pos[1] - All.GlobalDisplacementVector[1]);
+      pos[2] = WRAP_Z(pos[2] - All.GlobalDisplacementVector[2]);
+    }
+  else
+    terminate("Unkown mode %d.", mode);
+}
+
+/*! \brief Move the coordinate for all positions by the global displacement vector
+ *
+ *  \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD
+ *
+ *  \return void
+ */
+static void domain_displacePositions(enum domain_displace_mode mode)
+{
+  for(int i = 0; i < NumPart; i++)
+    {
+      if(P[i].ID == 0 && P[i].Mass == 0) /* derefined */
+        continue;
+
+      domain_displacePosition(P[i].Pos, mode);
+
+      if(i < NumGas)
+        domain_displacePosition(SphP[i].Center, mode);
+    }
+
+#ifdef PLACEHIGHRESREGION
+  domain_displacePosition(All.Xmintot[1], mode);
+  domain_displacePosition(All.Xmaxtot[1], mode);
+  domain_displacePosition(All.Corner[1], mode);
+  domain_displacePosition(All.UpperCorner[1], mode);
+#endif
+}
+
+/*! \brief Finds the extent of the global domain grid.
+ *
+ *  The minimum extent is the box size.
+ *
+ *  \return void
+ */
+void domain_findExtent(void)
+{
+  int i, j;
+  double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
+
+  /* determine local extension */
+  for(j = 0; j < 3; j++)
+    {
+      /* preset to simulation box */
+      xmin[j] = 0;
+      xmax[j] = boxSize;
+    }
+    // Take care of stretched box
+#ifdef LONG_X
+  xmax[0] = boxSize_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+  xmax[1] = boxSize_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+  xmax[2] = boxSize_Z;
+#endif /* #ifdef LONG_Z */
+
+  for(i = 0; i < NumPart; i++)
+    {
+#ifdef ADDBACKGROUNDGRID
+      if(P[i].Type != 0)
+        continue;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+      for(j = 0; j < 3; j++)
+        {
+          if(xmin[j] > P[i].Pos[j])
+            xmin[j] = P[i].Pos[j];
+
+          if(xmax[j] < P[i].Pos[j])
+            xmax[j] = P[i].Pos[j];
+        }
+    }
+
+  MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+#ifdef ADDBACKGROUNDGRID
+  for(j = 0; j < 3; j++)
+    if(xmax_glob[j] < All.BoxSize)
+      xmax_glob[j] = All.BoxSize;
+
+  for(j = 0; j < 3; j++)
+    if(xmin_glob[j] > 0)
+      xmin_glob[j] = 0;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  len = 0;
+  for(j = 0; j < 3; j++)
+    if(xmax_glob[j] - xmin_glob[j] > len)
+      len = xmax_glob[j] - xmin_glob[j];
+
+#if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID)
+  len *= 1.2; /* enlarge box a bit to avoid triggering of an out of box recovery */
+#else         /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) */
+  len *= 1.00001;
+#endif        /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) #else */
+
+#if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS)
+  for(j = 0; j < 3; j++)
+    {
+      DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
+      DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
+    }
+#else  /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) */
+  for(j = 0; j < 3; j++)
+    {
+      DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
+      DomainCenter[j] += (2. * get_random_number() - 1.) * 0.5 * len;
+    }
+
+  MPI_Bcast(DomainCenter, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  len *= 2;
+
+  for(j = 0; j < 3; j++)
+    DomainCorner[j] = DomainCenter[j] - 0.5 * len;
+#endif /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) #else \
+        */
+
+  DomainLen = len;
+
+  DomainInverseLen = 1.0 / DomainLen;
+  DomainFac        = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION));
+  DomainBigFac     = (DomainLen / (((long long)1) << 52));
+}
+
+/*! \brief Makes sure all particles are within box.
+ *
+ *  This function makes sure that all particle coordinates (Pos) are
+ *  periodically mapped onto the interval [0, BoxSize].  After this function
+ *  has been called, a new domain decomposition should be done, which will
+ *  also force a new tree construction.
+ *
+ *  \return void
+ */
+void do_box_wrapping(void)
+{
+  int j;
+  double boxsize[3];
+
+#ifdef ADDBACKGROUNDGRID
+  return;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  for(j = 0; j < 3; j++)
+    boxsize[j] = All.BoxSize;
+
+#ifdef LONG_X
+  boxsize[0] *= LONG_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+  boxsize[1] *= LONG_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+  boxsize[2] *= LONG_Z;
+#endif /* #ifdef LONG_Z */
+
+#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2)
+  domain_displacePositions(DISPLACE_POSITION_BACKWARD);
+
+  if(ThisTask == 0)
+    {
+      double prefac = 1.;
+#ifdef PLACEHIGHRESREGION
+      prefac = 0.5;
+#endif
+      for(j = 0; j < 3; j++)
+        All.GlobalDisplacementVector[j] = (get_random_number() - 0.5) * boxsize[j] * prefac;
+    }
+
+  mpi_printf("DOMAIN: New global displacement vector: %g, %g, %g\n", All.GlobalDisplacementVector[0], All.GlobalDisplacementVector[1],
+             All.GlobalDisplacementVector[2]);
+  MPI_Bcast(All.GlobalDisplacementVector, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+
+  domain_displacePositions(DISPLACE_POSITION_FORWARD);
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2) */
+
+  int i;
+  for(i = 0; i < NumPart; i++)
+    {
+      if(i < NumGas)
+        trans_table[i].wrapped = 0;
+
+#if defined(GRAVITY_NOT_PERIODIC)
+      if(P[i].Type != 0)
+        continue;
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+
+#if !defined(REFLECTIVE_X)
+      while(P[i].Pos[0] < 0)
+        {
+          P[i].Pos[0] += boxsize[0];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 1;
+        }
+
+      while(P[i].Pos[0] >= boxsize[0])
+        {
+          P[i].Pos[0] -= boxsize[0];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 2;
+        }
+
+#else  /* #if !defined(REFLECTIVE_X) */
+      if(P[i].Pos[0] < 0 || P[i].Pos[0] >= boxsize[0])
+        {
+          char buf[1000];
+
+          sprintf(buf, "i=%d ID=%d type=%d moved out of box. x=%g", i, P[i].ID, P[i].Type, P[i].Pos[0]);
+          terminate(buf);
+        }
+#endif /* #if !defined(REFLECTIVE_X) #else */
+
+#if !defined(REFLECTIVE_Y)
+      while(P[i].Pos[1] < 0)
+        {
+          P[i].Pos[1] += boxsize[1];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 4;
+        }
+
+      while(P[i].Pos[1] >= boxsize[1])
+        {
+          P[i].Pos[1] -= boxsize[1];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 8;
+        }
+
+#else  /* #if !defined(REFLECTIVE_Y) */
+      if(P[i].Pos[1] < 0 || P[i].Pos[1] >= boxsize[1])
+        {
+          char buf[1000];
+
+          sprintf(buf, "i=%d ID=%d type=%d moved out of box. y=%g", i, P[i].ID, P[i].Type, P[i].Pos[1]);
+          terminate(buf);
+        }
+#endif /* #if !defined(REFLECTIVE_Y) #else */
+
+#if !defined(REFLECTIVE_Z)
+      while(P[i].Pos[2] < 0)
+        {
+          P[i].Pos[2] += boxsize[2];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 16;
+        }
+
+      while(P[i].Pos[2] >= boxsize[2])
+        {
+          P[i].Pos[2] -= boxsize[2];
+          if(i < NumGas)
+            trans_table[i].wrapped |= 32;
+        }
+
+#else  /* #if !defined(REFLECTIVE_Z) */
+      if(P[i].Pos[2] < 0 || P[i].Pos[2] >= boxsize[2])
+        {
+          char buf[1000];
+
+          sprintf(buf, "i=%d ID=%d type=%d moved out of box. z=%g", i, P[i].ID, P[i].Type, P[i].Pos[2]);
+          terminate(buf);
+        }
+#endif /* #if !defined(REFLECTIVE_Z) #else */
+    }
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_counttogo.c b/src/amuse/community/arepo/src/domain/domain_counttogo.c
new file mode 100644
index 0000000000..82e798ef7e
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_counttogo.c
@@ -0,0 +1,84 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_counttogo.c
+ * \date        05/2018
+ * \brief       Functions to determine number of exchanged particles.
+ * \details     contains functions:
+ *                int domain_countToGo(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief Determines communication matrix for particles and cells.
+ *
+ *  This function determines how many particles that are currently stored
+ *  on the local CPU have to be moved off according to the domain
+ *  decomposition.
+ *
+ *  \return 0
+ */
+int domain_countToGo(void)
+{
+  for(int n = 0; n < NTask; n++)
+    {
+      toGo[n]    = 0;
+      toGoSph[n] = 0;
+    }
+
+  for(int n = 0; n < NumPart; n++)
+    {
+      int no = 0;
+
+      while(topNodes[no].Daughter >= 0)
+        no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size >> 3);
+
+      no = topNodes[no].Leaf;
+
+      if(DomainTask[no] != ThisTask)
+        {
+          toGo[DomainTask[no]] += 1;
+
+          if(P[n].Type == 0)
+            toGoSph[DomainTask[no]] += 1;
+        }
+    }
+
+  MPI_Alltoall(toGo, 1, MPI_INT, toGet, 1, MPI_INT, MPI_COMM_WORLD);
+  MPI_Alltoall(toGoSph, 1, MPI_INT, toGetSph, 1, MPI_INT, MPI_COMM_WORLD);
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_exchange.c b/src/amuse/community/arepo/src/domain/domain_exchange.c
new file mode 100644
index 0000000000..454cfafc82
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_exchange.c
@@ -0,0 +1,399 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_exchange.c
+ * \date        05/2018
+ * \brief       Algorithms for exchanging particle data and associated
+ *              rearrangements.
+ * \details     This includes changing the size of the P and SphP arrays as
+ *              well as the particle exchange routine itself.
+ *              contains functions:
+ *                void domain_resize_storage(int count_get, int count_get_sph,
+ *                  int option_flag)
+ *                void domain_exchange(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief Changes memory allocation if necessary for particle and cell data.
+ *
+ *  If the memory usage due to a net import or export of particles changes
+ *  above a certain tolerance, the P and SphP structures need to be
+ *  reallocated.
+ *
+ *  \param[in] count get How many particles are imported?
+ *  \param[in] count_get_sph How many cells are imported?
+ *  \param[in] option_flag Options for reallocating peanokey or ngbtree.
+ *
+ *  \return void
+ */
+void domain_resize_storage(int count_get, int count_get_sph, int option_flag)
+{
+  int load        = NumPart + count_get;
+  int sphload     = NumGas + count_get_sph;
+  int loc_data[2] = {load, sphload}, res[2];
+
+  MPI_Allreduce(loc_data, res, 2, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  int max_load    = res[0];
+  int max_sphload = res[1];
+
+  if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPart)
+    {
+      All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
+      reallocate_memory_maxpart();
+
+      if(option_flag == 1)
+        Key = (peanokey *)myrealloc_movable(Key, sizeof(peanokey) * All.MaxPart);
+    }
+
+  if(max_sphload >= (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph || max_sphload < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPartSph)
+    {
+      All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE);
+      if(option_flag == 2)
+        {
+          if(All.MaxPartSph > Ngb_MaxPart)
+            ngb_treemodifylength(All.MaxPartSph - Ngb_MaxPart);
+        }
+      reallocate_memory_maxpartsph();
+    }
+}
+
+/*! \brief Exchanges particles and cells according to new domain decomposition.
+ *
+ *  Communicates particles and cells to their new task. P and SphP arrays are
+ *  changed in size accordingly.
+ *
+ *  \return void
+ */
+void domain_exchange(void)
+{
+  double t0 = second();
+
+  int count_togo = 0, count_togo_sph = 0, count_get = 0, count_get_sph = 0;
+  int *count, *count_sph, *offset, *offset_sph;
+  int *count_recv, *count_recv_sph, *offset_recv, *offset_recv_sph;
+  int i, n, no, target;
+  struct particle_data *partBuf;
+  struct sph_particle_data *sphBuf;
+
+  peanokey *keyBuf;
+
+  long long sumtogo = 0;
+
+  for(i = 0; i < NTask; i++)
+    sumtogo += toGo[i];
+
+  sumup_longs(1, &sumtogo, &sumtogo);
+
+  count           = (int *)mymalloc_movable(&count, "count", NTask * sizeof(int));
+  count_sph       = (int *)mymalloc_movable(&count_sph, "count_sph", NTask * sizeof(int));
+  offset          = (int *)mymalloc_movable(&offset, "offset", NTask * sizeof(int));
+  offset_sph      = (int *)mymalloc_movable(&offset_sph, "offset_sph", NTask * sizeof(int));
+  count_recv      = (int *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(int));
+  count_recv_sph  = (int *)mymalloc_movable(&count_recv_sph, "count_recv_sph", NTask * sizeof(int));
+  offset_recv     = (int *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(int));
+  offset_recv_sph = (int *)mymalloc_movable(&offset_recv_sph, "offset_recv_sph", NTask * sizeof(int));
+
+  int prec_offset;
+  int *decrease;
+
+  decrease = (int *)mymalloc_movable(&decrease, "decrease", NTask * sizeof(int));
+
+  for(i = 1, offset_sph[0] = 0, decrease[0] = 0; i < NTask; i++)
+    {
+      offset_sph[i] = offset_sph[i - 1] + toGoSph[i - 1];
+      decrease[i]   = toGoSph[i - 1];
+    }
+
+  prec_offset = offset_sph[NTask - 1] + toGoSph[NTask - 1];
+
+  offset[0] = prec_offset;
+  for(i = 1; i < NTask; i++)
+    offset[i] = offset[i - 1] + (toGo[i - 1] - decrease[i]);
+
+  myfree(decrease);
+
+  for(i = 0; i < NTask; i++)
+    {
+      count_togo += toGo[i];
+      count_togo_sph += toGoSph[i];
+      count_get += toGet[i];
+      count_get_sph += toGetSph[i];
+    }
+
+  partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", count_togo * sizeof(struct particle_data));
+  sphBuf  = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", count_togo_sph * sizeof(struct sph_particle_data));
+
+  keyBuf = (peanokey *)mymalloc_movable(&keyBuf, "keyBuf", count_togo * sizeof(peanokey));
+
+  for(i = 0; i < NTask; i++)
+    {
+      count[i] = count_sph[i] = 0;
+    }
+
+  for(n = 0; n < NumPart; n++)
+    {
+      no = 0;
+
+      peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1));
+      int shift     = 3 * (BITS_PER_DIMENSION - 1);
+
+      while(topNodes[no].Daughter >= 0)
+        {
+          no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift);
+          mask >>= 3;
+          shift -= 3;
+        }
+
+      no = topNodes[no].Leaf;
+
+      target = DomainTask[no];
+
+      if(target != ThisTask)
+        {
+          /* copy this particle into the exchange buffer */
+          if(P[n].Type == 0)
+            {
+              partBuf[offset_sph[target] + count_sph[target]] = P[n];
+              keyBuf[offset_sph[target] + count_sph[target]]  = Key[n];
+              sphBuf[offset_sph[target] + count_sph[target]]  = SphP[n];
+              count_sph[target]++;
+            }
+          else
+            {
+              partBuf[offset[target] + count[target]] = P[n];
+              keyBuf[offset[target] + count[target]]  = Key[n];
+              count[target]++;
+            }
+
+          if(P[n].Type == 0)
+            {
+              P[n]          = P[NumGas - 1];
+              P[NumGas - 1] = P[NumPart - 1];
+
+              Key[n]          = Key[NumGas - 1];
+              Key[NumGas - 1] = Key[NumPart - 1];
+
+              SphP[n] = SphP[NumGas - 1];
+
+              NumGas--;
+            }
+          else
+            {
+              P[n]   = P[NumPart - 1];
+              Key[n] = Key[NumPart - 1];
+            }
+
+          NumPart--;
+          n--;
+
+        } /* target != ThisTask */
+    }     /* n < NumPart */
+
+  /**** now resize the storage for the P[] and SphP[] arrays if needed ****/
+  domain_resize_storage(count_get, count_get_sph, 1);
+
+  /*****  space has been created, now can do the actual exchange *****/
+  int count_totget = count_get_sph;
+
+  if(count_totget)
+    {
+      memmove(P + NumGas + count_totget, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data));
+      memmove(Key + NumGas + count_totget, Key + NumGas, (NumPart - NumGas) * sizeof(peanokey));
+    }
+
+  for(i = 0; i < NTask; i++)
+    {
+      count_recv_sph[i] = toGetSph[i];
+      count_recv[i]     = toGet[i] - toGetSph[i];
+    }
+
+  int prec_count;
+  for(i = 1, offset_recv_sph[0] = NumGas; i < NTask; i++)
+    offset_recv_sph[i] = offset_recv_sph[i - 1] + count_recv_sph[i - 1];
+  prec_count = NumGas + count_get_sph;
+
+  offset_recv[0] = NumPart - NumGas + prec_count;
+
+  for(i = 1; i < NTask; i++)
+    offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1];
+
+#ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP
+
+  int ngrp;
+#ifdef NO_ISEND_IRECV_IN_DOMAIN /* synchronous communication */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = ThisTask ^ ngrp;
+
+      if(target < NTask)
+        {
+          if(count_sph[target] > 0 || count_recv_sph[target] > 0)
+            {
+              MPI_Sendrecv(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                           TAG_PDATA_SPH, P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE,
+                           target, TAG_PDATA_SPH, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              MPI_Sendrecv(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target,
+                           TAG_SPHDATA, SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data),
+                           MPI_BYTE, target, TAG_SPHDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              MPI_Sendrecv(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH,
+                           Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH,
+                           MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+
+          if(count[target] > 0 || count_recv[target] > 0)
+            {
+              MPI_Sendrecv(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                           P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                           MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              MPI_Sendrecv(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY,
+                           Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+#else  /* #ifdef NO_ISEND_IRECV_IN_DOMAIN */
+  /* asynchronous communication */
+
+  MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 30 * NTask * sizeof(MPI_Request));
+  int n_requests        = 0;
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = ThisTask ^ ngrp;
+
+      if(target < NTask)
+        {
+          if(count_recv_sph[target] > 0)
+            {
+              MPI_Irecv(P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                        TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Irecv(SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target,
+                        TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Irecv(Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH,
+                        MPI_COMM_WORLD, &requests[n_requests++]);
+            }
+
+          if(count_recv[target] > 0)
+            {
+              MPI_Irecv(P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                        MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Irecv(Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD,
+                        &requests[n_requests++]);
+            }
+        }
+    }
+
+  MPI_Barrier(MPI_COMM_WORLD); /* not really necessary, but this will guarantee that all receives are
+                                  posted before the sends, which helps the stability of MPI on
+                                  bluegene, and perhaps some mpich1-clusters */
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = ThisTask ^ ngrp;
+
+      if(target < NTask)
+        {
+          if(count_sph[target] > 0)
+            {
+              MPI_Isend(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                        TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Isend(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target,
+                        TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Isend(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH,
+                        MPI_COMM_WORLD, &requests[n_requests++]);
+            }
+
+          if(count[target] > 0)
+            {
+              MPI_Isend(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                        MPI_COMM_WORLD, &requests[n_requests++]);
+
+              MPI_Isend(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD,
+                        &requests[n_requests++]);
+            }
+        }
+    }
+
+  MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
+  myfree(requests);
+#endif /* #ifdef NO_ISEND_IRECV_IN_DOMAIN #else */
+
+#else /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP */
+  /* begins block of myMPI_Alltoallv communications */
+
+  myMPI_Alltoallv(partBuf, count_sph, offset_sph, P, count_recv_sph, offset_recv_sph, sizeof(struct particle_data), 0, MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(sphBuf, count_sph, offset_sph, SphP, count_recv_sph, offset_recv_sph, sizeof(struct sph_particle_data), 0,
+                  MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(keyBuf, count_sph, offset_sph, Key, count_recv_sph, offset_recv_sph, sizeof(peanokey), 0, MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(partBuf, count, offset, P, count_recv, offset_recv, sizeof(struct particle_data), 0, MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(keyBuf, count, offset, Key, count_recv, offset_recv, sizeof(peanokey), 0, MPI_COMM_WORLD);
+
+#endif /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP #else */
+       /* close block of myMPI_Alltoallv communications */
+
+  NumPart += count_get;
+  NumGas += count_get_sph;
+
+  myfree(keyBuf);
+  myfree(sphBuf);
+  myfree(partBuf);
+  myfree(offset_recv_sph);
+  myfree(offset_recv);
+  myfree(count_recv_sph);
+  myfree(count_recv);
+  myfree(offset_sph);
+  myfree(offset);
+  myfree(count_sph);
+  myfree(count);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: exchange of %lld particles done. (took %g sec)\n", sumtogo, timediff(t0, t1));
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_rearrange.c b/src/amuse/community/arepo/src/domain/domain_rearrange.c
new file mode 100644
index 0000000000..e75b37872e
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_rearrange.c
@@ -0,0 +1,129 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_rearrange.c
+ * \date        05/2018
+ * \brief       Rearranges particle and cell arrays and gets rid of inactive
+ *              particles.
+ * \details     contains functions:
+ *                void domain_rearrange_particle_sequence(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief Gets rid of inactive/eliminated cells and particles.
+ *
+ *  Cells that were de-refined or turned into star particles are kept in the
+ *  SphP array, but flagged as inactive until this point. This routine cleans
+ *  up these arrays in order to make sure only active particles/cells are
+ *  exported.
+ *
+ *  \return void
+ */
+void domain_rearrange_particle_sequence(void)
+{
+#if defined(USE_SFR)
+  if(Stars_converted)
+    {
+      struct particle_data psave;
+      peanokey key;
+
+      for(int i = 0; i < NumGas; i++)
+        if(P[i].Type != 0) /*If not a gas particle, swap to the end of the list */
+          {
+            psave = P[i];
+            key   = Key[i];
+
+            P[i]    = P[NumGas - 1];
+            SphP[i] = SphP[NumGas - 1];
+            Key[i]  = Key[NumGas - 1];
+
+            P[NumGas - 1]   = psave;
+            Key[NumGas - 1] = key;
+
+            NumGas--;
+            i--;
+          }
+      /* Now we have rearranged the particles,
+       * we don't need to do it again unless there are more stars
+       */
+      Stars_converted = 0;
+    }
+#endif /* #if defined(USE_SFR) */
+
+#if defined(REFINEMENT_MERGE_CELLS)
+  int i, count_elim, count_gaselim;
+
+  count_elim    = 0;
+  count_gaselim = 0;
+
+  for(i = 0; i < NumPart; i++)
+    if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0))
+      {
+        if(P[i].Type == 0)
+          {
+            P[i]    = P[NumGas - 1];
+            SphP[i] = SphP[NumGas - 1];
+            Key[i]  = Key[NumGas - 1];
+
+            P[NumGas - 1]   = P[NumPart - 1];
+            Key[NumGas - 1] = Key[NumPart - 1];
+
+            NumGas--;
+            count_gaselim++;
+          }
+
+        NumPart--;
+        i--;
+        count_elim++;
+      }
+
+  int count[2] = {count_elim, count_gaselim};
+  int tot[2] = {0, 0}, nelem = 2;
+
+  MPI_Allreduce(count, tot, nelem, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      printf("DOMAIN: Eliminated %d derefined/swallowed gas cells.\n", tot[1]);
+      myflush(stdout);
+    }
+
+  All.TotNumPart -= tot[0];
+  All.TotNumGas -= tot[1];
+
+#endif /* #if defined(REFINEMENT_MERGE_CELLS */
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_sort_kernels.c b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c
new file mode 100644
index 0000000000..b0ad2c7a28
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c
@@ -0,0 +1,158 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_sort_kernels.c
+ * \date        05/2018
+ * \brief       Comparison and sorting functions for Peano-Hilbert data.
+ * \details     contains functions:
+ *                int domain_compare_count(const void *a, const void *b)
+ *                int domain_compare_key(const void *a, const void *b)
+ *                static void msort_domain_with_tmp(struct
+ *                  domain_peano_hilbert_data *b, size_t n, struct
+ *                  domain_peano_hilbert_data *t)
+ *                void mysort_domain(void *b, size_t n, size_t s)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+/*! \brief Comparison function for domain_count_data objects.
+ *
+ *  Compares the variable count.
+ *
+ *  \param[in] a Pointer to first domain_count_data object.
+ *  \param[in] b Pointer to second domain_count_data object.
+ *
+ *  \return 1 if b>a; -1 if a>b; otherwise 0.
+ */
+int domain_compare_count(const void *a, const void *b)
+{
+  if(((struct domain_count_data *)a)->count > (((struct domain_count_data *)b)->count))
+    return -1;
+
+  if(((struct domain_count_data *)a)->count < (((struct domain_count_data *)b)->count))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for domain_peano_hilbert_data objects.
+ *
+ *  Compares element key.
+ *
+ *  \param[in] a Pointer to first domain_peano_hilbert_data object.
+ *  \param[in] b Pointer to second domain_peano_hilbert_data object.
+ *
+ *  \return 1 if b>a; -1 if a>b; otherwise 0.
+ */
+int domain_compare_key(const void *a, const void *b)
+{
+  if(((struct domain_peano_hilbert_data *)a)->key < (((struct domain_peano_hilbert_data *)b)->key))
+    return -1;
+
+  if(((struct domain_peano_hilbert_data *)a)->key > (((struct domain_peano_hilbert_data *)b)->key))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Customized mergesort sorting routine, requires temporary array.
+ *
+ *  \param[in, out] b domain_peano_hilbert data array that is to be sorted.
+ *  \param[in] n Number of elements in array.
+ *  \param[in, out] t Temporary domain_peano_hilbert data array.
+ *
+ *  \return void
+ */
+static void msort_domain_with_tmp(struct domain_peano_hilbert_data *b, size_t n, struct domain_peano_hilbert_data *t)
+{
+  struct domain_peano_hilbert_data *tmp;
+  struct domain_peano_hilbert_data *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = b;
+  b2 = b + n1;
+
+  msort_domain_with_tmp(b1, n1, t);
+  msort_domain_with_tmp(b2, n2, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(b1->key <= b2->key)
+        {
+          --n1;
+          *tmp++ = *b1++;
+        }
+      else
+        {
+          --n2;
+          *tmp++ = *b2++;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * sizeof(struct domain_peano_hilbert_data));
+
+  memcpy(b, t, (n - n2) * sizeof(struct domain_peano_hilbert_data));
+}
+
+/*! \brief Customized mergesort sorting routine.
+ *
+ *  This function tends to work slightly faster than a call of qsort() for
+ *  this particular list, at least on most platforms.
+ *
+ *  \param[in, out] b domain_peano_hilbert data array that is to be sorted.
+ *  \param[in] n Number of elements.
+ *  \param[in] s Size of structure.
+ *
+ *  \return void
+ */
+void mysort_domain(void *b, size_t n, size_t s)
+{
+  const size_t size = n * s;
+  struct domain_peano_hilbert_data *tmp;
+
+  tmp = (struct domain_peano_hilbert_data *)mymalloc("tmp", size);
+
+  msort_domain_with_tmp((struct domain_peano_hilbert_data *)b, n, tmp);
+
+  myfree(tmp);
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_toplevel.c b/src/amuse/community/arepo/src/domain/domain_toplevel.c
new file mode 100644
index 0000000000..6c1fc22ac5
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_toplevel.c
@@ -0,0 +1,393 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_toplevel.c
+ * \date        05/2018
+ * \brief       Top level tree construction and walk routines used for the
+ *              domain decomposition.
+ * \details     Uses BSD macros.
+ *              contains functions:
+ *                static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+ *                int domain_determineTopTree(void)
+ *                void domain_do_local_refine(int n, int *list)
+ *                void domain_walktoptree(int no)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 17.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "bsd_tree.h"
+#include "domain.h"
+
+/*! \brief Structure of tree nodes.
+ */
+struct mydata
+{
+  double workload;
+  int topnode_index;
+
+  RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */
+};
+
+/*! \brief Comparison function of tree elements.
+ *
+ *  Compares elements workload and topnode_index.
+ *
+ *  \param[in] lhs pointer to left hand side top level tree node.
+ *  \param[in] rhs pointer to right hand side top level tree node.
+ *
+ *  \return -1: left is larger or lower topnode index, 1 opposite, 0 equal.
+ */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+{
+  if(lhs->workload > rhs->workload)
+    return -1;
+  else if(lhs->workload < rhs->workload)
+    return 1;
+  else if(lhs->topnode_index < rhs->topnode_index)
+    return -1;
+  else if(lhs->topnode_index > rhs->topnode_index)
+    return 1;
+
+  return 0;
+}
+
+/* the following macro declares 'struct mytree', which is the header element
+ * needed as handle for a tree
+ */
+RB_HEAD(mytree, mydata);
+
+static struct mydata *nload;
+static struct mytree queue_load;
+
+/* the following macros declare appropriate function prototypes and functions
+ * needed for this type of tree
+ */
+RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp);
+RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp);
+
+static double *list_cost, *list_sphcost;
+
+/*! \brief Construct top-level tree.
+ *
+ *  This function constructs the global top-level tree node that is used
+ *  for the domain decomposition. This is done by considering the string of
+ *  Peano-Hilbert keys for all particles, which is recursively chopped off
+ *  in pieces of eight segments until each segment holds at most a certain
+ *  number of particles.
+ *
+ *  \return 0
+ */
+int domain_determineTopTree(void)
+{
+  double t0 = second();
+  int count = 0, message_printed = 0;
+
+  mp           = (struct domain_peano_hilbert_data *)mymalloc_movable(&mp, "mp", sizeof(struct domain_peano_hilbert_data) * NumPart);
+  list_cost    = mymalloc_movable(&list_cost, "list_cost", sizeof(double) * NumPart);
+  list_sphcost = mymalloc_movable(&list_sphcost, "listsph_cost", sizeof(double) * NumPart);
+
+  for(int i = 0; i < NumPart; i++)
+    {
+      peano1D xb = domain_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0);
+      peano1D yb = domain_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0);
+      peano1D zb = domain_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0);
+
+      mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION);
+      mp[count].index        = i;
+      count++;
+
+      list_cost[i]    = domain_grav_tot_costfactor(i);
+      list_sphcost[i] = domain_hydro_tot_costfactor(i);
+    }
+
+  /* sort according to key (local particles!) */
+  mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data));
+
+  NTopnodes            = 1;
+  NTopleaves           = 1;
+  topNodes[0].Daughter = -1;
+  topNodes[0].Parent   = -1;
+  topNodes[0].Size     = PEANOCELLS;
+  topNodes[0].StartKey = 0;
+  topNodes[0].PIndex   = 0;
+  topNodes[0].Count    = count;
+  topNodes[0].Cost     = gravcost;
+  topNodes[0].SphCost  = sphcost;
+
+  int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * All.MultipleDomains * NTask);
+
+#ifdef ADDBACKGROUNDGRID
+  limitNTopNodes = imax(limitNTopNodes, 2 * All.GridSize * All.GridSize * All.GridSize);
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  while(limitNTopNodes > MaxTopNodes)
+    {
+      mpi_printf("DOMAIN: Increasing TopNodeAllocFactor=%g  ", All.TopNodeAllocFactor);
+      All.TopNodeAllocFactor *= 1.3;
+      mpi_printf("new value=%g\n", All.TopNodeAllocFactor);
+      if(All.TopNodeAllocFactor > 1000)
+        terminate("something seems to be going seriously wrong here. Stopping.\n");
+
+      MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1);
+
+      topNodes        = (struct local_topnode_data *)myrealloc_movable(topNodes, (MaxTopNodes * sizeof(struct local_topnode_data)));
+      TopNodes        = (struct topnode_data *)myrealloc_movable(TopNodes, (MaxTopNodes * sizeof(struct topnode_data)));
+      DomainTask      = (int *)myrealloc_movable(DomainTask, (MaxTopNodes * sizeof(int)));
+      DomainLeaveNode = (struct domain_cost_data *)myrealloc_movable(DomainLeaveNode, (MaxTopNodes * sizeof(struct domain_cost_data)));
+    }
+
+  RB_INIT(&queue_load);
+  nload     = mymalloc("nload", limitNTopNodes * sizeof(struct mydata));
+  int *list = mymalloc("list", limitNTopNodes * sizeof(int));
+
+#ifdef ADDBACKGROUNDGRID
+  peanokey MaxTopleaveSize = (PEANOCELLS / (All.GridSize * All.GridSize * All.GridSize));
+#else  /* #ifdef ADDBACKGROUNDGRID */
+  double limit = 1.0 / (All.TopNodeFactor * All.MultipleDomains * NTask);
+#endif /* #ifdef ADDBACKGROUNDGRID #else */
+
+  /* insert the root node */
+  nload[0].workload      = 1.0;
+  nload[0].topnode_index = 0;
+  RB_INSERT(mytree, &queue_load, &nload[0]);
+
+  int iter = 0;
+
+  do
+    {
+      count = 0;
+
+      double first_workload = 0;
+
+      for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst))
+        {
+          if(topNodes[nfirst->topnode_index].Size >= 8)
+            {
+              first_workload = nfirst->workload;
+              break;
+            }
+        }
+
+      for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np))
+        {
+#ifndef ADDBACKGROUNDGRID
+          if(np->workload < 0.125 * first_workload)
+            break;
+
+          if(NTopnodes + 8 * (count + 1) >= limitNTopNodes)
+            break;
+#endif /* #ifndef ADDBACKGROUNDGRID */
+
+#ifdef ADDBACKGROUNDGRID
+          if(topNodes[np->topnode_index].Size > MaxTopleaveSize)
+#else  /* #ifdef ADDBACKGROUNDGRID */
+          if(np->workload > limit || (NTopleaves < All.MultipleDomains * NTask && count == 0))
+#endif /* #ifdef ADDBACKGROUNDGRID #else */
+            {
+              if(topNodes[np->topnode_index].Size < 8)
+                {
+                  if(message_printed == 0)
+                    {
+                      mpi_printf("DOMAIN: Note: we would like to refine top-tree, but PEANOGRID is not fine enough\n");
+#ifndef OVERRIDE_PEANOGRID_WARNING
+                      terminate(
+                          "Consider setting BITS_PER_DIMENSION up to a value of 42 to get a fine enough PEANOGRID, or force a "
+                          "continuation by activating OVERRIDE_PEANOGRID_WARNING");
+#endif /* #ifndef OVERRIDE_PEANOGRID_WARNING */
+                      message_printed = 1;
+                    }
+                }
+              else
+                {
+                  list[count] = np->topnode_index;
+                  count++;
+                }
+            }
+        }
+
+      if(count > 0)
+        {
+          domain_do_local_refine(count, list);
+          iter++;
+        }
+    }
+  while(count > 0);
+
+  myfree(list);
+  myfree(nload);
+  myfree(list_sphcost);
+  myfree(list_cost);
+  myfree(mp);
+
+  /* count the number of top leaves */
+  NTopleaves = 0;
+  domain_walktoptree(0);
+
+  double t1 = second();
+  mpi_printf("DOMAIN: NTopleaves=%d, determination of top-level tree involved %d iterations and took %g sec\n", NTopleaves, iter,
+             timediff(t0, t1));
+
+  t0 = second();
+
+  domain_sumCost();
+
+  t1 = second();
+  mpi_printf("DOMAIN: cost summation for top-level tree took %g sec\n", timediff(t0, t1));
+
+  return 0;
+}
+
+/*! \brief Refine top-level tree locally.
+ *
+ *  Requires arrays list_cost and list_sphcost, mp.
+ *
+ *  \param[in] n Number of nodes that should be refined.
+ *  \param[in] list List of node indices that should be refined.
+ *
+ *  \return void
+ */
+void domain_do_local_refine(int n, int *list)
+{
+  double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double));
+  double *worklist    = mymalloc("worklist", 8 * n * sizeof(double));
+
+  double non_zero = 0, non_zero_tot;
+
+  /* create the new nodes */
+  for(int k = 0; k < n; k++)
+    {
+      int i                = list[k];
+      topNodes[i].Daughter = NTopnodes;
+      NTopnodes += 8;
+      NTopleaves += 7;
+
+      for(int j = 0; j < 8; j++)
+        {
+          int sub = topNodes[i].Daughter + j;
+
+          topNodes[sub].Daughter = -1;
+          topNodes[sub].Parent   = i;
+          topNodes[sub].Size     = (topNodes[i].Size >> 3);
+          topNodes[sub].StartKey = topNodes[i].StartKey + j * topNodes[sub].Size;
+          topNodes[sub].PIndex   = topNodes[i].PIndex;
+          topNodes[sub].Count    = 0;
+          topNodes[sub].Cost     = 0;
+          topNodes[sub].SphCost  = 0;
+        }
+
+      int sub = topNodes[i].Daughter;
+
+      for(int p = topNodes[i].PIndex, j = 0; p < topNodes[i].PIndex + topNodes[i].Count; p++)
+        {
+          if(j < 7)
+            while(mp[p].key >= topNodes[sub + 1].StartKey)
+              {
+                j++;
+                sub++;
+                topNodes[sub].PIndex = p;
+                if(j >= 7)
+                  break;
+              }
+
+          topNodes[sub].Cost += list_cost[mp[p].index];
+          topNodes[sub].SphCost += list_sphcost[mp[p].index];
+          topNodes[sub].Count++;
+        }
+
+      for(int j = 0; j < 8; j++)
+        {
+          int sub             = topNodes[i].Daughter + j;
+          worklist[k * 8 + j] = fac_work * topNodes[sub].Cost + fac_worksph * topNodes[sub].SphCost + fac_load * topNodes[sub].Count;
+
+          if(worklist[k * 8 + j] != 0)
+            non_zero++;
+        }
+    }
+
+  MPI_Allreduce(&non_zero, &non_zero_tot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  if(non_zero_tot > 0.05 * (NTask * 8 * n))
+    MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  else
+    allreduce_sparse_double_sum(worklist, worktotlist, 8 * n);
+
+  for(int k = 0; k < n; k++)
+    {
+      int i = list[k];
+      RB_REMOVE(mytree, &queue_load, &nload[i]);
+    }
+
+  for(int k = 0, l = 0; k < n; k++)
+    {
+      int i = list[k];
+
+      for(int j = 0; j < 8; j++, l++)
+        {
+          int sub = topNodes[i].Daughter + j;
+
+          /* insert the  node */
+          nload[sub].workload      = worktotlist[l];
+          nload[sub].topnode_index = sub;
+          RB_INSERT(mytree, &queue_load, &nload[sub]);
+        }
+    }
+
+  myfree(worklist);
+  myfree(worktotlist);
+}
+
+/*! \brief Walks top level tree recursively.
+ *
+ *  This function walks the global top tree in order to establish the
+ *  number of leaves it has, and for assigning the leaf numbers along the
+ *  Peano-Hilbert Curve. These leaves are later combined to domain pieces,
+ *  which are distributed to different processors.
+ *
+ *  \param[in] no Present node.
+ *
+ *  \return void
+ */
+void domain_walktoptree(int no)
+{
+  if(topNodes[no].Daughter == -1)
+    {
+      topNodes[no].Leaf = NTopleaves;
+      NTopleaves++;
+    }
+  else
+    {
+      for(int i = 0; i < 8; i++)
+        domain_walktoptree(topNodes[no].Daughter + i);
+    }
+}
diff --git a/src/amuse/community/arepo/src/domain/domain_vars.c b/src/amuse/community/arepo/src/domain/domain_vars.c
new file mode 100644
index 0000000000..d108a7dc8f
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/domain_vars.c
@@ -0,0 +1,117 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/domain_vars.c
+ * \date        05/2018
+ * \brief       Variables and memory allocation functions for domain
+ *              decomposition.
+ * \details     contains functions:
+ *                void domain_allocate_lists(void)
+ *                void domain_free_lists(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+#include "domain.h"
+
+struct domain_peano_hilbert_data *mp;
+
+struct local_topnode_data *topNodes, *branchNodes; /*!< points to the root node of the top-level tree */
+
+double totgravcost, totpartcount, gravcost, totsphcost, sphcost;
+
+struct domain_cost_data *DomainLeaveNode;
+
+double fac_work, fac_load, fac_worksph;
+double normsum_work, normsum_load, normsum_worksph;
+
+int Nbranch;
+
+/*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner'
+ */
+int *toGo, *toGoSph;
+int *toGet, *toGetSph;
+int *list_NumPart;
+int *list_NumGas;
+int *list_load;
+int *list_loadsph;
+double *list_work;
+double *list_worksph;
+
+/*! \brief Allocates lists needed for communication in domain decomposition.
+ *
+ *  These lists are holding information about other tasks (number of particles,
+ *  load, work, etc.).
+ *
+ *  \return void
+ */
+void domain_allocate_lists(void)
+{
+  Key             = (peanokey *)mymalloc_movable(&Key, "domain_key", (sizeof(peanokey) * All.MaxPart));
+  toGo            = (int *)mymalloc_movable(&toGo, "toGo", (sizeof(int) * NTask));
+  toGoSph         = (int *)mymalloc_movable(&toGoSph, "toGoSph", (sizeof(int) * NTask));
+  toGet           = (int *)mymalloc_movable(&toGet, "toGet", (sizeof(int) * NTask));
+  toGetSph        = (int *)mymalloc_movable(&toGetSph, "toGetSph", (sizeof(int) * NTask));
+  list_NumPart    = (int *)mymalloc_movable(&list_NumPart, "list_NumPart", (sizeof(int) * NTask));
+  list_NumGas     = (int *)mymalloc_movable(&list_NumGas, "list_NumGas", (sizeof(int) * NTask));
+  list_load       = (int *)mymalloc_movable(&list_load, "list_load", (sizeof(int) * NTask));
+  list_loadsph    = (int *)mymalloc_movable(&list_loadsph, "list_loadsph", (sizeof(int) * NTask));
+  list_work       = (double *)mymalloc_movable(&list_work, "list_work", (sizeof(double) * NTask));
+  list_worksph    = (double *)mymalloc_movable(&list_worksph, "list_worksph", (sizeof(double) * NTask));
+  DomainLeaveNode = (struct domain_cost_data *)mymalloc_movable(&DomainLeaveNode, "DomainLeaveNode",
+                                                                (MaxTopNodes * sizeof(struct domain_cost_data)));
+}
+
+/*! \brief Frees lists needed for communication in domain decomposition.
+ *
+ *  This routine is the counterpart of domain_allocate_lists(void).
+ *  Frees memory of all arrays allocated there, except Key, which is freed
+ *  elsewhere (in void domain_Decomposition(void); see domain.c).
+ *
+ *  \return void
+ */
+void domain_free_lists(void)
+{
+  myfree(DomainLeaveNode);
+  myfree(list_worksph);
+  myfree(list_work);
+  myfree(list_loadsph);
+  myfree(list_load);
+  myfree(list_NumGas);
+  myfree(list_NumPart);
+  myfree(toGetSph);
+  myfree(toGet);
+  myfree(toGoSph);
+  myfree(toGo);
+}
diff --git a/src/amuse/community/arepo/src/domain/peano.c b/src/amuse/community/arepo/src/domain/peano.c
new file mode 100644
index 0000000000..adb30ca43d
--- /dev/null
+++ b/src/amuse/community/arepo/src/domain/peano.c
@@ -0,0 +1,569 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/peano.c
+ * \date        05/2018
+ * \brief       Order particles along Peano-Hilbert curve.
+ * \details     contains functions:
+ *                void peano_hilbert_order(void)
+ *                void peano_hilbert_order_DP(void)
+ *                int peano_compare_key(const void *a, const void *b)
+ *                void reorder_DP(void)
+ *                void reorder_gas(int *Id)
+ *                void reorder_particles(int *Id)
+ *                peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z,
+ *                  int bits)
+ *                void peano_hilbert_key_inverse(peanokey key, int bits,
+ *                  peano1D * x, peano1D * y, peano1D * z)
+ *                static void msort_peano_with_tmp(struct peano_hilbert_data
+ *                  *b, size_t n, struct peano_hilbert_data *t)
+ *                void mysort_peano(void *b, size_t n, size_t s, int (*cmp)
+ *                  (const void *, const void *))
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+#include <gsl/gsl_heapsort.h>
+
+/*! Data structure for Peano Hilbert data.
+ */
+static struct peano_hilbert_data
+{
+  peanokey key;
+  int index;
+} * pmp;
+
+static int *Id;
+
+/*! \brief Sorts particles along Peano-Hilbert curve
+ *
+ *  \return void
+ */
+void peano_hilbert_order(void)
+{
+  int i;
+
+  double t0 = second();
+
+  // mpi_printf("DOMAIN: begin Peano-Hilbert order...\n");
+
+  if(NumGas)
+    {
+      pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * NumGas);
+      Id  = (int *)mymalloc("Id", sizeof(int) * NumGas);
+
+      for(i = 0; i < NumGas; i++)
+        {
+          pmp[i].index = i;
+          pmp[i].key   = Key[i];
+        }
+
+      mysort_peano(pmp, NumGas, sizeof(struct peano_hilbert_data), peano_compare_key);
+
+      for(i = 0; i < NumGas; i++)
+        Id[pmp[i].index] = i;
+
+      reorder_gas(Id);
+
+      myfree(Id);
+      myfree(pmp);
+    }
+
+  if(NumPart - NumGas > 0)
+    {
+      pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * (NumPart - NumGas));
+      pmp -= (NumGas);
+
+      Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas));
+      Id -= (NumGas);
+
+      for(i = NumGas; i < NumPart; i++)
+        {
+          pmp[i].index = i;
+          pmp[i].key   = Key[i];
+        }
+
+      mysort_peano(pmp + NumGas, NumPart - NumGas, sizeof(struct peano_hilbert_data), peano_compare_key);
+
+      for(i = NumGas; i < NumPart; i++)
+        Id[pmp[i].index] = i;
+
+      reorder_particles(Id);
+
+      Id += NumGas;
+      myfree(Id);
+      pmp += NumGas;
+      myfree(pmp);
+    }
+
+  double t1 = second();
+  mpi_printf("DOMAIN: Peano-Hilbert order done, took %g sec.\n", timediff(t0, t1));
+}
+
+/*! \brief Sorts Delaunay Points (DP array) along Peano-Hilbert curve.
+ *
+ *  \return void
+ */
+void peano_hilbert_order_DP(void)
+{
+#ifdef ONEDIMS
+  return;
+#endif /* #ifdef ONEDIMS */
+
+  int i;
+
+  if(Mesh.Ndp)
+    {
+      pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * Mesh.Ndp);
+      Id  = (int *)mymalloc("Id", sizeof(int) * Mesh.Ndp);
+
+      point *DP = Mesh.DP;
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        {
+          pmp[i].index = i;
+          pmp[i].key   = peano_hilbert_key((int)((DP[i].x + DomainLen) * DomainFac / 3), (int)((DP[i].y + DomainLen) * DomainFac / 3),
+                                         (int)((DP[i].z + DomainLen) * DomainFac / 3), BITS_PER_DIMENSION);
+        }
+
+      mysort_peano(pmp, Mesh.Ndp, sizeof(struct peano_hilbert_data), peano_compare_key);
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        Id[pmp[i].index] = i;
+
+      reorder_DP();
+
+      myfree(Id);
+      myfree(pmp);
+    }
+
+  mpi_printf("VORONOI: Peano-Hilbert of DP points done.\n");
+}
+
+/*! \brief Compares two peano_hilbert_data objects with each other.
+ *
+ *  Sorting kernel for sorting along Peano-Hilbert curve.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a->key < b->key
+ */
+int peano_compare_key(const void *a, const void *b)
+{
+  if(((struct peano_hilbert_data *)a)->key < (((struct peano_hilbert_data *)b)->key))
+    return -1;
+
+  if(((struct peano_hilbert_data *)a)->key > (((struct peano_hilbert_data *)b)->key))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Rearranges Delaunay points in DP array according to new ordering.
+ *
+ *  Requires access to an ordering array Id which is as long as the number of
+ *  Delaunay points and contains the new index of each Delaunay point.
+ *
+ *  \return void
+ */
+void reorder_DP(void)
+{
+  int i;
+  point DPsave, DPsource;
+  int idsource, idsave, dest;
+  point *DP = Mesh.DP;
+
+  for(i = 0; i < Mesh.Ndp; i++)
+    {
+      if(Id[i] != i)
+        {
+          DPsource = DP[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              DPsave = DP[dest];
+              idsave = Id[dest];
+
+              DP[dest] = DPsource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              DPsource = DPsave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+/*! \brief Rearranges gas cells in P and SphP arrays according to new ordering.
+ *
+ *  \param[in] Id Array which is as long as the number of gas cells and
+ *             which contains the new index of each cell.
+ *
+ *  \return void
+ */
+void reorder_gas(int *Id)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  struct sph_particle_data SphPsave, SphPsource;
+  int idsource, idsave, dest;
+
+  for(i = 0; i < NumGas; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource    = P[i];
+          SphPsource = SphP[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              Psave    = P[dest];
+              SphPsave = SphP[dest];
+              idsave   = Id[dest];
+
+              P[dest]    = Psource;
+              SphP[dest] = SphPsource;
+              Id[dest]   = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource    = Psave;
+              SphPsource = SphPsave;
+              idsource   = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+/*! \brief Rearranges particles in P array according to new ordering.
+ *
+ *  \param[in] Id Array which is as long as the number of particles and
+ *             which contains the new index of each particle.
+ *
+ *  \return void
+ */
+void reorder_particles(int *Id)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  int idsource, idsave, dest;
+
+  for(i = NumGas; i < NumPart; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource  = P[i];
+          idsource = Id[i];
+
+          dest = Id[i];
+
+          do
+            {
+              Psave  = P[dest];
+              idsave = Id[dest];
+
+              P[dest]  = Psource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource  = Psave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+/*  The following rewrite of the original function
+ *  peano_hilbert_key_old() has been written by MARTIN REINECKE.
+ *  It is about a factor 2.3 - 2.5 faster than Volker's old routine!
+ */
+const unsigned char rottable3[48][8] = {
+    {36, 28, 25, 27, 10, 10, 25, 27}, {29, 11, 24, 24, 37, 11, 26, 26}, {8, 8, 25, 27, 30, 38, 25, 27},
+    {9, 39, 24, 24, 9, 31, 26, 26},   {40, 24, 44, 32, 40, 6, 44, 6},   {25, 7, 33, 7, 41, 41, 45, 45},
+    {4, 42, 4, 46, 26, 42, 34, 46},   {43, 43, 47, 47, 5, 27, 5, 35},   {33, 35, 36, 28, 33, 35, 2, 2},
+    {32, 32, 29, 3, 34, 34, 37, 3},   {33, 35, 0, 0, 33, 35, 30, 38},   {32, 32, 1, 39, 34, 34, 1, 31},
+    {24, 42, 32, 46, 14, 42, 14, 46}, {43, 43, 47, 47, 25, 15, 33, 15}, {40, 12, 44, 12, 40, 26, 44, 34},
+    {13, 27, 13, 35, 41, 41, 45, 45}, {28, 41, 28, 22, 38, 43, 38, 22}, {42, 40, 23, 23, 29, 39, 29, 39},
+    {41, 36, 20, 36, 43, 30, 20, 30}, {37, 31, 37, 31, 42, 40, 21, 21}, {28, 18, 28, 45, 38, 18, 38, 47},
+    {19, 19, 46, 44, 29, 39, 29, 39}, {16, 36, 45, 36, 16, 30, 47, 30}, {37, 31, 37, 31, 17, 17, 46, 44},
+    {12, 4, 1, 3, 34, 34, 1, 3},      {5, 35, 0, 0, 13, 35, 2, 2},      {32, 32, 1, 3, 6, 14, 1, 3},
+    {33, 15, 0, 0, 33, 7, 2, 2},      {16, 0, 20, 8, 16, 30, 20, 30},   {1, 31, 9, 31, 17, 17, 21, 21},
+    {28, 18, 28, 22, 2, 18, 10, 22},  {19, 19, 23, 23, 29, 3, 29, 11},  {9, 11, 12, 4, 9, 11, 26, 26},
+    {8, 8, 5, 27, 10, 10, 13, 27},    {9, 11, 24, 24, 9, 11, 6, 14},    {8, 8, 25, 15, 10, 10, 25, 7},
+    {0, 18, 8, 22, 38, 18, 38, 22},   {19, 19, 23, 23, 1, 39, 9, 39},   {16, 36, 20, 36, 16, 2, 20, 10},
+    {37, 3, 37, 11, 17, 17, 21, 21},  {4, 17, 4, 46, 14, 19, 14, 46},   {18, 16, 47, 47, 5, 15, 5, 15},
+    {17, 12, 44, 12, 19, 6, 44, 6},   {13, 7, 13, 7, 18, 16, 45, 45},   {4, 42, 4, 21, 14, 42, 14, 23},
+    {43, 43, 22, 20, 5, 15, 5, 15},   {40, 12, 21, 12, 40, 6, 23, 6},   {13, 7, 13, 7, 41, 41, 22, 20}};
+
+const unsigned char subpix3[48][8] = {
+    {0, 7, 1, 6, 3, 4, 2, 5}, {7, 4, 6, 5, 0, 3, 1, 2}, {4, 3, 5, 2, 7, 0, 6, 1}, {3, 0, 2, 1, 4, 7, 5, 6}, {1, 0, 6, 7, 2, 3, 5, 4},
+    {0, 3, 7, 4, 1, 2, 6, 5}, {3, 2, 4, 5, 0, 1, 7, 6}, {2, 1, 5, 6, 3, 0, 4, 7}, {6, 1, 7, 0, 5, 2, 4, 3}, {1, 2, 0, 3, 6, 5, 7, 4},
+    {2, 5, 3, 4, 1, 6, 0, 7}, {5, 6, 4, 7, 2, 1, 3, 0}, {7, 6, 0, 1, 4, 5, 3, 2}, {6, 5, 1, 2, 7, 4, 0, 3}, {5, 4, 2, 3, 6, 7, 1, 0},
+    {4, 7, 3, 0, 5, 6, 2, 1}, {6, 7, 5, 4, 1, 0, 2, 3}, {7, 0, 4, 3, 6, 1, 5, 2}, {0, 1, 3, 2, 7, 6, 4, 5}, {1, 6, 2, 5, 0, 7, 3, 4},
+    {2, 3, 1, 0, 5, 4, 6, 7}, {3, 4, 0, 7, 2, 5, 1, 6}, {4, 5, 7, 6, 3, 2, 0, 1}, {5, 2, 6, 1, 4, 3, 7, 0}, {7, 0, 6, 1, 4, 3, 5, 2},
+    {0, 3, 1, 2, 7, 4, 6, 5}, {3, 4, 2, 5, 0, 7, 1, 6}, {4, 7, 5, 6, 3, 0, 2, 1}, {6, 7, 1, 0, 5, 4, 2, 3}, {7, 4, 0, 3, 6, 5, 1, 2},
+    {4, 5, 3, 2, 7, 6, 0, 1}, {5, 6, 2, 1, 4, 7, 3, 0}, {1, 6, 0, 7, 2, 5, 3, 4}, {6, 5, 7, 4, 1, 2, 0, 3}, {5, 2, 4, 3, 6, 1, 7, 0},
+    {2, 1, 3, 0, 5, 6, 4, 7}, {0, 1, 7, 6, 3, 2, 4, 5}, {1, 2, 6, 5, 0, 3, 7, 4}, {2, 3, 5, 4, 1, 0, 6, 7}, {3, 0, 4, 7, 2, 1, 5, 6},
+    {1, 0, 2, 3, 6, 7, 5, 4}, {0, 7, 3, 4, 1, 6, 2, 5}, {7, 6, 4, 5, 0, 1, 3, 2}, {6, 1, 5, 2, 7, 0, 4, 3}, {5, 4, 6, 7, 2, 3, 1, 0},
+    {4, 3, 7, 0, 5, 2, 6, 1}, {3, 2, 0, 1, 4, 5, 7, 6}, {2, 5, 1, 6, 3, 4, 0, 7}};
+
+/*! \brief This function computes a Peano-Hilbert key for an integer triplet
+ *         (x,y,z), with x,y,z in the range between 0 and 2^bits-1.
+ *
+ *  \param[in] x X position.
+ *  \param[in] y Y position.
+ *  \param[in] z Z position.
+ *  \param[in] bits Number of bits used for Peano key.
+ *
+ *  \return Peano-Hilbert key corresponding to position x,y,z.
+ */
+peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits)
+{
+  peano1D mask;
+  unsigned char rotation = 0;
+  peanokey key           = 0;
+
+  for(mask = ((peano1D)1) << (bits - 1); mask > 0; mask >>= 1)
+    {
+      unsigned char pix = ((x & mask) ? 4 : 0) | ((y & mask) ? 2 : 0) | ((z & mask) ? 1 : 0);
+
+      key <<= 3;
+      key |= subpix3[rotation][pix];
+      rotation = rottable3[rotation][pix];
+    }
+
+  return key;
+}
+
+static int quadrants[24][2][2][2] = {
+    /* rotx=0, roty=0-3 */
+    {{{0, 7}, {1, 6}}, {{3, 4}, {2, 5}}},
+    {{{7, 4}, {6, 5}}, {{0, 3}, {1, 2}}},
+    {{{4, 3}, {5, 2}}, {{7, 0}, {6, 1}}},
+    {{{3, 0}, {2, 1}}, {{4, 7}, {5, 6}}},
+    /* rotx=1, roty=0-3 */
+    {{{1, 0}, {6, 7}}, {{2, 3}, {5, 4}}},
+    {{{0, 3}, {7, 4}}, {{1, 2}, {6, 5}}},
+    {{{3, 2}, {4, 5}}, {{0, 1}, {7, 6}}},
+    {{{2, 1}, {5, 6}}, {{3, 0}, {4, 7}}},
+    /* rotx=2, roty=0-3 */
+    {{{6, 1}, {7, 0}}, {{5, 2}, {4, 3}}},
+    {{{1, 2}, {0, 3}}, {{6, 5}, {7, 4}}},
+    {{{2, 5}, {3, 4}}, {{1, 6}, {0, 7}}},
+    {{{5, 6}, {4, 7}}, {{2, 1}, {3, 0}}},
+    /* rotx=3, roty=0-3 */
+    {{{7, 6}, {0, 1}}, {{4, 5}, {3, 2}}},
+    {{{6, 5}, {1, 2}}, {{7, 4}, {0, 3}}},
+    {{{5, 4}, {2, 3}}, {{6, 7}, {1, 0}}},
+    {{{4, 7}, {3, 0}}, {{5, 6}, {2, 1}}},
+    /* rotx=4, roty=0-3 */
+    {{{6, 7}, {5, 4}}, {{1, 0}, {2, 3}}},
+    {{{7, 0}, {4, 3}}, {{6, 1}, {5, 2}}},
+    {{{0, 1}, {3, 2}}, {{7, 6}, {4, 5}}},
+    {{{1, 6}, {2, 5}}, {{0, 7}, {3, 4}}},
+    /* rotx=5, roty=0-3 */
+    {{{2, 3}, {1, 0}}, {{5, 4}, {6, 7}}},
+    {{{3, 4}, {0, 7}}, {{2, 5}, {1, 6}}},
+    {{{4, 5}, {7, 6}}, {{3, 2}, {0, 1}}},
+    {{{5, 2}, {6, 1}}, {{4, 3}, {7, 0}}}};
+
+static int rotxmap_table[24] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 17, 18, 19, 16, 23, 20, 21, 22};
+
+static int rotymap_table[24] = {1, 2, 3, 0, 16, 17, 18, 19, 11, 8, 9, 10, 22, 23, 20, 21, 14, 15, 12, 13, 4, 5, 6, 7};
+
+static int rotx_table[8] = {3, 0, 0, 2, 2, 0, 0, 1};
+static int roty_table[8] = {0, 1, 1, 2, 2, 3, 3, 0};
+
+static int sense_table[8] = {-1, -1, -1, +1, +1, -1, -1, -1};
+
+static int flag_quadrants_inverse = 1;
+static char quadrants_inverse_x[24][8];
+static char quadrants_inverse_y[24][8];
+static char quadrants_inverse_z[24][8];
+
+/*! \brief Computes position from Peano-Hilbert key.
+ *
+ *  \param[in] key Peano-Hilbert key.
+ *  \param[in] bits Bits used for Peano-Hilbert key.
+ *  \param[out] x X position.
+ *  \param[out] y Y position.
+ *  \param[out] z Z position.
+ */
+void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z)
+{
+  if(flag_quadrants_inverse)
+    {
+      flag_quadrants_inverse = 0;
+      for(int rotation = 0; rotation < 24; rotation++)
+        for(int bitx = 0; bitx < 2; bitx++)
+          for(int bity = 0; bity < 2; bity++)
+            for(int bitz = 0; bitz < 2; bitz++)
+              {
+                int quad                            = quadrants[rotation][bitx][bity][bitz];
+                quadrants_inverse_x[rotation][quad] = bitx;
+                quadrants_inverse_y[rotation][quad] = bity;
+                quadrants_inverse_z[rotation][quad] = bitz;
+              }
+    }
+
+  int shift     = 3 * (bits - 1);
+  peanokey mask = ((peanokey)7) << shift;
+  int rotation  = 0;
+  char sense    = 1;
+
+  *x = *y = *z = 0;
+
+  for(int i = 0; i < bits; i++, mask >>= 3, shift -= 3)
+    {
+      peanokey keypart = (key & mask) >> shift;
+
+      int quad = (sense == 1) ? (keypart) : (7 - keypart);
+
+      *x = (*x << 1) + quadrants_inverse_x[rotation][quad];
+      *y = (*y << 1) + quadrants_inverse_y[rotation][quad];
+      *z = (*z << 1) + quadrants_inverse_z[rotation][quad];
+
+      char rotx = rotx_table[quad];
+      char roty = roty_table[quad];
+      sense *= sense_table[quad];
+
+      while(rotx > 0)
+        {
+          rotation = rotxmap_table[rotation];
+          rotx--;
+        }
+
+      while(roty > 0)
+        {
+          rotation = rotymap_table[rotation];
+          roty--;
+        }
+    }
+}
+
+/*! \brief Sorting algorithm for sorting along Peano-Hilbert curve.
+ *
+ *  Merge sort algorithm.
+ *
+ *  \param[in, out] b Array to be sorted.
+ *  \param[in] n size of array.
+ *  \param[in] t Array for temporary data needed by msort.
+ *
+ *  \return void
+ */
+static void msort_peano_with_tmp(struct peano_hilbert_data *b, size_t n, struct peano_hilbert_data *t)
+{
+  struct peano_hilbert_data *tmp;
+  struct peano_hilbert_data *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = b;
+  b2 = b + n1;
+
+  msort_peano_with_tmp(b1, n1, t);
+  msort_peano_with_tmp(b2, n2, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(b1->key <= b2->key)
+        {
+          --n1;
+          *tmp++ = *b1++;
+        }
+      else
+        {
+          --n2;
+          *tmp++ = *b2++;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * sizeof(struct peano_hilbert_data));
+  memcpy(b, t, (n - n2) * sizeof(struct peano_hilbert_data));
+}
+
+/*! \brief Wrapper for sorting algorithm for sorting along Peano-Hilbert curve.
+ *
+ *  Allocates temporary array and then calls msort_peano_with_tmp.
+ *  This function could be replaced by a call of qsort(b, n, s, cmp), but the
+ *  present merge sort implementation is usually a bit faster for this array.
+ *
+ *  \param[in, out] b Array to be sorted.
+ *  \param[in] n Size of array.
+ *  \param[in] s Size of single array elements (needed for memory allocation).
+ *  \param[in] cmp Sorting kernel function (obsolete, but still there in case
+ *             an other sorting algorithm should be used).
+ *
+ *  \return void
+ */
+void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *))
+{
+  const size_t size = n * s;
+
+  struct peano_hilbert_data *tmp = (struct peano_hilbert_data *)mymalloc("tmp", size);
+
+  msort_peano_with_tmp((struct peano_hilbert_data *)b, n, tmp);
+
+  myfree(tmp);
+}
diff --git a/src/amuse/community/arepo/src/fof/fof.c b/src/amuse/community/arepo/src/fof/fof.c
new file mode 100644
index 0000000000..210f07f79c
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof.c
@@ -0,0 +1,967 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof.c
+ * \date        05/2018
+ * \brief       Parallel friend of friends (FoF) group finder.
+ * \details     contains functions:
+ *                void fof_fof(int num)
+ *                void fof_prepare_output_order(void)
+ *                double fof_get_comoving_linking_length(void)
+ *                void fof_compile_catalogue(void)
+ *                void fof_assign_group_numbers(void)
+ *                void fof_compute_group_properties(int gr, int start, int len)
+ *                void fof_exchange_group_data(void)
+ *                void fof_finish_group_properties(void)
+ *                double fof_periodic(double x)
+ *                double fof_periodic_wrap(double x)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+static MyIDType *MinID;
+static int *Head, *Len, *Next, *Tail, *MinIDTask;
+
+/*! \brief Main routine to execute the friend of friends group finder.
+ *
+ *  If called with num == -1 as argument, only FOF is carried out and no group
+ *  catalogs are saved to disk. If num >= 0, the code will store the
+ *  group/subgroup catalogs, and bring the particles into output order.
+ *  In this case, the calling routine (which is normally savepositions()) will
+ *  need to free PS[] and bring the particles back into the original order,
+ *  as well as reestablished the mesh.
+ *
+ *  \param[in] num Index of output; if negative, no output written.
+ *
+ *  \return void
+ */
+void fof_fof(int num)
+{
+  int i, start, lenloc, largestgroup;
+  double t0, t1, cputime;
+
+  TIMER_START(CPU_FOF);
+
+  mpi_printf("FOF: Begin to compute FoF group catalogue...  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  if(num >= 0 && RestartFlag != 3 && RestartFlag != 6)
+    {
+      /* let's discard an existing mesh - we do this here to reduce the peak memory usage, even at the price of
+       * having to recreate it later */
+      free_mesh();
+    }
+
+  if(RestartFlag != 6)
+    {
+      ngb_treefree();
+
+      domain_free();
+    }
+
+  domain_Decomposition();
+
+  ngb_treeallocate();
+  ngb_treebuild(NumGas);
+
+  /* check */
+  for(i = 0; i < NumPart; i++)
+    if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0))
+      terminate("this should not happen");
+
+  /* this structure will hold auxiliary information for each particle, needed only during group finding */
+  PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data));
+
+  memset(PS, 0, NumPart * sizeof(struct subfind_data));
+
+  /* First, we save the original location of the particles, in order to be able to revert to this layout later on */
+  for(i = 0; i < NumPart; i++)
+    {
+      PS[i].OriginTask  = ThisTask;
+      PS[i].OriginIndex = i;
+    }
+
+  fof_OldMaxPart    = All.MaxPart;
+  fof_OldMaxPartSph = All.MaxPartSph;
+
+  LinkL = fof_get_comoving_linking_length();
+
+  mpi_printf("FOF: Comoving linking length: %g    (presently allocated=%g MB)\n", LinkL, AllocatedBytes / (1024.0 * 1024.0));
+
+  MinID     = (MyIDType *)mymalloc("MinID", NumPart * sizeof(MyIDType));
+  MinIDTask = (int *)mymalloc("MinIDTask", NumPart * sizeof(int));
+
+  Head = (int *)mymalloc("Head", NumPart * sizeof(int));
+  Len  = (int *)mymalloc("Len", NumPart * sizeof(int));
+  Next = (int *)mymalloc("Next", NumPart * sizeof(int));
+  Tail = (int *)mymalloc("Tail", NumPart * sizeof(int));
+
+#ifdef HIERARCHICAL_GRAVITY
+  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestOccupiedTimeBin);
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+
+  construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build tree for all particles */
+
+#if defined(SUBFIND)
+  subfind_density_hsml_guess();
+#endif /* #if defined(SUBFIND) */
+
+  /* initialize link-lists */
+  for(i = 0; i < NumPart; i++)
+    {
+      Head[i] = Tail[i] = i;
+      Len[i]            = 1;
+      Next[i]           = -1;
+      MinID[i]          = P[i].ID;
+      MinIDTask[i]      = ThisTask;
+    }
+
+  /* call routine to find primary groups */
+  cputime = fof_find_groups(MinID, Head, Len, Next, Tail, MinIDTask);
+  mpi_printf("FOF: group finding took = %g sec\n", cputime);
+
+#ifdef FOF_SECONDARY_LINK_TARGET_TYPES
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+
+  /* now rebuild the tree with all the types selected as secondary link targets */
+  construct_forcetree(0, 0, 2, All.HighestOccupiedTimeBin);
+#endif /* #ifdef FOF_SECONDARY_LINK_TARGET_TYPES */
+
+#ifdef HIERARCHICAL_GRAVITY
+  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+
+  /* call routine to attach secondary particles/cells to primary groups */
+  cputime = fof_find_nearest_dmparticle(MinID, Head, Len, Next, Tail, MinIDTask);
+
+  mpi_printf("FOF: attaching gas and star particles to nearest dm particles took = %g sec\n", cputime);
+
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+  force_treefree();
+
+  myfree(Tail);
+  myfree(Next);
+  myfree(Len);
+
+  t0 = second();
+
+  FOF_PList = (struct fof_particle_list *)mymalloc_movable(&FOF_PList, "FOF_PList", NumPart * sizeof(struct fof_particle_list));
+
+  for(i = 0; i < NumPart; i++)
+    {
+      FOF_PList[i].MinID     = MinID[Head[i]];
+      FOF_PList[i].MinIDTask = MinIDTask[Head[i]];
+      FOF_PList[i].Pindex    = i;
+    }
+
+  myfree_movable(Head);
+  myfree_movable(MinIDTask);
+  myfree_movable(MinID);
+
+  FOF_GList = (struct fof_group_list *)mymalloc_movable(&FOF_GList, "FOF_GList", sizeof(struct fof_group_list) * NumPart);
+
+  fof_compile_catalogue();
+
+  t1 = second();
+  mpi_printf("FOF: compiling local group data and catalogue took = %g sec\n", timediff(t0, t1));
+
+  MPI_Allreduce(&Ngroups, &TotNgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  sumup_large_ints(1, &Nids, &TotNids);
+
+  if(TotNgroups > 0)
+    {
+      int largestloc = 0;
+
+      for(i = 0; i < NgroupsExt; i++)
+        if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount > largestloc)
+          largestloc = FOF_GList[i].LocCount + FOF_GList[i].ExtCount;
+      MPI_Allreduce(&largestloc, &largestgroup, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+    }
+  else
+    largestgroup = 0;
+
+  mpi_printf("FOF: Total number of FOF groups with at least %d particles: %d\n", FOF_GROUP_MIN_LEN, TotNgroups);
+  mpi_printf("FOF: Largest FOF group has %d particles.\n", largestgroup);
+  mpi_printf("FOF: Total number of particles in FOF groups: %lld\n", TotNids);
+
+  t0 = second();
+
+  MaxNgroups = 2 * imax(NgroupsExt, TotNgroups / NTask + 1);
+
+  Group = (struct group_properties *)mymalloc_movable(&Group, "Group", sizeof(struct group_properties) * MaxNgroups);
+
+  mpi_printf("FOF: group properties are now allocated.. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  for(i = 0, start = 0; i < NgroupsExt; i++)
+    {
+      while(FOF_PList[start].MinID < FOF_GList[i].MinID)
+        {
+          start++;
+          if(start > NumPart)
+            terminate("start > NumPart");
+        }
+
+      if(FOF_PList[start].MinID != FOF_GList[i].MinID)
+        terminate("ID mismatch");
+
+      for(lenloc = 0; start + lenloc < NumPart;)
+        if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID)
+          lenloc++;
+        else
+          break;
+
+      Group[i].MinID     = FOF_GList[i].MinID;
+      Group[i].MinIDTask = FOF_GList[i].MinIDTask;
+
+      fof_compute_group_properties(i, start, lenloc);
+
+      start += lenloc;
+    }
+
+  fof_exchange_group_data();
+
+  fof_finish_group_properties();
+
+  t1 = second();
+  mpi_printf("FOF: computation of group properties took = %g sec\n", timediff(t0, t1));
+
+  fof_assign_group_numbers();
+
+  mpi_printf("FOF: Finished computing FoF groups.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  myfree_movable(FOF_GList);
+  myfree_movable(FOF_PList);
+
+#ifdef SUBFIND
+  if(num >= 0)
+    {
+      TIMER_STOP(CPU_FOF);
+
+      subfind(num);
+
+      TIMER_START(CPU_FOF);
+    }
+#else  /* #ifdef SUBFIND */
+  Nsubgroups    = 0;
+  TotNsubgroups = 0;
+  if(num >= 0)
+    {
+      TIMER_STOP(CPU_FOF);
+      TIMER_START(CPU_SNAPSHOT);
+
+      fof_save_groups(num);
+
+      TIMER_STOP(CPU_SNAPSHOT);
+      TIMER_START(CPU_FOF);
+    }
+#endif /* #ifdef SUBFIND #else */
+
+  myfree_movable(Group);
+
+  mpi_printf("FOF: All FOF related work finished.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+#ifndef FOF_STOREIDS
+  if(num >= 0)
+    {
+      TIMER_STOP(CPU_FOF);
+      TIMER_START(CPU_SNAPSHOT);
+
+      /* now distribute the particles into output order */
+      t0 = second();
+      fof_prepare_output_order();
+      fof_subfind_exchange(
+          MPI_COMM_WORLD); /* distribute particles such that FOF groups will appear in coherent way in snapshot files */
+      t1 = second();
+      mpi_printf("FOF: preparing output order of particles took %g sec\n", timediff(t0, t1));
+
+      TIMER_STOP(CPU_SNAPSHOT);
+      TIMER_START(CPU_FOF);
+    }
+  else
+    myfree(PS);
+#else  /* #ifndef FOF_STOREIDS */
+  myfree(PS);
+#endif /* #ifndef FOF_STOREIDS #else */
+
+  TIMER_STOP(CPU_FOF);
+}
+
+/*! \brief Sorts groups by the desired output order.
+ *
+ *  \return void
+ */
+void fof_prepare_output_order(void)
+{
+  int i, off, ntype[NTYPES];
+
+  struct data_aux_sort *aux_sort = (struct data_aux_sort *)mymalloc("aux_sort", sizeof(struct data_aux_sort) * NumPart);
+
+  for(i = 0; i < NTYPES; i++)
+    ntype[i] = 0;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      aux_sort[i].OriginTask  = ThisTask;
+      aux_sort[i].OriginIndex = i;
+      aux_sort[i].GrNr        = PS[i].GrNr;
+#ifdef SUBFIND
+      aux_sort[i].SubNr            = PS[i].SubNr;
+      aux_sort[i].DM_BindingEnergy = PS[i].BindingEnergy;
+#endif /* #ifdef SUBFIND */
+      aux_sort[i].Type = P[i].Type;
+      aux_sort[i].ID   = P[i].ID;
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+      aux_sort[i].FileOrder = P[i].FileOrder;
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+      ntype[P[i].Type]++;
+    }
+
+  qsort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_Type);
+
+  if(RestartFlag == 18)
+    {
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+      for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++)
+        parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_FileOrder);
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+    }
+  else
+    {
+      for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++)
+        parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_GrNr);
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      aux_sort[i].TargetTask  = ThisTask;
+      aux_sort[i].TargetIndex = i;
+    }
+
+  /* now bring back into starting order */
+  parallel_sort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_OriginTask_OriginIndex);
+
+  for(i = 0; i < NumPart; i++)
+    {
+      PS[i].TargetTask  = aux_sort[i].TargetTask;
+      PS[i].TargetIndex = aux_sort[i].TargetIndex;
+    }
+
+  myfree(aux_sort);
+}
+
+/*! \brief Calculate linking length based on mean particle separation.
+ *
+ *  \return Linking length.
+ */
+double fof_get_comoving_linking_length(void)
+{
+  int i, ndm;
+  long long ndmtot;
+  double mass, masstot, rhodm;
+
+  for(i = 0, ndm = 0, mass = 0; i < NumPart; i++)
+    if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)))
+      {
+        ndm++;
+        mass += P[i].Mass;
+      }
+  sumup_large_ints(1, &ndm, &ndmtot);
+  MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  rhodm = (All.Omega0 - All.OmegaBaryon) * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+
+  return FOF_LINKLENGTH * pow(masstot / ndmtot / rhodm, 1.0 / 3);
+}
+
+/*! \brief Compiles the group catalogue.
+ *
+ *  Combines results from all tasks.
+ *
+ *  \return void
+ */
+void fof_compile_catalogue(void)
+{
+  int i, j, start, nimport, ngrp, recvTask;
+  struct fof_group_list *get_FOF_GList;
+
+  /* sort according to MinID */
+  mysort(FOF_PList, NumPart, sizeof(struct fof_particle_list), fof_compare_FOF_PList_MinID);
+
+  for(i = 0; i < NumPart; i++)
+    {
+      FOF_GList[i].MinID     = FOF_PList[i].MinID;
+      FOF_GList[i].MinIDTask = FOF_PList[i].MinIDTask;
+      if(FOF_GList[i].MinIDTask == ThisTask)
+        {
+          FOF_GList[i].LocCount = 1;
+          FOF_GList[i].ExtCount = 0;
+        }
+      else
+        {
+          FOF_GList[i].LocCount = 0;
+          FOF_GList[i].ExtCount = 1;
+        }
+    }
+
+  /* eliminate duplicates in FOF_GList with respect to MinID */
+
+  if(NumPart)
+    NgroupsExt = 1;
+  else
+    NgroupsExt = 0;
+
+  for(i = 1, start = 0; i < NumPart; i++)
+    {
+      if(FOF_GList[i].MinID == FOF_GList[start].MinID)
+        {
+          FOF_GList[start].LocCount += FOF_GList[i].LocCount;
+          FOF_GList[start].ExtCount += FOF_GList[i].ExtCount;
+        }
+      else
+        {
+          start            = NgroupsExt;
+          FOF_GList[start] = FOF_GList[i];
+          NgroupsExt++;
+        }
+    }
+
+  /* sort the remaining ones according to task */
+  mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask);
+
+  /* count how many we have of each task */
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+  for(i = 0; i < NgroupsExt; i++)
+    Send_count[FOF_GList[i].MinIDTask]++;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      if(j == ThisTask) /* we will not exchange the ones that are local */
+        Recv_count[j] = 0;
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  get_FOF_GList = (struct fof_group_list *)mymalloc("get_FOF_GList", nimport * sizeof(struct fof_group_list));
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the group info */
+              MPI_Sendrecv(&FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask,
+                           TAG_DENS_A, &get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  for(i = 0; i < nimport; i++)
+    get_FOF_GList[i].MinIDTask = i;
+
+  /* sort the groups according to MinID */
+  mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID);
+  mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID);
+
+  /* merge the imported ones with the local ones */
+  for(i = 0, start = 0; i < nimport; i++)
+    {
+      while(FOF_GList[start].MinID < get_FOF_GList[i].MinID)
+        {
+          start++;
+          if(start >= NgroupsExt)
+            terminate("start >= NgroupsExt");
+        }
+
+      if(get_FOF_GList[i].LocCount != 0)
+        terminate("start >= NgroupsExt");
+
+      if(FOF_GList[start].MinIDTask != ThisTask)
+        terminate("FOF_GList[start].MinIDTask != ThisTask");
+
+      if(FOF_GList[start].MinID != get_FOF_GList[i].MinID)
+        terminate(
+            "FOF_GList[start].MinID != get_FOF_GList[i].MinID start=%d i=%d FOF_GList[start].MinID=%llu get_FOF_GList[i].MinID=%llu\n",
+            start, i, (long long)FOF_GList[start].MinID, (long long)get_FOF_GList[i].MinID);
+
+      FOF_GList[start].ExtCount += get_FOF_GList[i].ExtCount;
+    }
+
+  /* copy the size information back into the list, to inform the others */
+  for(i = 0, start = 0; i < nimport; i++)
+    {
+      while(FOF_GList[start].MinID < get_FOF_GList[i].MinID)
+        {
+          start++;
+          if(start >= NgroupsExt)
+            terminate("start >= NgroupsExt");
+        }
+
+      get_FOF_GList[i].ExtCount = FOF_GList[start].ExtCount;
+      get_FOF_GList[i].LocCount = FOF_GList[start].LocCount;
+    }
+
+  /* sort the imported/exported list according to MinIDTask */
+  mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask);
+  mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask);
+
+  for(i = 0; i < nimport; i++)
+    get_FOF_GList[i].MinIDTask = ThisTask;
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the group info */
+              MPI_Sendrecv(&get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &FOF_GList[Send_offset[recvTask]],
+                           Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(get_FOF_GList);
+
+  /* eliminate all groups that are too small, and count local groups */
+  for(i = 0, Ngroups = 0, Nids = 0; i < NgroupsExt; i++)
+    {
+      if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount < FOF_GROUP_MIN_LEN)
+        {
+          FOF_GList[i] = FOF_GList[NgroupsExt - 1];
+          NgroupsExt--;
+          i--;
+        }
+      else
+        {
+          if(FOF_GList[i].MinIDTask == ThisTask)
+            {
+              Ngroups++;
+              Nids += FOF_GList[i].LocCount + FOF_GList[i].ExtCount;
+            }
+        }
+    }
+
+  /* sort the group list according to MinID */
+  mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID);
+}
+
+/*! \brief Assigns each group a global group number.
+ *
+ *  \return void
+ */
+void fof_assign_group_numbers(void)
+{
+  int i, j, ngr, start, lenloc;
+  long long totNids;
+  double t0, t1;
+
+  mpi_printf("FOF: start assigning group numbers\n");
+
+  t0 = second();
+
+  /* assign group numbers (at this point, both Group and FOF_GList are sorted by MinID) */
+  for(i = 0; i < NgroupsExt; i++)
+    {
+      FOF_GList[i].LocCount += FOF_GList[i].ExtCount; /* total length */
+      FOF_GList[i].ExtCount = ThisTask;               /* original task */
+    }
+
+  parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_LocCountTaskDiffMinID);
+
+  for(i = 0, ngr = 0; i < NgroupsExt; i++)
+    {
+      if(FOF_GList[i].ExtCount == FOF_GList[i].MinIDTask)
+        ngr++;
+
+      FOF_GList[i].GrNr = ngr - 1;
+    }
+
+  MPI_Allgather(&ngr, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  /* count how many groups there are on earlier CPUs */
+  long long ngr_sum;
+  for(j = 0, ngr_sum = 0; j < ThisTask; j++)
+    ngr_sum += Send_count[j];
+
+  for(i = 0; i < NgroupsExt; i++)
+    FOF_GList[i].GrNr += ngr_sum;
+
+  sumup_large_ints(1, &ngr, &ngr_sum);
+  if(ngr_sum != TotNgroups)
+    {
+      printf("ngr_sum=%d\n", (int)ngr_sum);
+      terminate("inconsistency");
+    }
+
+  /* bring the group list back into the original order */
+  parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_ExtCountMinID);
+
+  /* Assign the group numbers to the group properties array */
+  for(i = 0, start = 0; i < Ngroups; i++)
+    {
+      while(FOF_GList[start].MinID < Group[i].MinID)
+        {
+          start++;
+          if(start >= NgroupsExt)
+            terminate("start >= NgroupsExt");
+        }
+      Group[i].GrNr = FOF_GList[start].GrNr;
+    }
+
+  /* sort the groups according to group-number */
+  parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr);
+
+  for(i = 0; i < NumPart; i++)
+    PS[i].GrNr = TotNgroups + 1; /* this marks all particles that are not in any group */
+
+  for(i = 0, start = 0, Nids = 0; i < NgroupsExt; i++)
+    {
+      while(FOF_PList[start].MinID < FOF_GList[i].MinID)
+        {
+          start++;
+          if(start > NumPart)
+            terminate("start > NumPart");
+        }
+
+      if(FOF_PList[start].MinID != FOF_GList[i].MinID)
+        terminate("FOF_PList[start=%d].MinID=%lld != FOF_GList[i=%d].MinID=%lld", start, (long long)FOF_PList[start].MinID, i,
+                  (long long)FOF_GList[i].MinID);
+
+      for(lenloc = 0; start + lenloc < NumPart;)
+        if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID)
+          {
+            PS[FOF_PList[start + lenloc].Pindex].GrNr = FOF_GList[i].GrNr;
+            Nids++;
+            lenloc++;
+          }
+        else
+          break;
+
+      start += lenloc;
+    }
+
+  sumup_large_ints(1, &Nids, &totNids);
+
+  if(totNids != TotNids)
+    {
+      char buf[1000];
+      sprintf(buf, "Task=%d Nids=%d totNids=%d TotNids=%d\n", ThisTask, Nids, (int)totNids, (int)TotNids);
+      terminate(buf);
+    }
+
+  t1 = second();
+
+  mpi_printf("FOF: Assigning of group numbers took = %g sec\n", timediff(t0, t1));
+}
+
+/*! \brief Computes all kind of properties of groups.
+ *
+ *  Not complete after calling this. There is still the function
+ *  fof_finish_group_properties, which finalizes the calculation
+ *  (with normalization, averages, unit conversions and other operations).
+ *
+ *  \param[in] gr Index in Group array.
+ *  \param[in] start Start index in FOF_PList.
+ *  \param[in] len Number of particles in this group.
+ *
+ *  \return void
+ */
+void fof_compute_group_properties(int gr, int start, int len)
+{
+  int j, k, index, type, start_index = FOF_PList[start].Pindex;
+  double xyz[3];
+
+  Group[gr].Len  = 0;
+  double gr_Mass = 0;
+#ifdef USE_SFR
+  double gr_Sfr = 0;
+#endif /* #ifdef USE_SFR */
+
+  double gr_CM[3], gr_Vel[3];
+  for(k = 0; k < 3; k++)
+    {
+      gr_CM[k]              = 0;
+      gr_Vel[k]             = 0;
+      Group[gr].FirstPos[k] = P[start_index].Pos[k];
+    }
+
+  double gr_MassType[NTYPES];
+  for(k = 0; k < NTYPES; k++)
+    {
+      Group[gr].LenType[k] = 0;
+      gr_MassType[k]       = 0;
+    }
+
+  // calculate
+  for(k = 0; k < len; k++)
+    {
+      index = FOF_PList[start + k].Pindex;
+
+      Group[gr].Len++;
+      gr_Mass += P[index].Mass;
+      type = P[index].Type;
+
+      Group[gr].LenType[type]++;
+
+      gr_MassType[type] += P[index].Mass;
+
+#ifdef USE_SFR
+      if(P[index].Type == 0)
+        gr_Sfr += SphP[index].Sfr;
+#endif /* #ifdef USE_SFR */
+
+      for(j = 0; j < 3; j++)
+        {
+          xyz[j] = P[index].Pos[j];
+          xyz[j] = fof_periodic(xyz[j] - P[start_index].Pos[j]);
+          gr_CM[j] += P[index].Mass * xyz[j];
+          gr_Vel[j] += P[index].Mass * P[index].Vel[j];
+        }
+    }
+
+  // put values into group struct
+  Group[gr].Mass = gr_Mass;
+#ifdef USE_SFR
+  Group[gr].Sfr = gr_Sfr;
+#endif /* #ifdef USE_SFR */
+
+  for(k = 0; k < 3; k++)
+    {
+      Group[gr].CM[k]  = gr_CM[k];
+      Group[gr].Vel[k] = gr_Vel[k];
+    }
+
+  for(k = 0; k < NTYPES; k++)
+    Group[gr].MassType[k] = gr_MassType[k];
+}
+
+/*! \brief Global exchange of identified groups to their appropriate task.
+ *
+ *  \return void
+ */
+void fof_exchange_group_data(void)
+{
+  struct group_properties *get_Group;
+  int i, j, ngrp, recvTask, nimport, start;
+  double xyz[3];
+
+  /* sort the groups according to task */
+  mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinIDTask);
+
+  /* count how many we have of each task */
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+  for(i = 0; i < NgroupsExt; i++)
+    Send_count[FOF_GList[i].MinIDTask]++;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      if(j == ThisTask) /* we will not exchange the ones that are local */
+        Recv_count[j] = 0;
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  get_Group = (struct group_properties *)mymalloc("get_Group", sizeof(struct group_properties) * nimport);
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the group data */
+              MPI_Sendrecv(&Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask,
+                           TAG_DENS_A, &get_Group[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct group_properties),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  /* sort the groups again according to MinID */
+  mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinID);
+  mysort(get_Group, nimport, sizeof(struct group_properties), fof_compare_Group_MinID);
+
+  /* now add in the partial imported group data to the main ones */
+  for(i = 0, start = 0; i < nimport; i++)
+    {
+      while(Group[start].MinID < get_Group[i].MinID)
+        {
+          start++;
+          if(start >= NgroupsExt)
+            terminate("start >= NgroupsExt");
+        }
+
+      Group[start].Len += get_Group[i].Len;
+      Group[start].Mass += get_Group[i].Mass;
+
+      for(j = 0; j < NTYPES; j++)
+        {
+          Group[start].LenType[j] += get_Group[i].LenType[j];
+          Group[start].MassType[j] += get_Group[i].MassType[j];
+        }
+
+#ifdef USE_SFR
+      Group[start].Sfr += get_Group[i].Sfr;
+#endif /* #ifdef USE_SFR */
+
+      for(j = 0; j < 3; j++)
+        {
+          xyz[j] = get_Group[i].CM[j] / get_Group[i].Mass;
+          xyz[j] = fof_periodic(xyz[j] + get_Group[i].FirstPos[j] - Group[start].FirstPos[j]);
+          Group[start].CM[j] += get_Group[i].Mass * xyz[j];
+          Group[start].Vel[j] += get_Group[i].Vel[j];
+        }
+    }
+
+  myfree(get_Group);
+}
+
+/*! \brief Finalizes group property calculation.
+ *
+ *  Called after a loop over all particles of a group is already completed.
+ *
+ *  \return void
+ */
+void fof_finish_group_properties(void)
+{
+  double cm[3];
+  int i, j, ngr;
+
+  for(i = 0; i < NgroupsExt; i++)
+    {
+      if(Group[i].MinIDTask == ThisTask)
+        {
+          for(j = 0; j < 3; j++)
+            {
+              Group[i].Vel[j] /= Group[i].Mass;
+              cm[j]          = Group[i].CM[j] / Group[i].Mass;
+              cm[j]          = fof_periodic_wrap(cm[j] + Group[i].FirstPos[j]);
+              Group[i].CM[j] = cm[j];
+            }
+        }
+    }
+
+  /* eliminate the non-local groups */
+  for(i = 0, ngr = NgroupsExt; i < ngr; i++)
+    {
+      if(Group[i].MinIDTask != ThisTask)
+        {
+          Group[i] = Group[ngr - 1];
+          i--;
+          ngr--;
+        }
+    }
+
+  if(ngr != Ngroups)
+    terminate("ngr != Ngroups");
+
+  mysort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_MinID);
+}
+
+/*! \brief Do periodic wrap for coordinate.
+ *
+ *  Note that his works only for cubic box.
+ *
+ *  \param[in] x Coordinate.
+ *
+ *  \return coordinate within [-0.5*BoxSize,0.5*BoxSize).
+ */
+double fof_periodic(double x)
+{
+#ifndef GRAVITY_NOT_PERIODIC
+  if(x >= 0.5 * All.BoxSize)
+    x -= All.BoxSize;
+  if(x < -0.5 * All.BoxSize)
+    x += All.BoxSize;
+#endif /* #ifndef GRAVITY_NOT_PERIODIC */
+  return x;
+}
+
+/*! \brief Do periodic wrap for coordinate.
+ *
+ *  Note that his works only for cubic box.
+ *
+ *  \param[in] x Coordinate.
+ *
+ *  \return coordinate within [0,BoxSize).
+ */
+double fof_periodic_wrap(double x)
+{
+#ifndef GRAVITY_NOT_PERIODIC
+  while(x >= All.BoxSize)
+    x -= All.BoxSize;
+  while(x < 0)
+    x += All.BoxSize;
+#endif /* #ifndef GRAVITY_NOT_PERIODIC */
+  return x;
+}
+
+#endif /* of FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof.h b/src/amuse/community/arepo/src/fof/fof.h
new file mode 100644
index 0000000000..e60771b3aa
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof.h
@@ -0,0 +1,319 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof.h
+ * \date        05/2018
+ * \brief       Header for Friend-of-Friends halo finder.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef FOF_H
+#define FOF_H
+
+#include "../main/allvars.h"
+
+extern int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups;
+extern int Nids;
+extern long long TotNids;
+
+extern int fof_OldMaxPart;
+extern int fof_OldMaxPartSph;
+
+extern double LinkL;
+extern unsigned char *flag_node_inside_linkinglength;
+
+#define BITFLAG_INSIDE_LINKINGLENGTH 1
+
+#ifndef FOF_SECONDARY_LINK_TARGET_TYPES
+#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES
+#endif
+
+extern struct group_properties
+{
+  int Len;
+  MyIDType MinID;
+  MyIDType MinIDTask;
+  int GrNr;
+  int LenType[NTYPES];
+  MyFloat MassType[NTYPES];
+  MyFloat Mass;
+  MyDouble CM[3];
+  MyFloat Vel[3];
+  MyDouble Pos[3];
+
+  MyDouble FirstPos[3];
+#ifdef USE_SFR
+  MyFloat Sfr;
+#endif /* #ifdef USE_SFR */
+
+#ifdef SUBFIND
+  int TargetTask; /* primary CPU responsible for this group */
+  int Nsubs;
+  int FirstSub;
+  MyFloat M_Mean200, R_Mean200;
+  MyFloat M_Crit200, R_Crit200;
+  MyFloat M_Crit500, R_Crit500;
+  MyFloat M_TopHat200, R_TopHat200;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  MyFloat J_Mean200[3], JDM_Mean200[3], JGas_Mean200[3], JStars_Mean200[3], MassType_Mean200[NTYPES], CMFrac_Mean200,
+      CMFracType_Mean200[NTYPES];
+  MyFloat J_Crit200[3], JDM_Crit200[3], JGas_Crit200[3], JStars_Crit200[3], MassType_Crit200[NTYPES], CMFrac_Crit200,
+      CMFracType_Crit200[NTYPES];
+  MyFloat J_Crit500[3], JDM_Crit500[3], JGas_Crit500[3], JStars_Crit500[3], MassType_Crit500[NTYPES], CMFrac_Crit500,
+      CMFracType_Crit500[NTYPES];
+  MyFloat J_TopHat200[3], JDM_TopHat200[3], JGas_TopHat200[3], JStars_TopHat200[3], MassType_TopHat200[NTYPES], CMFrac_TopHat200,
+      CMFracType_TopHat200[NTYPES];
+  int LenType_Mean200[NTYPES], LenType_Crit200[NTYPES], LenType_Crit500[NTYPES], LenType_TopHat200[NTYPES];
+  MyFloat J[3], JDM[3], JGas[3], JStars[3], CMFrac, CMFracType[NTYPES];
+  MyFloat Ekin, Epot, Ethr;
+  MyFloat Ekin_Crit200, Epot_Crit200, Ethr_Crit200;
+  MyFloat Ekin_Crit500, Epot_Crit500, Ethr_Crit500;
+  MyFloat Ekin_Mean200, Epot_Mean200, Ethr_Mean200;
+  MyFloat Ekin_TopHat200, Epot_TopHat200, Ethr_TopHat200;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+#endif /* #ifdef SUBFIND */
+
+} * Group;
+
+struct data_aux_sort
+{
+  int OriginTask, OriginIndex;
+  int TargetTask, TargetIndex;
+  int GrNr;
+  int Type;
+  MyIDType ID;
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+  MyIDType FileOrder;
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+#ifdef SUBFIND
+  int SubNr;
+  MyFloat DM_BindingEnergy;
+#endif /* #ifdef SUBFIND */
+};
+
+extern struct fof_particle_list
+{
+  MyIDType MinID;
+  int MinIDTask;
+  int Pindex;
+} * FOF_PList;
+
+extern struct fof_group_list
+{
+  MyIDType MinID;
+  int MinIDTask;
+  int LocCount;
+  int ExtCount;
+  int GrNr;
+} * FOF_GList;
+
+extern struct id_list
+{
+  MyIDType ID;
+  int GrNr;
+  int Type;
+#ifdef SUBFIND
+  int SubNr;
+  MyFloat BindingEgy;
+#endif /* #ifdef SUBFIND */
+} * ID_list;
+
+extern struct bit_flags
+{
+  unsigned char Nonlocal : 2, MinIDChanged : 2, Marked : 2, Changed : 2;
+} * Flags;
+
+struct fof_local_sort_data
+{
+  int targetindex;
+  int index;
+};
+
+extern struct fof_subfind_header
+{
+  int Ngroups;
+  int Nsubgroups;
+  int Nids;
+  int TotNgroups;
+  int TotNsubgroups;
+  long long TotNids;
+  int num_files;
+  double time;
+  double redshift;
+  double HubbleParam;
+  double BoxSize;
+  double Omega0;
+  double OmegaLambda;
+  int flag_doubleprecision;
+} catalogue_header;
+
+enum fof_subfind_iofields
+{
+  IO_FOF_LEN,
+  IO_FOF_MTOT,
+  IO_FOF_POS,
+  IO_FOF_CM,
+  IO_FOF_VEL,
+  IO_FOF_LENTYPE,
+  IO_FOF_MASSTYPE,
+  IO_FOF_SFR,
+
+  IO_FOF_M_MEAN200,
+  IO_FOF_R_MEAN200,
+  IO_FOF_M_CRIT200,
+  IO_FOF_R_CRIT200,
+  IO_FOF_M_TOPHAT200,
+  IO_FOF_R_TOPHAT200,
+  IO_FOF_M_CRIT500,
+  IO_FOF_R_CRIT500,
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  IO_FOF_J_MEAN200,
+  IO_FOF_JDM_MEAN200,
+  IO_FOF_JGAS_MEAN200,
+  IO_FOF_JSTARS_MEAN200,
+  IO_FOF_MASSTYPE_MEAN200,
+  IO_FOF_LENTYPE_MEAN200,
+  IO_FOF_CMFRAC_MEAN200,
+  IO_FOF_CMFRACTYPE_MEAN200,
+  IO_FOF_J_CRIT200,
+  IO_FOF_JDM_CRIT200,
+  IO_FOF_JGAS_CRIT200,
+  IO_FOF_JSTARS_CRIT200,
+  IO_FOF_MASSTYPE_CRIT200,
+  IO_FOF_LENTYPE_CRIT200,
+  IO_FOF_CMFRAC_CRIT200,
+  IO_FOF_CMFRACTYPE_CRIT200,
+  IO_FOF_J_TOPHAT200,
+  IO_FOF_JDM_TOPHAT200,
+  IO_FOF_JGAS_TOPHAT200,
+  IO_FOF_JSTARS_TOPHAT200,
+  IO_FOF_MASSTYPE_TOPHAT200,
+  IO_FOF_LENTYPE_TOPHAT200,
+  IO_FOF_CMFRAC_TOPHAT200,
+  IO_FOF_CMFRACTYPE_TOPHAT200,
+  IO_FOF_J_CRIT500,
+  IO_FOF_JDM_CRIT500,
+  IO_FOF_JGAS_CRIT500,
+  IO_FOF_JSTARS_CRIT500,
+  IO_FOF_MASSTYPE_CRIT500,
+  IO_FOF_LENTYPE_CRIT500,
+  IO_FOF_CMFRAC_CRIT500,
+  IO_FOF_CMFRACTYPE_CRIT500,
+  IO_FOF_J,
+  IO_FOF_JDM,
+  IO_FOF_JGAS,
+  IO_FOF_JSTARS,
+  IO_FOF_CMFRAC,
+  IO_FOF_CMFRACTYPE,
+  IO_FOF_EKIN,
+  IO_FOF_ETHR,
+  IO_FOF_EPOT,
+  IO_FOF_EPOT_CRIT200,
+  IO_FOF_EKIN_CRIT200,
+  IO_FOF_ETHR_CRIT200,
+  IO_FOF_EPOT_MEAN200,
+  IO_FOF_EKIN_MEAN200,
+  IO_FOF_ETHR_MEAN200,
+  IO_FOF_EPOT_TOPHAT200,
+  IO_FOF_EKIN_TOPHAT200,
+  IO_FOF_ETHR_TOPHAT200,
+  IO_FOF_EPOT_CRIT500,
+  IO_FOF_EKIN_CRIT500,
+  IO_FOF_ETHR_CRIT500,
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  IO_FOF_NSUBS,
+  IO_FOF_FIRSTSUB,
+  IO_FOF_FUZZOFFTYPE,
+
+  IO_SUB_LEN,
+  IO_SUB_MTOT,
+  IO_SUB_POS,
+  IO_SUB_VEL,
+  IO_SUB_LENTYPE,
+  IO_SUB_MASSTYPE,
+  IO_SUB_CM,
+  IO_SUB_SPIN,
+  IO_SUB_BFLD_HALO,
+  IO_SUB_BFLD_DISK,
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  IO_SUB_EKIN,
+  IO_SUB_ETHR,
+  IO_SUB_EPOT,
+  IO_SUB_J,
+  IO_SUB_JDM,
+  IO_SUB_JGAS,
+  IO_SUB_JSTARS,
+  IO_SUB_JINHALFRAD,
+  IO_SUB_JDMINHALFRAD,
+  IO_SUB_JGASINHALFRAD,
+  IO_SUB_JSTARSINHALFRAD,
+  IO_SUB_JINRAD,
+  IO_SUB_JDMINRAD,
+  IO_SUB_JGASINRAD,
+  IO_SUB_JSTARSINRAD,
+  IO_SUB_CMFRAC,
+  IO_SUB_CMFRACTYPE,
+  IO_SUB_CMFRACINHALFRAD,
+  IO_SUB_CMFRACTYPEINHALFRAD,
+  IO_SUB_CMFRACINRAD,
+  IO_SUB_CMFRACTYPEINRAD,
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  IO_SUB_VELDISP,
+  IO_SUB_VMAX,
+  IO_SUB_VMAXRAD,
+  IO_SUB_HALFMASSRAD,
+  IO_SUB_HALFMASSRADTYPE,
+  IO_SUB_MASSINRAD,
+  IO_SUB_MASSINHALFRAD,
+  IO_SUB_MASSINMAXRAD,
+  IO_SUB_MASSINRADTYPE,
+  IO_SUB_MASSINHALFRADTYPE,
+  IO_SUB_MASSINMAXRADTYPE,
+  IO_SUB_IDMOSTBOUND,
+  IO_SUB_GRNR,
+  IO_SUB_PARENT,
+  IO_SUB_SFR,
+  IO_SUB_SFRINRAD,
+  IO_SUB_SFRINHALFRAD,
+  IO_SUB_SFRINMAXRAD,
+  IO_FOFSUB_IDS,
+  IO_FOF_LASTENTRY
+};
+
+int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr);
+int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr);
+int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr);
+int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr);
+void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label);
+void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label);
+int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr);
+void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc);
+int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr);
+
+#endif /* #ifndef FOF_H */
diff --git a/src/amuse/community/arepo/src/fof/fof_distribute.c b/src/amuse/community/arepo/src/fof/fof_distribute.c
new file mode 100644
index 0000000000..57c01ff81a
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_distribute.c
@@ -0,0 +1,420 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_distribute.c
+ * \date        05/2018
+ * \brief       Communication and reordering routines for FoF.
+ * \details     contains functions:
+ *                void fof_subfind_exchange(MPI_Comm Communicator)
+ *                void fof_reorder_PS(int *Id, int Nstart, int N)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+/*! \brief Redistributes the particles according to what is stored in
+ *         PS[].TargetTask, and PS[].TargetIndex.
+ *
+ *  \param[in] Communicator MPI communicator.
+ *
+ *  \return void
+ */
+void fof_subfind_exchange(MPI_Comm Communicator)
+{
+  int nimport, nexport;
+  int i, j, n, type, ngrp, target;
+  int max_load, max_loadsph, load;
+  struct particle_data *partBuf;
+  struct subfind_data *subBuf;
+  struct sph_particle_data *sphBuf;
+
+  int CommThisTask, CommNTask;
+
+  MPI_Comm_size(Communicator, &CommNTask);
+  MPI_Comm_rank(Communicator, &CommThisTask);
+
+  int old_AllMaxPart    = All.MaxPart;
+  int old_AllMaxPartSph = All.MaxPartSph;
+
+  for(type = 0; type < NTYPES; type++)
+    {
+      size_t ExportSpace = 0.5 * (FreeBytes); /* we will try to grab at most half of the still available memory  */
+      size_t PartSpace   = sizeof(struct particle_data) + sizeof(struct subfind_data) + sizeof(struct sph_particle_data);
+      if(PartSpace > ExportSpace)
+        terminate("seems like we have insufficient storage, PartSpace=%lld ExportSpace=%lld", (long long)PartSpace,
+                  (long long)ExportSpace);
+
+      int glob_flag = 0;
+
+      do
+        {
+          for(n = 0; n < CommNTask; n++)
+            {
+              Send_count[n] = 0;
+            }
+
+          ptrdiff_t AvailableSpace = ExportSpace; /* this must be a type that can become negative */
+
+          for(n = 0; n < NumPart; n++)
+            {
+              if(AvailableSpace < 0)
+                break;
+
+              if(P[n].Type == type && PS[n].TargetTask != CommThisTask)
+                {
+                  target = PS[n].TargetTask;
+
+                  if(target < 0 || target >= CommNTask)
+                    terminate("n=%d targettask=%d", n, target);
+
+                  AvailableSpace -= PartSpace;
+
+                  Send_count[target]++;
+                }
+            }
+
+          MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
+
+          for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++)
+            {
+              nexport += Send_count[j];
+              nimport += Recv_count[j];
+
+              if(j > 0)
+                {
+                  Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+                  Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+                }
+            }
+
+          /* for resize */
+          load = (NumPart + nimport - nexport);
+          MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
+
+          if(type == 0)
+            {
+              load = (NumGas + nimport - nexport);
+              MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
+            }
+
+          partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data));
+          subBuf  = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data));
+          if(type == 0)
+            sphBuf = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", nexport * sizeof(struct sph_particle_data));
+
+          for(i = 0; i < CommNTask; i++)
+            {
+              Send_count[i] = 0;
+            }
+
+          AvailableSpace = ExportSpace; /* this must be allowed to become negative */
+
+          int nstay         = 0;
+          int delta_numpart = 0;
+          int delta_numgas  = 0;
+
+          for(n = 0; n < NumPart; n++)
+            {
+              if(AvailableSpace < 0)
+                break;
+
+              if(P[n].Type == type && PS[n].TargetTask != CommThisTask)
+                {
+                  target = PS[n].TargetTask;
+
+                  AvailableSpace -= PartSpace;
+
+                  partBuf[Send_offset[target] + Send_count[target]] = P[n];
+                  subBuf[Send_offset[target] + Send_count[target]]  = PS[n];
+
+                  if(P[n].Type == 0)
+                    {
+                      sphBuf[Send_offset[target] + Send_count[target]] = SphP[n];
+                      delta_numgas++;
+                    }
+
+                  Send_count[target]++;
+                  delta_numpart++;
+                }
+              else
+                {
+                  if(nstay != n)
+                    {
+                      /* now move P[n] to P[nstay] */
+
+                      P[nstay]  = P[n];
+                      PS[nstay] = PS[n];
+
+                      if(P[nstay].Type == 0)
+                        SphP[nstay] = SphP[n];
+                    }
+
+                  nstay++;
+                }
+            }
+
+          if(delta_numgas > 0)
+            if(delta_numpart != delta_numgas)
+              terminate("delta_numpart=%d != delta_numgas=%d", delta_numpart, delta_numgas);
+
+          /* now close gap (if present) */
+          memmove(P + nstay, P + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct particle_data));
+          memmove(PS + nstay, PS + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct subfind_data));
+
+          if(delta_numgas > 0)
+            if(NumGas - (nstay + delta_numgas) > 0)
+              memmove(SphP + nstay, SphP + nstay + delta_numpart,
+                      (NumGas - (nstay + delta_numgas)) * sizeof(struct sph_particle_data));
+
+          NumPart -= delta_numpart;
+          NumGas -= delta_numgas;
+
+          /* do resize, but only increase arrays!! (otherwise data in ActiveParticleList etc. gets lost */
+          if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart)
+            {
+              All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
+              reallocate_memory_maxpart();
+              PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data));
+            }
+
+          if(type == 0)
+            {
+              if(max_loadsph > (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph)
+                {
+                  All.MaxPartSph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE);
+                  reallocate_memory_maxpartsph();
+                }
+            }
+
+          /* create a gap behind the existing gas particles where we will insert the incoming particles */
+          memmove(P + NumGas + nimport, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data));
+          memmove(PS + NumGas + nimport, PS + NumGas, (NumPart - NumGas) * sizeof(struct subfind_data));
+
+          for(i = 0; i < CommNTask; i++)
+            Recv_offset[i] += NumGas;
+
+          for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+            {
+              target = CommThisTask ^ ngrp;
+
+              if(target < CommNTask)
+                {
+                  if(Send_count[target] > 0 || Recv_count[target] > 0)
+                    {
+                      MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                                   TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE,
+                                   target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE);
+
+                      MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target,
+                                   TAG_KEY, PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE,
+                                   target, TAG_KEY, Communicator, MPI_STATUS_IGNORE);
+
+                      if(type == 0)
+                        MPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(struct sph_particle_data), MPI_BYTE,
+                                     target, TAG_SPHDATA, SphP + Recv_offset[target],
+                                     Recv_count[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, TAG_SPHDATA,
+                                     Communicator, MPI_STATUS_IGNORE);
+                    }
+                }
+            }
+
+          if(type == 0)
+            NumGas += nimport;
+
+          NumPart += nimport;
+
+          if(type == 0)
+            myfree_movable(sphBuf);
+
+          myfree_movable(subBuf);
+          myfree_movable(partBuf);
+
+          int loc_flag = 0;
+          if(AvailableSpace < 0)
+            loc_flag = 1;
+
+          MPI_Allreduce(&loc_flag, &glob_flag, 1, MPI_INT, MPI_SUM, Communicator);
+          if(glob_flag > 0 && CommThisTask == 0)
+            {
+              printf(
+                  "FOF-DISTRIBUTE: Need to cycle in particle exchange due to memory shortage. type=%d glob_flag=%d ThisTask=%d "
+                  "CommThisTask=%d   PartSpace=%lld  ExportSpace=%lld\n",
+                  type, glob_flag, ThisTask, CommThisTask, (long long)PartSpace, (long long)ExportSpace);
+              fflush(stdout);
+            }
+        }
+      while(glob_flag);
+    }
+
+  /* if there was a temporary memory shortage during the exchange, we may had to increase the maximum allocations. Go back to smaller
+   * values again if possible */
+
+  load = NumPart;
+  MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
+  max_load = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
+  if(max_load < old_AllMaxPart)
+    max_load = old_AllMaxPart;
+  if(max_load != All.MaxPart)
+    {
+      All.MaxPart = max_load;
+      reallocate_memory_maxpart();
+      PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data));
+    }
+
+  load = NumGas;
+  MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
+  max_loadsph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE);
+  if(max_loadsph < old_AllMaxPartSph)
+    max_loadsph = old_AllMaxPartSph;
+  if(max_loadsph != All.MaxPartSph)
+    {
+      All.MaxPartSph = max_loadsph;
+      reallocate_memory_maxpartsph();
+    }
+
+  /* finally, let's also address the desired local order according to PS[].TargetIndex */
+
+  struct fof_local_sort_data *mp;
+  int *Id;
+
+  if(NumGas)
+    {
+      mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * NumGas);
+      Id = (int *)mymalloc("Id", sizeof(int) * NumGas);
+
+      for(i = 0; i < NumGas; i++)
+        {
+          mp[i].index       = i;
+          mp[i].targetindex = PS[i].TargetIndex;
+        }
+
+      qsort(mp, NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex);
+
+      for(i = 0; i < NumGas; i++)
+        Id[mp[i].index] = i;
+
+      reorder_gas(Id);
+
+      for(i = 0; i < NumGas; i++)
+        Id[mp[i].index] = i;
+
+      fof_reorder_PS(Id, 0, NumGas);
+
+      myfree(Id);
+      myfree(mp);
+    }
+
+  if(NumPart - NumGas > 0)
+    {
+      mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart - NumGas));
+      mp -= NumGas;
+
+      Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas));
+      Id -= NumGas;
+
+      for(i = NumGas; i < NumPart; i++)
+        {
+          mp[i].index       = i;
+          mp[i].targetindex = PS[i].TargetIndex;
+        }
+
+      qsort(mp + NumGas, NumPart - NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex);
+
+      for(i = NumGas; i < NumPart; i++)
+        Id[mp[i].index] = i;
+
+      reorder_particles(Id);
+
+      for(i = NumGas; i < NumPart; i++)
+        Id[mp[i].index] = i;
+
+      fof_reorder_PS(Id, NumGas, NumPart);
+
+      Id += NumGas;
+      myfree(Id);
+      mp += NumGas;
+      myfree(mp);
+    }
+}
+
+/*! \brief Reorders the elements in the PS array according to the indices given
+ *         in the ID array.
+ *
+ *  \param[in, out] ID Array that specifies new index of element in PS array;
+ *                  i.e. PS[i] -> PS[ ID[i] ].
+ *  \param[in] Nstart Starting index in ID and PS arrays.
+ *  \param[in] N Final element +1 in ID and PS arrays.
+ *
+ *  \return void
+ */
+void fof_reorder_PS(int *Id, int Nstart, int N)
+{
+  int i;
+  struct subfind_data PSsave, PSsource;
+  int idsource, idsave, dest;
+
+  for(i = Nstart; i < N; i++)
+    {
+      if(Id[i] != i)
+        {
+          PSsource = PS[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              PSsave = PS[dest];
+              idsave = Id[dest];
+
+              PS[dest] = PSsource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              PSsource = PSsave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof_findgroups.c b/src/amuse/community/arepo/src/fof/fof_findgroups.c
new file mode 100644
index 0000000000..55e2ae3d2e
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_findgroups.c
@@ -0,0 +1,720 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_findgroups.c
+ * \date        05/2018
+ * \brief       Routine to identify friend of friends groups.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                double fof_find_groups(MyIDType * vMinID, int *vHead,
+ *                  int *vLen, int *vNext, int *vTail, int *vMinIDTask)
+ *                static int fof_find_dmparticles_evaluate(int target,
+ *                  int mode, int threadid)
+ *                static int fof_treefind_fof_primary(MyDouble searchcenter[3],
+ *                  MyFloat hsml, int target, int numnodes, int *firstnode,
+ *                  int mode, int threadid)
+ *                void fof_check_for_full_nodes_recursive(int no)
+ *                int fof_return_a_particle_in_cell_recursive(int no)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+static int fof_find_dmparticles_evaluate(int target, int mode, int threadid);
+static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode,
+                                    int threadid);
+
+static int *Tree_Head;
+
+static MyIDType *MinID;
+static int *Head, *Len, *Next, *Tail, *MinIDTask;
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+
+  MyIDType MinID;
+  int MinIDTask;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P array.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = P[i].Pos[0];
+  in->Pos[1] = P[i].Pos[1];
+  in->Pos[2] = P[i].Pos[2];
+
+  in->MinID     = MinID[Head[i]];
+  in->MinIDTask = MinIDTask[Head[i]];
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  char link_count_flag;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *             particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *             communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      terminate("here not used");
+    }
+  else /* combine */
+    {
+      if(out->link_count_flag)
+        Flags[i].Marked = 1;
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+static int link_across;
+static int nprocessed;
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= NumPart)
+          break;
+
+        if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)))
+          {
+            if(Flags[i].Nonlocal && Flags[i].Changed)
+              {
+                fof_find_dmparticles_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+
+                nprocessed++;
+              }
+          }
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        link_across += fof_find_dmparticles_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Links particles to groups.
+ *
+ *  \param[in, out] vMinID Pointer to MinID array.
+ *  \param[in, out] vHead Pointer to Head array.
+ *  \param[in, out] vLen Pointer to Len array.
+ *  \param[in, out] vNext Pointer to Next array.
+ *  \param[in, out] vTail Pointer to Tail array.
+ *  \param[in, out] vMinIDTask Pointer to MinIDTask array.
+ *
+ *  \return Time spent in this function.
+ */
+double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask)
+{
+  MinID     = vMinID;
+  Head      = vHead;
+  Len       = vLen;
+  Next      = vNext;
+  Tail      = vTail;
+  MinIDTask = vMinIDTask;
+
+  int i, npart, marked;
+  long long totmarked, totnpart;
+  long long link_across_tot, ntot;
+  double t0, t1, tstart, tend;
+
+  tstart = second();
+
+  mpi_printf("FOF: Start linking particles (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  /* allocate a flag field that is used to mark nodes that are fully inside the linking length */
+  flag_node_inside_linkinglength = (unsigned char *)mymalloc("flag_node_inside_linkinglength", Tree_MaxNodes * sizeof(unsigned char));
+  memset(flag_node_inside_linkinglength, 0, Tree_MaxNodes * sizeof(unsigned char));
+  flag_node_inside_linkinglength -= Tree_MaxPart;
+
+  Flags = (struct bit_flags *)mymalloc("Flags", NumPart * sizeof(struct bit_flags));
+
+  generic_set_MaxNexport();
+
+  Tree_Head = mymalloc("Tree_Head", Tree_NumNodes * sizeof(int));
+  Tree_Head -= Tree_MaxPart;
+
+  /* allocate buffers to arrange communication */
+  generic_alloc_partlist_nodelist_ngblist_threadbufs();
+
+  t0 = second();
+
+  /* first, link only among local particles */
+  for(i = 0, marked = 0, npart = 0; i < NumPart; i++)
+    {
+      if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)))
+        {
+          fof_find_dmparticles_evaluate(i, MODE_LOCAL_NO_EXPORT, 0);
+
+          npart++;
+
+          if(Flags[i].Nonlocal)
+            marked++;
+        }
+    }
+
+  sumup_large_ints(1, &marked, &totmarked);
+  sumup_large_ints(1, &npart, &totnpart);
+  t1 = second();
+  mpi_printf("FOF: links on local processor done (took %g sec).\nFOF: Marked=%lld out of the %lld primaries which are linked\n",
+             timediff(t0, t1), totmarked, totnpart);
+
+  generic_free_partlist_nodelist_ngblist_threadbufs();
+
+  t0 = second();
+  fof_check_for_full_nodes_recursive(Tree_MaxPart);
+  t1 = second();
+  mpi_printf("FOF: fully linked nodes determined (took %g sec).\n", timediff(t0, t1));
+  mpi_printf("FOF: begin linking across processors (presently allocated=%g MB) \n", AllocatedBytes / (1024.0 * 1024.0));
+
+  for(i = 0; i < NumPart; i++)
+    Flags[i].Marked = 1;
+
+  do
+    {
+      t0 = second();
+
+      for(i = 0; i < NumPart; i++)
+        {
+          Flags[i].Changed      = Flags[i].Marked;
+          Flags[i].Marked       = 0;
+          Flags[i].MinIDChanged = 0;
+        }
+
+      NextParticle = 0; /* begin with this index */
+
+      link_across = 0;
+      nprocessed  = 0;
+
+      generic_comm_pattern(NumPart, kernel_local, kernel_imported);
+
+      sumup_large_ints(1, &link_across, &link_across_tot);
+      sumup_large_ints(1, &nprocessed, &ntot);
+
+      t1 = second();
+
+      mpi_printf("FOF: have done %15lld cross links (processed %14lld, took %g sec)\n", link_across_tot, ntot, timediff(t0, t1));
+
+      /* let's check out which particles have changed their MinID */
+      for(i = 0; i < NumPart; i++)
+        if(Flags[i].Nonlocal)
+          {
+            if(Flags[Head[i]].MinIDChanged)
+              Flags[i].Marked = 1;
+          }
+    }
+  while(link_across_tot > 0);
+
+  Tree_Head += Tree_MaxPart;
+  myfree(Tree_Head);
+  myfree(Flags);
+  /* free flag */
+  myfree(flag_node_inside_linkinglength + Tree_MaxPart);
+
+  mpi_printf("FOF: Local groups found.\n");
+
+  tend = second();
+  return timediff(tstart, tend);
+}
+
+/*! \brief Links dark matter particles.
+ *
+ *  \param[in] target Index of particle/cell.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return Number of links.
+ */
+static int fof_find_dmparticles_evaluate(int target, int mode, int threadid)
+{
+  int j, n, links, p, s, ss, numnodes, *firstnode;
+  int numngb;
+  MyDouble *pos;
+  data_in local, *target_data;
+
+  links = 0;
+
+  if(mode == MODE_LOCAL_NO_EXPORT || mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos = target_data->Pos;
+
+  numngb = fof_treefind_fof_primary(pos, LinkL, target, numnodes, firstnode, mode, threadid);
+
+  if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT)
+    for(n = 0; n < numngb; n++)
+      {
+        j = Thread[threadid].Ngblist[n];
+
+        if(Head[target] != Head[j]) /* only if not yet linked */
+          {
+            if(Len[Head[target]] > Len[Head[j]]) /* p group is longer */
+              {
+                p = target;
+                s = j;
+              }
+            else
+              {
+                p = j;
+                s = target;
+              }
+            Next[Tail[Head[p]]] = Head[s];
+
+            Tail[Head[p]] = Tail[Head[s]];
+
+            Len[Head[p]] += Len[Head[s]];
+
+            if(MinID[Head[s]] < MinID[Head[p]])
+              {
+                MinID[Head[p]]     = MinID[Head[s]];
+                MinIDTask[Head[p]] = MinIDTask[Head[s]];
+              }
+
+            ss = Head[s];
+            do
+              Head[ss] = Head[p];
+            while((ss = Next[ss]) >= 0);
+          }
+      }
+
+  if(mode == MODE_IMPORTED_PARTICLES)
+    {
+      if(numngb > 0)
+        DataResult[target].link_count_flag = 1;
+      else
+        DataResult[target].link_count_flag = 0;
+    }
+
+  links += numngb;
+
+  return links;
+}
+
+/*! \brief Finds the neighbors among the primary link types which are within a
+ *         certain distance.
+ *
+ *  \param[in] searchcenter Position of search center.
+ *  \param[in] hsml Search radius.
+ *  \param[in] target Index of partcle.
+ *  \param[in] numnodes Number of nodes.
+ *  \param[in] fistnode First node.
+ *  \param[in] mode
+ *             -1: only local particles should be found and no export occurs;
+ *              0: export occurs, but local particles are ignored;
+ *              1: particles are found for an imported point.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return Number of particles found.
+ */
+static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode,
+                                    int threadid)
+{
+  int k, numngb, no, p, nexport_flag = 0;
+  MyDouble dx, dy, dz, dist, r2;
+
+#define FACT2 0.866025403785 /* sqrt(3)/2 */
+#define FACT3 (2.0 * FACT2)  /* sqrt(3)   */
+
+  MyDouble xtmp, ytmp, ztmp;
+
+  numngb = 0;
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT)
+        {
+          no = Tree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < Tree_MaxPart) /* single particle */
+            {
+              p  = no;
+              no = Nextnode[no];
+
+              if(!((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES)))
+                continue;
+
+              if(mode == MODE_LOCAL_PARTICLES)
+                continue;
+
+              dist = hsml;
+              dx   = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - searchcenter[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - searchcenter[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - searchcenter[2]);
+              if(dz > dist)
+                continue;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(MinID[Head[p]] > DataGet[target].MinID)
+                    {
+                      MinID[Head[p]]              = DataGet[target].MinID;
+                      MinIDTask[Head[p]]          = DataGet[target].MinIDTask;
+                      Flags[Head[p]].MinIDChanged = 1;
+                      numngb++;
+                    }
+                }
+              else
+                {
+                  /* this will only be done for MODE_LOCAL_NO_EXPORT */
+                  Thread[threadid].Ngblist[numngb++] = p;
+                }
+            }
+          else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+
+                  if(Tree_Head[no] >= 0)
+                    if(MinID[Tree_Head[no]] <= DataGet[target].MinID)
+                      {
+                        no = Nodes[no].u.d.sibling; /* the node can be discarded */
+                        continue;
+                      }
+                }
+
+              struct NODE *current = &Nodes[no];
+              int nocur            = no;
+              no                   = current->u.d.sibling; /* in case the node can be discarded */
+
+              if(mode == MODE_LOCAL_PARTICLES)
+                {
+                  if(nocur >= Tree_FirstNonTopLevelNode)
+                    {
+                      /* we have a node with only local particles, hence we can skip it for mode == 0 */
+                      continue;
+                    }
+                }
+
+              dist = hsml + 0.5 * current->len;
+              dx   = FOF_NEAREST_LONG_X(current->center[0] - searchcenter[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(current->center[1] - searchcenter[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(current->center[2] - searchcenter[2]);
+              if(dz > dist)
+                continue;
+
+              /* now test against the minimal sphere enclosing everything */
+              dist += FACT1 * current->len;
+              r2 = dx * dx + dy * dy + dz * dz;
+              if(r2 > dist * dist)
+                continue;
+
+              if(mode != MODE_LOCAL_PARTICLES)
+                {
+                  /* test whether the node is contained within the sphere */
+                  dist = hsml - FACT2 * current->len;
+                  if(dist > 0)
+                    if(r2 < dist * dist && hsml > FACT3 * current->len)
+                      {
+                        if(flag_node_inside_linkinglength[nocur] & (1 << BITFLAG_INSIDE_LINKINGLENGTH)) /* already flagged */
+                          {
+                            /* sufficient to return only one particle inside this cell */
+                            p = fof_return_a_particle_in_cell_recursive(nocur);
+
+                            if(p >= 0)
+                              {
+                                if(mode == MODE_IMPORTED_PARTICLES)
+                                  {
+                                    if(MinID[Head[p]] > DataGet[target].MinID)
+                                      {
+                                        MinID[Head[p]]              = DataGet[target].MinID;
+                                        MinIDTask[Head[p]]          = DataGet[target].MinIDTask;
+                                        Flags[Head[p]].MinIDChanged = 1;
+                                        numngb++;
+                                      }
+                                  }
+                                else
+                                  Thread[threadid].Ngblist[numngb++] = p;
+                              }
+
+                            continue;
+                          }
+                        else
+                          {
+                            /* flag it now */
+                            flag_node_inside_linkinglength[nocur] |= (1 << BITFLAG_INSIDE_LINKINGLENGTH);
+                          }
+                      }
+                }
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              terminate("do not expect imported points here");
+            }
+          else
+            {
+              if(mode == MODE_LOCAL_PARTICLES)
+                {
+                  if(target >= 0)
+                    tree_treefind_export_node_threads(no, target, threadid);
+                }
+              else if(mode == MODE_LOCAL_NO_EXPORT)
+                {
+                  nexport_flag = 1;
+                }
+              else if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("stop no=%d Tree_MaxPart=%d Tree_MaxNodes=%d", no, Tree_MaxPart, Tree_MaxNodes);
+
+              no = Nextnode[no - Tree_MaxNodes];
+              continue;
+            }
+        }
+    }
+
+  if(mode == MODE_LOCAL_NO_EXPORT)
+    {
+      if(nexport_flag == 0)
+        Flags[target].Nonlocal = 0;
+      else
+        Flags[target].Nonlocal = 1;
+    }
+
+  return numngb;
+}
+
+/*! \brief Walks a tree recursively and sets Tree_Head of node.
+ *
+ *  \param[in] no Index of node we are in.
+ *
+ *  \return void
+ */
+void fof_check_for_full_nodes_recursive(int no)
+{
+  if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+    {
+      int head = -1; /* no particle yet */
+
+      int p = Nodes[no].u.d.nextnode;
+
+      while(p != Nodes[no].u.d.sibling)
+        {
+          if(p < Tree_MaxPart) /* a particle */
+            {
+              if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES))
+                {
+                  if(head == -1)
+                    head = Head[p];
+                  else if(head >= 0)
+                    {
+                      if(head != Head[p])
+                        head = -2;
+                    }
+                }
+
+              p = Nextnode[p];
+            }
+          else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node  */
+            {
+              fof_check_for_full_nodes_recursive(p);
+
+              if(head == -1)
+                head = Tree_Head[p];
+              else if(head >= 0)
+                {
+                  if(head != Tree_Head[p])
+                    head = -2;
+                }
+
+              p = Nodes[p].u.d.sibling;
+            }
+          else /* a pseudo particle */
+            p = Nextnode[p - Tree_MaxNodes];
+        }
+
+      Tree_Head[no] = head;
+    }
+}
+
+/*! \brief Finds a particle in node.
+ *
+ *  \param[in] no Index of node.
+ *
+ *  \return Particle index; -1 if no particle was found.
+ */
+int fof_return_a_particle_in_cell_recursive(int no)
+{
+  if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+    {
+      int p = Nodes[no].u.d.nextnode;
+
+      while(p != Nodes[no].u.d.sibling)
+        {
+          if(p < Tree_MaxPart) /* a particle */
+            {
+              if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES))
+                {
+                  return p;
+                }
+
+              p = Nextnode[p];
+            }
+          else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node  */
+            {
+              int ret = fof_return_a_particle_in_cell_recursive(p);
+
+              if(ret >= 0)
+                return ret;
+
+              p = Nodes[p].u.d.sibling;
+            }
+          else /* a pseudo particle */
+            p = Nextnode[p - Tree_MaxNodes];
+        }
+    }
+
+  return -1;
+}
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof_io.c b/src/amuse/community/arepo/src/fof/fof_io.c
new file mode 100644
index 0000000000..3c0755ab69
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_io.c
@@ -0,0 +1,3151 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_io.c
+ * \date        05/2018
+ * \brief       Output functions for parallel FoF; also used by subfind.
+ * \details     contains functions:
+ *                void fof_save_groups(int num)
+ *                void fof_subfind_prepare_ID_list(void)
+ *                void fof_subfind_write_file(char *fname, int writeTask,
+ *                  int lastTask)
+ *                void fof_subfind_fill_write_buffer(enum fof_subfind_iofields
+ *                  blocknr, int *startindex, int pc)
+ *                void fof_subfind_get_dataset_name(enum fof_subfind_iofields
+ *                  blocknr, char *label)
+ *                int fof_subfind_get_dataset_group(enum fof_subfind_iofields
+ *                  blocknr)
+ *                int fof_subfind_get_particles_in_block(enum
+ *                  fof_subfind_iofields blocknr)
+ *                int fof_subfind_get_values_per_blockelement(enum
+ *                  fof_subfind_iofields blocknr)
+ *                int fof_subfind_get_bytes_per_blockelement(enum
+ *                  fof_subfind_iofields blocknr)
+ *                int fof_subfind_get_datatype(enum fof_subfind_iofields
+ *                  blocknr)
+ *                int fof_subfind_blockpresent(enum fof_subfind_iofields
+ *                  blocknr)
+ *                void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields
+ *                  blocknr, char *label)
+ *                void fof_subfind_write_header_attributes_in_hdf5(hid_t
+ *                  handle)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../gitversion/version.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+void fof_subfind_write_header_attributes_in_hdf5(hid_t handle);
+void write_parameters_attributes_in_hdf5(hid_t handle);
+void write_compile_time_options_in_hdf5(hid_t handle);
+#endif /* #ifdef HAVE_HDF5 */
+
+#ifdef FOF
+
+/*! \brief Make sure a position lies in the box in case of periodic boundaries.
+ *
+ *  \param[in] pos Single coordinate in one dimension to be wrapped
+ *  \param[in] dim Index of coordinate [0/1/2]
+ *
+ *  \return double: wrapped coordinate
+ */
+MyOutputFloat static wrap_position(MyOutputFloat pos, int dim)
+{
+#if defined(REFLECTIVE_X)
+  if(dim == 0)
+    return pos;
+#endif
+
+#if defined(REFLECTIVE_Y)
+  if(dim == 1)
+    return pos;
+#endif
+
+#if defined(REFLECTIVE_Z)
+  if(dim == 2)
+    return pos;
+#endif
+
+  double boxsize = All.BoxSize;
+
+#ifdef LONG_X
+  if(dim == 0)
+    boxsize *= LONG_X;
+#endif
+#ifdef LONG_Y
+  if(dim == 1)
+    boxsize *= LONG_Y;
+#endif
+#ifdef LONG_Z
+  if(dim == 2)
+    boxsize *= LONG_Z;
+#endif
+
+  while(pos < 0)
+    pos += boxsize;
+
+  while(pos >= boxsize)
+    pos -= boxsize;
+
+  return pos;
+}
+
+/*! \brief Main routine for group output.
+ *
+ *  \param[in] num Index of group file (snapshot index for this output).
+ *
+ *  \return void
+ */
+void fof_save_groups(int num)
+{
+  int filenr, gr, ngrps, masterTask, lastTask;
+  double t0, t1;
+  char buf[500];
+
+#ifdef FOF_STOREIDS
+  fof_subfind_prepare_ID_list();
+#endif /* #ifdef FOF_STOREIDS */
+
+  t0 = second();
+
+  CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE);
+
+  if(NTask < All.NumFilesPerSnapshot)
+    {
+      warn(
+          "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot "
+          "accordingly.\n");
+      All.NumFilesPerSnapshot = NTask;
+    }
+
+  if(All.SnapFormat < 1 || All.SnapFormat > 3)
+    mpi_printf("Unsupported File-Format. All.SnapFormat=%d\n", All.SnapFormat);
+
+#ifndef HAVE_HDF5
+  if(All.SnapFormat == 3)
+    {
+      mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n");
+    }
+#endif /* #ifndef  HAVE_HDF5 */
+
+  /* assign processors to output files */
+  distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask);
+
+  if(All.NumFilesPerSnapshot > 1)
+    {
+      if(ThisTask == 0)
+        {
+          sprintf(buf, "%s/groups_%03d", All.OutputDir, num);
+          mkdir(buf, 02755);
+        }
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+
+  if(All.NumFilesPerSnapshot > 1)
+    sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_tab", num, filenr);
+  else
+    sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_tab", num);
+
+  ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel;
+  if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel))
+    ngrps++;
+
+  for(gr = 0; gr < ngrps; gr++)
+    {
+      if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */
+        fof_subfind_write_file(buf, masterTask, lastTask);
+
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+
+  myfree(CommBuffer);
+
+#ifdef FOF_STOREIDS
+  myfree(ID_list);
+#endif /* #ifdef FOF_STOREIDS */
+
+  t1 = second();
+
+  mpi_printf("FOF: Group catalogues saved. took = %g sec\n", timediff(t0, t1));
+}
+
+/*! \brief Prepares ID list for option FOF_STOREIDS.
+ *
+ *  \return void
+ */
+void fof_subfind_prepare_ID_list(void)
+{
+  int i, nids;
+  long long totNids;
+  double t0, t1;
+
+  t0 = second();
+
+  ID_list = mymalloc("ID_list", sizeof(struct id_list) * Nids);
+
+  for(i = 0, nids = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr < TotNgroups)
+        {
+          if(nids >= Nids)
+            terminate("nids >= Nids");
+
+          ID_list[nids].GrNr = PS[i].GrNr;
+          ID_list[nids].Type = P[i].Type;
+          ID_list[nids].ID   = P[i].ID;
+#ifdef SUBFIND
+          ID_list[nids].SubNr      = PS[i].SubNr;
+          ID_list[nids].BindingEgy = PS[i].BindingEnergy;
+#endif /* #ifdef SUBFIND */
+          nids++;
+        }
+    }
+
+  sumup_large_ints(1, &nids, &totNids);
+  if(totNids != TotNids)
+    {
+      char buf[1000];
+      sprintf(buf, "Task=%d Nids=%d totNids=%lld TotNids=%lld\n", ThisTask, Nids, totNids, TotNids);
+      terminate(buf);
+    }
+
+    /* sort the particle IDs according to group-number, and optionally subhalo number and binding energy  */
+#ifdef SUBFIND
+  parallel_sort(ID_list, Nids, sizeof(struct id_list), subfind_compare_ID_list);
+#else  /* #ifdef SUBFIND */
+  parallel_sort(ID_list, Nids, sizeof(struct id_list), fof_compare_ID_list_GrNrID);
+#endif /* #ifdef SUBFIND #else */
+
+  t1 = second();
+  mpi_printf("FOF/SUBFIND: Particle/cell IDs in groups globally sorted. took = %g sec\n", timediff(t0, t1));
+}
+
+/*! \brief Writes a file with name fname containing data from writeTask to
+ *         lastTask.
+ *
+ *  \param[in] fname Filename of the output file.
+ *  \param[in] writeTask Task responsible for writing the file.
+ *  \param[in] lastTask Last task whose data is still in this file.
+ *
+ *  \return void
+ */
+void fof_subfind_write_file(char *fname, int writeTask, int lastTask)
+{
+  int bytes_per_blockelement, npart, nextblock;
+  int n_for_this_task, n, p, pc, offset = 0, task;
+  int blockmaxlen, n_type[3], ntot_type[3], nn[3];
+  enum fof_subfind_iofields blocknr;
+  char label[8];
+  int bnr;
+  int blksize;
+  MPI_Status status;
+  FILE *fd = 0;
+#ifdef HAVE_HDF5
+  hid_t hdf5_file = 0, hdf5_grp[3], hdf5_headergrp = 0, hdf5_dataspace_memory;
+  hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0;
+  hid_t hdf5_paramsgrp = 0, hdf5_configgrp = 0;
+  herr_t hdf5_status;
+  hsize_t dims[2], count[2], start[2];
+  int rank = 0, pcsum = 0;
+  char buf[1000];
+#endif /* #ifdef HAVE_HDF5 */
+
+#define SKIP                                 \
+  {                                          \
+    my_fwrite(&blksize, sizeof(int), 1, fd); \
+  }
+
+  /* determine group/id numbers of each type in file */
+  n_type[0] = Ngroups;
+  n_type[1] = Nsubgroups;
+  n_type[2] = Nids;
+
+  if(ThisTask == writeTask)
+    {
+      for(n = 0; n < 3; n++)
+        ntot_type[n] = n_type[n];
+
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          MPI_Recv(&nn[0], 3, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
+          for(n = 0; n < 3; n++)
+            ntot_type[n] += nn[n];
+        }
+
+      for(task = writeTask + 1; task <= lastTask; task++)
+        MPI_Send(&ntot_type[0], 3, MPI_INT, task, TAG_N, MPI_COMM_WORLD);
+    }
+  else
+    {
+      MPI_Send(&n_type[0], 3, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
+      MPI_Recv(&ntot_type[0], 3, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status);
+    }
+
+  /* fill file header */
+  catalogue_header.Ngroups    = ntot_type[0];
+  catalogue_header.Nsubgroups = ntot_type[1];
+  catalogue_header.Nids       = ntot_type[2];
+
+  catalogue_header.TotNgroups    = TotNgroups;
+  catalogue_header.TotNsubgroups = TotNsubgroups;
+  catalogue_header.TotNids       = TotNids;
+
+  catalogue_header.num_files = All.NumFilesPerSnapshot;
+
+  catalogue_header.time = All.Time;
+  if(All.ComovingIntegrationOn)
+    catalogue_header.redshift = 1.0 / All.Time - 1;
+  else
+    catalogue_header.redshift = 0;
+  catalogue_header.HubbleParam = All.HubbleParam;
+  catalogue_header.BoxSize     = All.BoxSize;
+  catalogue_header.Omega0      = All.Omega0;
+  catalogue_header.OmegaLambda = All.OmegaLambda;
+
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+  catalogue_header.flag_doubleprecision = 1;
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+  catalogue_header.flag_doubleprecision = 0;
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */
+
+  /* open file and write header */
+
+  if(ThisTask == writeTask)
+    {
+      if(All.SnapFormat == 3)
+        {
+#ifdef HAVE_HDF5
+          sprintf(buf, "%s.hdf5", fname);
+          hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+          mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot);
+          hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0);
+
+          hdf5_grp[0] = my_H5Gcreate(hdf5_file, "/Group", 0);
+          hdf5_grp[1] = my_H5Gcreate(hdf5_file, "/Subhalo", 0);
+          hdf5_grp[2] = my_H5Gcreate(hdf5_file, "/IDs", 0);
+
+          fof_subfind_write_header_attributes_in_hdf5(hdf5_headergrp);
+
+          hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0);
+          write_parameters_attributes_in_hdf5(hdf5_paramsgrp);
+
+          hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0);
+          write_compile_time_options_in_hdf5(hdf5_configgrp);
+
+#endif /* #ifdef HAVE_HDF5 */
+        }
+      else
+        {
+          if(!(fd = fopen(fname, "w")))
+            {
+              printf("can't open file `%s' for writing snapshot.\n", fname);
+              terminate("file open error");
+            }
+
+          mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot);
+
+          if(All.SnapFormat == 2)
+            {
+              blksize = sizeof(int) + 4 * sizeof(char);
+              SKIP;
+              my_fwrite((void *)"HEAD", sizeof(char), 4, fd);
+              nextblock = sizeof(catalogue_header) + 2 * sizeof(int);
+              my_fwrite(&nextblock, sizeof(int), 1, fd);
+              SKIP;
+            }
+
+          blksize = sizeof(catalogue_header);
+
+          SKIP;
+          my_fwrite(&catalogue_header, sizeof(catalogue_header), 1, fd);
+          SKIP;
+        }
+    }
+
+  for(bnr = 0; bnr < 1000; bnr++)
+    {
+      blocknr = (enum fof_subfind_iofields)bnr;
+
+      if(blocknr == IO_FOF_LASTENTRY)
+        break;
+
+      if(fof_subfind_blockpresent(blocknr))
+        {
+          bytes_per_blockelement = fof_subfind_get_bytes_per_blockelement(blocknr);
+
+          blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement);
+
+          npart   = fof_subfind_get_particles_in_block(blocknr);
+          int grp = fof_subfind_get_dataset_group(blocknr);
+
+          if(npart > 0)
+            {
+              if(ThisTask == 0)
+                {
+                  char buf[1000];
+
+                  fof_subfind_get_dataset_name(blocknr, buf);
+                  printf("FOF/SUBFIND: writing block %d (%s)...\n", blocknr, buf);
+                }
+
+              if(ThisTask == writeTask)
+                {
+                  if(All.SnapFormat == 1 || All.SnapFormat == 2)
+                    {
+                      if(All.SnapFormat == 2)
+                        {
+                          blksize = sizeof(int) + 4 * sizeof(char);
+                          SKIP;
+                          fof_subfind_get_Tab_IO_Label(blocknr, label);
+                          my_fwrite(label, sizeof(char), 4, fd);
+                          nextblock = npart * bytes_per_blockelement + 2 * sizeof(int);
+                          my_fwrite(&nextblock, sizeof(int), 1, fd);
+                          SKIP;
+                        }
+
+                      blksize = npart * bytes_per_blockelement;
+                      SKIP;
+                    }
+                  else if(All.SnapFormat == 3)
+                    {
+#ifdef HAVE_HDF5
+                      switch(fof_subfind_get_datatype(blocknr))
+                        {
+                          case 0:
+                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_INT);
+                            break;
+                          case 1:
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE);
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT);
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */
+                            break;
+                          case 2:
+                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64);
+                            break;
+                        }
+
+                      dims[0] = ntot_type[grp];
+                      dims[1] = fof_subfind_get_values_per_blockelement(blocknr);
+                      if(dims[1] == 1)
+                        rank = 1;
+                      else
+                        rank = 2;
+
+                      fof_subfind_get_dataset_name(blocknr, buf);
+
+                      hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL);
+
+                      hdf5_dataset = my_H5Dcreate(hdf5_grp[grp], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT);
+
+                      pcsum = 0;
+#endif /* #ifdef HAVE_HDF5 */
+                    }
+                }
+
+              for(task = writeTask, offset = 0; task <= lastTask; task++)
+                {
+                  if(task == ThisTask)
+                    {
+                      n_for_this_task = n_type[grp];
+
+                      for(p = writeTask; p <= lastTask; p++)
+                        if(p != ThisTask)
+                          MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD);
+                    }
+                  else
+                    MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status);
+
+                  while(n_for_this_task > 0)
+                    {
+                      pc = n_for_this_task;
+
+                      if(pc > blockmaxlen)
+                        pc = blockmaxlen;
+
+                      if(ThisTask == task)
+                        fof_subfind_fill_write_buffer(blocknr, &offset, pc);
+
+                      if(ThisTask == writeTask && task != writeTask)
+                        MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status);
+
+                      if(ThisTask != writeTask && task == ThisTask)
+                        MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD);
+
+                      if(ThisTask == writeTask)
+                        {
+                          if(All.SnapFormat == 3)
+                            {
+#ifdef HAVE_HDF5
+                              start[0] = pcsum;
+                              start[1] = 0;
+
+                              count[0] = pc;
+                              count[1] = fof_subfind_get_values_per_blockelement(blocknr);
+                              pcsum += pc;
+
+                              my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL);
+
+                              dims[0]               = pc;
+                              dims[1]               = fof_subfind_get_values_per_blockelement(blocknr);
+                              hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL);
+
+                              hdf5_status = my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file,
+                                                        H5P_DEFAULT, CommBuffer, buf);
+
+                              (void)hdf5_status;
+
+                              my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE);
+#endif /* #ifdef HAVE_HDF5 */
+                            }
+                          else
+                            {
+                              my_fwrite(CommBuffer, bytes_per_blockelement, pc, fd);
+                            }
+                        }
+
+                      n_for_this_task -= pc;
+                    }
+                }
+
+              if(ThisTask == writeTask)
+                {
+                  if(All.SnapFormat == 3)
+                    {
+#ifdef HAVE_HDF5
+                      my_H5Dclose(hdf5_dataset, buf);
+                      my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE);
+                      my_H5Tclose(hdf5_datatype);
+#endif /* #ifdef HAVE_HDF5 */
+                    }
+                  else
+                    SKIP;
+                }
+            }
+        }
+    }
+
+  if(ThisTask == writeTask)
+    {
+      if(All.SnapFormat == 3)
+        {
+#ifdef HAVE_HDF5
+          my_H5Gclose(hdf5_grp[0], "/Group");
+          my_H5Gclose(hdf5_grp[1], "/Subhalo");
+          my_H5Gclose(hdf5_grp[2], "/IDs");
+          my_H5Gclose(hdf5_headergrp, "/Header");
+          my_H5Gclose(hdf5_paramsgrp, "/Parameters");
+          my_H5Gclose(hdf5_configgrp, "/Config");
+
+          my_H5Fclose(hdf5_file, fname);
+#endif /* #ifdef HAVE_HDF5 */
+        }
+      else
+        fclose(fd);
+    }
+}
+
+/*! \brief Copies data from global group array to appropriate output buffer.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *  \param[in] startindex First particle index to be included.
+ *  \param[in] pc Particle count; number of particles to be written.
+ *
+ *  \return void
+ */
+void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc)
+{
+  int n, k, pindex, *ip;
+  MyOutputFloat *fp;
+  MyIDType *idp;
+
+  fp  = (MyOutputFloat *)CommBuffer;
+  ip  = (int *)CommBuffer;
+  idp = (MyIDType *)CommBuffer;
+
+  pindex = *startindex;
+
+  for(n = 0; n < pc; pindex++, n++)
+    {
+      switch(blocknr)
+        {
+          case IO_FOF_LEN:
+            *ip++ = Group[pindex].Len;
+            break;
+          case IO_FOF_MTOT:
+            *fp++ = Group[pindex].Mass;
+            break;
+          case IO_FOF_POS:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = wrap_position(Group[pindex].Pos[k] - All.GlobalDisplacementVector[k], k);
+#else  /* #ifdef SUBFIND */
+              *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k);
+#endif /* #ifdef SUBFIND #else */
+            break;
+          case IO_FOF_CM:
+            for(k = 0; k < 3; k++)
+              *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k);
+            break;
+          case IO_FOF_VEL:
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].Vel[k];
+            break;
+          case IO_FOF_LENTYPE:
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = Group[pindex].LenType[k];
+            break;
+          case IO_FOF_MASSTYPE:
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].MassType[k];
+            break;
+          case IO_FOF_SFR:
+#ifdef USE_SFR
+            *fp++ = Group[pindex].Sfr;
+#endif /* #ifdef USE_SFR */
+            break;
+          case IO_FOF_M_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].M_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_R_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].R_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          case IO_FOF_J_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].J_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JDM_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JDM_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JGAS_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JGas_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JSTARS_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JStars_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_MASSTYPE_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].MassType_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_LENTYPE_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = Group[pindex].LenType_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRAC_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].CMFrac_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRACTYPE_MEAN200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].CMFracType_Mean200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_J_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].J_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JDM_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JDM_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JGAS_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JGas_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JSTARS_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JStars_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_MASSTYPE_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].MassType_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_LENTYPE_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = Group[pindex].LenType_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRAC_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].CMFrac_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRACTYPE_CRIT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].CMFracType_Crit200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_J_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].J_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JDM_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JDM_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JGAS_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JGas_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JSTARS_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JStars_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_MASSTYPE_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].MassType_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_LENTYPE_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = Group[pindex].LenType_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRAC_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].CMFrac_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRACTYPE_CRIT500:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].CMFracType_Crit500[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_J_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].J_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JDM_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JDM_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JGAS_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JGas_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JSTARS_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JStars_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_MASSTYPE_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].MassType_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_LENTYPE_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = Group[pindex].LenType_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRAC_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].CMFrac_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRACTYPE_TOPHAT200:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].CMFracType_TopHat200[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EPOT_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Epot_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EKIN_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ekin_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_ETHR_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ethr_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EPOT_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Epot_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EKIN_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ekin_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_ETHR_MEAN200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ethr_Mean200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EPOT_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Epot_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EKIN_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ekin_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_ETHR_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ethr_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EPOT_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Epot_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EKIN_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ekin_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_ETHR_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ethr_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_J:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].J[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JDM:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JDM[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JGAS:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JGas[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_JSTARS:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = Group[pindex].JStars[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRAC:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].CMFrac;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_CMFRACTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = Group[pindex].CMFracType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EKIN:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ekin;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_ETHR:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Ethr;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_EPOT:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].Epot;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_EKIN:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].Ekin;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_ETHR:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].Ethr;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_EPOT:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].Epot;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_J:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].J[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JDM:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jdm[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JGAS:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jgas[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JSTARS:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jstars[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JINHALFRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].J_inHalfRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JDMINHALFRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jdm_inHalfRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JGASINHALFRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jgas_inHalfRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JSTARSINHALFRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jstars_inHalfRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JINRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].J_inRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JDMINRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jdm_inRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JGASINRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jgas_inRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_JSTARSINRAD:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Jstars_inRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRAC:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].CMFrac;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRACTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].CMFracType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRACINHALFRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].CMFrac_inHalfRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRACTYPEINHALFRAD:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].CMFracType_inHalfRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRACINRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].CMFrac_inRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CMFRACTYPEINRAD:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].CMFracType_inRad[k];
+#endif /* #ifdef SUBFIND */
+            break;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+            break;
+          case IO_FOF_M_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].M_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_R_CRIT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].R_Crit200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_M_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].M_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_R_CRIT500:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].R_Crit500;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_M_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].M_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_R_TOPHAT200:
+#ifdef SUBFIND
+            *fp++ = Group[pindex].R_TopHat200;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_NSUBS:
+#ifdef SUBFIND
+            *ip++ = Group[pindex].Nsubs;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_FIRSTSUB:
+#ifdef SUBFIND
+            *ip++ = Group[pindex].FirstSub;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_FOF_FUZZOFFTYPE:
+            break;
+          case IO_SUB_LEN:
+#ifdef SUBFIND
+            *ip++ = SubGroup[pindex].Len;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MTOT:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].Mass;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_POS:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = wrap_position(SubGroup[pindex].Pos[k] - All.GlobalDisplacementVector[k], k);
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_VEL:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = SubGroup[pindex].Vel[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_LENTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *ip++ = SubGroup[pindex].LenType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].MassType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_CM:
+#ifdef SUBFIND
+            for(k = 0; k < 3; k++)
+              *fp++ = wrap_position(SubGroup[pindex].CM[k] - All.GlobalDisplacementVector[k], k);
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_SPIN:
+            for(k = 0; k < 3; k++)
+#ifdef SUBFIND
+              *fp++ = SubGroup[pindex].Spin[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_VELDISP:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubVelDisp;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_VMAX:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubVmax;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_VMAXRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubVmaxRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_HALFMASSRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubHalfMassRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_HALFMASSRADTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].SubHalfMassRadType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubMassInRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINRADTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].SubMassInRadType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINHALFRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubMassInHalfRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINHALFRADTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].SubMassInHalfRadType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINMAXRAD:
+#ifdef SUBFIND
+            *fp++ = SubGroup[pindex].SubMassInMaxRad;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_MASSINMAXRADTYPE:
+#ifdef SUBFIND
+            for(k = 0; k < NTYPES; k++)
+              *fp++ = SubGroup[pindex].SubMassInMaxRadType[k];
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_IDMOSTBOUND:
+#ifdef SUBFIND
+            *idp++ = SubGroup[pindex].SubMostBoundID;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_GRNR:
+#ifdef SUBFIND
+            *ip++ = SubGroup[pindex].GrNr;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_PARENT:
+#ifdef SUBFIND
+            *ip++ = SubGroup[pindex].SubParent;
+#endif /* #ifdef SUBFIND */
+            break;
+          case IO_SUB_BFLD_HALO:
+#if defined(MHD) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].Bfld_Halo * sqrt(4. * M_PI);
+#endif /* #if defined(MHD) && defined(SUBFIND) */
+            break;
+          case IO_SUB_BFLD_DISK:
+#if defined(MHD) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].Bfld_Disk * sqrt(4. * M_PI);
+#endif /* #if defined(MHD) && defined(SUBFIND) */
+            break;
+          case IO_SUB_SFR:
+#if defined(USE_SFR) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].Sfr;
+#endif /* #if defined(USE_SFR) && defined(SUBFIND) */
+            break;
+          case IO_SUB_SFRINRAD:
+#if defined(USE_SFR) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].SfrInRad;
+#endif /* #if defined(USE_SFR) && defined(SUBFIND) */
+            break;
+          case IO_SUB_SFRINHALFRAD:
+#if defined(USE_SFR) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].SfrInHalfRad;
+#endif /* #if defined(USE_SFR) && defined(SUBFIND) */
+            break;
+          case IO_SUB_SFRINMAXRAD:
+#if defined(USE_SFR) && defined(SUBFIND)
+            *fp++ = SubGroup[pindex].SfrInMaxRad;
+#endif /* #if defined(USE_SFR) && defined(SUBFIND) */
+            break;
+          case IO_FOFSUB_IDS:
+#ifdef FOF_STOREIDS
+            *idp++ = ID_list[pindex].ID;
+#endif /* #ifdef FOF_STOREIDS */
+            break;
+
+          case IO_FOF_LASTENTRY:
+            terminate("should not be reached");
+            break;
+        }
+    }
+}
+
+/*! \brief Associates the output variable blocknumber with its name.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *  \param[out] label Name of field.
+ *
+ *  \return void
+ */
+void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label)
+{
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+        strcpy(label, "GroupLen");
+        break;
+      case IO_FOF_MTOT:
+        strcpy(label, "GroupMass");
+        break;
+      case IO_FOF_POS:
+        strcpy(label, "GroupPos");
+        break;
+      case IO_FOF_CM:
+        strcpy(label, "GroupCM");
+        break;
+      case IO_FOF_VEL:
+        strcpy(label, "GroupVel");
+        break;
+      case IO_FOF_LENTYPE:
+        strcpy(label, "GroupLenType");
+        break;
+      case IO_FOF_MASSTYPE:
+        strcpy(label, "GroupMassType");
+        break;
+      case IO_FOF_SFR:
+        strcpy(label, "GroupSFR");
+        break;
+      case IO_FOF_M_MEAN200:
+        strcpy(label, "Group_M_Mean200");
+        break;
+      case IO_FOF_R_MEAN200:
+        strcpy(label, "Group_R_Mean200");
+        break;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_J_MEAN200:
+        strcpy(label, "Group_J_Mean200");
+        break;
+      case IO_FOF_JDM_MEAN200:
+        strcpy(label, "Group_Jdm_Mean200");
+        break;
+      case IO_FOF_JGAS_MEAN200:
+        strcpy(label, "Group_Jgas_Mean200");
+        break;
+      case IO_FOF_JSTARS_MEAN200:
+        strcpy(label, "Group_Jstars_Mean200");
+        break;
+      case IO_FOF_MASSTYPE_MEAN200:
+        strcpy(label, "Group_MassType_Mean200");
+        break;
+      case IO_FOF_LENTYPE_MEAN200:
+        strcpy(label, "Group_LenType_Mean200");
+        break;
+      case IO_FOF_CMFRAC_MEAN200:
+        strcpy(label, "Group_CMFrac_Mean200");
+        break;
+      case IO_FOF_CMFRACTYPE_MEAN200:
+        strcpy(label, "Group_CMFracType_Mean200");
+        break;
+      case IO_FOF_J_CRIT200:
+        strcpy(label, "Group_J_Crit200");
+        break;
+      case IO_FOF_JDM_CRIT200:
+        strcpy(label, "Group_Jdm_Crit200");
+        break;
+      case IO_FOF_JGAS_CRIT200:
+        strcpy(label, "Group_Jgas_Crit200");
+        break;
+      case IO_FOF_JSTARS_CRIT200:
+        strcpy(label, "Group_Jstars_Crit200");
+        break;
+      case IO_FOF_MASSTYPE_CRIT200:
+        strcpy(label, "Group_MassType_Crit200");
+        break;
+      case IO_FOF_LENTYPE_CRIT200:
+        strcpy(label, "Group_LenType_Crit200");
+        break;
+      case IO_FOF_CMFRAC_CRIT200:
+        strcpy(label, "Group_CMFrac_Crit200");
+        break;
+      case IO_FOF_CMFRACTYPE_CRIT200:
+        strcpy(label, "Group_CMFracType_Crit200");
+        break;
+      case IO_FOF_J_CRIT500:
+        strcpy(label, "Group_J_Crit500");
+        break;
+      case IO_FOF_JDM_CRIT500:
+        strcpy(label, "Group_Jdm_Crit500");
+        break;
+      case IO_FOF_JGAS_CRIT500:
+        strcpy(label, "Group_Jgas_Crit500");
+        break;
+      case IO_FOF_JSTARS_CRIT500:
+        strcpy(label, "Group_Jstars_Crit500");
+        break;
+      case IO_FOF_MASSTYPE_CRIT500:
+        strcpy(label, "Group_MassType_Crit500");
+        break;
+      case IO_FOF_LENTYPE_CRIT500:
+        strcpy(label, "Group_LenType_Crit500");
+        break;
+      case IO_FOF_CMFRAC_CRIT500:
+        strcpy(label, "Group_CMFrac_Crit500");
+        break;
+      case IO_FOF_CMFRACTYPE_CRIT500:
+        strcpy(label, "Group_CMFracType_Crit500");
+        break;
+      case IO_FOF_J_TOPHAT200:
+        strcpy(label, "Group_J_TopHat200");
+        break;
+      case IO_FOF_JDM_TOPHAT200:
+        strcpy(label, "Group_Jdm_TopHat200");
+        break;
+      case IO_FOF_JGAS_TOPHAT200:
+        strcpy(label, "Group_Jgas_TopHat200");
+        break;
+      case IO_FOF_JSTARS_TOPHAT200:
+        strcpy(label, "Group_Jstars_TopHat200");
+        break;
+      case IO_FOF_MASSTYPE_TOPHAT200:
+        strcpy(label, "Group_MassType_TopHat200");
+        break;
+      case IO_FOF_LENTYPE_TOPHAT200:
+        strcpy(label, "Group_LenType_TopHat200");
+        break;
+      case IO_FOF_CMFRAC_TOPHAT200:
+        strcpy(label, "Group_CMFrac_TopHat200");
+        break;
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+        strcpy(label, "Group_CMFracType_TopHat200");
+        break;
+      case IO_FOF_EPOT_CRIT200:
+        strcpy(label, "Group_Epot_Crit200");
+        break;
+      case IO_FOF_EKIN_CRIT200:
+        strcpy(label, "Group_Ekin_Crit200");
+        break;
+      case IO_FOF_ETHR_CRIT200:
+        strcpy(label, "Group_Ethr_Crit200");
+        break;
+      case IO_FOF_EPOT_MEAN200:
+        strcpy(label, "Group_Epot_Mean200");
+        break;
+      case IO_FOF_EKIN_MEAN200:
+        strcpy(label, "Group_Ekin_Mean200");
+        break;
+      case IO_FOF_ETHR_MEAN200:
+        strcpy(label, "Group_Ethr_Mean200");
+        break;
+      case IO_FOF_EPOT_TOPHAT200:
+        strcpy(label, "Group_Epot_TopHat200");
+        break;
+      case IO_FOF_EKIN_TOPHAT200:
+        strcpy(label, "Group_Ekin_TopHat200");
+        break;
+      case IO_FOF_ETHR_TOPHAT200:
+        strcpy(label, "Group_Ethr_TopHat200");
+        break;
+      case IO_FOF_EPOT_CRIT500:
+        strcpy(label, "Group_Epot_Crit500");
+        break;
+      case IO_FOF_EKIN_CRIT500:
+        strcpy(label, "Group_Ekin_Crit500");
+        break;
+      case IO_FOF_ETHR_CRIT500:
+        strcpy(label, "Group_Ethr_Crit500");
+        break;
+      case IO_FOF_J:
+        strcpy(label, "Group_J");
+        break;
+      case IO_FOF_JDM:
+        strcpy(label, "Group_Jdm");
+        break;
+      case IO_FOF_JGAS:
+        strcpy(label, "Group_Jgas");
+        break;
+      case IO_FOF_JSTARS:
+        strcpy(label, "Group_Jstars");
+        break;
+      case IO_FOF_CMFRAC:
+        strcpy(label, "Group_CMFrac");
+        break;
+      case IO_FOF_CMFRACTYPE:
+        strcpy(label, "Group_CMFracType");
+        break;
+      case IO_FOF_EKIN:
+        strcpy(label, "GroupEkin");
+        break;
+      case IO_FOF_ETHR:
+        strcpy(label, "GroupEthr");
+        break;
+      case IO_FOF_EPOT:
+        strcpy(label, "GroupEpot");
+        break;
+      case IO_SUB_EKIN:
+        strcpy(label, "SubhaloEkin");
+        break;
+      case IO_SUB_ETHR:
+        strcpy(label, "SubhaloEthr");
+        break;
+      case IO_SUB_EPOT:
+        strcpy(label, "SubhaloEpot");
+        break;
+      case IO_SUB_J:
+        strcpy(label, "Subhalo_J");
+        break;
+      case IO_SUB_JDM:
+        strcpy(label, "Subhalo_Jdm");
+        break;
+      case IO_SUB_JGAS:
+        strcpy(label, "Subhalo_Jgas");
+        break;
+      case IO_SUB_JSTARS:
+        strcpy(label, "Subhalo_Jstars");
+        break;
+      case IO_SUB_JINHALFRAD:
+        strcpy(label, "Subhalo_JInHalfRad");
+        break;
+      case IO_SUB_JDMINHALFRAD:
+        strcpy(label, "Subhalo_JdmInHalfRad");
+        break;
+      case IO_SUB_JGASINHALFRAD:
+        strcpy(label, "Subhalo_JgasInHalfRad");
+        break;
+      case IO_SUB_JSTARSINHALFRAD:
+        strcpy(label, "Subhalo_JstarsInHalfRad");
+        break;
+      case IO_SUB_JINRAD:
+        strcpy(label, "Subhalo_JInRad");
+        break;
+      case IO_SUB_JDMINRAD:
+        strcpy(label, "Subhalo_JdmInRad");
+        break;
+      case IO_SUB_JGASINRAD:
+        strcpy(label, "Subhalo_JgasInRad");
+        break;
+      case IO_SUB_JSTARSINRAD:
+        strcpy(label, "Subhalo_JstarsInRad");
+        break;
+      case IO_SUB_CMFRAC:
+        strcpy(label, "Subhalo_CMFrac");
+        break;
+      case IO_SUB_CMFRACTYPE:
+        strcpy(label, "Subhalo_CMFracType");
+        break;
+      case IO_SUB_CMFRACINHALFRAD:
+        strcpy(label, "Subhalo_CMFracInHalfRad");
+        break;
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+        strcpy(label, "Subhalo_CMFracTypeInHalfRad");
+        break;
+      case IO_SUB_CMFRACINRAD:
+        strcpy(label, "Subhalo_CMFracInRad");
+        break;
+      case IO_SUB_CMFRACTYPEINRAD:
+        strcpy(label, "Subhalo_CMFracTypeInRad");
+        break;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+      case IO_FOF_M_CRIT200:
+        strcpy(label, "Group_M_Crit200");
+        break;
+      case IO_FOF_R_CRIT200:
+        strcpy(label, "Group_R_Crit200");
+        break;
+      case IO_FOF_M_CRIT500:
+        strcpy(label, "Group_M_Crit500");
+        break;
+      case IO_FOF_R_CRIT500:
+        strcpy(label, "Group_R_Crit500");
+        break;
+      case IO_FOF_M_TOPHAT200:
+        strcpy(label, "Group_M_TopHat200");
+        break;
+      case IO_FOF_R_TOPHAT200:
+        strcpy(label, "Group_R_TopHat200");
+        break;
+      case IO_FOF_NSUBS:
+        strcpy(label, "GroupNsubs");
+        break;
+      case IO_FOF_FIRSTSUB:
+        strcpy(label, "GroupFirstSub");
+        break;
+      case IO_FOF_FUZZOFFTYPE:
+        strcpy(label, "GroupFuzzOffsetType");
+        break;
+      case IO_SUB_LEN:
+        strcpy(label, "SubhaloLen");
+        break;
+      case IO_SUB_MTOT:
+        strcpy(label, "SubhaloMass");
+        break;
+      case IO_SUB_POS:
+        strcpy(label, "SubhaloPos");
+        break;
+      case IO_SUB_VEL:
+        strcpy(label, "SubhaloVel");
+        break;
+      case IO_SUB_LENTYPE:
+        strcpy(label, "SubhaloLenType");
+        break;
+      case IO_SUB_MASSTYPE:
+        strcpy(label, "SubhaloMassType");
+        break;
+      case IO_SUB_CM:
+        strcpy(label, "SubhaloCM");
+        break;
+      case IO_SUB_SPIN:
+        strcpy(label, "SubhaloSpin");
+        break;
+      case IO_SUB_VELDISP:
+        strcpy(label, "SubhaloVelDisp");
+        break;
+      case IO_SUB_VMAX:
+        strcpy(label, "SubhaloVmax");
+        break;
+      case IO_SUB_VMAXRAD:
+        strcpy(label, "SubhaloVmaxRad");
+        break;
+      case IO_SUB_HALFMASSRAD:
+        strcpy(label, "SubhaloHalfmassRad");
+        break;
+      case IO_SUB_HALFMASSRADTYPE:
+        strcpy(label, "SubhaloHalfmassRadType");
+        break;
+      case IO_SUB_MASSINRAD:
+        strcpy(label, "SubhaloMassInRad");
+        break;
+      case IO_SUB_MASSINHALFRAD:
+        strcpy(label, "SubhaloMassInHalfRad");
+        break;
+      case IO_SUB_MASSINMAXRAD:
+        strcpy(label, "SubhaloMassInMaxRad");
+        break;
+      case IO_SUB_MASSINRADTYPE:
+        strcpy(label, "SubhaloMassInRadType");
+        break;
+      case IO_SUB_MASSINHALFRADTYPE:
+        strcpy(label, "SubhaloMassInHalfRadType");
+        break;
+      case IO_SUB_MASSINMAXRADTYPE:
+        strcpy(label, "SubhaloMassInMaxRadType");
+        break;
+      case IO_SUB_IDMOSTBOUND:
+        strcpy(label, "SubhaloIDMostbound");
+        break;
+      case IO_SUB_GRNR:
+        strcpy(label, "SubhaloGrNr");
+        break;
+      case IO_SUB_PARENT:
+        strcpy(label, "SubhaloParent");
+        break;
+      case IO_SUB_BFLD_HALO:
+        strcpy(label, "SubhaloBfldHalo");
+        break;
+      case IO_SUB_BFLD_DISK:
+        strcpy(label, "SubhaloBfldDisk");
+        break;
+      case IO_SUB_SFR:
+        strcpy(label, "SubhaloSFR");
+        break;
+      case IO_SUB_SFRINRAD:
+        strcpy(label, "SubhaloSFRinRad");
+        break;
+      case IO_SUB_SFRINHALFRAD:
+        strcpy(label, "SubhaloSFRinHalfRad");
+        break;
+      case IO_SUB_SFRINMAXRAD:
+        strcpy(label, "SubhaloSFRinMaxRad");
+        break;
+      case IO_FOFSUB_IDS:
+        strcpy(label, "ID");
+        break;
+
+      case IO_FOF_LASTENTRY:
+        terminate("should not be reached");
+        break;
+    }
+}
+
+/*! \brief Is this output field a group or subhalo property?
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return 0: group property; 1 subhalo property; 2: both (unused)
+ */
+int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr)
+{
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_MTOT:
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_FOF_LENTYPE:
+      case IO_FOF_MASSTYPE:
+      case IO_FOF_SFR:
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+      case IO_FOF_FUZZOFFTYPE:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_LENTYPE_TOPHAT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+      case IO_FOF_CMFRAC:
+      case IO_FOF_CMFRACTYPE:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+        return 0;
+
+      case IO_SUB_LEN:
+      case IO_SUB_MTOT:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_LENTYPE:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+      case IO_SUB_IDMOSTBOUND:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        return 1;
+
+      case IO_FOFSUB_IDS:
+        return 2;
+
+      case IO_FOF_LASTENTRY:
+        terminate("reached last entry in switch - strange.");
+        break;
+    }
+
+  terminate("reached end of function - this should not happen");
+  return 0;
+}
+
+/*! \brief Returns number of particles of specific field.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return Number of entries of this property.
+ */
+int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr)
+{
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_MTOT:
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_FOF_LENTYPE:
+      case IO_FOF_MASSTYPE:
+      case IO_FOF_SFR:
+      case IO_FOF_FUZZOFFTYPE:
+        return catalogue_header.Ngroups;
+
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_LENTYPE_TOPHAT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+      case IO_FOF_CMFRAC:
+      case IO_FOF_CMFRACTYPE:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef SUBFIND
+        return catalogue_header.Ngroups;
+#else  /* #ifdef SUBFIND */
+        return 0;
+#endif /* #ifdef SUBFIND #else */
+
+      case IO_SUB_LEN:
+      case IO_SUB_MTOT:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_LENTYPE:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+      case IO_SUB_IDMOSTBOUND:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef SUBFIND
+        return catalogue_header.Nsubgroups;
+#else  /* #ifdef SUBFIND */
+        return 0;
+#endif /* #ifdef SUBFIND #else */
+
+      case IO_FOFSUB_IDS:
+        return catalogue_header.Nids;
+
+      case IO_FOF_LASTENTRY:
+        terminate("reached last entry in switch - strange.");
+        break;
+    }
+
+  terminate("reached end of function - this should not happen");
+  return 0;
+}
+
+/*! \brief Returns the number of elements per entry of a given property.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return Number of values per element of the specified property.
+ */
+int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr)
+{
+  int values = 0;
+
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+      case IO_SUB_LEN:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+      case IO_FOF_MTOT:
+      case IO_FOF_SFR:
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_SUB_MTOT:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_IDMOSTBOUND:
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+      case IO_FOFSUB_IDS:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_FOF_CMFRAC:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        values = 1;
+        break;
+
+      case IO_FOF_LENTYPE:
+      case IO_SUB_LENTYPE:
+      case IO_FOF_MASSTYPE:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+      case IO_FOF_FUZZOFFTYPE:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_CMFRACTYPE:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_LENTYPE_TOPHAT200:
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        values = NTYPES;
+        break;
+
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        values = 3;
+        break;
+
+      case IO_FOF_LASTENTRY:
+        terminate("reached last entry in switch - should not get here");
+        break;
+    }
+  return values;
+}
+
+/*! \brief Returns the number of bytes per element of a given property.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return Number of bytes per element for this property.
+ */
+int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr)
+{
+  int bytes_per_blockelement = 0;
+
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+      case IO_SUB_LEN:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+        bytes_per_blockelement = sizeof(int);
+        break;
+
+      case IO_FOF_LENTYPE:
+      case IO_SUB_LENTYPE:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_LENTYPE_TOPHAT200:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        bytes_per_blockelement = NTYPES * sizeof(int);
+        break;
+
+      case IO_FOF_MTOT:
+      case IO_FOF_SFR:
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_SUB_MTOT:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_CMFRAC:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        bytes_per_blockelement = sizeof(MyOutputFloat);
+        break;
+
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        bytes_per_blockelement = 3 * sizeof(MyOutputFloat);
+        break;
+
+      case IO_FOF_MASSTYPE:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+      case IO_FOF_CMFRACTYPE:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        bytes_per_blockelement = NTYPES * sizeof(MyOutputFloat);
+        break;
+
+      case IO_SUB_IDMOSTBOUND:
+      case IO_FOFSUB_IDS:
+        bytes_per_blockelement = sizeof(MyIDType);
+        break;
+
+      case IO_FOF_FUZZOFFTYPE:
+        bytes_per_blockelement = NTYPES * sizeof(long long);
+        break;
+
+      case IO_FOF_LASTENTRY:
+        terminate("reached last entry in switch - should not get here");
+        break;
+    }
+  return bytes_per_blockelement;
+}
+
+/*! \brief Returns key for datatype of element of a given property.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return Key for datatype: 0: int, 1: (output)float, 2: long long.
+ */
+int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr)
+{
+  int typekey = 0;
+
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_LENTYPE:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+      case IO_SUB_LEN:
+      case IO_SUB_LENTYPE:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_LENTYPE_TOPHAT200:
+#endif               /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        typekey = 0; /* native int */
+        break;
+
+      case IO_FOF_MTOT:
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_FOF_MASSTYPE:
+      case IO_FOF_SFR:
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_SUB_MTOT:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+      case IO_FOF_CMFRAC:
+      case IO_FOF_CMFRACTYPE:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+#endif               /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        typekey = 1; /* native MyOutputFloat */
+        break;
+
+      case IO_SUB_IDMOSTBOUND:
+      case IO_FOFSUB_IDS:
+#ifdef LONGIDS
+        typekey = 2; /* native long long */
+#else                /* #ifdef LONGIDS */
+        typekey = 0; /* native int */
+#endif               /* #ifdef LONGIDS #else */
+        break;
+
+      case IO_FOF_FUZZOFFTYPE:
+        typekey = 2; /* native long long */
+        break;
+
+      case IO_FOF_LASTENTRY:
+        terminate("should not be reached");
+        break;
+    }
+
+  return typekey;
+}
+
+/*! \brief Determines if block is present in the current code configuration.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *
+ *  \return 0: not present; 1: present.
+ */
+int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr)
+{
+  int present = 0;
+
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+      case IO_FOF_LENTYPE:
+      case IO_FOF_MTOT:
+      case IO_FOF_POS:
+      case IO_FOF_CM:
+      case IO_FOF_VEL:
+      case IO_FOF_MASSTYPE:
+        present = 1;
+        break;
+
+      case IO_FOF_SFR:
+      case IO_SUB_SFR:
+      case IO_SUB_SFRINRAD:
+      case IO_SUB_SFRINHALFRAD:
+      case IO_SUB_SFRINMAXRAD:
+#ifdef USE_SFR
+        present = 1;
+#endif /* #ifdef USE_SFR */
+        break;
+
+      case IO_SUB_BFLD_HALO:
+      case IO_SUB_BFLD_DISK:
+#ifdef MHD
+        present = 1;
+#endif /* #ifdef MHD */
+        break;
+
+      case IO_FOF_FUZZOFFTYPE:
+        break;
+
+      case IO_FOF_M_MEAN200:
+      case IO_FOF_R_MEAN200:
+      case IO_FOF_M_CRIT200:
+      case IO_FOF_R_CRIT200:
+      case IO_FOF_M_TOPHAT200:
+      case IO_FOF_R_TOPHAT200:
+      case IO_FOF_M_CRIT500:
+      case IO_FOF_R_CRIT500:
+      case IO_FOF_NSUBS:
+      case IO_FOF_FIRSTSUB:
+      case IO_SUB_LEN:
+      case IO_SUB_LENTYPE:
+      case IO_SUB_MTOT:
+      case IO_SUB_POS:
+      case IO_SUB_VEL:
+      case IO_SUB_MASSTYPE:
+      case IO_SUB_CM:
+      case IO_SUB_SPIN:
+      case IO_SUB_VELDISP:
+      case IO_SUB_VMAX:
+      case IO_SUB_VMAXRAD:
+      case IO_SUB_HALFMASSRAD:
+      case IO_SUB_HALFMASSRADTYPE:
+      case IO_SUB_MASSINRAD:
+      case IO_SUB_MASSINHALFRAD:
+      case IO_SUB_MASSINMAXRAD:
+      case IO_SUB_MASSINRADTYPE:
+      case IO_SUB_MASSINHALFRADTYPE:
+      case IO_SUB_MASSINMAXRADTYPE:
+      case IO_SUB_IDMOSTBOUND:
+      case IO_SUB_GRNR:
+      case IO_SUB_PARENT:
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_J_MEAN200:
+      case IO_FOF_JDM_MEAN200:
+      case IO_FOF_JGAS_MEAN200:
+      case IO_FOF_JSTARS_MEAN200:
+      case IO_FOF_CMFRAC_MEAN200:
+      case IO_FOF_CMFRACTYPE_MEAN200:
+      case IO_FOF_J_CRIT200:
+      case IO_FOF_JDM_CRIT200:
+      case IO_FOF_JGAS_CRIT200:
+      case IO_FOF_JSTARS_CRIT200:
+      case IO_FOF_CMFRAC_CRIT200:
+      case IO_FOF_CMFRACTYPE_CRIT200:
+      case IO_FOF_J_TOPHAT200:
+      case IO_FOF_JDM_TOPHAT200:
+      case IO_FOF_JGAS_TOPHAT200:
+      case IO_FOF_JSTARS_TOPHAT200:
+      case IO_FOF_CMFRAC_TOPHAT200:
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+      case IO_FOF_J_CRIT500:
+      case IO_FOF_JDM_CRIT500:
+      case IO_FOF_JGAS_CRIT500:
+      case IO_FOF_JSTARS_CRIT500:
+      case IO_FOF_CMFRAC_CRIT500:
+      case IO_FOF_CMFRACTYPE_CRIT500:
+      case IO_FOF_J:
+      case IO_FOF_JDM:
+      case IO_FOF_JGAS:
+      case IO_FOF_JSTARS:
+      case IO_FOF_CMFRAC:
+      case IO_FOF_CMFRACTYPE:
+      case IO_FOF_EKIN:
+      case IO_FOF_ETHR:
+      case IO_FOF_EPOT:
+      case IO_FOF_MASSTYPE_MEAN200:
+      case IO_FOF_MASSTYPE_CRIT200:
+      case IO_FOF_MASSTYPE_CRIT500:
+      case IO_FOF_MASSTYPE_TOPHAT200:
+      case IO_FOF_LENTYPE_MEAN200:
+      case IO_FOF_LENTYPE_CRIT200:
+      case IO_FOF_LENTYPE_CRIT500:
+      case IO_FOF_LENTYPE_TOPHAT200:
+      case IO_FOF_EPOT_CRIT200:
+      case IO_FOF_EKIN_CRIT200:
+      case IO_FOF_ETHR_CRIT200:
+      case IO_FOF_EPOT_MEAN200:
+      case IO_FOF_EKIN_MEAN200:
+      case IO_FOF_ETHR_MEAN200:
+      case IO_FOF_EPOT_TOPHAT200:
+      case IO_FOF_EKIN_TOPHAT200:
+      case IO_FOF_ETHR_TOPHAT200:
+      case IO_FOF_EPOT_CRIT500:
+      case IO_FOF_EKIN_CRIT500:
+      case IO_FOF_ETHR_CRIT500:
+      case IO_SUB_EKIN:
+      case IO_SUB_ETHR:
+      case IO_SUB_EPOT:
+      case IO_SUB_J:
+      case IO_SUB_JDM:
+      case IO_SUB_JGAS:
+      case IO_SUB_JSTARS:
+      case IO_SUB_JINHALFRAD:
+      case IO_SUB_JDMINHALFRAD:
+      case IO_SUB_JGASINHALFRAD:
+      case IO_SUB_JSTARSINHALFRAD:
+      case IO_SUB_JINRAD:
+      case IO_SUB_JDMINRAD:
+      case IO_SUB_JGASINRAD:
+      case IO_SUB_JSTARSINRAD:
+      case IO_SUB_CMFRAC:
+      case IO_SUB_CMFRACTYPE:
+      case IO_SUB_CMFRACINHALFRAD:
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+      case IO_SUB_CMFRACINRAD:
+      case IO_SUB_CMFRACTYPEINRAD:
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+#ifdef SUBFIND
+        present = 1;
+#else  /* #ifdef SUBFIND */
+        present = 0;
+#endif /* #ifdef SUBFIND #else */
+        break;
+
+      case IO_FOFSUB_IDS:
+#ifdef FOF_STOREIDS
+        present = 1;
+#else  /* #ifdef FOF_STOREIDS */
+        present = 0;
+#endif /* #ifdef FOF_STOREIDS #else */
+        break;
+
+      case IO_FOF_LASTENTRY:
+        terminate("should not be reached");
+        break;
+    }
+  return present;
+}
+
+/*! \brief Get the 4 letter IO label for a given output field.
+ *
+ *  \param[in] blocknr Number (identifier) of the field to be written.
+ *  \param[out] label String with the label.
+ *
+ *  \return void
+ */
+void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label)
+{
+  switch(blocknr)
+    {
+      case IO_FOF_LEN:
+        strncpy(label, "FLEN", 4);
+        break;
+      case IO_FOF_MTOT:
+        strncpy(label, "FMAS", 4);
+        break;
+      case IO_FOF_POS:
+        strncpy(label, "FPOS", 4);
+        break;
+      case IO_FOF_CM:
+        strncpy(label, "FGCM", 4);
+        break;
+      case IO_FOF_VEL:
+        strncpy(label, "FVEL", 4);
+        break;
+      case IO_FOF_LENTYPE:
+        strncpy(label, "FLTY", 4);
+        break;
+      case IO_FOF_MASSTYPE:
+        strncpy(label, "FMTY", 4);
+        break;
+      case IO_FOF_SFR:
+        strncpy(label, "FSFR", 4);
+        break;
+      case IO_FOF_M_MEAN200:
+        strncpy(label, "FMM2", 4);
+        break;
+      case IO_FOF_R_MEAN200:
+        strncpy(label, "FRM2", 4);
+        break;
+      case IO_FOF_M_CRIT200:
+        strncpy(label, "FMC2", 4);
+        break;
+      case IO_FOF_R_CRIT200:
+        strncpy(label, "FRC2", 4);
+        break;
+      case IO_FOF_M_TOPHAT200:
+        strncpy(label, "FMT2", 4);
+        break;
+      case IO_FOF_R_TOPHAT200:
+        strncpy(label, "FRT2", 4);
+        break;
+      case IO_FOF_M_CRIT500:
+        strncpy(label, "FMC5", 4);
+        break;
+      case IO_FOF_R_CRIT500:
+        strncpy(label, "FRC5", 4);
+        break;
+      case IO_FOF_NSUBS:
+        strncpy(label, "FNSH", 4);
+        break;
+      case IO_FOF_FIRSTSUB:
+        strncpy(label, "FFSH", 4);
+        break;
+      case IO_FOF_FUZZOFFTYPE:
+        strncpy(label, "FUOF", 4);
+        break;
+
+      case IO_SUB_LEN:
+        strncpy(label, "SLEN", 4);
+        break;
+      case IO_SUB_MTOT:
+        strncpy(label, "SMAS", 4);
+        break;
+      case IO_SUB_POS:
+        strncpy(label, "SPOS", 4);
+        break;
+      case IO_SUB_VEL:
+        strncpy(label, "SVEL", 4);
+        break;
+      case IO_SUB_LENTYPE:
+        strncpy(label, "SLTY", 4);
+        break;
+      case IO_SUB_MASSTYPE:
+        strncpy(label, "SMTY", 4);
+        break;
+      case IO_SUB_CM:
+        strncpy(label, "SCMP", 4);
+        break;
+      case IO_SUB_SPIN:
+        strncpy(label, "SSPI", 4);
+        break;
+      case IO_SUB_VELDISP:
+        strncpy(label, "SVDI", 4);
+        break;
+      case IO_SUB_VMAX:
+        strncpy(label, "SVMX", 4);
+        break;
+      case IO_SUB_VMAXRAD:
+        strncpy(label, "SVRX", 4);
+        break;
+      case IO_SUB_HALFMASSRAD:
+        strncpy(label, "SHMR", 4);
+        break;
+      case IO_SUB_HALFMASSRADTYPE:
+        strncpy(label, "SHMT", 4);
+        break;
+      case IO_SUB_MASSINRAD:
+        strncpy(label, "SMIR", 4);
+        break;
+      case IO_SUB_MASSINHALFRAD:
+        strncpy(label, "SMIH", 4);
+        break;
+      case IO_SUB_MASSINMAXRAD:
+        strncpy(label, "SMIM", 4);
+        break;
+      case IO_SUB_MASSINRADTYPE:
+        strncpy(label, "SMIT", 4);
+        break;
+      case IO_SUB_MASSINHALFRADTYPE:
+        strncpy(label, "SMHT", 4);
+        break;
+      case IO_SUB_MASSINMAXRADTYPE:
+        strncpy(label, "SMMT", 4);
+        break;
+      case IO_SUB_IDMOSTBOUND:
+        strncpy(label, "SIDM", 4);
+        break;
+      case IO_SUB_GRNR:
+        strncpy(label, "SGNR", 4);
+        break;
+      case IO_SUB_PARENT:
+        strncpy(label, "SPRT", 4);
+        break;
+      case IO_SUB_BFLD_HALO:
+        strncpy(label, "BFDH", 4);
+        break;
+      case IO_SUB_BFLD_DISK:
+        strncpy(label, "BFDD", 4);
+        break;
+      case IO_SUB_SFR:
+        strncpy(label, "SSFR", 4);
+        break;
+      case IO_SUB_SFRINRAD:
+        strncpy(label, "SSFI", 4);
+        break;
+      case IO_SUB_SFRINHALFRAD:
+        strncpy(label, "SSFH", 4);
+        break;
+      case IO_SUB_SFRINMAXRAD:
+        strncpy(label, "SSFM", 4);
+        break;
+      case IO_FOFSUB_IDS:
+        strncpy(label, "PIDS", 4);
+        break;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      case IO_FOF_J_MEAN200:
+        strncpy(label, "FJM2", 4);
+        break;
+      case IO_FOF_JDM_MEAN200:
+        strncpy(label, "JDM2", 4);
+        break;
+      case IO_FOF_JGAS_MEAN200:
+        strncpy(label, "JGM2", 4);
+        break;
+      case IO_FOF_JSTARS_MEAN200:
+        strncpy(label, "JSM2", 4);
+        break;
+      case IO_FOF_MASSTYPE_MEAN200:
+        strncpy(label, "MTM2", 4);
+        break;
+      case IO_FOF_LENTYPE_MEAN200:
+        strncpy(label, "LTM2", 4);
+        break;
+      case IO_FOF_CMFRAC_MEAN200:
+        strncpy(label, "CFM2", 4);
+        break;
+      case IO_FOF_CMFRACTYPE_MEAN200:
+        strncpy(label, "FTM2", 4);
+        break;
+      case IO_FOF_J_CRIT200:
+        strncpy(label, "FJC2", 4);
+        break;
+      case IO_FOF_JDM_CRIT200:
+        strncpy(label, "JDC2", 4);
+        break;
+      case IO_FOF_JGAS_CRIT200:
+        strncpy(label, "JGC2", 4);
+        break;
+      case IO_FOF_JSTARS_CRIT200:
+        strncpy(label, "JSC2", 4);
+        break;
+      case IO_FOF_MASSTYPE_CRIT200:
+        strncpy(label, "MTC2", 4);
+        break;
+      case IO_FOF_LENTYPE_CRIT200:
+        strncpy(label, "LTC2", 4);
+        break;
+      case IO_FOF_CMFRAC_CRIT200:
+        strncpy(label, "CFC2", 4);
+        break;
+      case IO_FOF_CMFRACTYPE_CRIT200:
+        strncpy(label, "FTC2", 4);
+        break;
+      case IO_FOF_J_TOPHAT200:
+        strncpy(label, "FJT2", 4);
+        break;
+      case IO_FOF_JDM_TOPHAT200:
+        strncpy(label, "JDT2", 4);
+        break;
+      case IO_FOF_JGAS_TOPHAT200:
+        strncpy(label, "JGT2", 4);
+        break;
+      case IO_FOF_JSTARS_TOPHAT200:
+        strncpy(label, "JST2", 4);
+        break;
+      case IO_FOF_MASSTYPE_TOPHAT200:
+        strncpy(label, "MTT2", 4);
+        break;
+      case IO_FOF_LENTYPE_TOPHAT200:
+        strncpy(label, "LTT2", 4);
+        break;
+      case IO_FOF_CMFRAC_TOPHAT200:
+        strncpy(label, "CFT2", 4);
+        break;
+      case IO_FOF_CMFRACTYPE_TOPHAT200:
+        strncpy(label, "FTT2", 4);
+        break;
+      case IO_FOF_J_CRIT500:
+        strncpy(label, "FJC5", 4);
+        break;
+      case IO_FOF_JDM_CRIT500:
+        strncpy(label, "JDC5", 4);
+        break;
+      case IO_FOF_JGAS_CRIT500:
+        strncpy(label, "JGC5", 4);
+        break;
+      case IO_FOF_JSTARS_CRIT500:
+        strncpy(label, "JSC5", 4);
+        break;
+      case IO_FOF_MASSTYPE_CRIT500:
+        strncpy(label, "MTC5", 4);
+        break;
+      case IO_FOF_LENTYPE_CRIT500:
+        strncpy(label, "LTC5", 4);
+        break;
+      case IO_FOF_CMFRAC_CRIT500:
+        strncpy(label, "CFC5", 4);
+        break;
+      case IO_FOF_CMFRACTYPE_CRIT500:
+        strncpy(label, "FTC5", 4);
+        break;
+      case IO_FOF_J:
+        strncpy(label, "FOFJ", 4);
+        break;
+      case IO_FOF_JDM:
+        strncpy(label, "FOJD", 4);
+        break;
+      case IO_FOF_JGAS:
+        strncpy(label, "FOJG", 4);
+        break;
+      case IO_FOF_JSTARS:
+        strncpy(label, "FOJS", 4);
+        break;
+      case IO_FOF_CMFRAC:
+        strncpy(label, "FOCF", 4);
+        break;
+      case IO_FOF_CMFRACTYPE:
+        strncpy(label, "FOFT", 4);
+        break;
+      case IO_FOF_EKIN:
+        strncpy(label, "EKIN", 4);
+        break;
+      case IO_FOF_ETHR:
+        strncpy(label, "ETHR", 4);
+        break;
+      case IO_FOF_EPOT:
+        strncpy(label, "EPOT", 4);
+        break;
+
+      case IO_FOF_EPOT_CRIT200:
+        strncpy(label, "EPO1", 4);
+        break;
+      case IO_FOF_EKIN_CRIT200:
+        strncpy(label, "EKI1", 4);
+        break;
+      case IO_FOF_ETHR_CRIT200:
+        strncpy(label, "ETH1", 4);
+        break;
+      case IO_FOF_EPOT_MEAN200:
+        strncpy(label, "EPO2", 4);
+        break;
+      case IO_FOF_EKIN_MEAN200:
+        strncpy(label, "EKI2", 4);
+        break;
+      case IO_FOF_ETHR_MEAN200:
+        strncpy(label, "ETH2", 4);
+        break;
+      case IO_FOF_EPOT_TOPHAT200:
+        strncpy(label, "EPO3", 4);
+        break;
+      case IO_FOF_EKIN_TOPHAT200:
+        strncpy(label, "EKI3", 4);
+        break;
+      case IO_FOF_ETHR_TOPHAT200:
+        strncpy(label, "ETH3", 4);
+        break;
+      case IO_FOF_EPOT_CRIT500:
+        strncpy(label, "EPO4", 4);
+        break;
+      case IO_FOF_EKIN_CRIT500:
+        strncpy(label, "EKI4", 4);
+        break;
+      case IO_FOF_ETHR_CRIT500:
+        strncpy(label, "ETH4", 4);
+        break;
+
+      case IO_SUB_EKIN:
+        strncpy(label, "SEKN", 4);
+        break;
+      case IO_SUB_ETHR:
+        strncpy(label, "SETH", 4);
+        break;
+      case IO_SUB_EPOT:
+        strncpy(label, "SEPT", 4);
+        break;
+      case IO_SUB_J:
+        strncpy(label, "SUBJ", 4);
+        break;
+      case IO_SUB_JDM:
+        strncpy(label, "SJDM", 4);
+        break;
+      case IO_SUB_JGAS:
+        strncpy(label, "SJGS", 4);
+        break;
+      case IO_SUB_JSTARS:
+        strncpy(label, "SJST", 4);
+        break;
+      case IO_SUB_JINHALFRAD:
+        strncpy(label, "SJHR", 4);
+        break;
+      case IO_SUB_JDMINHALFRAD:
+        strncpy(label, "SJDH", 4);
+        break;
+      case IO_SUB_JGASINHALFRAD:
+        strncpy(label, "SJGH", 4);
+        break;
+      case IO_SUB_JSTARSINHALFRAD:
+        strncpy(label, "SJSH", 4);
+        break;
+      case IO_SUB_JINRAD:
+        strncpy(label, "SJMR", 4);
+        break;
+      case IO_SUB_JDMINRAD:
+        strncpy(label, "SJDR", 4);
+        break;
+      case IO_SUB_JGASINRAD:
+        strncpy(label, "SJGR", 4);
+        break;
+      case IO_SUB_JSTARSINRAD:
+        strncpy(label, "SJSR", 4);
+        break;
+      case IO_SUB_CMFRAC:
+        strncpy(label, "SCMF", 4);
+        break;
+      case IO_SUB_CMFRACTYPE:
+        strncpy(label, "SCMT", 4);
+        break;
+      case IO_SUB_CMFRACINHALFRAD:
+        strncpy(label, "SCMH", 4);
+        break;
+      case IO_SUB_CMFRACTYPEINHALFRAD:
+        strncpy(label, "SCTH", 4);
+        break;
+      case IO_SUB_CMFRACINRAD:
+        strncpy(label, "SCMR", 4);
+        break;
+      case IO_SUB_CMFRACTYPEINRAD:
+        strncpy(label, "SCTR", 4);
+        break;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+      case IO_FOF_LASTENTRY:
+        terminate("should not be reached");
+        break;
+    }
+}
+
+#ifdef HAVE_HDF5
+/*! \brief Function that handles writing hdf5 header.
+ *
+ *  \param[in] handle Handle for header hdf5 group.
+ *
+ *  \return void
+ */
+void fof_subfind_write_header_attributes_in_hdf5(hid_t handle)
+{
+  hid_t hdf5_dataspace, hdf5_attribute;
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Ngroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Ngroups, "Ngroups_ThisFile");
+  my_H5Aclose(hdf5_attribute, "Ngroups_ThisFile");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nsubgroups, "Nsubgroups_ThisFile");
+  my_H5Aclose(hdf5_attribute, "Nsubgroups_ThisFile");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Nids_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nids, "Nids_ThisFile");
+  my_H5Aclose(hdf5_attribute, "Nids_ThisFile");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Ngroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNgroups, "Ngroups_Total");
+  my_H5Aclose(hdf5_attribute, "Ngroups_Total");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNsubgroups, "Nsubgroups_Total");
+  my_H5Aclose(hdf5_attribute, "Nsubgroups_Total");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Nids_Total", H5T_NATIVE_INT64, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT64, &catalogue_header.TotNids, "Nids_Total");
+  my_H5Aclose(hdf5_attribute, "Nids_Total");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "NumFiles", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.num_files, "NumFiles");
+  my_H5Aclose(hdf5_attribute, "NumFiles");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.time, "Time");
+  my_H5Aclose(hdf5_attribute, "Time");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.redshift, "Redshift");
+  my_H5Aclose(hdf5_attribute, "Redshift");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.HubbleParam, "HubbleParam");
+  my_H5Aclose(hdf5_attribute, "HubbleParam");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.BoxSize, "BoxSize");
+  my_H5Aclose(hdf5_attribute, "BoxSize");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.Omega0, "Omega0");
+  my_H5Aclose(hdf5_attribute, "Omega0");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.OmegaLambda, "OmegaLambda");
+  my_H5Aclose(hdf5_attribute, "OmegaLambda");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "FlagDoubleprecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.flag_doubleprecision, "FlagDoubleprecision");
+  my_H5Aclose(hdf5_attribute, "FlagDoubleprecision");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hid_t atype = my_H5Tcopy(H5T_C_S1);
+
+  my_H5Tset_size(atype, strlen(GIT_COMMIT));
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit");
+  my_H5Aclose(hdf5_attribute, "Git_commit");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  my_H5Tset_size(atype, strlen(GIT_DATE));
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date");
+  my_H5Aclose(hdf5_attribute, "Git_date");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+}
+#endif /* #ifdef HAVE_HDF5 */
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof_nearest.c b/src/amuse/community/arepo/src/fof/fof_nearest.c
new file mode 100644
index 0000000000..c21badf579
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_nearest.c
@@ -0,0 +1,473 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_nearest.c
+ * \date        05/2018
+ * \brief       Routine to find nearest primary link type particle to link
+ *              secondary link type to FoF groups.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                double fof_find_nearest_dmparticle(MyIDType * vMinID, int
+ *                  *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask)
+ *                static int fof_find_nearest_dmparticle_evaluate(int target,
+ *                  int mode, int threadid)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+static MyFloat *fof_nearest_distance;
+static MyFloat *fof_nearest_hsml;
+
+static MyIDType *MinID;
+static int *Head, *Len, *Next, *Tail, *MinIDTask;
+
+static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Hsml;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = P[i].Pos[0];
+  in->Pos[1] = P[i].Pos[1];
+  in->Pos[2] = P[i].Pos[2];
+  in->Hsml   = fof_nearest_hsml[i];
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Distance;
+  MyIDType MinID;
+  int MinIDTask;
+#if defined(SUBFIND)
+  MyFloat DM_Hsml;
+#endif /* #if defined(SUBFIND) */
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *             particle and cell data arrays (PS)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *             communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(out->Distance < fof_nearest_distance[i])
+    {
+      fof_nearest_distance[i] = out->Distance;
+      MinID[i]                = out->MinID;
+      MinIDTask[i]            = out->MinIDTask;
+#if defined(SUBFIND)
+      PS[i].Hsml = out->DM_Hsml;
+#endif /* #if defined(SUBFIND) */
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= NumPart)
+          break;
+
+        if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES))
+          {
+            if(fof_nearest_distance[i] > 1.0e29) /* we haven't found any neighbor yet */
+              {
+                fof_find_nearest_dmparticle_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+              }
+          }
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        fof_find_nearest_dmparticle_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Finds nearest dark matter particle for secondary link types
+ *
+ *  \param[out] vMinID Pointer to MinID array.
+ *  \param[in] vHead Pointer to Head array.
+ *  \param[in] vLen Pointer to Len array.
+ *  \param[in] vNext Pointer to Next array.
+ *  \param[in] vTail Pointer to Tail array.
+ *  \param[out] vMinIDTask Pointer to MinIDTask array.
+ *
+ *  \return Time spent in this function.
+ */
+double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask)
+{
+  MinID     = vMinID;
+  Head      = vHead;
+  Len       = vLen;
+  Next      = vNext;
+  Tail      = vTail;
+  MinIDTask = vMinIDTask;
+
+  int i, n, npleft, iter;
+  long long ntot;
+  double tstart = second();
+
+  mpi_printf("FOF: Start finding nearest dm-particle (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  fof_nearest_distance = (MyFloat *)mymalloc("fof_nearest_distance", sizeof(MyFloat) * NumPart);
+  fof_nearest_hsml     = (MyFloat *)mymalloc("fof_nearest_hsml", sizeof(MyFloat) * NumPart);
+
+  for(n = 0; n < NumPart; n++)
+    {
+      if((1 << P[n].Type) & (FOF_SECONDARY_LINK_TYPES))
+        {
+          fof_nearest_distance[n] = 1.0e30;
+          if(P[n].Type == 0)
+#ifdef USE_AREPO_FOF_WITH_GADGET_FIX
+            fof_nearest_hsml[n] = SphP[n].Hsml;
+#else  /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX */
+            fof_nearest_hsml[n] = get_cell_radius(n);
+#endif /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX #else */
+          else
+            fof_nearest_hsml[n] = 0.1 * LinkL;
+        }
+    }
+
+  generic_set_MaxNexport();
+
+  iter = 0;
+  /* we will repeat the whole thing for those particles where we didn't find enough neighbours */
+  do
+    {
+      double t0 = second();
+
+      generic_comm_pattern(NumPart, kernel_local, kernel_imported);
+
+      /* do final operations on results */
+      for(i = 0, npleft = 0; i < NumPart; i++)
+        {
+          if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES))
+            {
+              if(fof_nearest_distance[i] > 1.0e29)
+                {
+                  if(fof_nearest_hsml[i] < 4 * LinkL) /* we only search out to a maximum distance */
+                    {
+                      /* need to redo this particle */
+                      npleft++;
+                      fof_nearest_hsml[i] *= 2.0;
+                      if(iter >= MAXITER - 10)
+                        {
+                          printf("FOF: i=%d task=%d ID=%d P[i].Type=%d Hsml=%g LinkL=%g nearest=%g pos=(%g|%g|%g)\n", i, ThisTask,
+                                 (int)P[i].ID, P[i].Type, fof_nearest_hsml[i], LinkL, fof_nearest_distance[i], P[i].Pos[0],
+                                 P[i].Pos[1], P[i].Pos[2]);
+                          myflush(stdout);
+                        }
+                    }
+                  else
+                    {
+                      fof_nearest_distance[i] = 0; /* we do not continue to search for this particle */
+                    }
+                }
+            }
+        }
+
+      sumup_large_ints(1, &npleft, &ntot);
+
+      double t1 = second();
+      if(ntot > 0)
+        {
+          iter++;
+          if(iter > 0)
+            mpi_printf("FOF: fof-nearest iteration %d: need to repeat for %lld particles. (took = %g sec)\n", iter, ntot,
+                       timediff(t0, t1));
+
+          if(iter > MAXITER)
+            terminate("FOF: failed to converge in fof-nearest\n");
+        }
+    }
+  while(ntot > 0);
+
+  myfree(fof_nearest_hsml);
+  myfree(fof_nearest_distance);
+
+  mpi_printf("FOF: done finding nearest dm-particle\n");
+
+  double tend = second();
+  return timediff(tstart, tend);
+}
+
+/*! \brief Evaluate function to finding nearest dark matter particle for
+ *         secondary link types.
+ *
+ *  \param[in] target Index of particle/cell.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return 0
+ */
+static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid)
+{
+  int k, no, index, numnodes, *firstnode;
+  double h, r2max, dist;
+  double dx, dy, dz, r2;
+  MyDouble *pos;
+  data_in local, *target_data;
+  data_out out;
+
+  double xtmp, ytmp, ztmp;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos = target_data->Pos;
+  h   = target_data->Hsml;
+
+  index = -1;
+  r2max = 1.0e30;
+
+  /* Now start the actual tree-walk computation for this particle */
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Tree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < Tree_MaxPart) /* single particle */
+            {
+              int p = no;
+              no    = Nextnode[no];
+
+              if(!((1 << P[p].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES)))
+                continue;
+
+              dist = h;
+              dx   = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]);
+              if(dz > dist)
+                continue;
+
+              r2 = dx * dx + dy * dy + dz * dz;
+              if(r2 < r2max && r2 < h * h)
+                {
+                  index = p;
+                  r2max = r2;
+                }
+            }
+          else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              struct NODE *current = &Nodes[no];
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              dist = h + 0.5 * current->len;
+              dx   = FOF_NEAREST_LONG_X(current->center[0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]);
+              if(dz > dist)
+                continue;
+
+              /* now test against the minimal sphere enclosing everything */
+              dist += FACT1 * current->len;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              terminate("do not expect imported points here");
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES");
+
+              if(target >= 0)
+                tree_treefind_export_node_threads(no, target, threadid);
+
+              no = Nextnode[no - Tree_MaxNodes];
+            }
+        }
+    }
+
+  if(index >= 0)
+    {
+      out.Distance  = sqrt(r2max);
+      out.MinID     = MinID[Head[index]];
+      out.MinIDTask = MinIDTask[Head[index]];
+#if defined(SUBFIND)
+      out.DM_Hsml = PS[index].Hsml;
+#endif /* #if defined(SUBFIND) */
+    }
+  else
+    {
+      out.Distance  = 2.0e30;
+      out.MinID     = 0;
+      out.MinIDTask = -1;
+#if defined(SUBFIND)
+      out.DM_Hsml = 0;
+#endif /* #if defined(SUBFIND) */
+    }
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof_sort_kernels.c b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c
new file mode 100644
index 0000000000..e10627ca7f
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c
@@ -0,0 +1,495 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_sort_kernels.c
+ * \date        05/2018
+ * \brief       Various sort kernels used by the parallel FoF group finder.
+ * \details     contains functions:
+ *                int fof_compare_local_sort_data_targetindex(const void *a,
+ *                  const void *b)
+ *                int fof_compare_aux_sort_Type(const void *a, const void *b)
+ *                int fof_compare_aux_sort_FileOrder(const void *a,
+ *                  const void *b)
+ *                int fof_compare_aux_sort_GrNr(const void *a, const void *b)
+ *                int fof_compare_aux_sort_OriginTask_OriginIndex(const void
+ *                  *a, const void *b)
+ *                int fof_compare_FOF_PList_MinID(const void *a, const void *b)
+ *                int fof_compare_FOF_GList_MinID(const void *a, const void *b)
+ *                int fof_compare_FOF_GList_MinIDTask(const void *a,
+ *                  const void *b)
+ *                int fof_compare_FOF_GList_MinIDTask_MinID(const void *a,
+ *                  const void *b)
+ *                int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void
+ *                  *a, const void *b)
+ *                int fof_compare_FOF_GList_ExtCountMinID(const void *a,
+ *                  const void *b)
+ *                int fof_compare_Group_MinID(const void *a, const void *b)
+ *                int fof_compare_Group_GrNr(const void *a, const void *b)
+ *                int fof_compare_Group_MinIDTask(const void *a, const void *b)
+ *                int fof_compare_Group_MinIDTask_MinID(const void *a,
+ *                  const void *b)
+ *                int fof_compare_Group_Len(const void *a, const void *b)
+ *                int fof_compare_ID_list_GrNrID(const void *a, const void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+/*! \brief Comparison function for fof_local_sort_data objects.
+ *
+ *  Sorting kernel comparing element targetindex.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_local_sort_data_targetindex(const void *a, const void *b)
+{
+  if(((struct fof_local_sort_data *)a)->targetindex < ((struct fof_local_sort_data *)b)->targetindex)
+    return -1;
+
+  if(((struct fof_local_sort_data *)a)->targetindex > ((struct fof_local_sort_data *)b)->targetindex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for data_aux_sort objects.
+ *
+ *  Sorting kernel comparing element Type.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_aux_sort_Type(const void *a, const void *b)
+{
+  if(((struct data_aux_sort *)a)->Type < ((struct data_aux_sort *)b)->Type)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->Type > ((struct data_aux_sort *)b)->Type)
+    return +1;
+
+  return 0;
+}
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+/*! \brief Comparison function for data_aux_sort objects.
+ *
+ *  Sorting kernel comparing element FileOrder.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_aux_sort_FileOrder(const void *a, const void *b)
+{
+  if(((struct data_aux_sort *)a)->FileOrder < ((struct data_aux_sort *)b)->FileOrder)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->FileOrder > ((struct data_aux_sort *)b)->FileOrder)
+    return +1;
+
+  return 0;
+}
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+/*! \brief Comparison function for data_aux_sort objects.
+ *
+ *  Sorting kernel comparing elements (most important fist):
+ *   GrNr, SubNr, DM_BindingEnergy
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_aux_sort_GrNr(const void *a, const void *b)
+{
+  if(((struct data_aux_sort *)a)->GrNr < ((struct data_aux_sort *)b)->GrNr)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->GrNr > ((struct data_aux_sort *)b)->GrNr)
+    return +1;
+
+#ifdef SUBFIND
+  if(((struct data_aux_sort *)a)->SubNr < ((struct data_aux_sort *)b)->SubNr)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->SubNr > ((struct data_aux_sort *)b)->SubNr)
+    return +1;
+
+  if(((struct data_aux_sort *)a)->DM_BindingEnergy < ((struct data_aux_sort *)b)->DM_BindingEnergy)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->DM_BindingEnergy > ((struct data_aux_sort *)b)->DM_BindingEnergy)
+    return +1;
+#endif /* #ifdef SUBFIND */
+
+  if(((struct data_aux_sort *)a)->ID < ((struct data_aux_sort *)b)->ID)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->ID > ((struct data_aux_sort *)b)->ID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for data_aux_sort objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *   OriginTask, OriginIndex
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b)
+{
+  if(((struct data_aux_sort *)a)->OriginTask < ((struct data_aux_sort *)b)->OriginTask)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->OriginTask > ((struct data_aux_sort *)b)->OriginTask)
+    return +1;
+
+  if(((struct data_aux_sort *)a)->OriginIndex < ((struct data_aux_sort *)b)->OriginIndex)
+    return -1;
+
+  if(((struct data_aux_sort *)a)->OriginIndex > ((struct data_aux_sort *)b)->OriginIndex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_particle_list objects.
+ *
+ *  Sorting kernel comparing element MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_FOF_PList_MinID(const void *a, const void *b)
+{
+  if(((struct fof_particle_list *)a)->MinID < ((struct fof_particle_list *)b)->MinID)
+    return -1;
+
+  if(((struct fof_particle_list *)a)->MinID > ((struct fof_particle_list *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_group_list objects.
+ *
+ *  Sorting kernel comparing element MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_FOF_GList_MinID(const void *a, const void *b)
+{
+  if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_group_list objects.
+ *
+ *  Sorting kernel comparing element MinIDTask.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b)
+{
+  if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_group_list objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *   MinIDTask, MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b)
+{
+  if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask)
+    return +1;
+
+  if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_group_list objects.
+ *
+ *  Sorting kernel comparing elements (most important first):.
+ *   LocCount, MinID, ExtCount.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, except for LocCount where -1 if a > b
+ */
+int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b)
+{
+  if(((struct fof_group_list *)a)->LocCount > ((struct fof_group_list *)b)->LocCount)
+    return -1;
+
+  if(((struct fof_group_list *)a)->LocCount < ((struct fof_group_list *)b)->LocCount)
+    return +1;
+
+  if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID)
+    return +1;
+
+  if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) <
+     labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask))
+    return -1;
+
+  if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) >
+     labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for fof_group_list objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *   ExtCount, MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b)
+{
+  if(((struct fof_group_list *)a)->ExtCount < ((struct fof_group_list *)b)->ExtCount)
+    return -1;
+
+  if(((struct fof_group_list *)a)->ExtCount > ((struct fof_group_list *)b)->ExtCount)
+    return +1;
+
+  if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID)
+    return -1;
+
+  if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for group_properties objects.
+ *
+ *  Sorting kernel comparing element MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_Group_MinID(const void *a, const void *b)
+{
+  if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID)
+    return -1;
+
+  if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for group_properties objects.
+ *
+ *  Sorting kernel comparing element GrNr.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_Group_GrNr(const void *a, const void *b)
+{
+  if(((struct group_properties *)a)->GrNr < ((struct group_properties *)b)->GrNr)
+    return -1;
+
+  if(((struct group_properties *)a)->GrNr > ((struct group_properties *)b)->GrNr)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for group_properties objects.
+ *
+ *  Sorting kernel comparing element MinIDTask.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_Group_MinIDTask(const void *a, const void *b)
+{
+  if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask)
+    return -1;
+
+  if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for group_properties objects.
+ *
+ *  Sorting kernel comparing elements (most imporant first):
+ *   MinIDTask, MinID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b)
+{
+  if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask)
+    return -1;
+
+  if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask)
+    return +1;
+
+  if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID)
+    return -1;
+
+  if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for group_properties objects.
+ *
+ *  Sorting kernel comparing element Len.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a > b.
+ */
+int fof_compare_Group_Len(const void *a, const void *b)
+{
+  if(((struct group_properties *)a)->Len > ((struct group_properties *)b)->Len)
+    return -1;
+
+  if(((struct group_properties *)a)->Len < ((struct group_properties *)b)->Len)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for id_list objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *   GrNr, Type, ID.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int fof_compare_ID_list_GrNrID(const void *a, const void *b)
+{
+  if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr)
+    return -1;
+
+  if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr)
+    return +1;
+
+  if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type)
+    return -1;
+
+  if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type)
+    return +1;
+
+  if(((struct id_list *)a)->ID < ((struct id_list *)b)->ID)
+    return -1;
+
+  if(((struct id_list *)a)->ID > ((struct id_list *)b)->ID)
+    return +1;
+
+  return 0;
+}
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/fof/fof_vars.c b/src/amuse/community/arepo/src/fof/fof_vars.c
new file mode 100644
index 0000000000..2df2856c66
--- /dev/null
+++ b/src/amuse/community/arepo/src/fof/fof_vars.c
@@ -0,0 +1,79 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/fof/fof_vars.c
+ * \date        05/2018
+ * \brief       Iinstances for the global variables used by FOF, which are
+ *              declared in fof.h
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../subfind/subfind.h"
+#include "fof.h"
+
+#ifdef FOF
+
+int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups;
+int Nids;
+long long TotNids;
+
+double LinkL = 0;
+
+int fof_OldMaxPart;
+int fof_OldMaxPartSph;
+
+unsigned char *flag_node_inside_linkinglength;
+
+struct group_properties *Group;
+
+struct fofdata_in *FoFDataIn, *FoFDataGet;
+
+struct fofdata_out *FoFDataResult, *FoFDataOut;
+
+struct fof_particle_list *FOF_PList;
+
+struct fof_group_list *FOF_GList;
+
+struct id_list *ID_list;
+
+struct bit_flags *Flags;
+
+struct fof_subfind_header catalogue_header;
+
+#endif /* #ifdef FOF */
diff --git a/src/amuse/community/arepo/src/gitversion/version b/src/amuse/community/arepo/src/gitversion/version
new file mode 100644
index 0000000000..9cd3dc25eb
--- /dev/null
+++ b/src/amuse/community/arepo/src/gitversion/version
@@ -0,0 +1,7 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+const char* GIT_DATE   = "_DATE_";
+const char* GIT_COMMIT = "_COMMIT_";
+
+#endif
diff --git a/src/amuse/community/arepo/src/gitversion/version.h b/src/amuse/community/arepo/src/gitversion/version.h
new file mode 100644
index 0000000000..7d33b0889a
--- /dev/null
+++ b/src/amuse/community/arepo/src/gitversion/version.h
@@ -0,0 +1,38 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gitversion/version.h
+ * \date        05/2018
+ * \brief       Header for git-version variables.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef VERSION_H
+#define VERSION_H
+
+extern const char* GIT_DATE;
+extern const char* GIT_COMMIT;
+
+#endif /* #ifndef VERSION_H */
diff --git a/src/amuse/community/arepo/src/gravity/accel.c b/src/amuse/community/arepo/src/gravity/accel.c
new file mode 100644
index 0000000000..493216c347
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/accel.c
@@ -0,0 +1,347 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/accel.c
+ * \date        05/2018
+ * \brief       Routines to carry out gravity force computation.
+ * \details     contains functions:
+ *                void compute_grav_accelerations(int timebin, int fullflag)
+ *                void gravity(int timebin, int fullflag)
+ *                void gravity_force_finalize(int timebin)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 03.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Computes the gravitational accelerations for all active particles.
+ *
+ *  If the particle mesh is used and the current time step
+ *  requires a PM force computation, new long range forces are
+ *  computed by long_range_force(). Then the short-range tree forces
+ *  are computed by gravity(). The force tree is rebuild every time step.
+ *
+ *  \param[in] timebin Current timebin for which gravity is calculated
+ *             (positive integer).
+ *  \param[in] fullflag Flag whether this is a global timestep
+ *             (Flag_Full_Tree, Flag_Partial_Tree).
+ *
+ *  \return void
+ */
+void compute_grav_accelerations(int timebin, int fullflag)
+{
+  if(TimeBinsGravity.GlobalNActiveParticles > 0)
+    {
+      if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0 && All.ErrTolTheta > 0)
+        {
+          /* For the first timestep, we do one gravity calculation up front
+           * with the Barnes & Hut Criterion to allow usage of relative opening
+           * criterion with consistent accuracy.
+           */
+#ifdef PMGRID
+          long_range_force();
+#endif /* #ifdef PMGRID */
+          gravity(timebin, fullflag);
+        }
+
+      gravity(timebin, fullflag); /* computes (short-range) gravity accel. */
+
+#ifdef FORCETEST
+      gravity_forcetest();
+#endif /* #ifdef FORCETEST */
+    }
+}
+
+/*! \brief Main routine for tree force calculation.
+ *
+ *  This routine handles the tree force calculation. First it builds a new
+ *  force tree calling force_treebuild() at every timestep. This tree is then
+ *  used to calculate a new tree force for every active particle by calling
+ *  gravity_tree().
+ *
+ *  \param[in] timebin Current timebin for which gravity is calculated.
+ *  \param[in] fullflag Flag whether this is a global timestep.
+ *
+ *  \return void
+ */
+void gravity(int timebin, int fullflag)
+{
+  double tstart = second();
+
+#if defined(SELFGRAVITY)
+  /* set new softening lengths on global steps to take into account possible cosmological time variation */
+  if(timebin == All.HighestOccupiedGravTimeBin)
+    set_softenings();
+
+#ifdef ALLOW_DIRECT_SUMMATION
+  if(TimeBinsGravity.GlobalNActiveParticles < DIRECT_SUMMATION_THRESHOLD)
+    {
+      gravity_direct(timebin);
+
+#ifndef ONEDIMS_SPHERICAL
+      gravity_force_finalize(timebin);
+#endif /* #ifndef ONEDIMS_SPHERICAL */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+      calc_exact_gravity_for_particle_type();
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+#ifdef EXTERNALGRAVITY
+      gravity_external();
+#endif /* #ifdef EXTERNALGRAVITY */
+    }
+  else
+#endif /* #ifdef ALLOW_DIRECT_SUMMATION */
+    {
+#ifdef ONEDIMS_SPHERICAL
+      gravity_monopole_1d_spherical();
+#else /* #ifdef ONEDIMS_SPHERICAL */
+
+    if(TimeBinsGravity.GlobalNActiveParticles >= 10 * NTask)
+      construct_forcetree(0, 1, 0, timebin); /* build force tree with all particles */
+    else
+      construct_forcetree(0, 0, 0, timebin); /* build force tree with all particles */
+
+    gravity_tree(timebin);
+
+    gravity_force_finalize(timebin);
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+    calc_exact_gravity_for_particle_type();
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+#ifdef EXTERNALGRAVITY
+    gravity_external();
+#endif /* #ifdef EXTERNALGRAVITY */
+
+    /* note: we here moved 'gravity_force_finalize' in front of the non-standard physics;
+     * reminder: restart flag 18: post-processing calculation potential without running simulation
+     */
+    if(fullflag == FLAG_FULL_TREE && RestartFlag != 18)
+      calculate_non_standard_physics_with_valid_gravity_tree();
+
+    /* this is for runs which have the full tree at each time step; no HIERARCHICAL_GRAVITY */
+    calculate_non_standard_physics_with_valid_gravity_tree_always();
+
+    myfree(Father);
+    myfree(Nextnode);
+    myfree(Tree_Points);
+    force_treefree();
+#endif /* #ifdef ONEDIMS_SPHERICAL #else */
+    }
+
+#else /* defined(SELFGRAVITY) */
+
+  /* self-gravity is switched off */
+  int idx, i, j;
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+
+      if(i < 0)
+        continue;
+
+#ifdef EVALPOTENTIAL
+      P[i].Potential = 0;
+#endif /* #ifdef EVALPOTENTIAL */
+
+      for(j = 0; j < 3; j++)
+        P[i].GravAccel[j] = 0;
+    }
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+  calc_exact_gravity_for_particle_type();
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+#ifdef EXTERNALGRAVITY
+  gravity_external();
+#endif /* #ifdef EXTERNALGRAVITY */
+
+#endif /* defined(SELFGRAVITY) #else */
+
+  double tend = second();
+  mpi_printf("GRAVITY: done for timebin %d,  %lld particles  (took %g sec)\n", timebin, TimeBinsGravity.GlobalNActiveParticles,
+             timediff(tstart, tend));
+}
+
+/*! \brief Adds individual gravity contribution and appropriate factors.
+ *
+ *  Routine combines accelerations of particle mesh and tree and applies
+ *  the required physical constants and scaling factors e.g. for a cosmological
+ *  simulation with nonperiodic gravity.
+ *
+ *  \param[in] timebin Current timebin for which gravity is calculated.
+ *
+ *  \return void
+ */
+void gravity_force_finalize(int timebin)
+{
+  int i, j, idx;
+  double ax, ay, az;
+
+  TIMER_START(CPU_TREE);
+
+  /* now add things for comoving integration */
+#ifdef GRAVITY_NOT_PERIODIC
+#ifndef PMGRID
+  if(All.ComovingIntegrationOn)
+    {
+      double fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          for(j = 0; j < 3; j++)
+            P[i].GravAccel[j] += fac * P[i].Pos[j];
+        }
+    }
+#endif /* #ifndef PMGRID */
+#endif /* #ifdef GRAVITY_NOT_PERIODIC */
+
+#ifdef HIERARCHICAL_GRAVITY
+  if(timebin == All.HighestOccupiedGravTimeBin)
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+    {
+      mpi_printf("GRAVTREE: Setting OldAcc!\n");
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+#ifdef PMGRID
+          ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G;
+          ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G;
+          az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G;
+#else  /* #ifdef PMGRID */
+        ax = P[i].GravAccel[0];
+        ay = P[i].GravAccel[1];
+        az = P[i].GravAccel[2];
+#endif /* #ifdef PMGRID #else */
+
+          P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az);
+        }
+    }
+
+  /*  muliply by G */
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      for(j = 0; j < 3; j++)
+        P[i].GravAccel[j] *= All.G;
+
+#ifdef EVALPOTENTIAL
+
+#if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC)
+      P[i].Potential += All.MassPMregions[0] * M_PI / (All.Asmth[0] * All.Asmth[0] * boxSize_X * boxSize_Y * boxSize_Z);
+#ifdef PLACEHIGHRESREGION
+      P[i].Potential += All.MassPMregions[1] * M_PI / (All.Asmth[1] * All.Asmth[1] * boxSize_X * boxSize_Y * boxSize_Z);
+#endif /* #ifdef PLACEHIGHRESREGION */
+#endif /* #if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */
+
+      /* It's better to not remove the self-potential here to get a smooth potential field for co-spatial particles with varying mass
+       * or softening. For calculating the binding energy of a particle, the self-energy should then be removed as
+       *
+       *  P[i].Potential += P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8);
+       */
+
+      P[i].Potential *= All.G;
+
+#ifdef PMGRID
+#ifndef FORCETEST_TESTFORCELAW
+      P[i].Potential += P[i].PM_Potential; /* add in long-range potential */
+#endif                                     /* #ifndef FORCETEST_TESTFORCELAW */
+#endif                                     /* #ifdef PMGRID */
+#endif                                     /* #ifdef EVALPOTENTIAL */
+      if(All.ComovingIntegrationOn)
+        {
+#ifdef GRAVITY_NOT_PERIODIC
+          double fac, r2;
+          int k;
+
+          fac = -0.5 * All.Omega0 * All.Hubble * All.Hubble;
+
+          for(k = 0, r2 = 0; k < 3; k++)
+            r2 += P[i].Pos[k] * P[i].Pos[k];
+
+#ifdef EVALPOTENTIAL
+          P[i].Potential += fac * r2;
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef GRAVITY_NOT_PERIODIC */
+        }
+      else
+        {
+          double fac, r2;
+          int k;
+
+          fac = -0.5 * All.OmegaLambda * All.Hubble * All.Hubble;
+
+          if(fac != 0)
+            {
+              for(k = 0, r2 = 0; k < 3; k++)
+                r2 += P[i].Pos[k] * P[i].Pos[k];
+#ifdef EVALPOTENTIAL
+              P[i].Potential += fac * r2;
+#endif /* #ifdef EVALPOTENTIAL */
+            }
+        }
+    }
+
+    /* Finally, the following factor allows a computation of a cosmological
+     * simulation with vacuum energy in physical coordinates
+     */
+#ifdef GRAVITY_NOT_PERIODIC
+#ifndef PMGRID
+  if(All.ComovingIntegrationOn == 0)
+    {
+      double fac = All.OmegaLambda * All.Hubble * All.Hubble;
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          for(j = 0; j < 3; j++)
+            P[i].GravAccel[j] += fac * P[i].Pos[j];
+        }
+    }
+#endif /* #ifndef PMGRID */
+#endif /* #ifdef GRAVITY_NOT_PERIODIC */
+
+  TIMER_STOP(CPU_TREE);
+}
diff --git a/src/amuse/community/arepo/src/gravity/forcetree.c b/src/amuse/community/arepo/src/gravity/forcetree.c
new file mode 100644
index 0000000000..c659a75e97
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/forcetree.c
@@ -0,0 +1,1827 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/forcetree.c
+ * \date        05/2018
+ * \brief       Gravitational tree build.
+ * \details     This file contains the construction of the tree used for
+ *              calculating the gravitational force. The type tree implemented
+ *              is a geometrical oct-tree, starting from a cube encompassing
+ *              all particles. This cube is automatically found in the domain
+ *              decomposition, which also splits up the global "top-level"
+ *              tree along node boundaries, moving the particles of different
+ *              parts of the tree to separate processors. In this version of
+ *              the code, the tree construction may be repeated every timestep
+ *              without a renewed domain decomposition. If particles are on
+ *              the "wrong" processor because a new domain decomposition has
+ *              not been carried out, they are sent as temporary points to the
+ *              right insertion processor according to the layout of the
+ *              top-level nodes. In addition, the mapping of the top-level
+ *              nodes to processors may be readjusted in order to improve
+ *              work-load balance for the current time step.
+ *              contains functions:
+ *                int construct_forcetree(int mode, int
+ *                  optimized_domain_mapping, int insert_only_primary,
+ *                  int timebin)
+ *                int force_treebuild(int npart, int optimized_domain_mapping,
+ *                  int insert_only_primary, int timebin)
+ *                int force_treebuild_construct(int npart, int
+ *                  optimized_domain_mapping, int insert_only_primary,
+ *                  int timebin)
+ *                int force_treebuild_insert_single_point(int i, unsigned
+ *                  long long *intpos, int th, unsigned char levels)
+ *                void force_assign_cost_values(void)
+ *                int force_create_empty_nodes(int no, int topnode, int bits,
+ *                  int x, int y, int z)
+ *                void force_insert_pseudo_particles(void)
+ *                void force_update_node_recursive(int no, int sib, int father,
+ *                  int *last)
+ *                void force_exchange_topleafdata(void)
+ *                void force_treeupdate_toplevel(int no, int topnode, int bits,
+ *                  int x, int y, int z)
+ *                void force_treeallocate(int maxpart, int maxindex)
+ *                void force_treefree(void)
+ *                void dump_particles(void)
+ *                int force_add_empty_nodes(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 17.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+static int *th_list;
+static unsigned char *level_list;
+int NTreeInsert;
+
+#ifdef FOF
+#ifndef FOF_SECONDARY_LINK_TARGET_TYPES
+#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES
+#endif /* #ifndef FOF_SECONDARY_LINK_TARGET_TYPES */
+#endif /* #ifdef FOF */
+
+#ifdef HIERARCHICAL_GRAVITY
+#define INDEX(idx) (TimeBinsGravity.ActiveParticleList[idx])
+#else /* #ifdef HIERARCHICAL_GRAVITY */
+#define INDEX(idx) (idx)
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+
+/*! \brief Triggers forcetree construction until successful.
+ *
+ *  Allocates memory and constructs forcetree until successful;
+ *  currently, there are two valid modes: forcetree only for gas or for all
+ *  particles.
+ *
+ *  \param[in] mode Mode: all particles or just gas cells.
+ *  \param[in] optimized_domain_mapping Handed over to force_treebuild.
+ *  \param[in] insert_only_primary Handed over to force_treebuild.
+ *  \param[in] timebin Handed over to force_treebuild.
+ *
+ *  \return Number of nodes in tree.
+ */
+int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin)
+{
+  int npart, Tree_NumNodes = 0;
+
+  do
+    {
+      /* Note: force_treebuild will call force_treefree if it is about to return a negative value!
+       * Therefore, this has to be allocated within the loop! The only exception is when
+       * insert_only_primary == 2, in which case the code assumes that the forcetree is already
+       * allocated (this happens only in fof.c). In this case, force_treeallocate is not called
+       * during the first loop.
+       */
+      if(insert_only_primary != 2 || Tree_NumNodes < 0)
+        force_treeallocate(NumPart, All.MaxPart); /* reallocate force tree structure */
+
+      /* prepare variables for force_treebuild call */
+      switch(mode)
+        {
+          case 0: /* all particles */
+            {
+              npart = NumPart;
+              break;
+            }
+          case 1: /* only gas particles */
+            {
+              npart = NumGas;
+              break;
+            }
+          default:
+            {
+              mpi_terminate("FORCETREE: construct_forcetree: invalid mode!\n");
+            }
+        }
+
+      Tree_NumNodes = force_treebuild(npart, optimized_domain_mapping, insert_only_primary, timebin);
+    }
+  while(Tree_NumNodes < 0);
+
+  return Tree_NumNodes;
+}
+
+/*! \brief Constructs the gravitational oct-tree and handles errors.
+ *
+ *  \param[in] npart Number of particles on local task.
+ *  \param[in] optimized_domain_mapping Specifies if mapping of the top-level
+ *             nodes to processors may be readjusted.
+ *  \param[in] insert_only_primary If this is set, only particles of the types
+ *             set in FOF_PRIMARY_LINK_TYPES are inserted.
+ *  \param[in] timebin Current timebin; needed for HIERARCHICAL_GRAVITY.
+ *
+ *  \return number of local+top nodes of the constructed tree.
+ */
+int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin)
+{
+  int i, flag;
+
+#ifdef HIERARCHICAL_GRAVITY
+  NTreeInsert              = TimeBinsGravity.NActiveParticles;
+  optimized_domain_mapping = 0;
+#else  /* #ifdef HIERARCHICAL_GRAVITY */
+  NTreeInsert = npart;
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+
+  TIMER_START(CPU_TREEBUILD);
+
+  long long loc_insert = NTreeInsert, tot_insert;
+  MPI_Reduce(&loc_insert, &tot_insert, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  mpi_printf("FORCETREE: Tree construction.  (inserting %lld points)\n", tot_insert);
+
+  TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_INSERT);
+
+  int flag_single = force_treebuild_construct(npart, optimized_domain_mapping, insert_only_primary, timebin);
+
+  TIMER_STOPSTART(CPU_TREEBUILD_INSERT, CPU_TREEBUILD);
+
+  MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+  if(flag < 0)
+    {
+      /* tree construction was not successful and needs to be repeated */
+      if(flag_single != -2)
+        {
+          myfree(Tree_Points);
+        }
+
+      force_treefree();
+
+      if(flag == -3)
+        {
+          /* we need to do an extra domain decomposition to recover from an out-of-box condition for a particle,
+             which can happen if GRAVITY_NOT_PERIODIC is used */
+          ngb_treefree();
+          domain_free();
+
+          domain_Decomposition();
+
+          ngb_treeallocate();
+          ngb_treebuild(NumGas);
+        }
+      else
+        {
+          All.TreeAllocFactor *= 1.15;
+          mpi_printf("FORCETREE: Increasing TreeAllocFactor, new value=%g\n", All.TreeAllocFactor);
+
+          if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+            {
+              char buf[500];
+              sprintf(buf,
+                      "task %d: looks like a serious problem in tree construction, stopping with particle dump.  Tree_NumNodes=%d "
+                      "Tree_MaxNodes=%d  Tree_NumPartImported=%d NumPart=%d\n",
+                      ThisTask, Tree_NumNodes, Tree_MaxNodes, Tree_NumPartImported, NumPart);
+              dump_particles();
+              terminate(buf);
+            }
+        }
+
+      TIMER_STOP(CPU_TREEBUILD); /* stop timer before returning */
+      return -1; /* stop right here with error code to invoke a new call of this function, possibly with changed values for npart */
+    }            /* if(flag < 0) */
+
+  Nextnode = (int *)mymalloc_movable(&Nextnode, "Nextnode", (Tree_MaxPart + NTopleaves + Tree_NumPartImported) * sizeof(int));
+  Father   = (int *)mymalloc_movable(&Father, "Father", (Tree_MaxPart + Tree_NumPartImported) * sizeof(int));
+
+  for(i = 0; i < Tree_MaxPart + Tree_NumPartImported; i++)
+    Father[i] = -1;
+
+  TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_BRANCHES);
+
+  /* insert the pseudo particles that represent the mass distribution of other domains */
+  force_insert_pseudo_particles();
+
+  /* now compute the multipole moments recursively */
+  int last = -1;
+
+  force_update_node_recursive(Tree_MaxPart, -1, -1, &last);
+
+  if(last >= Tree_MaxPart)
+    {
+      if(last >= Tree_MaxPart + Tree_MaxNodes) /* a pseudo-particle or imported particle */
+        Nextnode[last - Tree_MaxNodes] = -1;
+      else
+        Nodes[last].u.d.nextnode = -1;
+    }
+  else
+    Nextnode[last] = -1;
+
+  TIMER_STOPSTART(CPU_TREEBUILD_BRANCHES, CPU_TREEBUILD_TOPLEVEL);
+
+  force_exchange_topleafdata();
+
+  Tree_NextFreeNode = Tree_MaxPart + 1;
+  force_treeupdate_toplevel(Tree_MaxPart, 0, 1, 0, 0, 0);
+
+  TIMER_STOPSTART(CPU_TREEBUILD_TOPLEVEL, CPU_LOGS);
+
+#ifdef HIERARCHICAL_GRAVITY
+  if(timebin == All.HighestOccupiedGravTimeBin)
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+    {
+      double locdata[2] = {Tree_NumPartImported, Tree_NumNodes}, sumdata[2];
+      MPI_Reduce(locdata, sumdata, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+      double tot_imported = sumdata[0];
+      double tot_numnodes = sumdata[1];
+
+      mpi_printf(
+          "FORCETREE: Tree construction done.  <avg imported/local ratio>=%g <numnodes>=%g NTopnodes=%d NTopleaves=%d "
+          "tree-build-scalability=%g\n",
+          tot_imported / (All.TotNumPart + 1.0e-60), tot_numnodes / NTask, NTopnodes, NTopleaves,
+          ((double)((tot_numnodes - NTask * ((double)NTopnodes)) + NTopnodes)) / (tot_numnodes + 1.0e-60));
+    }
+#ifdef HIERARCHICAL_GRAVITY
+  else
+    mpi_printf("FORCETREE: Tree construction done.\n");
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+
+  TIMER_STOP(CPU_LOGS);
+
+  return Tree_NumNodes;
+}
+
+/*! \brief Constructs the gravitational oct-tree.
+ *
+ *  The index convention for accessing tree nodes is the following:
+ *  node index
+ *  [0...            Tree_MaxPart-1]  references single particles,
+ *  the indices
+ *  [Tree_MaxPart... Tree_MaxPart+Tree_MaxNodes-1]  references tree nodes
+ *  [Tree_MaxPart+Tree_MaxNodes...  Tree_MaxPart+Tree_MaxNodes+NTopleaves-1]
+ *     references "pseudo particles", i.e. mark branches on foreign CPUs
+ *  [Tree_MaxPart+Tree_MaxNodes+NTopleaves...
+ *     Tree_MaxPart+Tree_MaxNodes+NTopleaves+Tree_NumPartImported-1]
+ *     references imported points.
+ *
+ *  the pointer `Nodes' is shifted such that Nodes[Tree_MaxPart] gives the
+ *  first tree node (i.e. the root node).
+ *
+ *  \param[in] npart Number of particles on local task.
+ *  \param[in] optimized_domain_mapping Specifies if mapping of the top-level
+ *             nodes to processors may be readjusted.
+ *  \param[in] insert_only_primary If this is set, only particles of the types
+ *             set in FOF_PRIMARY_LINK_TYPES are inserted.
+ *  \param[in] timebin (unused).
+ *
+ *  \return if successful returns the number of local+top nodes of the
+ *             constructed tree;
+ *          -1 if the number of allocated tree nodes is too small;
+ *          -2 if the number of allocated tree nodes is even too small to fit
+ *             the top nodes;
+ *          -3 if a particle out of domain box condition was encountered.
+ */
+int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin)
+{
+  int idx, i, j, no, flag = 0;
+  int ngrp, recvTask, count_ListNoData, *no_place = NULL;
+  unsigned long long *intposp;
+  MyDouble *posp;
+
+#ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING
+  optimized_domain_mapping = 0;
+#endif /* #ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING */
+
+#if !defined(GRAVITY_NOT_PERIODIC)
+  double boxsize[3];
+  boxsize[0] = boxSize_X;
+  boxsize[1] = boxSize_Y;
+  boxsize[2] = boxSize_Z;
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */
+
+  /* create an empty root node  */
+  Tree_NextFreeNode   = Tree_MaxPart;              /* index of first free node */
+  struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; /* select first node        */
+
+  for(j = 0; j < 8; j++)
+    nfreep->u.suns[j] = -1;
+
+  nfreep->len = DomainLen;
+  for(j = 0; j < 3; j++)
+    nfreep->center[j] = DomainCenter[j];
+
+  Tree_NumNodes = 1;
+  Tree_NextFreeNode++;
+
+  /* create a set of empty nodes corresponding to the top-level domain
+   * grid. We need to generate these nodes first to make sure that we have a
+   * complete top-level tree which allows the easy insertion of the
+   * pseudo-particles at the right place
+   */
+  if(force_create_empty_nodes(Tree_MaxPart, 0, 1, 0, 0, 0) < 0)
+    return -2;
+
+  Tree_FirstNonTopLevelNode = Tree_NextFreeNode;
+
+  /* if a high-resolution region in a global tree is used, we need to generate
+   * an additional set of empty nodes to make sure that we have a complete
+   * top-level tree for the high-resolution inset
+   */
+
+  /* we first do a dummy allocation here that we'll resize later if needed, in which case the following arrays will have to be moved
+   * once. */
+  int guess_nimported = 1.2 * NumPart;
+
+  Tree_Points =
+      (struct treepoint_data *)mymalloc_movable(&Tree_Points, "Tree_Points", guess_nimported * sizeof(struct treepoint_data));
+
+  th_list    = (int *)mymalloc_movable(&th_list, "th_list", NumPart * sizeof(int));
+  level_list = (unsigned char *)mymalloc_movable(&level_list, "level_list", NumPart * sizeof(unsigned char));
+  Tree_IntPos_list =
+      (unsigned long long *)mymalloc_movable(&Tree_IntPos_list, "Tree_IntPos_list", 3 * NumPart * sizeof(unsigned long long));
+
+  if(NumPart < NTreeInsert)
+    {
+      terminate("ERROR: NumPart %d, NTreeInsert %d! This should not happen!", NumPart, NTreeInsert);
+    }
+
+  /* first check whether particles are still in domain box */
+  for(idx = 0; idx < NTreeInsert; idx++)
+    {
+      i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+      if(P[i].Ti_Current != All.Ti_Current)
+        drift_particle(i, All.Ti_Current);
+
+      posp = &Tree_Pos_list[i * 3];
+
+      for(j = 0; j < 3; j++, posp++)
+        {
+#ifdef CELL_CENTER_GRAVITY
+          if(P[i].Type == 0)
+            *posp = SphP[i].Center[j];
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            *posp = P[i].Pos[j];
+
+#if !defined(GRAVITY_NOT_PERIODIC)
+          if(*posp < 0)
+            *posp += boxsize[j];
+          if(*posp >= boxsize[j])
+            *posp -= boxsize[j];
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */
+          if(*posp < DomainCorner[j] || *posp >= DomainCorner[j] + DomainLen)
+            {
+              flag = 1;
+              break;
+            }
+        }
+    }
+
+#if defined(GRAVITY_NOT_PERIODIC)
+  int flag_sum;
+  MPI_Allreduce(&flag, &flag_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  if(flag_sum)
+    {
+      mpi_printf(
+          "FORCETREE: Particle out of domain box condition was triggered. Need to do an (unplanned) new domain decomposition.\n");
+      myfree(Tree_IntPos_list);
+      myfree(level_list);
+      myfree(th_list);
+      return -3;
+    }
+#else  /* #if defined(GRAVITY_NOT_PERIODIC) */
+  if(flag)
+    {
+      char buf[1000];
+      sprintf(buf, "i=%d ID=%lld type=%d moved out of box. Pos[j=%d]=%g DomainCorner[%d]=%g DomainLen=%g", i, (long long)P[i].ID,
+              P[i].Type, j, P[i].Pos[j], j, DomainCorner[j], DomainLen);
+      terminate(buf);
+    }
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) #else */
+
+#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC)
+  double mass_highres = 0, mass_lowres = 0;
+  for(int idx = 0; idx < NTreeInsert; idx++)
+    {
+      int i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+#ifdef PLACEHIGHRESREGION
+      if(pmforce_is_particle_high_res(P[i].Type, &Tree_Pos_list[3 * i]))
+        mass_highres += P[i].Mass;
+      else
+#endif /* #ifdef PLACEHIGHRESREGION */
+        mass_lowres += P[i].Mass;
+    }
+  double mass_pmregions[2] = {mass_lowres, mass_highres};
+  MPI_Allreduce(mass_pmregions, All.MassPMregions, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */
+
+  /* now we determine for each point the insertion top-level node, and the task on which this lies */
+  if(optimized_domain_mapping)
+    {
+      TaskCost    = mymalloc("TaskCost", NTask * sizeof(double));
+      TaskCount   = mymalloc("TaskCount", NTask * sizeof(int));
+      DomainCost  = mymalloc("DomainCost", NTopleaves * sizeof(double));
+      DomainCount = mymalloc("DomainCount", NTopleaves * sizeof(int));
+      ListNoData  = mymalloc("ListNoData", NTopleaves * sizeof(struct no_list_data));
+      no_place    = mymalloc("no_place", NTopleaves * sizeof(int));
+
+      memset(no_place, -1, NTopleaves * sizeof(int));
+
+      for(j = 0; j < NTopleaves; j++)
+        DomainCost[j] = 0;
+      for(j = 0; j < NTopleaves; j++)
+        DomainCount[j] = 0;
+      for(j = 0; j < NTask; j++)
+        TaskCost[j] = 0;
+
+      for(j = 0; j < NTask; j++)
+        Send_count[j] = 0;
+
+      count_ListNoData = 0;
+    }
+
+  for(idx = 0; idx < NTreeInsert; idx++)
+    {
+      i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+      posp = &Tree_Pos_list[i * 3];
+
+      unsigned long long xxb  = force_double_to_int(((*posp++ - DomainCorner[0]) * DomainInverseLen) + 1.0);
+      unsigned long long yyb  = force_double_to_int(((*posp++ - DomainCorner[1]) * DomainInverseLen) + 1.0);
+      unsigned long long zzb  = force_double_to_int(((*posp++ - DomainCorner[2]) * DomainInverseLen) + 1.0);
+      unsigned long long mask = ((unsigned long long)1) << (52 - 1);
+      unsigned char shiftx    = (52 - 1);
+      unsigned char shifty    = (52 - 2);
+      unsigned char shiftz    = (52 - 3);
+      unsigned char levels    = 0;
+
+      intposp    = &Tree_IntPos_list[i * 3];
+      *intposp++ = xxb;
+      *intposp++ = yyb;
+      *intposp++ = zzb;
+
+      no = 0;
+      while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */
+        {
+          unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                                   ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+          mask >>= 1;
+          levels++;
+
+          no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode];
+        }
+
+      no = TopNodes[no].Leaf;
+
+      th_list[i]    = no;
+      level_list[i] = levels;
+
+      if(optimized_domain_mapping)
+        {
+          /* find costs for all top leaves */
+
+          int bin = All.HighestActiveTimeBin;
+          double cost;
+
+          if(domain_bintolevel[bin] >= 0)
+            cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[bin]] * domain_grav_weight[bin];
+          else
+            {
+              if(domain_refbin[bin] >= 0)
+                cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[domain_refbin[bin]]] * domain_grav_weight[bin];
+              else
+                cost = 1.0;
+            }
+
+          int task = DomainTask[no];
+          TaskCost[task] += cost;
+
+          if(task == ThisTask)
+            {
+              DomainCost[no] += cost;
+              DomainCount[no]++;
+            }
+          else
+            {
+              int p = no_place[no];
+              if(p >= 0)
+                {
+                  ListNoData[p].domainCost += cost;
+                  ListNoData[p].domainCount++;
+                }
+              else
+                {
+                  Send_count[task]++;
+                  p                         = count_ListNoData++;
+                  no_place[no]              = p;
+                  ListNoData[p].task        = task;
+                  ListNoData[p].no          = no;
+                  ListNoData[p].domainCost  = cost;
+                  ListNoData[p].domainCount = 1;
+                }
+            }
+        }
+    }
+
+  if(optimized_domain_mapping)
+    {
+      /* if necessary, re-adjust the mapping of the top-level nodes to the processors */
+
+      if(All.Ti_Current > 0)
+        {
+          double current_balance, impact;
+          current_balance = force_get_current_balance(&impact);
+
+          mpi_printf("FORCETREE: current balance=  %g | %g\n", current_balance, impact);
+
+          if(All.HighestActiveTimeBin <
+             All.SmallestTimeBinWithDomainDecomposition) /* only do this for steps which did not do a domain decomposition */
+            {
+              if(impact > MAX_IMPACT_BEFORE_OPTIMIZATION)
+                {
+                  force_get_global_cost_for_leavenodes(count_ListNoData);
+                  force_optimize_domain_mapping();
+                }
+              else
+                {
+                  mpi_printf(
+                      "FORCETREE: we're not trying to optimize further because overall imbalance impact is only %g (threshold is "
+                      "%g)\n",
+                      impact, MAX_IMPACT_BEFORE_OPTIMIZATION);
+                  memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
+                }
+            }
+          else
+            {
+              mpi_printf("FORCETREE: we're not trying to optimize futher because we just did a domain decomposition\n");
+              memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
+            }
+        }
+      else
+        memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
+    }
+  else
+    memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
+
+  if(optimized_domain_mapping)
+    {
+      myfree(no_place);
+      myfree(ListNoData);
+      myfree(DomainCount);
+      myfree(DomainCost);
+      myfree(TaskCount);
+      myfree(TaskCost);
+    }
+
+  for(j = 0; j < NTask; j++)
+    {
+      Force_Send_count[j] = 0;
+    }
+
+  for(idx = 0; idx < NTreeInsert; idx++) /* make list of insertion top leaf and task for all particles */
+    {
+      i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+      no         = th_list[i];
+      th_list[i] = DomainNodeIndex[no];
+
+      int task = DomainNewTask[no];
+
+      Tree_Task_list[i] = task;
+
+      if(task != ThisTask)
+        {
+          Force_Send_count[task]++;
+        }
+    }
+
+  MPI_Alltoall(Force_Send_count, 1, MPI_INT, Force_Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, Tree_NumPartImported = 0, Tree_NumPartExported = 0, Force_Recv_offset[0] = 0, Force_Send_offset[0] = 0; j < NTask; j++)
+    {
+      Tree_NumPartImported += Force_Recv_count[j];
+      Tree_NumPartExported += Force_Send_count[j];
+      if(j > 0)
+        {
+          Force_Send_offset[j] = Force_Send_offset[j - 1] + Force_Send_count[j - 1];
+          Force_Recv_offset[j] = Force_Recv_offset[j - 1] + Force_Recv_count[j - 1];
+        }
+    }
+
+  if(Tree_NumPartImported > guess_nimported)
+    {
+      printf("ThisTask=%d: Tree_NumPartImported=%d  NumPart=%d\n", ThisTask, Tree_NumPartImported, NumPart);
+      Tree_Points = (struct treepoint_data *)myrealloc_movable(Tree_Points, Tree_NumPartImported * sizeof(struct treepoint_data));
+    }
+
+  if(Tree_NumPartImported > 0.25 * NumPart)
+    {
+      Tree_MaxNodes = (int)(All.TreeAllocFactor * (NumPart + Tree_NumPartImported)) + NTopnodes;
+
+      Nodes += Tree_MaxPart;
+      Nodes = (struct NODE *)myrealloc_movable(Nodes, (Tree_MaxNodes + 1) * sizeof(struct NODE));
+      Nodes -= Tree_MaxPart;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      ExtNodes += Tree_MaxPart;
+      ExtNodes = (struct ExtNODE *)myrealloc_movable(ExtNodes, (Tree_MaxNodes + 1) * sizeof(struct ExtNODE));
+      ExtNodes -= Tree_MaxPart;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    }
+
+  struct treepoint_data *export_Tree_Points =
+      (struct treepoint_data *)mymalloc("export_Tree_Points", Tree_NumPartExported * sizeof(struct treepoint_data));
+
+  for(j = 0; j < NTask; j++)
+    {
+      Force_Send_count[j] = 0;
+    }
+
+  for(idx = 0; idx < NTreeInsert; idx++) /* prepare particle data to be copied to other tasks */
+    {
+      i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+      int task = Tree_Task_list[i];
+
+      if(task != ThisTask)
+        {
+          int n = Force_Send_offset[task] + Force_Send_count[task]++;
+
+          /* this point has to go to another task */
+          export_Tree_Points[n].Pos[0]        = Tree_Pos_list[3 * i + 0];
+          export_Tree_Points[n].Pos[1]        = Tree_Pos_list[3 * i + 1];
+          export_Tree_Points[n].Pos[2]        = Tree_Pos_list[3 * i + 2];
+          export_Tree_Points[n].IntPos[0]     = Tree_IntPos_list[3 * i + 0];
+          export_Tree_Points[n].IntPos[1]     = Tree_IntPos_list[3 * i + 1];
+          export_Tree_Points[n].IntPos[2]     = Tree_IntPos_list[3 * i + 2];
+          export_Tree_Points[n].Mass          = P[i].Mass;
+          export_Tree_Points[n].OldAcc        = P[i].OldAcc;
+          export_Tree_Points[n].SofteningType = P[i].SofteningType;
+          export_Tree_Points[n].index         = i;
+          export_Tree_Points[n].Type          = P[i].Type;
+          export_Tree_Points[n].th            = th_list[i];
+          export_Tree_Points[n].level         = level_list[i];
+#ifndef HIERARCHICAL_GRAVITY
+          if(TimeBinSynchronized[P[i].TimeBinGrav])
+            export_Tree_Points[n].ActiveFlag = 1;
+          else
+            export_Tree_Points[n].ActiveFlag = 0;
+#endif /* #ifndef HIERARCHICAL_GRAVITY */
+        }
+    }
+
+  /* exchange  data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&export_Tree_Points[Force_Send_offset[recvTask]], Force_Send_count[recvTask] * sizeof(struct treepoint_data),
+                       MPI_BYTE, recvTask, TAG_DENS_A, &Tree_Points[Force_Recv_offset[recvTask]],
+                       Force_Recv_count[recvTask] * sizeof(struct treepoint_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+    }
+
+  myfree(export_Tree_Points);
+
+  Tree_ImportedNodeOffset = Tree_MaxPart + Tree_MaxNodes + NTopleaves;
+
+  int full_flag = 0;
+
+  /* now we insert all particles */
+  for(idx = 0; idx < NTreeInsert; idx++)
+    {
+      i = INDEX(idx);
+      if(i < 0)
+        continue;
+
+#ifdef NO_GAS_SELFGRAVITY
+      if(P[i].Type == 0)
+        continue;
+#endif /* #ifdef NO_GAS_SELFGRAVITY */
+#ifdef NO_SELFGRAVITY_TYPE
+      if(P[i].Type == NO_SELFGRAVITY_TYPE)
+        continue;
+#endif /* #ifdef NO_SELFGRAVITY_TYPE */
+#if defined(FOF) || defined(SUBFIND)
+      if(insert_only_primary == 1)
+        {
+          if(!((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)))
+            continue;
+        }
+      else if(insert_only_primary == 2)
+        {
+          if(!((1 << P[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES)))
+            continue;
+        }
+#endif /* #if defined(FOF) || defined(SUBFIND) */
+      if(Tree_Task_list[i] == ThisTask)
+        {
+          if(force_treebuild_insert_single_point(i, &Tree_IntPos_list[3 * i], th_list[i], level_list[i]) < 0)
+            {
+              full_flag = 1;
+              break;
+            }
+        }
+    }
+
+  if(full_flag == 0) /* only continue if previous step was successful */
+    {
+      for(i = 0; i < Tree_NumPartImported; i++)
+        {
+#ifdef NO_GAS_SELFGRAVITY
+          if(Tree_Points[i].Type == 0)
+            continue;
+#endif /* #ifdef NO_GAS_SELFGRAVITY */
+#ifdef NO_SELFGRAVITY_TYPE
+          if(Tree_Points[i].Type == NO_SELFGRAVITY_TYPE)
+            continue;
+#endif /* #ifdef NO_SELFGRAVITY_TYPE */
+#if defined(FOF) || defined(SUBFIND)
+          if(insert_only_primary == 1)
+            {
+              if(!((1 << Tree_Points[i].Type) & (FOF_PRIMARY_LINK_TYPES)))
+                continue;
+            }
+          else if(insert_only_primary == 2)
+            {
+              if(!((1 << Tree_Points[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES)))
+                continue;
+            }
+#endif /* #if defined(FOF) || defined(SUBFIND) */
+          if(force_treebuild_insert_single_point(i + Tree_ImportedNodeOffset, Tree_Points[i].IntPos, Tree_Points[i].th,
+                                                 Tree_Points[i].level) < 0)
+            {
+              full_flag = 1;
+              break;
+            }
+        }
+    }
+
+  myfree_movable(Tree_IntPos_list);
+  myfree_movable(level_list);
+  myfree_movable(th_list);
+
+  if(full_flag)
+    return -1;
+
+#ifdef ADDBACKGROUNDGRID
+  if(force_add_empty_nodes())
+    return -1;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  return Tree_NumNodes;
+}
+
+/*! \brief Inserts a single particle into the gravitational tree.
+ *
+ *  \param[in] i Index of particle.
+ *  \param[in] intpos Integer representation of particle position.
+ *  \param[in] th Target node.
+ *  \param[in] levels Level of target node.
+ *
+ *  \return 0 if successful;
+ *          -1 if too few nodes have been allocated in the Nodes array
+ */
+int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels)
+{
+  int j, parent = -1;
+  unsigned char subnode       = 0;
+  unsigned long long xxb      = intpos[0];
+  unsigned long long yyb      = intpos[1];
+  unsigned long long zzb      = intpos[2];
+  unsigned long long mask     = ((unsigned long long)1) << ((52 - 1) - levels);
+  unsigned char shiftx        = (52 - 1) - levels;
+  unsigned char shifty        = (52 - 2) - levels;
+  unsigned char shiftz        = (52 - 3) - levels;
+  signed long long centermask = (0xFFF0000000000000llu);
+  unsigned long long *intppos;
+  centermask >>= levels;
+
+  while(1)
+    {
+      if(th >= Tree_MaxPart && th < Tree_ImportedNodeOffset) /* we are dealing with an internal node */
+        {
+          subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                     ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+          centermask >>= 1;
+          mask >>= 1;
+          levels++;
+
+          if(levels > MAX_TREE_LEVEL)
+            {
+              /* seems like we're dealing with particles at identical (or extremely close)
+               * locations. Shift subnode index to allow tree construction. Note: Multipole moments
+               * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have
+               *      DomainLen/2^MAX_TREE_LEEL  < gravitational softening length
+               */
+              for(j = 0; j < 8; j++)
+                {
+                  if(Nodes[th].u.suns[subnode] < 0)
+                    break;
+
+                  subnode++;
+                  if(subnode >= 8)
+                    subnode = 7;
+                }
+            }
+
+          int nn = Nodes[th].u.suns[subnode];
+
+          if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
+            {
+              parent = th;
+              th     = nn;
+            }
+          else
+            {
+              /* here we have found an empty slot where we can attach
+               * the new particle as a leaf.
+               */
+              Nodes[th].u.suns[subnode] = i;
+              break; /* done for this particle */
+            }
+        }
+      else
+        {
+          /* We try to insert into a leaf with a single particle.  Need
+           * to generate a new internal node at this point.
+           */
+          Nodes[parent].u.suns[subnode] = Tree_NextFreeNode;
+          struct NODE *nfreep           = &Nodes[Tree_NextFreeNode];
+
+          double len = ((double)(mask << 1)) * DomainBigFac;
+          double cx  = ((double)((xxb & centermask) | mask)) * DomainBigFac + DomainCorner[0];
+          double cy  = ((double)((yyb & centermask) | mask)) * DomainBigFac + DomainCorner[1];
+          double cz  = ((double)((zzb & centermask) | mask)) * DomainBigFac + DomainCorner[2];
+
+          nfreep->len       = len;
+          nfreep->center[0] = cx;
+          nfreep->center[1] = cy;
+          nfreep->center[2] = cz;
+
+          for(j = 0; j < 8; j++)
+            nfreep->u.suns[j] = -1;
+
+          if(th >= Tree_ImportedNodeOffset)
+            intppos = Tree_Points[th - Tree_ImportedNodeOffset].IntPos;
+          else
+            intppos = &Tree_IntPos_list[3 * th];
+
+          subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) |
+                     ((unsigned char)((intppos[2] & mask) >> shiftz)));
+
+          nfreep->u.suns[subnode] = th;
+
+          th = Tree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */
+          Tree_NumNodes++;
+          Tree_NextFreeNode++;
+
+          if(Tree_NumNodes >= Tree_MaxNodes)
+            {
+              return -1;
+            }
+        }
+    }
+
+  return 0;
+}
+
+/*! \brief Distributes the gravity costs of each node among the particles it
+ *         contains.
+ *
+ *  \return void
+ */
+void force_assign_cost_values(void)
+{
+  int idx, i, ngrp, recvTask;
+
+  if(TakeLevel >= 0)
+    {
+      int thread;
+
+      /* consolidate the cost measurements done by the different threads */
+      for(thread = 1; thread < NUM_THREADS; thread++)
+        for(i = 0; i < NumPart; i++)
+          Thread[0].P_CostCount[i] += Thread[thread].P_CostCount[i];
+
+      for(thread = 1; thread < NUM_THREADS; thread++)
+        for(i = 0; i < Tree_NumNodes; i++)
+          Thread[0].Node_CostCount[i + Tree_MaxPart] += Thread[thread].Node_CostCount[i + Tree_MaxPart];
+
+      for(thread = 1; thread < NUM_THREADS; thread++)
+        for(i = 0; i < Tree_NumPartImported; i++)
+          Thread[0].TreePoints_CostCount[i] += Thread[thread].TreePoints_CostCount[i];
+
+#ifdef VERBOSE
+      /* calculate some check sums to validate the total cost assignment */
+      double sumbefore = 0, sumbeforetot;
+      for(i = 0; i < NumPart; i++)
+        sumbefore += P[i].GravCost[TakeLevel];
+      MPI_Allreduce(&sumbefore, &sumbeforetot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      double nodecost = 0, nodecosttot;
+      for(i = 0; i < Tree_NumNodes; i++)
+        nodecost += Thread[0].Node_CostCount[i + Tree_MaxPart];
+      MPI_Allreduce(&nodecost, &nodecosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      double importedcost = 0, importedcosttot;
+      for(i = 0; i < Tree_NumPartImported; i++)
+        importedcost += Thread[0].TreePoints_CostCount[i];
+      MPI_Allreduce(&importedcost, &importedcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      double partcost = 0, partcosttot;
+      for(idx = 0; idx < NTreeInsert; idx++)
+        {
+          i = INDEX(idx);
+          if(i < 0)
+            continue;
+
+          {
+            int no = Father[i];
+
+            if(no >= 0)
+              partcost += Thread[0].P_CostCount[i];
+          }
+        }
+      MPI_Allreduce(&partcost, &partcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif /* #ifdef VERBOSE */
+
+      double *loc_cost  = mymalloc("loc_cost", NTopnodes * sizeof(double));
+      double *glob_cost = mymalloc("glob_cost", NTopnodes * sizeof(double));
+
+      for(i = 0; i < NTopnodes; i++)
+        loc_cost[i] = Thread[0].Node_CostCount[i + Tree_MaxPart];
+
+      MPI_Allreduce(loc_cost, glob_cost, NTopnodes, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      for(i = 0; i < NTopnodes; i++)
+        Thread[0].Node_CostCount[i + Tree_MaxPart] = glob_cost[i];
+
+      myfree(glob_cost);
+      myfree(loc_cost);
+
+      for(i = 0; i < NumPart; i++)
+        P[i].GravCost[TakeLevel] = 0;
+
+      /* distribute costs of parent nodes to particles */
+      for(idx = 0; idx < NTreeInsert; idx++)
+        {
+          i = INDEX(idx);
+          if(i < 0)
+            continue;
+
+          {
+            double sum = Thread[0].P_CostCount[i];
+
+            int no = Father[i];
+
+            while(no >= 0)
+              {
+                if(Nodes[no].u.d.mass > 0)
+                  sum += Thread[0].Node_CostCount[no] * (P[i].Mass / Nodes[no].u.d.mass);
+
+                no = Nodes[no].u.d.father;
+              }
+
+            P[i].GravCost[TakeLevel] = sum;
+          }
+        }
+
+      /* Now, if we moved points to other CPUs, we need to collect these cost values */
+      struct gravcost_data
+      {
+        float GravCost;
+        int index;
+      } * gdata_export, *gdata_import;
+
+      gdata_export = mymalloc("grav_data_export", Tree_NumPartExported * sizeof(struct gravcost_data));
+      gdata_import = mymalloc("grav_data_import", Tree_NumPartImported * sizeof(struct gravcost_data));
+
+      for(i = 0; i < Tree_NumPartImported; i++)
+        {
+          double sum = Thread[0].TreePoints_CostCount[i];
+
+          int no = Father[i + Tree_MaxPart];
+
+          while(no >= 0)
+            {
+              if(Nodes[no].u.d.mass > 0)
+                sum += Thread[0].Node_CostCount[no] * Tree_Points[i].Mass / Nodes[no].u.d.mass;
+
+              no = Nodes[no].u.d.father;
+            }
+
+          gdata_import[i].GravCost = sum;
+          gdata_import[i].index    = Tree_Points[i].index;
+        }
+
+      /* exchange  data */
+      for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+        {
+          recvTask = ThisTask ^ ngrp;
+
+          if(recvTask < NTask)
+            {
+              if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0)
+                {
+                  MPI_Sendrecv(&gdata_import[Force_Recv_offset[recvTask]], Force_Recv_count[recvTask] * sizeof(struct gravcost_data),
+                               MPI_BYTE, recvTask, TAG_DENS_A, &gdata_export[Force_Send_offset[recvTask]],
+                               Force_Send_count[recvTask] * sizeof(struct gravcost_data), MPI_BYTE, recvTask, TAG_DENS_A,
+                               MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                }
+            }
+        }
+
+      for(i = 0; i < Tree_NumPartExported; i++)
+        P[gdata_export[i].index].GravCost[TakeLevel] = gdata_export[i].GravCost;
+
+      myfree(gdata_import);
+      myfree(gdata_export);
+
+#ifdef VERBOSE
+      double sum = 0, sumtot;
+      for(i = 0; i < NumPart; i++)
+        sum += P[i].GravCost[TakeLevel];
+      MPI_Allreduce(&sum, &sumtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+      mpi_printf(
+          "FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d   yields cost=%g|%g (before %g)  nodecosttot=%g  "
+          "partcosttot=%g importedcosttot=%g\n",
+          TakeLevel, All.HighestActiveTimeBin, sumtot, nodecosttot + partcosttot + importedcosttot, sumbeforetot, nodecosttot,
+          partcosttot, importedcosttot);
+#else  /* #ifdef VERBOSE */
+      mpi_printf("FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d\n", TakeLevel, All.HighestActiveTimeBin);
+#endif /* #ifdef VERBOSE #else */
+    }
+}
+
+/*! \brief Recursively creates a set of empty tree nodes which
+ *         corresponds to the top-level tree for the domain grid.
+ *
+ *  This is done to ensure that this top-level tree is always "complete" so
+ *  that we can easily associate the pseudo-particles of other CPUs with
+ *  tree-nodes at a given level in the tree, even when the particle population
+ *  is so sparse that some of these nodes are actually empty.
+ *
+ * \param[in] no Parent node for which daughter nodes shall be created.
+ * \param[in] topnode Index of the parent node in the 'TopNodes' array.
+ * \param[in] bits 2^bits is the number of nodes per dimension at the level of
+ *            the daughter nodes.
+ * \param[in] x Position of the parent node in the x direction, falls in the
+ *            range [0,2^(bits-1) - 1].
+ * \param[in] y Position of the parent node in the y direction, falls in the
+ *            range [0,2^(bits-1) - 1].
+ * \param[in] z Position of the parent node in the z direction, falls in the
+ *            range [0,2^(bits-1) - 1].
+ *
+ * \return 0 if successful;
+ *         -1 if number of allocated tree nodes is too small to fit the newly
+ *         created nodes.
+ */
+int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z)
+{
+  if(TopNodes[topnode].Daughter >= 0)
+    {
+      for(int i = 0; i < 2; i++) /* loop over daughter nodes */
+        for(int j = 0; j < 2; j++)
+          for(int k = 0; k < 2; k++)
+            {
+              if(Tree_NumNodes >= Tree_MaxNodes)
+                {
+                  if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+                    {
+                      char buf[500];
+                      sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask,
+                              NTopnodes);
+                      dump_particles();
+                      terminate(buf);
+                    }
+                  return -1;
+                }
+
+              int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
+
+              int count = i + 2 * j + 4 * k;
+
+              Nodes[no].u.suns[count] = Tree_NextFreeNode;
+
+              double lenhalf                     = 0.25 * Nodes[no].len;
+              Nodes[Tree_NextFreeNode].len       = 0.5 * Nodes[no].len;
+              Nodes[Tree_NextFreeNode].center[0] = Nodes[no].center[0] + (2 * i - 1) * lenhalf;
+              Nodes[Tree_NextFreeNode].center[1] = Nodes[no].center[1] + (2 * j - 1) * lenhalf;
+              Nodes[Tree_NextFreeNode].center[2] = Nodes[no].center[2] + (2 * k - 1) * lenhalf;
+
+              for(int n = 0; n < 8; n++)
+                Nodes[Tree_NextFreeNode].u.suns[n] = -1;
+
+              if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1)
+                DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Tree_NextFreeNode;
+
+              Tree_NextFreeNode++;
+              Tree_NumNodes++;
+
+              if(force_create_empty_nodes(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j,
+                                          2 * z + k) < 0)
+                return -1; /* create granddaughter nodes for current daughter node */
+            }
+    }
+
+  return 0;
+}
+
+/*! \brief Inserts pseudo particles.
+ *
+ *  This function inserts pseudo-particles which will represent the mass
+ *  distribution of the other CPUs. Initially, the mass of the
+ *  pseudo-particles is set to zero, and their coordinate is set to the
+ *  center of the domain-cell they correspond to. These quantities will be
+ *  updated later on.
+ *
+ *  \return void
+ */
+void force_insert_pseudo_particles(void)
+{
+  for(int i = 0; i < NTopleaves; i++)
+    {
+      int index = DomainNodeIndex[i];
+
+      if(DomainNewTask[i] != ThisTask)
+        Nodes[index].u.suns[0] = Tree_MaxPart + Tree_MaxNodes + i;
+    }
+}
+
+/*! \brief Determines multipole moments.
+ *
+ *  This routine determines the multipole moments for a given internal node
+ *  and all its subnodes using a recursive computation.  The result is
+ *  stored in the Nodes[] structure in the sequence of this tree-walk.
+ *
+ *  \param[in] no Node for which the moments shall be found.
+ *  \param[in] sib Sibling of node no.
+ *  \param[in] father Father node of node no.
+ *  \param[in, out] last Last node for which this function was called, or -1
+ *                  when called for root node.
+ *
+ *  \return void
+ */
+void force_update_node_recursive(int no, int sib, int father, int *last)
+{
+  int j, jj, p, pp, nextsib, suns[8];
+  double s[3], mass;
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+    {
+      for(j = 0; j < 8; j++)
+        suns[j] = Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
+                                          overwrite one element (union!) */
+      if(*last >= 0)
+        {
+          if(*last >= Tree_MaxPart)
+            {
+              if(*last >= Tree_MaxPart + Tree_MaxNodes)
+                Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or imported point */
+              else
+                Nodes[*last].u.d.nextnode = no;
+            }
+          else
+            Nextnode[*last] = no;
+        }
+
+      *last = no;
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      for(j = 0; j < 8; j++)
+        {
+          if((p = suns[j]) >= 0)
+            {
+              /* check if we have a sibling on the same level */
+              for(jj = j + 1; jj < 8; jj++)
+                if((pp = suns[jj]) >= 0)
+                  break;
+
+              if(jj < 8) /* yes, we do */
+                nextsib = pp;
+              else
+                nextsib = sib;
+
+              force_update_node_recursive(p, nextsib, no, last);
+
+              if(p < Tree_MaxPart) /* a particle */
+                {
+                  MyDouble *pos = &Tree_Pos_list[3 * p];
+
+                  mass += P[p].Mass;
+                  s[0] += P[p].Mass * pos[0];
+                  s[1] += P[p].Mass * pos[1];
+                  s[2] += P[p].Mass * pos[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType])
+                    maxsofttype = P[p].SofteningType;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass;
+
+                  if(P[p].Type == 0)
+                    {
+                      if(maxhydrosofttype < P[p].SofteningType)
+                        maxhydrosofttype = P[p].SofteningType;
+                      if(minhydrosofttype > P[p].SofteningType)
+                        minhydrosofttype = P[p].SofteningType;
+                    }
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  mass_per_type[P[p].SofteningType] += P[p].Mass;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node  */
+                {
+                  mass += Nodes[p].u.d.mass;
+                  s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0];
+                  s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1];
+                  s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype])
+                    maxsofttype = Nodes[p].u.d.maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                  int k;
+                  for(k = 0; k < NSOFTTYPES; k++)
+                    mass_per_type[k] += ExtNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype)
+                    maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype;
+                  if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype)
+                    minhydrosofttype = Nodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else if(p < Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* a pseudo particle */
+                {
+                  /* nothing to be done here because the mass of the
+                   *  pseudo-particle is still zero. This will be changed
+                   * later.
+                   */
+                }
+              else
+                { /* an imported point */
+                  int n = p - (Tree_MaxPart + Tree_MaxNodes + NTopleaves);
+
+                  if(n >= Tree_NumPartImported)
+                    terminate("n >= Tree_NumPartImported");
+
+                  mass += Tree_Points[n].Mass;
+                  s[0] += Tree_Points[n].Mass * Tree_Points[n].Pos[0];
+                  s[1] += Tree_Points[n].Mass * Tree_Points[n].Pos[1];
+                  s[2] += Tree_Points[n].Mass * Tree_Points[n].Pos[2];
+
+                  /* Might not need the following routine */
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Tree_Points[n].SofteningType])
+                    maxsofttype = Tree_Points[n].SofteningType;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  mass_per_type[Tree_Points[n].Type == 0 ? 0 : Tree_Points[n].SofteningType] += Tree_Points[n].Mass;
+
+                  if(Tree_Points[n].Type == 0)
+                    {
+                      if(maxhydrosofttype < Tree_Points[n].SofteningType)
+                        maxhydrosofttype = Tree_Points[n].SofteningType;
+                      if(minhydrosofttype > Tree_Points[n].SofteningType)
+                        minhydrosofttype = Tree_Points[n].SofteningType;
+                    }
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  mass_per_type[Tree_Points[n].SofteningType] += Tree_Points[n].Mass;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+            }
+        }
+
+      if(mass)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = Nodes[no].center[0];
+          s[1] = Nodes[no].center[1];
+          s[2] = Nodes[no].center[2];
+        }
+
+      Nodes[no].u.d.mass        = mass;
+      Nodes[no].u.d.s[0]        = s[0];
+      Nodes[no].u.d.s[1]        = s[1];
+      Nodes[no].u.d.s[2]        = s[2];
+      Nodes[no].u.d.maxsofttype = maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+      int k;
+      for(k = 0; k < NSOFTTYPES; k++)
+        ExtNodes[no].mass_per_type[k] = mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      Nodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      Nodes[no].u.d.sibling = sib;
+      Nodes[no].u.d.father  = father;
+    }
+  else /* single particle or pseudo particle */
+    {
+      if(*last >= 0)
+        {
+          if(*last >= Tree_MaxPart)
+            {
+              if(*last >= Tree_MaxPart + Tree_MaxNodes)
+                Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or an imported point */
+              else
+                Nodes[*last].u.d.nextnode = no;
+            }
+          else
+            Nextnode[*last] = no;
+        }
+
+      *last = no;
+
+      if(no < Tree_MaxPart) /* only set it for single particles... */
+        Father[no] = father;
+      if(no >= Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* ...or for imported points */
+        Father[no - Tree_MaxNodes - NTopleaves] = father;
+    }
+}
+
+/*! \brief Communicates the values of the multipole moments of the
+ *         top-level tree-nodes of the domain grid.
+ *
+ *  This data can then be used to update the pseudo-particles on each CPU
+ *  accordingly.
+ *
+ *  \return void
+ */
+void force_exchange_topleafdata(void)
+{
+  struct DomainNODE
+  {
+    MyDouble s[3];
+    MyDouble mass;
+#ifdef MULTIPLE_NODE_SOFTENING
+    MyDouble mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+    unsigned char maxhydrosofttype;
+    unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    unsigned char maxsofttype;
+#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES)
+    int NodeGrNr;
+#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */
+  };
+
+  struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE));
+
+  /* share the pseudo-particle data accross CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(int task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(int n = 0; n < NTopleaves; n++)
+    recvcounts[DomainNewTask[n]]++;
+
+  for(int task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  recvoffset[0] = 0, byteoffset[0] = 0;
+  for(int task = 1; task < NTask; task++)
+    {
+      recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1];
+      byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+    }
+
+  struct DomainNODE *loc_DomainMoment =
+      (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
+
+  int idx = 0;
+  for(int n = 0; n < NTopleaves; n++)
+    {
+      if(DomainNewTask[n] == ThisTask)
+        {
+          int no = DomainNodeIndex[n];
+
+          /* read out the multipole moments from the local base cells */
+          loc_DomainMoment[idx].s[0]        = Nodes[no].u.d.s[0];
+          loc_DomainMoment[idx].s[1]        = Nodes[no].u.d.s[1];
+          loc_DomainMoment[idx].s[2]        = Nodes[no].u.d.s[2];
+          loc_DomainMoment[idx].mass        = Nodes[no].u.d.mass;
+          loc_DomainMoment[idx].maxsofttype = Nodes[no].u.d.maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+          for(int k = 0; k < NSOFTTYPES; k++)
+            loc_DomainMoment[idx].mass_per_type[k] = ExtNodes[no].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+          loc_DomainMoment[idx].maxhydrosofttype = Nodes[no].u.d.maxhydrosofttype;
+          loc_DomainMoment[idx].minhydrosofttype = Nodes[no].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+          idx++;
+        }
+    }
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  for(int task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(int n = 0; n < NTopleaves; n++)
+    {
+      int task = DomainNewTask[n];
+      if(task != ThisTask)
+        {
+          int no  = DomainNodeIndex[n];
+          int idx = recvoffset[task] + recvcounts[task]++;
+
+          Nodes[no].u.d.s[0]        = DomainMoment[idx].s[0];
+          Nodes[no].u.d.s[1]        = DomainMoment[idx].s[1];
+          Nodes[no].u.d.s[2]        = DomainMoment[idx].s[2];
+          Nodes[no].u.d.mass        = DomainMoment[idx].mass;
+          Nodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+          for(int k = 0; k < NSOFTTYPES; k++)
+            ExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+          Nodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype;
+          Nodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+        }
+    }
+
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvoffset);
+  myfree(recvcounts);
+  myfree(DomainMoment);
+}
+
+/*! \brief Updates the top-level tree after the multipole moments of the
+ *         pseudo-particles have been updated.
+ *
+ *  \param[in] no Node to be updated.
+ *  \param[in] topnode Index of the node no in the 'TopNodes' array.
+ *  \param[in] bits 2^bits is the number of nodes per dimension at the level of
+ *             the daughter nodes of node no.
+ *  \param[in] x Position of the node no in the x direction, falls in the
+ *             range [0,2^(bits-1) - 1].
+ *  \param[in] y Position of the node no in the y direction, falls in the
+ *             range [0,2^(bits-1) - 1].
+ *  \param[in] z Position of the node no in the z direction, falls in the
+ *             range [0,2^(bits-1) - 1].
+ *
+ *  \return void
+ */
+void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z)
+{
+  double s[3], mass;
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  if(TopNodes[topnode].Daughter >= 0)
+    {
+      for(int i = 0; i < 2; i++)
+        for(int j = 0; j < 2; j++)
+          for(int k = 0; k < 2; k++)
+            {
+              int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
+
+              Tree_NextFreeNode++;
+              force_treeupdate_toplevel(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j,
+                                        2 * z + k);
+            }
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(int j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      int p = Nodes[no].u.d.nextnode;
+
+      for(int j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */
+        {
+          if(p >= Tree_MaxPart && p < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+            {
+              mass += Nodes[p].u.d.mass;
+              s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0];
+              s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1];
+              s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2];
+
+              if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype])
+                maxsofttype = Nodes[p].u.d.maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+              for(int k = 0; k < NSOFTTYPES; k++)
+                mass_per_type[k] += ExtNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype)
+                maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype;
+              if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype)
+                minhydrosofttype = Nodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+            }
+          else
+            terminate("may not happen");
+
+          p = Nodes[p].u.d.sibling;
+        }
+
+      if(mass)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = Nodes[no].center[0];
+          s[1] = Nodes[no].center[1];
+          s[2] = Nodes[no].center[2];
+        }
+
+      Nodes[no].u.d.s[0]        = s[0];
+      Nodes[no].u.d.s[1]        = s[1];
+      Nodes[no].u.d.s[2]        = s[2];
+      Nodes[no].u.d.mass        = mass;
+      Nodes[no].u.d.maxsofttype = maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(int k = 0; k < NSOFTTYPES; k++)
+        ExtNodes[no].mass_per_type[k] = mass_per_type[k];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      Nodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    }
+}
+
+/*! \brief Allocates the memory used for storage of the tree nodes.
+ *
+ *  Usually, the number of required nodes is of order 0.7*maxpart, but if this
+ *  is insufficient, the code will try to allocated more space.
+ *
+ *  \param[in] maxpart Number of particles on the current task.
+ *  \param[in] maxindex The Nodes pointer will be shifted such that the index
+ *             of the first element is maxindex.
+ *
+ *  \return void
+ */
+void force_treeallocate(int maxpart, int maxindex)
+{
+  if(Nodes)
+    terminate("already allocated");
+
+  Tree_MaxPart  = maxindex;
+  Tree_MaxNodes = (int)(All.TreeAllocFactor * maxpart) + NTopnodes;
+
+  DomainNewTask   = (int *)mymalloc_movable(&DomainNewTask, "DomainNewTask", NTopleaves * sizeof(int));
+  DomainNodeIndex = (int *)mymalloc_movable(&DomainNodeIndex, "DomainNodeIndex", NTopleaves * sizeof(int));
+  Tree_Task_list  = (int *)mymalloc_movable(&Tree_Task_list, "Tree_Task_list", maxpart * sizeof(int));
+  Tree_Pos_list   = (MyDouble *)mymalloc_movable(&Tree_Pos_list, "Tree_Pos_list", 3 * maxpart * sizeof(MyDouble));
+
+  Nodes = (struct NODE *)mymalloc_movable(&Nodes, "Nodes", (Tree_MaxNodes + 1) * sizeof(struct NODE));
+  Nodes -= Tree_MaxPart;
+#ifdef MULTIPLE_NODE_SOFTENING
+  ExtNodes = (struct ExtNODE *)mymalloc_movable(&ExtNodes, "ExtNodes", (Tree_MaxNodes + 1) * sizeof(struct ExtNODE));
+  ExtNodes -= Tree_MaxPart;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+}
+
+/*! \brief Frees the memory allocated for the tree.
+ *
+ *  I.e. it frees the space allocated by the function force_treeallocate().
+ *
+ *  \return void
+ */
+void force_treefree(void)
+{
+  if(Nodes)
+    {
+#ifdef MULTIPLE_NODE_SOFTENING
+      myfree(ExtNodes + Tree_MaxPart);
+      ExtNodes = NULL;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+      myfree(Nodes + Tree_MaxPart);
+      myfree(Tree_Pos_list);
+      myfree(Tree_Task_list);
+      myfree(DomainNodeIndex);
+      myfree(DomainNewTask);
+
+      Nodes           = NULL;
+      DomainNodeIndex = NULL;
+      DomainNewTask   = NULL;
+      Tree_Task_list  = NULL;
+      Nextnode        = NULL;
+      Father          = NULL;
+    }
+  else
+    terminate("trying to free the tree even though it's not allocated");
+}
+
+/*! \brief Dump particle data into file.
+ *
+ *  This function dumps some of the basic particle data to a file. In case
+ *  the tree construction fails, it is called just before the run
+ *  terminates with an error message. Examination of the generated file may
+ *  then give clues to what caused the problem.
+ *
+ *  \return void
+ */
+void dump_particles(void)
+{
+  char buffer[200];
+  sprintf(buffer, "particles%d.dat", ThisTask);
+  FILE *fd = fopen(buffer, "w");
+  my_fwrite(&NumPart, 1, sizeof(int), fd);
+  for(int i = 0; i < NumPart; i++)
+    my_fwrite(&P[i].Pos[0], 3, sizeof(MyDouble), fd);
+  for(int i = 0; i < NumPart; i++)
+    my_fwrite(&P[i].Vel[0], 3, sizeof(MyFloat), fd);
+  for(int i = 0; i < NumPart; i++)
+    my_fwrite(&P[i].ID, 1, sizeof(int), fd);
+  fclose(fd);
+}
+
+#ifdef ADDBACKGROUNDGRID
+/*! \brief Add additional empty nodes.
+ *
+ *  Called during tree construction if ADDBACKGROUNDGRID is active.
+ *
+ *  \return 0: default; 1: number of nodes > max number of nodes.
+ */
+int force_add_empty_nodes(void)
+{
+  int nempty = 0;
+  int no, j, subnode;
+
+  for(no = Tree_MaxPart; no < Tree_MaxPart + Tree_NumNodes; no++)
+    {
+      int count = 0;
+
+      for(subnode = 0; subnode < 8; subnode++)
+        if(Nodes[no].u.suns[subnode] == -1)
+          count++;
+
+      if(count < 8)
+        {
+          for(subnode = 0, count = 0; subnode < 8; subnode++)
+            if(Nodes[no].u.suns[subnode] == -1)
+              {
+                Nodes[no].u.suns[subnode] = Tree_NextFreeNode;
+                struct NODE *nfreep       = &Nodes[Tree_NextFreeNode];
+
+                nfreep->len    = 0.5 * Nodes[no].len;
+                double lenhalf = 0.25 * Nodes[no].len;
+
+                if(subnode & 1)
+                  nfreep->center[0] = Nodes[no].center[0] + lenhalf;
+                else
+                  nfreep->center[0] = Nodes[no].center[0] - lenhalf;
+
+                if(subnode & 2)
+                  nfreep->center[1] = Nodes[no].center[1] + lenhalf;
+                else
+                  nfreep->center[1] = Nodes[no].center[1] - lenhalf;
+
+                if(subnode & 4)
+                  nfreep->center[2] = Nodes[no].center[2] + lenhalf;
+                else
+                  nfreep->center[2] = Nodes[no].center[2] - lenhalf;
+
+                for(j = 0; j < 8; j++)
+                  nfreep->u.suns[j] = -1;
+
+                Tree_NumNodes++;
+                Tree_NextFreeNode++;
+
+                if(Tree_NumNodes >= Tree_MaxNodes)
+                  {
+                    if(All.TreeAllocFactor > 5.0)
+                      {
+                        char buf[500];
+                        sprintf(
+                            buf,
+                            "task %d: looks like a serious problem, stopping with particle dump. Tree_NumNodes=%d Tree_MaxNodes=%d\n",
+                            ThisTask, Tree_NumNodes, Tree_MaxNodes);
+                        dump_particles();
+                        terminate(buf);
+                      }
+                    return 1;
+                  }
+                nempty++;
+              }
+        }
+    }
+
+  printf("FORCETREE: Task %d has added %d empty nodes\n", ThisTask, nempty);
+  return 0;
+}
+#endif /* #ifdef ADDBACKGROUNDGRID */
diff --git a/src/amuse/community/arepo/src/gravity/forcetree.h b/src/amuse/community/arepo/src/gravity/forcetree.h
new file mode 100644
index 0000000000..0371e7e9f7
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/forcetree.h
@@ -0,0 +1,168 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/forcetree.h
+ * \date        05/2018
+ * \brief       Functions and data structurer for forcetree.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 28.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef FORCETREE_H
+#define FORCETREE_H
+
+#ifndef INLINE_FUNC
+#define INLINE_FUNC
+#endif /* #ifndef INLINE_FUNC */
+
+typedef struct
+{
+  MyDouble Pos[3];
+  float OldAcc;
+  unsigned char Type;
+  unsigned char SofteningType;
+
+  int Firstnode;
+} gravdata_in;
+
+typedef struct
+{
+  MyFloat Acc[3];
+#ifdef EVALPOTENTIAL
+  MyFloat Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef OUTPUTGRAVINTERACTIONS
+  int GravInteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+
+} gravdata_out;
+
+#ifdef LONG_X
+#define STRETCHX (LONG_X)
+#else /* #ifdef LONG_X */
+#define STRETCHX 1
+#endif /* #ifdef LONG_X #else */
+
+#ifdef LONG_Y
+#define STRETCHY (LONG_Y)
+#else /* #ifdef LONG_Y */
+#define STRETCHY 1
+#endif /* #ifdef LONG_Y #else */
+
+#ifdef LONG_Z
+#define STRETCHZ (LONG_Z)
+#else /* #ifdef LONG_Z */
+#define STRETCHZ 1
+#endif /* #ifdef LONG_Z #else */
+
+#define DBX 1
+#define DBY 1
+#define DBZ 1
+#define DBX_EXTRA 0
+#define DBY_EXTRA 0
+#define DBZ_EXTRA 0
+
+/*! length of lock-up table for short-range force kernel in TreePM algorithm */
+#define NTAB 127
+
+#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC)
+
+#define EN 64
+
+#define ENX (DBX * STRETCHX * EN)
+#define ENY (DBY * STRETCHY * EN)
+#define ENZ (DBZ * STRETCHZ * EN)
+
+extern MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1];
+extern MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1];
+extern MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1];
+extern MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1];
+extern double Ewd_fac_intp;
+
+extern int NTreeInsert;
+
+#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) */
+
+#define MAX_TREE_LEVEL 30
+#define MAX_TREE_ALLOC_FACTOR 30.0
+
+#define TAKE_NSLOTS_IN_ONE_GO 32
+
+#define MAX_IMPACT_BEFORE_OPTIMIZATION 1.03
+
+#define BITFLAG_TOPLEVEL 0
+#define BITFLAG_DEPENDS_ON_LOCAL_MASS 1
+#define BITFLAG_DEPENDS_ON_EXTERN_MASS 2
+#define BITFLAG_INTERNAL_TOPLEVEL 6
+#define BITFLAG_MULTIPLEPARTICLES 7
+#define BITFLAG_CONTAINS_GAS 10
+
+#define BITFLAG_MASK ((1 << BITFLAG_CONTAINS_GAS) + (1 << BITFLAG_MULTIPLEPARTICLES))
+
+static inline unsigned long long force_double_to_int(double d)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d = d;
+  return (u.ull & 0xFFFFFFFFFFFFFllu);
+}
+
+static inline double force_int_to_double(unsigned long long x)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d = 1.0;
+  u.ull |= x;
+  return u.d;
+}
+
+int tree_treefind_export_node_threads(int no, int target, int thread_id);
+int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin);
+int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin);
+int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin);
+int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char level);
+int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z);
+void force_insert_pseudo_particles(void);
+void force_update_node_recursive(int no, int sib, int father, int *last);
+void force_exchange_topleafdata(void);
+void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z);
+void force_treeallocate(int maxpart, int maxindex);
+void force_treefree(void);
+void dump_particles(void);
+int force_add_empty_nodes(void);
+void force_short_range_init(void);
+int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode,
+                       int measure_cost_flag);
+void force_assign_cost_values(void);
+void force_optimize_domain_mapping(void);
+double force_get_current_balance(double *impact);
+void force_get_global_cost_for_leavenodes(int nexport);
+void forcetest_ewald_init(void);
+
+#endif /* #ifndef FORCETREE_H */
diff --git a/src/amuse/community/arepo/src/gravity/forcetree_ewald.c b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c
new file mode 100644
index 0000000000..f1b73fb5f6
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c
@@ -0,0 +1,529 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/forcetree_ewald.c
+ * \date        05/2018
+ * \brief       Code for Ewald correction (i.e. tree force with periodic
+ *              boundary conditions.
+ * \details     This file contains the computation of the Ewald correction
+ *              table.
+ *              contains functins:
+ *                void ewald_init(void)
+ *                void ewald_corr(double dx, double dy, double dz, double
+ *                  *fper)
+ *                double ewald_pot_corr(double dx, double dy, double dz)
+ *                double ewald_psi(double x, double y, double z)
+ *                void ewald_force(double x, double y, double z, double
+ *                  force[3])
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 20.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL)
+#include <gsl/gsl_sf_bessel.h>
+
+/* variables for Ewald correction lookup table */
+MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1];
+MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1];
+MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1];
+MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1];
+double Ewd_fac_intp;
+
+/*! \brief Structure that holds information of Ewald correction table.
+ */
+typedef struct
+{
+  int resx, resy, resz, varsize, ewaldtype;
+} ewald_header;
+
+/*! \brief This function initializes tables with the correction force and the
+ *  correction potential due to the periodic images of a point mass located
+ *  at the origin.
+ *
+ *  These corrections are obtained by Ewald summation. (See for example
+ *  Hernquist, Bouchet, Suto, ApJS, 1991, 75, 231) The correction fields
+ *  are used to obtain the full periodic force if periodic boundaries
+ *  combined with the pure tree algorithm are used. For the TreePM
+ *  algorithm, the Ewald correction is not used.
+ *
+ *  The correction terms are computed by ewald_psi() and ewald_force() and
+ *  stored in the arrays Ewd_fcorrx, Ewd_fcorry, Ewd_fcorrz and Ewd_potcorr.
+ *
+ *  The correction fields are stored on disk once they are computed. If a
+ *  corresponding file is found, they are loaded from disk to speed up the
+ *  initialization. The Ewald summation issrc/gravtree_forcetest.c done in
+ *  parallel, i.e. the processors share the work to compute the tables if
+ *  needed.
+ *
+ *  \return void
+ */
+void ewald_init(void)
+{
+  int recomputeflag = 0;
+  double force[3];
+  char buf[200];
+  FILE *fd;
+
+  mpi_printf("EWALD: initialize Ewald correction...\n");
+
+#ifdef LONG_X
+  if(LONG_X != (int)(LONG_X))
+    terminate("LONG_X must be an integer");
+#endif /* #ifdef LONG_X */
+
+#ifdef LONG_Y
+  if(LONG_Y != (int)(LONG_Y))
+    terminate("LONG_Y must be an integer");
+#endif /* #ifdef LONG_Y */
+
+#ifdef LONG_Z
+  if(LONG_Z != (int)(LONG_Z))
+    terminate("LONG_Z must be an integer");
+#endif /* #ifdef LONG_Z */
+
+  sprintf(buf, "ewald_table_%d_%d_%d.dat", ENX, ENY, ENZ);
+
+  if(ThisTask == 0)
+    {
+      if((fd = fopen(buf, "r")))
+        {
+          mpi_printf("\nEWALD: reading Ewald tables from file `%s'\n", buf);
+
+          ewald_header tabh;
+          my_fread(&tabh, sizeof(ewald_header), 1, fd);
+
+          int ewaldtype = -1;
+
+          if(tabh.resx != ENX || tabh.resy != ENY || tabh.resz != ENZ || tabh.varsize != sizeof(MyFloat) ||
+             tabh.ewaldtype != ewaldtype)
+            {
+              mpi_printf("\nEWALD: something's wrong with this table file. Discarding it.\n");
+              recomputeflag = 1;
+            }
+          else
+            {
+              my_fread(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fread(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fread(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fread(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+
+              recomputeflag = 0;
+            }
+          fclose(fd);
+        }
+      else
+        recomputeflag = 1;
+    }
+
+  MPI_Bcast(&recomputeflag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  if(recomputeflag)
+    {
+      mpi_printf("\nEWALD: No usable Ewald tables in file `%s' found. Recomputing them...\n", buf);
+
+      /* ok, let's recompute things. Actually, we do that in parallel. */
+      int size = (ENX + 1) * (ENY + 1) * (ENZ + 1);
+      int first, count;
+
+      subdivide_evenly(size, NTask, ThisTask, &first, &count);
+
+      for(int n = first; n < first + count; n++)
+        {
+          int i = n / ((ENY + 1) * (ENZ + 1));
+          int j = (n - i * (ENY + 1) * (ENZ + 1)) / (ENZ + 1);
+          int k = (n - i * (ENY + 1) * (ENZ + 1) - j * (ENZ + 1));
+
+          if(ThisTask == 0)
+            {
+              if(((n - first) % (count / 20)) == 0)
+                {
+                  printf("%4.1f percent done\n", (n - first) / (count / 100.0));
+                  myflush(stdout);
+                }
+            }
+
+          double xx = 0.5 * DBX * STRETCHX * ((double)i) / ENX;
+          double yy = 0.5 * DBY * STRETCHY * ((double)j) / ENY;
+          double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / ENZ;
+
+          Ewd_potcorr[i][j][k] = ewald_psi(xx, yy, zz);
+
+          ewald_force(xx, yy, zz, force);
+
+          Ewd_fcorrx[i][j][k] = force[0];
+          Ewd_fcorry[i][j][k] = force[1];
+          Ewd_fcorrz[i][j][k] = force[2];
+        }
+
+      int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int));
+      int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int));
+
+      for(int i = 0; i < NTask; i++)
+        {
+          int off, cnt;
+          subdivide_evenly(size, NTask, i, &off, &cnt);
+          recvcnts[i] = cnt * sizeof(MyFloat);
+          recvoffs[i] = off * sizeof(MyFloat);
+        }
+
+      MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrx, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD);
+      MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorry, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD);
+      MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrz, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD);
+      MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_potcorr, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD);
+
+      myfree(recvoffs);
+      myfree(recvcnts);
+
+      mpi_printf("\nEWALD: writing Ewald tables to file `%s'\n", buf);
+      if(ThisTask == 0)
+        {
+          if((fd = fopen(buf, "w")))
+            {
+              ewald_header tabh;
+              tabh.resx      = ENX;
+              tabh.resy      = ENY;
+              tabh.resz      = ENZ;
+              tabh.varsize   = sizeof(MyFloat);
+              tabh.ewaldtype = -1;
+
+              my_fwrite(&tabh, sizeof(ewald_header), 1, fd);
+              my_fwrite(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fwrite(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fwrite(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              my_fwrite(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
+              fclose(fd);
+            }
+        }
+    }
+  else
+    {
+      /* here we got them from disk */
+      int len = (ENX + 1) * (ENY + 1) * (ENZ + 1) * sizeof(MyFloat);
+
+      MPI_Bcast(Ewd_fcorrx, len, MPI_BYTE, 0, MPI_COMM_WORLD);
+      MPI_Bcast(Ewd_fcorry, len, MPI_BYTE, 0, MPI_COMM_WORLD);
+      MPI_Bcast(Ewd_fcorrz, len, MPI_BYTE, 0, MPI_COMM_WORLD);
+      MPI_Bcast(Ewd_potcorr, len, MPI_BYTE, 0, MPI_COMM_WORLD);
+    }
+
+  /* now scale things to the boxsize that is actually used */
+  Ewd_fac_intp = 2 * EN / All.BoxSize;
+
+  for(int i = 0; i <= ENX; i++)
+    for(int j = 0; j <= ENY; j++)
+      for(int k = 0; k <= ENZ; k++)
+        {
+          Ewd_potcorr[i][j][k] /= All.BoxSize;
+          Ewd_fcorrx[i][j][k] /= All.BoxSize * All.BoxSize;
+          Ewd_fcorry[i][j][k] /= All.BoxSize * All.BoxSize;
+          Ewd_fcorrz[i][j][k] /= All.BoxSize * All.BoxSize;
+        }
+
+  mpi_printf("EWALD: Initialization of periodic boundaries finished.\n");
+}
+
+/*! \brief This function looks up the correction force due to the infinite
+ *  number of periodic particle/node images.
+ *
+ *  We here use trilinear interpolation to get it from the precomputed tables,
+ *  which contain one octant around the target particle at the origin. The
+ *  other octants are obtained from it by exploiting the symmetry properties.
+ *
+ *  \param[in] dx x component of the distance between the two particles.
+ *  \param[in] dx y component of the distance between the two particles.
+ *  \param[in] dx z component of the distance between the two particles.
+ *  \param[out] fper pointer to array containing the correction force.
+ *
+ *  \return void
+ */
+void ewald_corr(double dx, double dy, double dz, double *fper)
+{
+  int signx, signy, signz;
+  int i, j, k;
+  double u, v, w;
+  double f1, f2, f3, f4, f5, f6, f7, f8;
+
+  if(dx < 0)
+    {
+      dx    = -dx;
+      signx = +1;
+    }
+  else
+    signx = -1;
+  if(dy < 0)
+    {
+      dy    = -dy;
+      signy = +1;
+    }
+  else
+    signy = -1;
+  if(dz < 0)
+    {
+      dz    = -dz;
+      signz = +1;
+    }
+  else
+    signz = -1;
+  u = dx * Ewd_fac_intp;
+  i = (int)u;
+  if(i >= ENX)
+    i = ENX - 1;
+  u -= i;
+  v = dy * Ewd_fac_intp;
+  j = (int)v;
+  if(j >= ENY)
+    j = ENY - 1;
+  v -= j;
+  w = dz * Ewd_fac_intp;
+  k = (int)w;
+  if(k >= ENZ)
+    k = ENZ - 1;
+  w -= k;
+  f1      = (1 - u) * (1 - v) * (1 - w);
+  f2      = (1 - u) * (1 - v) * (w);
+  f3      = (1 - u) * (v) * (1 - w);
+  f4      = (1 - u) * (v) * (w);
+  f5      = (u) * (1 - v) * (1 - w);
+  f6      = (u) * (1 - v) * (w);
+  f7      = (u) * (v) * (1 - w);
+  f8      = (u) * (v) * (w);
+  fper[0] = signx * (Ewd_fcorrx[i][j][k] * f1 + Ewd_fcorrx[i][j][k + 1] * f2 + Ewd_fcorrx[i][j + 1][k] * f3 +
+                     Ewd_fcorrx[i][j + 1][k + 1] * f4 + Ewd_fcorrx[i + 1][j][k] * f5 + Ewd_fcorrx[i + 1][j][k + 1] * f6 +
+                     Ewd_fcorrx[i + 1][j + 1][k] * f7 + Ewd_fcorrx[i + 1][j + 1][k + 1] * f8);
+  fper[1] = signy * (Ewd_fcorry[i][j][k] * f1 + Ewd_fcorry[i][j][k + 1] * f2 + Ewd_fcorry[i][j + 1][k] * f3 +
+                     Ewd_fcorry[i][j + 1][k + 1] * f4 + Ewd_fcorry[i + 1][j][k] * f5 + Ewd_fcorry[i + 1][j][k + 1] * f6 +
+                     Ewd_fcorry[i + 1][j + 1][k] * f7 + Ewd_fcorry[i + 1][j + 1][k + 1] * f8);
+  fper[2] = signz * (Ewd_fcorrz[i][j][k] * f1 + Ewd_fcorrz[i][j][k + 1] * f2 + Ewd_fcorrz[i][j + 1][k] * f3 +
+                     Ewd_fcorrz[i][j + 1][k + 1] * f4 + Ewd_fcorrz[i + 1][j][k] * f5 + Ewd_fcorrz[i + 1][j][k + 1] * f6 +
+                     Ewd_fcorrz[i + 1][j + 1][k] * f7 + Ewd_fcorrz[i + 1][j + 1][k + 1] * f8);
+}
+
+/*! \brief This function looks up the correction potential due to the infinite
+ *  number of periodic particle/node images.
+ *
+ *  We here use tri-linear interpolation to get it from the precomputed
+ *  table, which contains one octant around the target particle at the
+ *  origin. The other octants are obtained from it by exploiting symmetry
+ *  properties.
+ *
+ *  \param[in] dx x component of the distance between the two particles.
+ *  \param[in] dx y component of the distance between the two particles.
+ *  \param[in] dx z component of the distance between the two particles.
+ *
+ *  \return The correction potential.
+ */
+double ewald_pot_corr(double dx, double dy, double dz)
+{
+  int i, j, k;
+  double u, v, w;
+  double f1, f2, f3, f4, f5, f6, f7, f8;
+
+  if(dx < 0)
+    dx = -dx;
+  if(dy < 0)
+    dy = -dy;
+  if(dz < 0)
+    dz = -dz;
+  u = dx * Ewd_fac_intp;
+  i = (int)u;
+  if(i >= ENX)
+    i = ENX - 1;
+  u -= i;
+  v = dy * Ewd_fac_intp;
+  j = (int)v;
+  if(j >= ENY)
+    j = ENY - 1;
+  v -= j;
+  w = dz * Ewd_fac_intp;
+  k = (int)w;
+  if(k >= ENZ)
+    k = ENZ - 1;
+  w -= k;
+  f1 = (1 - u) * (1 - v) * (1 - w);
+  f2 = (1 - u) * (1 - v) * (w);
+  f3 = (1 - u) * (v) * (1 - w);
+  f4 = (1 - u) * (v) * (w);
+  f5 = (u) * (1 - v) * (1 - w);
+  f6 = (u) * (1 - v) * (w);
+  f7 = (u) * (v) * (1 - w);
+  f8 = (u) * (v) * (w);
+  return Ewd_potcorr[i][j][k] * f1 + Ewd_potcorr[i][j][k + 1] * f2 + Ewd_potcorr[i][j + 1][k] * f3 +
+         Ewd_potcorr[i][j + 1][k + 1] * f4 + Ewd_potcorr[i + 1][j][k] * f5 + Ewd_potcorr[i + 1][j][k + 1] * f6 +
+         Ewd_potcorr[i + 1][j + 1][k] * f7 + Ewd_potcorr[i + 1][j + 1][k + 1] * f8;
+}
+
+/*! \brief This function computes the potential correction term by means of
+ *  Ewald summation.
+ *
+ *  \param[in] x X distance for which the correction term should be computed.
+ *  \param[in] y Y distance for which the correction term should be computed.
+ *  \param[in] z Z distance for which the correction term should be computed.
+ *
+ *  \return The correction term.
+ */
+double ewald_psi(double x, double y, double z)
+{
+  static int printed = 0;
+
+  double r = sqrt(x * x + y * y + z * z);
+
+  if(r == 0)
+    return 0;
+
+  double lmin  = imin(imin(STRETCHX, STRETCHY), STRETCHZ);
+  double alpha = 3.0 / lmin;
+
+  const int nmax = 4;
+
+  double sum1 = 0;
+  for(int nx = -nmax; nx <= nmax; nx++)
+    for(int ny = -nmax; ny <= nmax; ny++)
+      for(int nz = -nmax; nz <= nmax; nz++)
+        {
+          double dx = x - nx * STRETCHX;
+          double dy = y - ny * STRETCHY;
+          double dz = z - nz * STRETCHZ;
+          double r  = sqrt(dx * dx + dy * dy + dz * dz);
+          sum1 += erfc(alpha * r) / r;
+        }
+
+  double alpha2 = alpha * alpha;
+
+  int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5);
+  int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5);
+  int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5);
+
+  if(printed == 0)
+    {
+      mpi_printf("EWALD: potential tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax);
+      printed = 1;
+    }
+
+  double sum2 = 0.0;
+  for(int nx = -nxmax; nx <= nxmax; nx++)
+    for(int ny = -nymax; ny <= nymax; ny++)
+      for(int nz = -nzmax; nz <= nzmax; nz++)
+        {
+          double kx = (2.0 * M_PI / (STRETCHX)) * nx;
+          double ky = (2.0 * M_PI / (STRETCHY)) * ny;
+          double kz = (2.0 * M_PI / (STRETCHZ)) * nz;
+          double k2 = kx * kx + ky * ky + kz * kz;
+          if(k2 > 0)
+            {
+              double kdotx = (x * kx + y * ky + z * kz);
+              sum2 += 4.0 * M_PI / (k2 * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2)) * cos(kdotx);
+            }
+        }
+
+  double psi = /*-2.83729 + */ M_PI / (alpha * alpha * STRETCHX * STRETCHY * STRETCHZ) - sum1 - sum2 + 1.0 / r;
+
+  return psi;
+}
+
+/*! \brief This function computes the force correction term (difference
+ *  between full force of infinite lattice and nearest image) by Ewald
+ *  summation.
+ *
+ *  \param[in] x X distance for which the correction term should be computed.
+ *  \param[in] y Y distance for which the correction term should be computed.
+ *  \param[in] z Z distance for which the correction term should be computed.
+ *  \param force Array will containing the correction force,
+ *
+ *  \return void
+ */
+void ewald_force(double x, double y, double z, double force[3])
+{
+  static int printed = 0;
+  for(int i = 0; i < 3; i++)
+    force[i] = 0;
+  double r2 = x * x + y * y + z * z;
+
+  if(r2 == 0)
+    return;
+
+  double lmin   = imin(imin(STRETCHX, STRETCHY), STRETCHZ);
+  double alpha  = 2.0 / lmin;
+  double alpha2 = alpha * alpha;
+
+  double r3inv = 1.0 / (r2 * sqrt(r2));
+
+  force[0] += r3inv * x;
+  force[1] += r3inv * y;
+  force[2] += r3inv * z;
+
+  const int nmax = 4;
+
+  for(int nx = -nmax; nx <= nmax; nx++)
+    for(int ny = -nmax; ny <= nmax; ny++)
+      for(int nz = -nmax; nz <= nmax; nz++)
+        {
+          double dx   = x - nx * STRETCHX;
+          double dy   = y - ny * STRETCHY;
+          double dz   = z - nz * STRETCHZ;
+          double r2   = dx * dx + dy * dy + dz * dz;
+          double r    = sqrt(r2);
+          double val  = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2);
+          double val2 = val / (r2 * r);
+
+          force[0] -= dx * val2;
+          force[1] -= dy * val2;
+          force[2] -= dz * val2;
+        }
+
+  int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5);
+  int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5);
+  int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5);
+
+  if(printed == 0)
+    {
+      mpi_printf("EWALD: force tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax);
+      printed = 1;
+    }
+
+  for(int hx = -nxmax; hx <= nxmax; hx++)
+    for(int hy = -nymax; hy <= nymax; hy++)
+      for(int hz = -nzmax; hz <= nzmax; hz++)
+        {
+          double h2 = hx * hx + hy * hy + hz * hz;
+          if(h2 > 0)
+            {
+              double hdotx = x * hx + y * hy + z * hz;
+              double val   = 2.0 / h2 * exp(-M_PI * M_PI * h2 / alpha2) * sin(2.0 * M_PI * hdotx);
+
+              force[0] -= hx * val;
+              force[1] -= hy * val;
+              force[2] -= hz * val;
+            }
+        }
+}
+
+#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */
diff --git a/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c
new file mode 100644
index 0000000000..3289af844c
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c
@@ -0,0 +1,486 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/forcetree_optimizebalance.c
+ * \date        05/2018
+ * \brief       Does some preparation work for use of red-black ordered binary
+ *              tree based on BSD macros.
+ * \details     contains functions:
+ *                int force_sort_load(const void *a, const void *b)
+ *                double force_get_current_balance(double *impact)
+ *                void force_get_global_cost_for_leavenodes(int nexport)
+ *                static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+ *                void force_optimize_domain_mapping(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 20.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/bsd_tree.h"
+#include "../domain/domain.h"
+
+/* \brief Structure of my tree nodes.
+ */
+struct mydata
+{
+  double pri;
+  int target;
+  RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */
+};
+
+/* prototype of comparison function of tree elements */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs);
+
+/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */
+RB_HEAD(mytree, mydata);
+
+/* the following macros declare appropriate function prototypes and functions needed for this type of tree */
+RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp);
+RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp);
+
+/*! \brief Data structure that describes force-segment.
+ */
+static struct force_segments_data
+{
+  int start, end, task;
+  double work, cost, count, normalized_load;
+} * force_domainAssign;
+
+/*! \brief Comparison function for force_segments_data.
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First object.
+ *  \param[in] b Second object.
+ *
+ *  \return (-1,0,1), -1 if a->normalized_load > b->normalized_load.
+ */
+int force_sort_load(const void *a, const void *b)
+{
+  if(((struct force_segments_data *)a)->normalized_load > (((struct force_segments_data *)b)->normalized_load))
+    return -1;
+
+  if(((struct force_segments_data *)a)->normalized_load < (((struct force_segments_data *)b)->normalized_load))
+    return +1;
+
+  return 0;
+}
+
+static double oldmax, oldsum;
+
+/*! \brief Calculates current balance.
+ *
+ *  \param[out] impact Impact factor of imbalance (1 if optimally balanced).
+ *
+ *  \return Domain balance = max(cost) / average(cost).
+ */
+double force_get_current_balance(double *impact)
+{
+#ifndef NO_MPI_IN_PLACE
+  MPI_Allreduce(MPI_IN_PLACE, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#else  /* #ifndef NO_MPI_IN_PLACE */
+  double *inTaskCost = mymalloc("inTaskCost", NTask * sizeof(double));
+  ;
+  memcpy(inTaskCost, TaskCost, NTask * sizeof(double));
+  MPI_Allreduce(inTaskCost, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  myfree(inTaskCost);
+#endif /* #ifndef NO_MPI_IN_PLACE #else */
+
+  int i;
+  for(i = 0, oldmax = oldsum = 0; i < NTask; i++)
+    {
+      oldsum += TaskCost[i];
+      if(oldmax < TaskCost[i])
+        oldmax = TaskCost[i];
+    }
+
+  *impact = 1.0 + domain_grav_weight[All.HighestActiveTimeBin] * (oldmax - oldsum / NTask) / All.TotGravCost;
+
+  return oldmax / (oldsum / NTask);
+}
+
+/*! \brief Gather cost data of all leaf-nodes and communicate result.
+ *
+ *  \param[in] nexport Number of exported nodes.
+ *
+ *  \return void
+ */
+void force_get_global_cost_for_leavenodes(int nexport)
+{
+  int i, j, n, nimport, idx, task, ngrp;
+
+  struct node_data
+  {
+    double domainCost;
+    int domainCount;
+    int no;
+  } * export_node_data, *import_node_data;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  export_node_data = mymalloc("export_node_data", nexport * sizeof(struct node_data));
+  import_node_data = mymalloc("import_node_data", nimport * sizeof(struct node_data));
+
+  for(i = 0; i < nexport; i++)
+    {
+      int task = ListNoData[i].task;
+      int ind  = Send_offset[task] + Send_count[task]++;
+
+      export_node_data[ind].domainCost  = ListNoData[i].domainCost;
+      export_node_data[ind].domainCount = ListNoData[i].domainCount;
+      export_node_data[ind].no          = ListNoData[i].no;
+    }
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+          MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct node_data), MPI_BYTE, recvTask,
+                       TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct node_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+
+  for(i = 0; i < nimport; i++)
+    {
+      int no = import_node_data[i].no;
+      DomainCost[no] += import_node_data[i].domainCost;
+      DomainCount[no] += import_node_data[i].domainCount;
+    }
+
+  myfree(import_node_data);
+  myfree(export_node_data);
+
+  /* now share the cost data across all processors */
+  struct DomainNODE
+  {
+    double domainCost;
+    int domainCount;
+  } * DomainMoment, *loc_DomainMoment;
+
+  DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE));
+
+  /* share the cost data accross CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(n = 0; n < NTopleaves; n++)
+    recvcounts[DomainTask[n]]++;
+
+  for(task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < NTask; task++)
+    {
+      recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1];
+      byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+    }
+
+  loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
+
+  for(n = 0, idx = 0; n < NTopleaves; n++)
+    {
+      if(DomainTask[n] == ThisTask)
+        {
+          loc_DomainMoment[idx].domainCost  = DomainCost[n];
+          loc_DomainMoment[idx].domainCount = DomainCount[n];
+          idx++;
+        }
+    }
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  for(task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(n = 0; n < NTopleaves; n++)
+    {
+      task = DomainTask[n];
+      if(task != ThisTask)
+        {
+          idx = recvoffset[task] + recvcounts[task]++;
+
+          DomainCost[n]  = DomainMoment[idx].domainCost;
+          DomainCount[n] = DomainMoment[idx].domainCount;
+        }
+    }
+
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvoffset);
+  myfree(recvcounts);
+  myfree(DomainMoment);
+}
+
+/*! \brief Comparison function of tree elements.
+ *
+ *  Compares
+ *    - pri and if this is equal
+ *    - target
+ *
+ *  \param[in] lhs First mydata object.
+ *  \param[in] rhs Second mydata object.
+ *
+ *  \return (-1,0,1) -1 if lhs < rhs.
+ */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+{
+  if(lhs->pri < rhs->pri)
+    return -1;
+  else if(lhs->pri > rhs->pri)
+    return 1;
+  else if(lhs->target < rhs->target)
+    return -1;
+  else if(lhs->target > rhs->target)
+    return 1;
+
+  return 0;
+}
+
+/*! \brief Optimization algorithm for the workload balance.
+ *
+ *  \return void
+ */
+void force_optimize_domain_mapping(void)
+{
+  int i, j;
+
+  double fac_cost  = 0.5 / oldsum;
+  double fac_count = 0.5 / All.TotNumPart;
+
+  int ncpu              = NTask * All.MultipleDomains;
+  int ndomain           = NTopleaves;
+  double workavg        = 1.0 / ncpu;
+  double workhalfnode   = 0.5 / NTopleaves;
+  double work_before    = 0;
+  double workavg_before = 0;
+
+  int start = 0;
+
+  force_domainAssign = mymalloc("force_domainAssign", ncpu * sizeof(struct force_segments_data));
+
+  for(i = 0; i < ncpu; i++)
+    {
+      double work = 0, cost = 0, count = 0;
+      int end = start;
+
+      cost += fac_cost * DomainCost[end];
+      count += fac_count * DomainCount[end];
+      work += fac_cost * DomainCost[end] + fac_count * DomainCount[end];
+
+      while((work + work_before + (end + 1 < NTopleaves ? fac_cost * DomainCost[end + 1] + fac_count * DomainCount[end + 1] : 0) <
+             workavg + workavg_before + workhalfnode) ||
+            (i == ncpu - 1 && end < ndomain - 1))
+        {
+          if((ndomain - end) > (ncpu - i))
+            end++;
+          else
+            break;
+
+          cost += fac_cost * DomainCost[end];
+          count += fac_count * DomainCount[end];
+          work += fac_cost * DomainCost[end] + fac_count * DomainCount[end];
+        }
+
+      force_domainAssign[i].start = start;
+      force_domainAssign[i].end   = end;
+      force_domainAssign[i].work  = work;
+      force_domainAssign[i].cost  = cost;
+      force_domainAssign[i].count = count;
+
+      force_domainAssign[i].normalized_load = cost + count; /* note: they are already multiplied by fac_cost/fac_count */
+
+      work_before += work;
+      workavg_before += workavg;
+      start = end + 1;
+    }
+
+  qsort(force_domainAssign, ncpu, sizeof(struct force_segments_data), force_sort_load);
+
+  /* create three priority trees, one for the cost load, one for the particle count, and one for the combined cost */
+  struct mytree queues[3]; /* 0=cost, 1=count, 2=combi */
+
+  struct mydata *ncost  = mymalloc("ncost", NTask * sizeof(struct mydata));
+  struct mydata *ncount = mymalloc("ncount", NTask * sizeof(struct mydata));
+  struct mydata *ncombi = mymalloc("ncombi", NTask * sizeof(struct mydata));
+
+  RB_INIT(&queues[0]);
+  RB_INIT(&queues[1]);
+  RB_INIT(&queues[2]);
+
+  /* fill in all the tasks into the trees. The priority will be the current cost/count, the tag 'val' is used to label the task */
+  for(i = 0; i < NTask; i++)
+    {
+      ncost[i].pri    = 0;
+      ncost[i].target = i;
+      RB_INSERT(mytree, &queues[0], &ncost[i]);
+
+      ncount[i].pri    = 0;
+      ncount[i].target = i;
+      RB_INSERT(mytree, &queues[1], &ncount[i]);
+
+      ncombi[i].pri    = 0;
+      ncombi[i].target = i;
+      RB_INSERT(mytree, &queues[2], &ncombi[i]);
+    }
+
+  double max_load = 0;
+  double max_cost = 0;
+
+  int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED;
+  if(n_lowest > NTask)
+    n_lowest = NTask;
+
+  int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int));
+  struct mydata *np;
+
+  for(i = 0; i < ncpu; i++)
+    {
+      /* pick the least work-loaded target from the queue, and the least particle-loaded, and then decide which choice
+         gives the smallest load overall */
+      double cost, load;
+      double bestwork = 1.0e30;
+      int q, target = -1;
+
+      for(q = 0; q < 3; q++)
+        {
+          /* look up the n_lowest smallest elements from the tree */
+          for(np = RB_MIN(mytree, &queues[q]), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queues[q], np), rep++)
+            candidates[rep] = np->target;
+
+          for(rep = 0; rep < n_lowest; rep++)
+            {
+              int t = candidates[rep];
+
+              cost = ncost[t].pri + force_domainAssign[i].cost;
+              load = ncount[t].pri + force_domainAssign[i].count;
+              if(cost < max_cost)
+                cost = max_cost;
+              if(load < max_load)
+                load = max_load;
+              double w = cost + load;
+              if(w < bestwork)
+                {
+                  bestwork = w;
+                  target   = t;
+                }
+            }
+        }
+
+      force_domainAssign[i].task = target;
+
+      cost = ncost[target].pri + force_domainAssign[i].cost;
+      load = ncount[target].pri + force_domainAssign[i].count;
+
+      RB_REMOVE(mytree, &queues[0], &ncost[target]);
+      ncost[target].pri = cost;
+      RB_INSERT(mytree, &queues[0], &ncost[target]);
+
+      RB_REMOVE(mytree, &queues[1], &ncount[target]);
+      ncount[target].pri = load;
+      RB_INSERT(mytree, &queues[1], &ncount[target]);
+
+      RB_REMOVE(mytree, &queues[2], &ncombi[target]);
+      ncombi[target].pri = cost + load;
+      RB_INSERT(mytree, &queues[2], &ncombi[target]);
+
+      if(max_cost < cost)
+        max_cost = cost;
+
+      if(max_load < load)
+        max_load = load;
+    }
+
+  myfree(candidates);
+
+  /* free tree nodes again */
+  myfree(ncombi);
+  myfree(ncount);
+  myfree(ncost);
+
+  for(i = 0; i < ncpu; i++)
+    for(j = force_domainAssign[i].start; j <= force_domainAssign[i].end; j++)
+      DomainNewTask[j] = force_domainAssign[i].task;
+
+  myfree(force_domainAssign);
+
+  for(i = 0; i < NTask; i++)
+    {
+      TaskCost[i]  = 0;
+      TaskCount[i] = 0;
+    }
+
+  for(i = 0; i < NTopleaves; i++)
+    {
+      TaskCost[DomainNewTask[i]] += DomainCost[i];
+      TaskCount[DomainNewTask[i]] += DomainCount[i];
+    }
+
+  double max, sum, maxload, sumload;
+  for(i = 0, max = sum = 0, maxload = sumload = 0; i < NTask; i++)
+    {
+      sum += TaskCost[i];
+      if(max < TaskCost[i])
+        max = TaskCost[i];
+      sumload += TaskCount[i];
+      if(maxload < TaskCount[i])
+        maxload = TaskCount[i];
+    }
+
+  mpi_printf("FORCETREE: Active-TimeBin=%d  [unoptimized work-balance=%g]  new work-balance=%g, new load-balance=%g\n",
+             All.HighestActiveTimeBin, oldmax / (oldsum / NTask), max / (sum / NTask), maxload / (sumload / NTask));
+
+  if((max / (sum / NTask) > oldmax / (oldsum / NTask)) || (maxload > All.MaxPart))
+    {
+      mpi_printf(
+          "FORCETREE: The work-load is either worse than before or the memory-balance is not viable. We keep the old distribution.\n");
+      memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
+    }
+}
diff --git a/src/amuse/community/arepo/src/gravity/forcetree_walk.c b/src/amuse/community/arepo/src/gravity/forcetree_walk.c
new file mode 100644
index 0000000000..b773024cea
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/forcetree_walk.c
@@ -0,0 +1,709 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/forcetree_walk.c
+ * \date        05/2018
+ * \brief       Gravitational tree walk code.
+ * \details     This file contains the various gravitational tree walks.
+ *              contains functions:
+ *                void force_short_range_init(void)
+ *                int force_treeevaluate(gravdata_in * in, gravdata_out * out,
+ *                  int target, int mode, int thread_id, int numnodes, int
+ *                  *firstnode, int measure_cost_flag)
+ *                int tree_treefind_export_node_threads(int no, int i, int
+ *                  thread_id)
+ *                void force_evaluate_direct(int target, int result_idx,
+ *                  int nimport)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 16.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Variable for short-range lookup table.
+ *
+ *  Contains the factor needed for the short range
+ *  contribution of the tree to the gravity force.
+ */
+static float shortrange_table[NTAB + 1];
+
+/*! \brief Variable for short-range lookup table.
+ *
+ *  Contains the factor needed for the short range
+ *  contribution of the tree to the potential energy.
+ */
+static float shortrange_table_potential[NTAB + 1];
+
+/*! \brief Initializes the short range table.
+ *
+ *  The short range table contains the complementary error function
+ *  needed for the computation of the short range part of the gravity
+ *  force/potential in case of the TreePM algorithm.
+ *
+ *  \return void
+ */
+void force_short_range_init(void)
+{
+  for(int i = 0; i <= NTAB; i++)
+    {
+      double u = ((RCUT / 2.0) / NTAB) * i;
+
+      shortrange_table_potential[i] = -erfc(u); /* -r * g(r) */
+
+      if(u > 0)
+        shortrange_table[i] = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0) / (u * u); /* -g'(r) - 1/r^2 */
+      else
+        shortrange_table[i] = 0;
+    }
+}
+
+/*! \brief This routine calculates the (short range) force contribution
+ *   for a given particle in case the Tree(PM) algorithm is used.
+ *
+ *  In the TreePM algorithm, the tree is walked only locally around the
+ *  target coordinate.  Tree nodes that fall outside a box of half
+ *  side-length Rcut= RCUT*ASMTH*MeshSize can be discarded. The short-range
+ *  potential is modified by a complementary error function, multiplied
+ *  with the Newtonian form. The resulting short-range suppression compared
+ *  to the Newtonian force is tabulated, because looking up from this table
+ *  is faster than recomputing the corresponding factor, despite the
+ *  memory-access penalty (which reduces cache performance) incurred by the
+ *  table.
+ *
+ *  Depending on the value of TypeOfOpeningCriterion, either the geometrical BH
+ *  cell-opening criterion, or the `relative' opening criterion is used.
+ *
+ *  \param[in] in Gravdata communicated into function.
+ *  \param[in, out] out Gravdata communicated from function.
+ *  \param[in] target Index of the particle to be processed.
+ *  \param[in] mode 0: process local particle (phase 1), 1: process imported
+ *             particle (phase 2).
+ *  \param[in] thread_id Id of this thread.
+ *  \param[in, out] firstnode First node involved in this algorithm.
+ *  \param[in] measure_cost_flag Whether the cost of the tree walk should be
+ *             measured.
+ *
+ *  \return Number of interactions processed for particle i.
+ */
+int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode,
+                       int measure_cost_flag)
+{
+  struct NODE *nop = NULL;
+#ifdef MULTIPLE_NODE_SOFTENING
+  struct ExtNODE *extnop = 0;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+#if !defined(GRAVITY_NOT_PERIODIC)
+  double xtmp, ytmp, ztmp;
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */
+
+  double acc_x = 0;
+  double acc_y = 0;
+  double acc_z = 0;
+#ifdef EVALPOTENTIAL
+  double pot = 0.0;
+#endif /* #ifdef EVALPOTENTIAL */
+
+  int ninteractions = 0;
+
+  double pos_x = in->Pos[0];
+  double pos_y = in->Pos[1];
+  double pos_z = in->Pos[2];
+  double aold  = All.ErrTolForceAcc * in->OldAcc;
+  double h_i   = All.ForceSoftening[in->SofteningType];
+
+#ifdef PMGRID
+  double rcut  = All.Rcut[0];
+  double asmth = All.Asmth[0];
+#ifdef PLACEHIGHRESREGION
+  if(pmforce_is_particle_high_res(in->Type, in->Pos))
+    {
+      rcut  = All.Rcut[1];
+      asmth = All.Asmth[1];
+    }
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+  double rcut2     = rcut * rcut;
+  double asmthinv  = 0.5 / asmth;
+  double asmthinv2 = asmthinv * asmthinv;
+  double asmthfac  = asmthinv * (NTAB / (RCUT / 2.0));
+#endif /* #ifdef PMGRID */
+
+  for(int k = 0; k < numnodes; k++)
+    {
+      int no;
+
+      if(mode == 0)
+        no = Tree_MaxPart; /* root node */
+      else
+        {
+          no = firstnode[k];
+          no = Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          double dx, dy, dz, r2, mass, hmax;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+          int indi_flag1 = -1, indi_flag2 = 0;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+          if(no < Tree_MaxPart) /* single particle */
+            {
+              dx = GRAVITY_NEAREST_X(Tree_Pos_list[3 * no + 0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(Tree_Pos_list[3 * no + 1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(Tree_Pos_list[3 * no + 2] - pos_z);
+              r2 = dx * dx + dy * dy + dz * dz;
+
+              mass = P[no].Mass;
+
+              if(measure_cost_flag)
+                Thread[thread_id].P_CostCount[no]++;
+
+              double h_j = All.ForceSoftening[P[no].SofteningType];
+
+              hmax = (h_j > h_i) ? h_j : h_i;
+
+              no = Nextnode[no];
+            }
+          else if(no < Tree_MaxPart + Tree_MaxNodes) /* we have an  internal node */
+            {
+              if(mode == 1)
+                {
+                  if(no <
+                     Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    {
+                      no = -1;
+                      continue;
+                    }
+                }
+
+              nop = &Nodes[no];
+
+              mass = nop->u.d.mass;
+              dx   = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x);
+              dy   = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y);
+              dz   = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z);
+
+              r2 = dx * dx + dy * dy + dz * dz;
+
+#if defined(PMGRID)
+              if(r2 > rcut2)
+                {
+                  /* check whether we can stop walking along this branch */
+                  double eff_dist = rcut + 0.5 * nop->len;
+
+                  double dist = GRAVITY_NEAREST_X(nop->center[0] - pos_x);
+                  if(dist < -eff_dist || dist > eff_dist)
+                    {
+                      no = nop->u.d.sibling;
+                      continue;
+                    }
+
+                  dist = GRAVITY_NEAREST_Y(nop->center[1] - pos_y);
+                  if(dist < -eff_dist || dist > eff_dist)
+                    {
+                      no = nop->u.d.sibling;
+                      continue;
+                    }
+
+                  dist = GRAVITY_NEAREST_Z(nop->center[2] - pos_z);
+                  if(dist < -eff_dist || dist > eff_dist)
+                    {
+                      no = nop->u.d.sibling;
+                      continue;
+                    }
+                }
+#endif /* #if defined(PMGRID) */
+
+              if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
+                {
+                  if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+                }
+              else /* check relative opening criterion */
+                {
+                  double len2 = nop->len * nop->len;
+
+                  if(len2 > r2 * (1.2 * 1.2)) /* add a worst case protection */
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+
+                    // note that aold is strictly speaking |acceleration| / G
+#ifdef ACTIVATE_MINIMUM_OPENING_ANGLE
+                  if(mass * len2 > r2 * r2 * aold && len2 > r2 * (0.4 * 0.4))
+#else  /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE */
+                  if(mass * len2 > r2 * r2 * aold)
+#endif /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE #else */
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+
+                  /* check in addition whether we lie inside or very close to the cell */
+                  if(fabs(GRAVITY_NEAREST_X(nop->center[0] - pos_x)) < 0.60 * nop->len)
+                    {
+                      if(fabs(GRAVITY_NEAREST_Y(nop->center[1] - pos_y)) < 0.60 * nop->len)
+                        {
+                          if(fabs(GRAVITY_NEAREST_Z(nop->center[2] - pos_z)) < 0.60 * nop->len)
+                            {
+                              no = nop->u.d.nextnode;
+                              continue;
+                            }
+                        }
+                    }
+                }
+
+              double h_j = All.ForceSoftening[nop->u.d.maxsofttype];
+
+              if(h_j > h_i)
+                {
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype)
+                    if(ExtNodes[no].mass_per_type[0] > 0)
+                      if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype])
+                        {
+                          /* open cell */
+                          no = nop->u.d.nextnode;
+                          continue;
+                        }
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  indi_flag1 = 0;
+                  indi_flag2 = NSOFTTYPES;
+#else  /* #ifdef MULTIPLE_NODE_SOFTENING */
+                  if(r2 < h_j * h_j)
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */
+                  hmax = h_j;
+                }
+              else
+                hmax = h_i;
+
+                /* ok, node can be used */
+#ifdef MULTIPLE_NODE_SOFTENING
+              extnop = &ExtNodes[no];
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+              if(measure_cost_flag && mass)
+                Thread[thread_id].Node_CostCount[no]++;
+
+              no = nop->u.d.sibling;
+            }
+          else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              int n = no - Tree_ImportedNodeOffset;
+
+              dx = GRAVITY_NEAREST_X(Tree_Points[n].Pos[0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(Tree_Points[n].Pos[1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(Tree_Points[n].Pos[2] - pos_z);
+
+              r2 = dx * dx + dy * dy + dz * dz;
+
+              mass = Tree_Points[n].Mass;
+
+              if(measure_cost_flag)
+                Thread[thread_id].TreePoints_CostCount[n]++;
+
+              double h_j = All.ForceSoftening[Tree_Points[n].SofteningType];
+
+              hmax = (h_j > h_i) ? h_j : h_i;
+
+              no = Nextnode[no - Tree_MaxNodes];
+            }
+          else /* pseudo particle */
+            {
+              if(mode == 0)
+                {
+                  tree_treefind_export_node_threads(no, target, thread_id);
+                }
+
+              no = Nextnode[no - Tree_MaxNodes];
+              continue;
+            }
+
+          /* now evaluate the multipole moment */
+          if(mass)
+            {
+              double r = sqrt(r2);
+
+#ifdef PMGRID
+              double tabentry = asmthfac * r;
+              int tabindex    = (int)tabentry;
+
+              if(tabindex < NTAB)
+                {
+                  double tabweight    = tabentry - tabindex;
+                  double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1];
+#ifdef EVALPOTENTIAL
+                  double factor_pot =
+                      (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1];
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef PMGRID */
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                  for(int type = indi_flag1; type < indi_flag2; type++)
+                    {
+                      if(type >= 0)
+                        {
+                          mass = extnop->mass_per_type[type];
+                          double h_j;
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                          if(type == 0)
+                            h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype];
+                          else
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                            h_j = All.ForceSoftening[type];
+
+                          hmax = (h_j > h_i) ? h_j : h_i;
+                        }
+
+                      if(mass)
+                        {
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                          double fac;
+#ifdef EVALPOTENTIAL
+                          double wp;
+#endif /* #ifdef EVALPOTENTIAL */
+
+                          if(r >= hmax)
+                            {
+                              double rinv  = 1.0 / r;
+                              double rinv3 = rinv * rinv * rinv;
+#ifdef PMGRID
+                              fac = rinv3 + rinv * factor_force * asmthinv2; /* fac  = -g'(r)/r */
+#ifdef EVALPOTENTIAL
+                              wp = rinv * factor_pot; /* wp   = -g(r)    */
+#endif                                                /* #ifdef EVALPOTENTIAL */
+#else                                                 /* #ifdef PMGRID */
+                  fac = rinv3;
+#ifdef EVALPOTENTIAL
+                  wp  = -rinv;
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef PMGRID #else */
+                            }
+                          else
+                            {
+                              double h_inv  = 1.0 / hmax;
+                              double h3_inv = h_inv * h_inv * h_inv;
+                              double u      = r * h_inv;
+
+                              if(u < 0.5)
+                                {
+                                  double u2 = u * u;
+                                  fac       = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3));
+#ifdef EVALPOTENTIAL
+                                  wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7)));
+#endif /* #ifdef EVALPOTENTIAL */
+                                }
+                              else
+                                {
+                                  double u2 = u * u;
+                                  double u3 = u2 * u;
+                                  fac       = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3);
+#ifdef EVALPOTENTIAL
+                                  wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u +
+                                                u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u))));
+#endif /* #ifdef EVALPOTENTIAL */
+                                }
+
+#ifdef PMGRID
+                              if(r > 0)
+                                {
+                                  double rinv = 1.0 / r;
+                                  fac += rinv * factor_force * asmthinv2; /* fac  = -g'(r)/r */
+#ifdef EVALPOTENTIAL
+                                  wp += rinv * (factor_pot + 1.0); /* wp   = -g(r)    */
+#endif                                                             /* #ifdef EVALPOTENTIAL */
+                                }
+#endif /* #ifdef PMGRID */
+                            }
+
+#ifdef EVALPOTENTIAL
+                          pot += mass * wp;
+#endif /* #ifdef EVALPOTENTIAL */
+                          fac *= mass;
+
+                          acc_x += dx * fac;
+                          acc_y += dy * fac;
+                          acc_z += dz * fac;
+
+#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL)
+                          double fcorr[3];
+                          ewald_corr(dx, dy, dz, fcorr);
+                          acc_x += mass * fcorr[0];
+                          acc_y += mass * fcorr[1];
+                          acc_z += mass * fcorr[2];
+#ifdef EVALPOTENTIAL
+                          pot += mass * ewald_pot_corr(dx, dy, dz);
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                        }
+                    }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                  ninteractions++;
+#ifdef PMGRID
+                }
+#endif /* #ifdef PMGRID */
+            }
+        }
+    }
+
+  out->Acc[0] = acc_x;
+  out->Acc[1] = acc_y;
+  out->Acc[2] = acc_z;
+#ifdef EVALPOTENTIAL
+  out->Potential = pot;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef NO_GRAVITY_TYPE
+  if(in->Type == NO_GRAVITY_TYPE)
+    {
+      out->Acc[0] = 0.0;
+      out->Acc[1] = 0.0;
+      out->Acc[2] = 0.0;
+#ifdef EVALPOTENTIAL
+      out->Potential = 0.0;
+#endif /* #ifdef EVALPOTENTIAL */
+    }
+#endif /* #ifdef NO_GRAVITY_TYPE */
+#ifdef OUTPUTGRAVINTERACTIONS
+  out->GravInteractions = ninteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+
+  return ninteractions;
+}
+
+/*! \brief Prepares node to be exported.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] i Index of particle.
+ *  \param[in] thread_id ID of thread.
+ *
+ *  \return 0
+ */
+int tree_treefind_export_node_threads(int no, int i, int thread_id)
+{
+  /* The task indicated by the pseudoparticle node */
+  int task = DomainNewTask[no - (Tree_MaxPart + Tree_MaxNodes)];
+
+  if(Thread[thread_id].Exportflag[task] != i)
+    {
+      Thread[thread_id].Exportflag[task]     = i;
+      int nexp                               = Thread[thread_id].Nexport++;
+      Thread[thread_id].PartList[nexp].Task  = task;
+      Thread[thread_id].PartList[nexp].Index = i;
+      Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize;
+    }
+
+  int nexp                      = Thread[thread_id].NexportNodes++;
+  nexp                          = -1 - nexp;
+  struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace);
+  nodelist[nexp].Task           = task;
+  nodelist[nexp].Index          = i;
+  nodelist[nexp].Node           = DomainNodeIndex[no - (Tree_MaxPart + Tree_MaxNodes)];
+  Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int);
+  return 0;
+}
+
+#ifdef ALLOW_DIRECT_SUMMATION
+/*! \brief Kernel of direct summation force calculation.
+ *
+ *  \param[in] target Index of particle in import array.
+ *  \param[in] result_idx Index in result array.
+ *  \param[in] nimport number of imported particles.
+ *
+ *  \return void
+ */
+void force_evaluate_direct(int target, int result_idx, int nimport)
+{
+#if !defined(GRAVITY_NOT_PERIODIC)
+  double xtmp, ytmp, ztmp;
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */
+
+  double acc_x = 0;
+  double acc_y = 0;
+  double acc_z = 0;
+#ifdef EVALPOTENTIAL
+  double pot = 0.0;
+#endif /* #ifdef EVALPOTENTIAL */
+
+  double pos_x = DirectDataAll[target].Pos[0];
+  double pos_y = DirectDataAll[target].Pos[1];
+  double pos_z = DirectDataAll[target].Pos[2];
+  double h_i   = All.ForceSoftening[DirectDataAll[target].SofteningType];
+
+#ifdef PMGRID
+  double asmth = All.Asmth[0];
+#if defined(PLACEHIGHRESREGION)
+  int ptype_i = DirectDataAll[target].Type;
+  if(pmforce_is_particle_high_res(ptype_i, DirectDataAll[target].Pos))
+    asmth = All.Asmth[1];
+#endif /* #if defined(PLACEHIGHRESREGION) */
+  double asmthinv  = 0.5 / asmth;
+  double asmthinv2 = asmthinv * asmthinv;
+  double asmthfac  = asmthinv * (NTAB / (RCUT / 2.0));
+#endif /* #ifdef PMGRID */
+
+  for(int j = 0; j < nimport; j++)
+    {
+      double h_j = All.ForceSoftening[DirectDataAll[j].SofteningType];
+
+      double hmax = (h_j > h_i) ? h_j : h_i;
+
+      double dx = GRAVITY_NEAREST_X(DirectDataAll[j].Pos[0] - pos_x);
+      double dy = GRAVITY_NEAREST_Y(DirectDataAll[j].Pos[1] - pos_y);
+      double dz = GRAVITY_NEAREST_Z(DirectDataAll[j].Pos[2] - pos_z);
+
+      double r2 = dx * dx + dy * dy + dz * dz;
+
+      double mass = DirectDataAll[j].Mass;
+
+      /* now evaluate the force component */
+
+      double r = sqrt(r2);
+
+#ifdef PMGRID
+      double tabentry = asmthfac * r;
+      int tabindex    = (int)tabentry;
+
+      if(tabindex < NTAB)
+        {
+          double tabweight    = tabentry - tabindex;
+          double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1];
+#ifdef EVALPOTENTIAL
+          double factor_pot =
+              (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1];
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef PMGRID */
+
+          double fac;
+#ifdef EVALPOTENTIAL
+          double wp;
+#endif /* #ifdef EVALPOTENTIAL */
+
+          if(r >= hmax)
+            {
+              double rinv  = 1.0 / r;
+              double rinv3 = rinv * rinv * rinv;
+#ifdef PMGRID
+              fac = rinv3 + rinv * factor_force * asmthinv2; /* fac  = -g'(r)/r */
+#ifdef EVALPOTENTIAL
+              wp = rinv * factor_pot; /* wp   = -g(r)    */
+#endif                                /* #ifdef EVALPOTENTIAL */
+#else                                 /* #ifdef PMGRID */
+          fac = rinv3;
+#ifdef EVALPOTENTIAL
+          wp  = -rinv;
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef PMGRID #else */
+            }
+          else
+            {
+              double h_inv  = 1.0 / hmax;
+              double h3_inv = h_inv * h_inv * h_inv;
+              double u      = r * h_inv;
+
+              if(u < 0.5)
+                {
+                  double u2 = u * u;
+                  fac       = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3));
+#ifdef EVALPOTENTIAL
+                  wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7)));
+#endif /* #ifdef EVALPOTENTIAL */
+                }
+              else
+                {
+                  double u2 = u * u;
+                  double u3 = u2 * u;
+                  fac       = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3);
+#ifdef EVALPOTENTIAL
+                  wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u))));
+#endif /* #ifdef EVALPOTENTIAL */
+                }
+#ifdef PMGRID
+              if(r > 0)
+                {
+                  double rinv = 1.0 / r;
+                  fac += rinv * factor_force * asmthinv2; /* fac  = -g'(r)/r */
+#ifdef EVALPOTENTIAL
+                  wp += rinv * (factor_pot + 1.0); /* wp   = -g(r)    */
+#endif                                             /* #ifdef EVALPOTENTIAL */
+                }
+#endif /* #ifdef PMGRID */
+            }
+
+#ifdef EVALPOTENTIAL
+          pot += mass * wp;
+#endif /* #ifdef EVALPOTENTIAL */
+          fac *= mass;
+
+          acc_x += dx * fac;
+          acc_y += dy * fac;
+          acc_z += dz * fac;
+
+#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL)
+          {
+            double fcorr[3];
+            ewald_corr(dx, dy, dz, fcorr);
+            acc_x += mass * fcorr[0];
+            acc_y += mass * fcorr[1];
+            acc_z += mass * fcorr[2];
+#if defined(EVALPOTENTIAL)
+            pot += mass * ewald_pot_corr(dx, dy, dz);
+#endif /* #if defined(EVALPOTENTIAL) */
+          }
+#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */
+
+#ifdef PMGRID
+        }
+#endif /* #ifdef PMGRID */
+    }
+
+  DirectAccOut[result_idx].Acc[0] = acc_x;
+  DirectAccOut[result_idx].Acc[1] = acc_y;
+  DirectAccOut[result_idx].Acc[2] = acc_z;
+#ifdef EVALPOTENTIAL
+  DirectAccOut[result_idx].Potential = pot;
+#endif /* #ifdef EVALPOTENTIAL */
+}
+#endif /* #ifdef ALLOW_DIRECT_SUMMATION */
diff --git a/src/amuse/community/arepo/src/gravity/grav_external.c b/src/amuse/community/arepo/src/gravity/grav_external.c
new file mode 100644
index 0000000000..784341a47b
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/grav_external.c
@@ -0,0 +1,579 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/gravtree.c
+ * \date        05/2018
+ * \brief       Special gravity routines for external forces.
+ * \details     contains functions:
+ *                void gravity_external(void)
+ *                static void gravity_external_get_force( double pos[3],
+ *                  int type, MyIDType ID, double acc[3], double *pot, int
+ *                  *flag_set )
+ *                void gravity_monopole_1d_spherical()
+ *                double enclosed_mass(double R)
+ *                void calc_exact_gravity_for_particle_type(void)
+ *                void special_particle_create_list()
+ *                void special_particle_update_list()
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+#ifdef EXTERNALGRAVITY
+static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set);
+
+/*! \brief Main routine to add contribution of external gravitational potential
+ *  to accelerations.
+ *
+ *  Function is called in gravity() (in accel.c). Function also evaluates
+ *  the gradient of the accelerations which is needed for the timestep
+ *  criterion due to the external potential.
+ *
+ *  \return void
+ */
+void gravity_external(void)
+{
+  mpi_printf("EXTERNALGRAVITY: execute\n");
+
+  TIMER_START(CPU_TREE);
+
+  for(int idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      int i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      double *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        pos = SphP[i].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+      double acc[3], pot;
+      int flag_set = 0;
+      gravity_external_get_force(pos, P[i].Type, P[i].ID, acc, &pot, &flag_set);
+
+      if(flag_set)
+        {
+          for(int k = 0; k < NUMDIMS; k++)
+            P[i].GravAccel[k] = acc[k];
+          for(int k = NUMDIMS; k < 3; k++)
+            P[i].GravAccel[k] = 0;
+          P[i].ExtPotential = pot;
+        }
+      else
+        {
+          for(int k = 0; k < NUMDIMS; k++)
+            P[i].GravAccel[k] += acc[k];
+#ifdef EVALPOTENTIAL
+          P[i].Potential += pot;
+#endif
+          P[i].ExtPotential += pot;
+        }
+
+      double dx;
+      if(P[i].Type == 0)
+        dx = 0.1 * get_cell_radius(i);
+      else
+        dx = 0.1 * All.ForceSoftening[P[i].SofteningType];
+
+      P[i].dGravAccel = 0;
+      for(int dim = 0; dim < NUMDIMS; dim++)
+        {
+          double accL[3], posL[3];
+          for(int k = 0; k < 3; k++)
+            posL[k] = pos[k];
+          posL[dim] -= dx;
+          gravity_external_get_force(posL, P[i].Type, P[i].ID, accL, &pot, &flag_set);
+
+          double accR[3], posR[3];
+          for(int k = 0; k < 3; k++)
+            posR[k] = pos[k];
+          posR[dim] += dx;
+          gravity_external_get_force(posR, P[i].Type, P[i].ID, accR, &pot, &flag_set);
+
+          for(int k = 0; k < NUMDIMS; k++)
+            {
+              double dGrav = accR[k] - accL[k];
+              P[i].dGravAccel += dGrav * dGrav;
+            }
+        }
+      P[i].dGravAccel = sqrt(P[i].dGravAccel) / (2. * dx);
+    }
+
+  TIMER_STOP(CPU_TREE);
+}
+
+/*! \brief Calculates the force from the external potential given a position.
+ *
+ *  \param[in] pos Position at which force is to be evaluated.
+ *  \param[in] type (unused)
+ *  \param[in] ID (unused)
+ *  \param[in, out] acc Acceleration array.
+ *  \param[in, out] pot Pointer to potential.
+ *  \param[in] flag_set (unused)
+ *
+ *  \return void
+ */
+static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set)
+{
+  for(int k = 0; k < 3; k++)
+    acc[k] = 0;
+
+  *pot = 0;
+
+#ifdef EXTERNALGY
+  acc[1] += EXTERNALGY;
+  *pot = -(EXTERNALGY)*pos[1];
+#endif /* #ifdef EXTERNALGY */
+
+#ifdef STATICISO
+  {
+    double r, m;
+    double dx, dy, dz;
+
+    dx = pos[0] - boxHalf_X;
+    dy = pos[1] - boxHalf_Y;
+    dz = pos[2] - boxHalf_Z;
+
+    r = sqrt(dx * dx + dy * dy + dz * dz);
+
+    if(r > ISO_R200)
+      m = ISO_M200;
+    else
+      m = ISO_M200 * r / ISO_R200;
+
+#ifdef ISO_FRACTION
+    m *= ISO_FRACTION;
+#endif /* #ifdef ISO_FRACTION */
+
+    if(r > 0)
+      {
+        acc[0] += -All.G * m * dx / r / (r * r + ISO_Eps * ISO_Eps);
+        acc[1] += -All.G * m * dy / r / (r * r + ISO_Eps * ISO_Eps);
+        acc[2] += -All.G * m * dz / r / (r * r + ISO_Eps * ISO_Eps);
+      }
+  }
+#endif /* #ifdef STATICISO */
+
+#ifdef STATICNFW
+  {
+    double r, m;
+    double dx, dy, dz;
+
+    dx = pos[0] - boxHalf_X;
+    dy = pos[1] - boxHalf_Y;
+    dz = pos[2] - boxHalf_Z;
+
+    r = sqrt(dx * dx + dy * dy + dz * dz);
+    m = enclosed_mass(r);
+#ifdef NFW_DARKFRACTION
+    m *= NFW_DARKFRACTION;
+#endif /* #ifdef NFW_DARKFRACTION */
+    if(r > 0)
+      {
+        acc[0] += -All.G * m * dx / (r * r * r);
+        acc[1] += -All.G * m * dy / (r * r * r);
+        acc[2] += -All.G * m * dz / (r * r * r);
+      }
+  }
+#endif /* #ifdef STATICNFW */
+
+#ifdef STATICHQ
+  {
+    double r, m, a;
+    double dx, dy, dz;
+
+    dx = pos[0] - boxHalf_X;
+    dy = pos[1] - boxHalf_Y;
+    dz = pos[2] - boxHalf_Z;
+
+    r = sqrt(dx * dx + dy * dy + dz * dz);
+
+    a = pow(All.G * HQ_M200 / (100 * All.Hubble * All.Hubble), 1.0 / 3) / HQ_C * sqrt(2 * (log(1 + HQ_C) - HQ_C / (1 + HQ_C)));
+
+    m = HQ_M200 * pow(r / (r + a), 2);
+#ifdef HQ_DARKFRACTION
+    m *= HQ_DARKFRACTION;
+#endif /* #ifdef HQ_DARKFRACTION */
+    if(r > 0)
+      {
+        acc[0] += -All.G * m * dx / (r * r * r);
+        acc[1] += -All.G * m * dy / (r * r * r);
+        acc[2] += -All.G * m * dz / (r * r * r);
+      }
+  }
+#endif /* #ifdef STATICHQ */
+}
+#endif /* #ifdef EXTERNALGRAVITY */
+
+#ifdef ONEDIMS_SPHERICAL
+/*! \brief One-dimensional gravity in the spherically symmetric case.
+ *
+ *  \return void
+ */
+void gravity_monopole_1d_spherical()
+{
+  printf("Doing 1D gravity...\n");
+
+  int i;
+  double msum = All.CoreMass;
+
+  for(i = 0; i < NumGas; i++)
+    {
+      double r0;
+      if(i > 0)
+        r0 = 0.5 * (P[i].Pos[0] + P[i - 1].Pos[0]);
+      else
+        r0 = All.CoreRadius;
+      double dm  = 4. / 3. * M_PI * (SphP[i].Center[0] * SphP[i].Center[0] * SphP[i].Center[0] - r0 * r0 * r0) * SphP[i].Density;
+      double rad = SphP[i].Center[0];
+
+      P[i].GravAccel[0] = -(msum + dm) * All.G / (rad * rad);
+
+#ifdef EVALPOTENTIAL
+      P[i].Potential = -(msum + dm) * All.G / rad;
+#endif /* #ifdef EVALPOTENTIAL */
+
+      msum += P[i].Mass;
+
+      P[i].GravAccel[1] = 0;
+      P[i].GravAccel[2] = 0;
+    }
+
+  printf("... 1D gravity done.\n");
+}
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+#ifdef STATICNFW
+/*! \brief Auxiliary function for static NFW potential.
+ *
+ *  \param[in] R Radius from center of potential.
+ *
+ *  \return Enclosed mass (which causes the external potential).
+ */
+double enclosed_mass(double R)
+{
+  /* Eps is in units of Rs !!!! */
+
+  if(R > Rs * NFW_C)
+    R = Rs * NFW_C;
+
+  return fac * 4 * M_PI * RhoCrit * Dc *
+         (-(Rs * Rs * Rs * (1 - NFW_Eps + log(Rs) - 2 * NFW_Eps * log(Rs) + NFW_Eps * NFW_Eps * log(NFW_Eps * Rs))) /
+              ((NFW_Eps - 1) * (NFW_Eps - 1)) +
+          (Rs * Rs * Rs *
+           (Rs - NFW_Eps * Rs - (2 * NFW_Eps - 1) * (R + Rs) * log(R + Rs) + NFW_Eps * NFW_Eps * (R + Rs) * log(R + NFW_Eps * Rs))) /
+              ((NFW_Eps - 1) * (NFW_Eps - 1) * (R + Rs)));
+}
+#endif /* #ifdef STATICNFW */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+/*! \brief Routine that computes gravitational force by direct summation.
+ *
+ *  Called by gravity() (in accel.c).
+ *
+ *  \return void
+ */
+void calc_exact_gravity_for_particle_type(void)
+{
+  int i, idx;
+#ifdef EXACT_GRAVITY_REACTION
+  double *accx, *accy, *accz;
+  accx = (double *)mymalloc("accx", All.MaxPartSpecial * sizeof(double));
+  accy = (double *)mymalloc("accy", All.MaxPartSpecial * sizeof(double));
+  accz = (double *)mymalloc("accz", All.MaxPartSpecial * sizeof(double));
+#ifdef EVALPOTENTIAL
+  double *pot;
+  pot = (double *)mymalloc("pot", All.MaxPartSpecial * sizeof(double));
+#endif /* #ifdef EVALPOTENTIAL */
+  int n;
+  for(n = 0; n < All.MaxPartSpecial; n++)
+    {
+      accx[n] = accy[n] = accz[n] = 0.0;
+#ifdef EVALPOTENTIAL
+      pot[n] = 0.0;
+#endif /* #ifdef EVALPOTENTIAL */
+    }
+#endif /* #ifdef EXACT_GRAVITY_REACTION */
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      double fac, wp;
+      double dx, dy, dz, r, r2;
+      double h, h_inv, h3_inv, u;
+      int k;
+
+      /* set softening to corresponding particle's softening length */
+      h = All.ForceSoftening[All.SofteningTypeOfPartType[EXACT_GRAVITY_FOR_PARTICLE_TYPE]];
+
+      for(k = 0; k < All.MaxPartSpecial; k++)
+        {
+          if(PartSpecialListGlobal[k].ID == P[i].ID)
+            continue;
+
+          dx = P[i].Pos[0] - PartSpecialListGlobal[k].pos[0];
+          dy = P[i].Pos[1] - PartSpecialListGlobal[k].pos[1];
+          dz = P[i].Pos[2] - PartSpecialListGlobal[k].pos[2];
+
+          r2 = dx * dx + dy * dy + dz * dz;
+          r  = sqrt(r2);
+
+          // using spline softening
+          if(r >= h)
+            {
+              fac = 1 / (r2 * r);
+              wp  = -1 / r;
+            }
+          else
+            {
+              h_inv  = 1.0 / h;
+              h3_inv = h_inv * h_inv * h_inv;
+              u      = r * h_inv;
+
+              if(u < 0.5)
+                {
+                  fac = h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4));
+                  wp  = h_inv * (-2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)));
+                }
+              else
+                {
+                  fac = h3_inv *
+                        (21.333333333333 - 48.0 * u + 38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
+                  wp = h_inv * (-3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))));
+                }
+            }
+
+          P[i].GravAccel[0] -= All.G * PartSpecialListGlobal[k].mass * fac * dx;
+          P[i].GravAccel[1] -= All.G * PartSpecialListGlobal[k].mass * fac * dy;
+          P[i].GravAccel[2] -= All.G * PartSpecialListGlobal[k].mass * fac * dz;
+
+#ifdef EVALPOTENTIAL
+          P[i].Potential += All.G * PartSpecialListGlobal[k].mass * wp;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef EXACT_GRAVITY_REACTION
+          /* avoid double counting */
+          if(P[i].Type != EXACT_GRAVITY_FOR_PARTICLE_TYPE)
+            {
+              accx[k] += All.G * P[i].Mass * fac * dx;
+              accy[k] += All.G * P[i].Mass * fac * dy;
+              accz[k] += All.G * P[i].Mass * fac * dz;
+#ifdef EVALPOTENTIAL
+              pot[k] += All.G * P[i].Mass * wp;
+#endif /* #ifdef EVALPOTENTIAL */
+            }
+#endif /* #ifdef EXACT_GRAVITY_REACTION */
+        }
+    }
+#ifdef EXACT_GRAVITY_REACTION
+  double *buf = (double *)mymalloc("buf", All.MaxPartSpecial * sizeof(double));
+
+  MPI_Allreduce(accx, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  for(n = 0; n < All.MaxPartSpecial; n++)
+    accx[n] = buf[n];
+  MPI_Allreduce(accy, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  for(n = 0; n < All.MaxPartSpecial; n++)
+    accy[n] = buf[n];
+  MPI_Allreduce(accz, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  for(n = 0; n < All.MaxPartSpecial; n++)
+    accz[n] = buf[n];
+#ifdef EVALPOTENTIAL
+  MPI_Allreduce(pot, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  for(n = 0; n < All.MaxPartSpecial; n++)
+    pot[n] = buf[n];
+#endif /* #ifdef EVALPOTENTIAL */
+  myfree(buf);
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+      for(n = 0; n < All.MaxPartSpecial; n++)
+        {
+          if(PartSpecialListGlobal[n].ID == P[i].ID)
+            {
+              P[i].GravAccel[0] += accx[n];
+              P[i].GravAccel[1] += accy[n];
+              P[i].GravAccel[2] += accz[n];
+#ifdef EVALPOTENTIAL
+              P[i].Potential += pot[n];
+#endif /* #ifdef EVALPOTENTIAL */
+            }
+        }
+    }
+
+#ifdef EVALPOTENTIAL
+  myfree(pot);
+#endif /* #ifdef EVALPOTENTIAL */
+  myfree(accz);
+  myfree(accy);
+  myfree(accx);
+#endif /* #ifdef EXACT_GRAVITY_REACTION */
+}
+
+/*! \brief Creates list of special particles, i.e. particles for which gravity
+ *  is calculated by direct summation.
+ *
+ *  Called in begrund2() (begrun.c), i.e. only at startup of the simulation.
+ *
+ *  \return void
+ */
+void special_particle_create_list()
+{
+  struct special_particle_data *SpecialPartList;
+  SpecialPartList =
+      (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data));
+
+  int i, j, nsrc, nimport, ngrp;
+  for(i = 0, nsrc = 0; i < NumPart; i++)
+    {
+      if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE)
+        {
+          SpecialPartList[nsrc].ID = P[i].ID;
+
+          SpecialPartList[nsrc].pos[0] = P[i].Pos[0];
+          SpecialPartList[nsrc].pos[1] = P[i].Pos[1];
+          SpecialPartList[nsrc].pos[2] = P[i].Pos[2];
+
+          SpecialPartList[nsrc++].mass = P[i].Mass;
+        }
+    }
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = nsrc;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = 0;
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  /* exchange particle data */
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(SpecialPartList);
+}
+
+/*! \brief Updates list of special particles, i.e. particles for which gravity
+ *  is calculated by direct summation.
+ *
+ *  Called in run() (run.c).
+ *
+ *  \return void
+ */
+void special_particle_update_list()
+{
+  struct special_particle_data *SpecialPartList;
+  SpecialPartList =
+      (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data));
+
+  int i, j, nsrc, nimport, ngrp;
+  for(i = 0, nsrc = 0; i < NumPart; i++)
+    {
+      if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE)
+        {
+          SpecialPartList[nsrc].ID = P[i].ID;
+
+          SpecialPartList[nsrc].pos[0] = P[i].Pos[0];
+          SpecialPartList[nsrc].pos[1] = P[i].Pos[1];
+          SpecialPartList[nsrc].pos[2] = P[i].Pos[2];
+
+          SpecialPartList[nsrc++].mass = P[i].Mass;
+        }
+    }
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = nsrc;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = 0;
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  /* exchange particle data */
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(SpecialPartList);
+}
+#endif /* #ifdef  EXACT_GRAVITY_FOR_PARTICLE_TYPE */
diff --git a/src/amuse/community/arepo/src/gravity/grav_softening.c b/src/amuse/community/arepo/src/gravity/grav_softening.c
new file mode 100644
index 0000000000..4494f4df08
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/grav_softening.c
@@ -0,0 +1,215 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/gravtree.c
+ * \date        05/2018
+ * \brief       Routines for setting the gravitational softening lengths.
+ * \details     contains functions:
+ *                void set_softenings(void)
+ *                int get_softeningtype_for_hydro_cell(int i)
+ *                double get_default_softening_of_particletype(int type)
+ *                int get_softening_type_from_mass(double mass)
+ *                double get_desired_softening_from_mass(double mass)
+ *                void init_individual_softenings(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+/*! \brief Sets the (comoving) softening length of all particle
+ *         types in the table All.SofteningTable[...].
+ *
+ *  A check is performed that the physical softening length is bounded by the
+ *  Softening-MaxPhys values.
+ *
+ *  \return void
+ */
+void set_softenings(void)
+{
+  int i;
+
+  if(All.ComovingIntegrationOn)
+    {
+      for(i = 0; i < NSOFTTYPES; i++)
+        if(All.SofteningComoving[i] * All.Time > All.SofteningMaxPhys[i])
+          All.SofteningTable[i] = All.SofteningMaxPhys[i] / All.Time;
+        else
+          All.SofteningTable[i] = All.SofteningComoving[i];
+    }
+  else
+    {
+      for(i = 0; i < NSOFTTYPES; i++)
+        All.SofteningTable[i] = All.SofteningComoving[i];
+    }
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  for(i = 0; i < NSOFTTYPES_HYDRO; i++)
+    All.SofteningTable[i + NSOFTTYPES] = All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, i);
+
+  if(All.AdaptiveHydroSofteningSpacing < 1)
+    terminate("All.AdaptiveHydroSofteningSpacing < 1");
+
+#ifdef MULTIPLE_NODE_SOFTENING
+  /* we check that type=0 has its own slot 0 in the softening types, so that only gas masses are stored there */
+  if(All.SofteningTypeOfPartType[0] != 0)
+    terminate("All.SofteningTypeOfPartType[0] != 0");
+
+  for(i = 1; i < NTYPES; i++)
+    if(All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0])
+      terminate("i=%d: All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0]", i);
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+
+  for(i = 0; i < NSOFTTYPES + NSOFTTYPES_HYDRO; i++)
+    All.ForceSoftening[i] = 2.8 * All.SofteningTable[i];
+
+  All.ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO] = 0; /* important - this entry is actually used */
+}
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+/*! \brief Finds the index of the softening table for a given cell depending
+ *         on its radius.
+ *
+ *  \param[in] i Index of cell in SphP array.
+ *
+ *  \return Index of corresponding softening in softening lookup-table.
+ */
+int get_softeningtype_for_hydro_cell(int i)
+{
+  double soft = All.GasSoftFactor * get_cell_radius(i);
+
+  if(soft <= All.ForceSoftening[NSOFTTYPES])
+    return NSOFTTYPES;
+
+  int k = 0.5 + log(soft / All.ForceSoftening[NSOFTTYPES]) / log(All.AdaptiveHydroSofteningSpacing);
+  if(k >= NSOFTTYPES_HYDRO)
+    k = NSOFTTYPES_HYDRO - 1;
+
+  return NSOFTTYPES + k;
+}
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+
+/*! \brief Returns the default softening length for particle type 'type'.
+ *
+ * \param[in] type Type of the local particle.
+ *
+ * \return The softening length of particle with type 'type'.
+ */
+double get_default_softening_of_particletype(int type) { return All.SofteningTable[All.SofteningTypeOfPartType[type]]; }
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+/*! \brief Determines the softening type from the mass of a particle.
+ *
+ *  \param[in] mass Mass of the particle.
+ *
+ *  \return Index in gravitational softening table.
+ */
+int get_softening_type_from_mass(double mass)
+{
+  int i, min_type = -1;
+  double eps     = get_desired_softening_from_mass(mass);
+  double min_dln = MAX_FLOAT_NUMBER;
+
+#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING)
+  i = 1;
+#else  /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */
+  i = 0;
+#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) #else */
+
+  for(; i < NSOFTTYPES; i++)
+    {
+      if(All.ForceSoftening[i] > 0)
+        {
+          double dln = fabs(log(eps) - log(All.ForceSoftening[i]));
+
+          if(dln < min_dln)
+            {
+              min_dln  = dln;
+              min_type = i;
+            }
+        }
+    }
+  if(min_type < 0)
+    terminate("min_type < 0  mass=%g  eps=%g   All.AvgType1Mass=%g  All.ForceSoftening[1]=%g", mass, eps, All.AvgType1Mass,
+              All.ForceSoftening[1]);
+
+  return min_type;
+}
+
+/*! \brief Returns the softening length of softening type 1
+ *  particles depending on the particle mass.
+ *
+ *  \param[in] mass Particle mass.
+ *
+ *  \return Softening length for a softening type 1 particle of mass 'mass'.
+ */
+double get_desired_softening_from_mass(double mass)
+{
+  if(mass <= All.AvgType1Mass)
+    return 2.8 * All.SofteningComoving[1];
+  else
+    return 2.8 * All.SofteningComoving[1] * pow(mass / All.AvgType1Mass, 1.0 / 3);
+}
+
+/*! \brief Initializes the mass dependent softening calculation for Type 1
+ *         particles.
+ *
+ *  The average mass of Type 1 particles is calculated.
+ *
+ *  \return void
+ */
+void init_individual_softenings(void)
+{
+  int i, ndm;
+  double mass, masstot;
+  long long ndmtot;
+
+  for(i = 0, ndm = 0, mass = 0; i < NumPart; i++)
+    if(P[i].Type == 1)
+      {
+        ndm++;
+        mass += P[i].Mass;
+      }
+  sumup_large_ints(1, &ndm, &ndmtot);
+  MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  All.AvgType1Mass = masstot / ndmtot;
+
+  mpi_printf("INIT: AvgType1Mass = %g\n", All.AvgType1Mass);
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING)))
+        P[i].SofteningType = get_softening_type_from_mass(P[i].Mass);
+    }
+}
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
diff --git a/src/amuse/community/arepo/src/gravity/gravdirect.c b/src/amuse/community/arepo/src/gravity/gravdirect.c
new file mode 100644
index 0000000000..cbe7be7426
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/gravdirect.c
@@ -0,0 +1,259 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravdirect.c
+ * \date        05/2018
+ * \brief       Main driver routines for gravitational (short-range) force
+ *              computation through direct summation
+ * \details     Note that this is not the same thing as
+ *              EXACT_GRAVITY_FOR_PARTICLE_TYPE!
+ *              ALLOW_DIRECT_SUMMATION does direct summation for performance
+ *              reasons if there is only a small number of interactions to be
+ *              calculated and the overhead of a tree-construction would be
+ *              more expensive than the direct summation calculation, while
+ *              EXACT_GRAVITY_FOR_PARTICLE_TYPE always enforces a direct
+ *              summation for all particle pairs of a given type.
+ *              contains functions:
+ *                void gravity_direct(int timebin)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+#ifdef ALLOW_DIRECT_SUMMATION
+static int Nimport;
+
+/*! \brief Computes the gravitational forces for all active particles through
+ *         direct summation.
+ *
+ *  \param[in] timebin (unused)
+ *
+ *  \return void
+ */
+void gravity_direct(int timebin)
+{
+  int i, j, k, idx;
+
+  TIMER_START(CPU_TREEDIRECT);
+
+  if(TimeBinsGravity.GlobalNActiveParticles <= 1)
+    {
+      if(TimeBinsGravity.NActiveParticles > 0)
+        {
+          i = TimeBinsGravity.ActiveParticleList[0];
+          if(i >= 0)
+            {
+              for(k = 0; k < 3; k++)
+                P[i].GravAccel[k] = 0;
+
+#ifdef EVALPOTENTIAL
+              P[i].Potential = 0;
+#endif /* #ifdef EVALPOTENTIAL */
+            }
+        }
+
+      mpi_printf("Found only %d particles to do direct summation -> SKIPPING IT\n", TimeBinsGravity.GlobalNActiveParticles);
+      TIMER_STOP(CPU_TREEDIRECT);
+      return;
+    }
+
+  mpi_printf("GRAVDIRECT: direct summation.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  double tstart = second();
+
+  DirectDataIn = (struct directdata *)mymalloc("DirectDataIn", TimeBinsGravity.NActiveParticles * sizeof(struct directdata));
+
+  Nforces = 0;
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        {
+          for(k = 0; k < 3; k++)
+            DirectDataIn[Nforces].Pos[k] = SphP[i].Center[k];
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        {
+          for(k = 0; k < 3; k++)
+            DirectDataIn[Nforces].Pos[k] = P[i].Pos[k];
+        }
+
+      DirectDataIn[Nforces].Mass = P[i].Mass;
+
+      DirectDataIn[Nforces].Type          = P[i].Type;
+      DirectDataIn[Nforces].SofteningType = P[i].SofteningType;
+
+      Nforces++;
+    }
+
+  MPI_Allgather(&Nforces, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, Nimport = 0, Recv_offset[0] = 0; j < NTask; j++)
+    {
+      Nimport += Recv_count[j];
+
+      if(j > 0)
+        Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+    }
+
+  DirectDataAll = (struct directdata *)mymalloc("DirectDataAll", Nimport * sizeof(struct directdata));
+
+  for(j = 0; j < NTask; j++)
+    {
+      Send_count[j]  = Recv_count[j] * sizeof(struct directdata);
+      Send_offset[j] = Recv_offset[j] * sizeof(struct directdata);
+    }
+
+  MPI_Allgatherv(DirectDataIn, Nforces * sizeof(struct directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE,
+                 MPI_COMM_WORLD);
+
+  /* subdivide the work evenly */
+  int first, count;
+  subdivide_evenly(Nimport, NTask, ThisTask, &first, &count);
+
+  DirectAccOut = (struct accdata *)mymalloc("DirectDataOut", count * sizeof(struct accdata));
+
+  /* now calculate the forces */
+  for(i = 0; i < count; i++)
+    force_evaluate_direct(i + first, i, Nimport);
+
+  /* now send the forces to the right places */
+
+  DirectAccIn = (struct accdata *)mymalloc("DirectDataIn", Nforces * sizeof(struct accdata));
+
+  MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 2 * NTask * sizeof(MPI_Request));
+  int n_requests        = 0;
+
+  int recvTask = 0;
+  int sendTask = 0;
+  int send_first, send_count;
+  subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count);
+
+  while(recvTask < NTask && sendTask < NTask) /* go through both lists */
+    {
+      while(send_first + send_count < Recv_offset[recvTask])
+        {
+          if(sendTask >= NTask - 1)
+            terminate("sendTask >= NTask  recvTask=%d sendTask=%d", recvTask, sendTask);
+
+          sendTask++;
+          subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count);
+        }
+
+      while(Recv_offset[recvTask] + Recv_count[recvTask] < send_first)
+        {
+          if(recvTask >= NTask - 1)
+            terminate("recvTask >= NTask  recvTask=%d sendTask=%d", recvTask, sendTask);
+
+          recvTask++;
+        }
+
+      int start = imax(Recv_offset[recvTask], send_first);
+      int next  = imin(Recv_offset[recvTask] + Recv_count[recvTask], send_first + send_count);
+
+      if(next - start >= 1)
+        {
+          if(ThisTask == sendTask)
+            MPI_Isend(DirectAccOut + start - send_first, (next - start) * sizeof(struct accdata), MPI_BYTE, recvTask, TAG_PDATA_SPH,
+                      MPI_COMM_WORLD, &requests[n_requests++]);
+
+          if(ThisTask == recvTask)
+            MPI_Irecv(DirectAccIn + start - Recv_offset[recvTask], (next - start) * sizeof(struct accdata), MPI_BYTE, sendTask,
+                      TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]);
+        }
+
+      if(next == Recv_offset[recvTask] + Recv_count[recvTask])
+        recvTask++;
+      else
+        {
+          sendTask++;
+          if(sendTask >= NTask)
+            break;
+
+          subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count);
+        }
+    }
+
+  MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
+  myfree(requests);
+
+  Nforces = 0;
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      for(k = 0; k < 3; k++)
+        P[i].GravAccel[k] = DirectAccIn[Nforces].Acc[k];
+
+#ifdef EVALPOTENTIAL
+      P[i].Potential = DirectAccIn[Nforces].Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+      Nforces++;
+    }
+
+  myfree(DirectAccIn);
+  myfree(DirectAccOut);
+  myfree(DirectDataAll);
+  myfree(DirectDataIn);
+
+  mpi_printf("GRAVDIRECT: force is done.\n");
+
+  All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles;
+
+  double tend = second();
+
+  double timedirect, sumt;
+  timedirect = tend - tstart;
+
+  MPI_Reduce(&timedirect, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      fprintf(FdTimings, "Nf=%9lld   active part/task: avg=%g   total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles,
+              ((double)TimeBinsGravity.GlobalNActiveParticles) / NTask, All.TotNumOfForces);
+      fprintf(FdTimings, "  (direct) part/sec:  %g   ia/sec: %g\n", TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20),
+              TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20) * TimeBinsGravity.GlobalNActiveParticles);
+      myflush(FdTimings);
+    }
+
+  TIMER_STOP(CPU_TREEDIRECT);
+}
+
+#endif /* #ifdef ALLOW_DIRECT_SUMMATION */
diff --git a/src/amuse/community/arepo/src/gravity/gravtree.c b/src/amuse/community/arepo/src/gravity/gravtree.c
new file mode 100644
index 0000000000..810aa9c3da
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/gravtree.c
@@ -0,0 +1,749 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/gravtree.c
+ * \date        05/2018
+ * \brief       Main driver routines for gravitational (short-range) force
+ *              computation.
+ * \details     This file contains the code for the gravitational force
+ *              computation by means of the tree algorithm. To this end, a tree
+ *              force is computed for all active local particles, and particles
+ *              are exported to other processors if needed, where they can
+ *              receive additional force contributions. If the TreePM algorithm
+ *               is enabled, the force computed will only be the short-range
+ *               part.
+ *               contains functions:
+ *                 static void particle2in(data_in * in, int i, int firstnode)
+ *                 static void out2particle(data_out * out, int i, int mode)
+ *                 static void gravity_primary_loop(void)
+ *                 void gravity_secondary_loop(void)
+ *                 void gravity_tree(int timebin)
+ *                 static int gravity_evaluate(int target, int mode, int
+ *                   threadid)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 20.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+static double ThreadsCosttotal[NUM_THREADS]; /*!< The total cost of the particles/nodes processed by each thread */
+static int ThreadFirstExec[NUM_THREADS]; /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */
+static int MeasureCostFlag;              /*!< Whether the tree costs are measured for the current time step */
+
+static int gravity_evaluate(int target, int mode, int threadid);
+
+typedef gravdata_in data_in;
+
+typedef gravdata_out data_out;
+
+#ifdef DETAILEDTIMINGS
+static double tstart;
+static int current_timebin;
+#endif /* #ifdef DETAILEDTIMINGS */
+
+/* local data structure for collecting particle/cell data that is sent to other processors if needed */
+static data_in *DataIn, *DataGet;
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  if(i < NumPart)
+    {
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        {
+          for(int k = 0; k < 3; k++)
+            in->Pos[k] = SphP[i].Center[k];
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        {
+          for(int k = 0; k < 3; k++)
+            in->Pos[k] = P[i].Pos[k];
+        }
+
+      in->Type          = P[i].Type;
+      in->SofteningType = P[i].SofteningType;
+      in->OldAcc        = P[i].OldAcc;
+    }
+  else
+    {
+      i -= Tree_ImportedNodeOffset;
+
+      for(int k = 0; k < 3; k++)
+        in->Pos[k] = Tree_Points[i].Pos[k];
+
+      in->Type          = Tree_Points[i].Type;
+      in->SofteningType = Tree_Points[i].SofteningType;
+      in->OldAcc        = Tree_Points[i].OldAcc;
+    }
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      if(i < NumPart)
+        {
+          P[i].GravAccel[0] = out->Acc[0];
+          P[i].GravAccel[1] = out->Acc[1];
+          P[i].GravAccel[2] = out->Acc[2];
+#ifdef EVALPOTENTIAL
+          P[i].Potential = out->Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef OUTPUTGRAVINTERACTIONS
+          P[i].GravInteractions = out->GravNinteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+        }
+      else
+        {
+          int idx                                      = Tree_ResultIndexList[i - Tree_ImportedNodeOffset];
+          Tree_ResultsActiveImported[idx].GravAccel[0] = out->Acc[0];
+          Tree_ResultsActiveImported[idx].GravAccel[1] = out->Acc[1];
+          Tree_ResultsActiveImported[idx].GravAccel[2] = out->Acc[2];
+#ifdef EVALPOTENTIAL
+          Tree_ResultsActiveImported[idx].Potential = out->Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef OUTPUTGRAVINTERACTIONS
+          Tree_ResultsActiveImported[idx].GravInteractions = out->GravNinteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+        }
+    }
+  else /* combine */
+    {
+      if(i < NumPart)
+        {
+          P[i].GravAccel[0] += out->Acc[0];
+          P[i].GravAccel[1] += out->Acc[1];
+          P[i].GravAccel[2] += out->Acc[2];
+#ifdef EVALPOTENTIAL
+          P[i].Potential += out->Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef OUTPUTGRAVINTERACTIONS
+          P[i].GravInteractions += out->GravNinteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+        }
+      else
+        {
+          int idx = Tree_ResultIndexList[i - Tree_ImportedNodeOffset];
+          Tree_ResultsActiveImported[idx].GravAccel[0] += out->Acc[0];
+          Tree_ResultsActiveImported[idx].GravAccel[1] += out->Acc[1];
+          Tree_ResultsActiveImported[idx].GravAccel[2] += out->Acc[2];
+#ifdef EVALPOTENTIAL
+          Tree_ResultsActiveImported[idx].Potential += out->Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+#ifdef OUTPUTGRAVINTERACTIONS
+          Tree_ResultsActiveImported[idx].GravInteractions += out->GravNinteractions;
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+        }
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Primary loop of gravity calculation.
+ *
+ *  Gravitational interactions between local particles; see gravity_tree(..).
+ *  Equivalent to 'kernel_local'.
+ *
+ *  \return void
+ */
+static void gravity_primary_loop(void)
+{
+  TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK1);
+
+#ifdef DETAILEDTIMINGS
+  double t0 = second();
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  int idx;
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+    double cost = 0;
+
+    if(ThreadFirstExec[threadid])
+      {
+        ThreadFirstExec[threadid] = 0;
+
+        if(MeasureCostFlag)
+          {
+            memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int));
+            memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int));
+            memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int));
+          }
+      }
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= Nforces)
+          break;
+
+        int i = TargetList[idx];
+
+        cost += gravity_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+
+    ThreadsCosttotal[threadid] += cost;
+  }
+
+#ifdef DETAILEDTIMINGS
+  double t1 = second();
+
+  fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_LOCAL_PARTICLES,
+          timediff(tstart, t0), timediff(tstart, t1));
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  TIMER_STOPSTART(CPU_TREEWALK1, CPU_TREEBALSNDRCV);
+}
+
+/*! \brief Secondary loop of gravity calculation.
+ *
+ *  Gravitational interactions between imported particles; see gravity_tree(.).
+ *  Equivalent to 'kernel_imported'.
+ *
+ *  \return void
+ */
+void gravity_secondary_loop(void)
+{
+  TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK2);
+
+#ifdef DETAILEDTIMINGS
+  double t0 = second();
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+    double cost  = 0;
+
+    if(ThreadFirstExec[threadid])
+      {
+        ThreadFirstExec[threadid] = 0;
+
+        if(MeasureCostFlag)
+          {
+            memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int));
+            memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int));
+            memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int));
+          }
+      }
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        cost += gravity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+
+    ThreadsCosttotal[threadid] += cost;
+  }
+
+#ifdef DETAILEDTIMINGS
+  double t1 = second();
+
+  fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_IMPORTED_PARTICLES,
+          timediff(tstart, t0), timediff(tstart, t1));
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  TIMER_STOPSTART(CPU_TREEWALK2, CPU_TREEBALSNDRCV);
+}
+
+/*! \brief This function computes the gravitational forces for all active
+ *         particles.
+ *
+ *  The tree walk is done in two phases: First the local part of the force tree
+ *  is processed (gravity_primary_loop() ). Whenever an external node is
+ *  encountered during the walk, this node is saved on a list. This node list
+ *  along with data about the particles is then exchanged among tasks.
+ *  In the second phase (gravity_secondary_loop() ) each task now continues
+ *  the tree walk for the imported particles. Finally the resulting partial
+ *  forces are send back to the original task and are summed up there to
+ *  complete the tree force calculation.
+ *
+ *  If only the tree algorithm is used in a periodic box, the whole tree walk
+ *  is done twice. First a normal tree walk is done as described above, and
+ *  afterwards a second tree walk, which adds the needed Ewald corrections is
+ *  performed.
+ *
+ *  Particles are only exported to other processors when really needed,
+ *  thereby allowing a good use of the communication buffer. Every particle is
+ *  sent at most once to a given processor together with the complete list of
+ *  relevant tree nodes to be checked on the other task.
+ *
+ *  Particles which drifted into the domain of another task are sent to this
+ *  task for the force computation. Afterwards the resulting force is sent
+ *  back to the originating task.
+ *
+ *  In order to improve the work load balancing during a domain decomposition,
+ *  the work done by each node/particle is measured. The work is measured for
+ *  the interaction partners (i.e. the nodes or particles) and not for the
+ *  particles itself that require a force computation. This way, work done for
+ *  imported particles is accounted for at the task where the work actually
+ *  incurred. The cost measurement is only done for the "GRAVCOSTLEVELS"
+ *  highest occupied time bins. The variable 'MeasureCostFlag' will state
+ *  whether a measurement is done at the present time step.
+ *
+ *  The particles requiring a force computation are split into chunks of size
+ *  'Nchunksize'. A set of every 'Nchunk' -th chunk is processed first.
+ *  Then the process is repeated, processing the next set of chunks. This way
+ *  the amount of exported particles is more balanced, as communication heavy
+ *  regions are mixed with less communication intensive regions.
+ *
+ * \param[in] timebin Time bin for which gravity should be calculated.
+ *
+ * \return void
+ */
+void gravity_tree(int timebin)
+{
+  int idx, i, j, k, ncount, iter = 0, maxiter;
+  struct detailed_timings
+  {
+    double all, tree1, tree2, tree, commwait;
+    double sumnexport, costtotal, numnodes;
+    ;
+  } timer, tisum, timax;
+  memset(&timer, 0, sizeof(struct detailed_timings));
+  double Costtotal;
+  int ngrp;
+  int recvTask;
+
+  TIMER_STORE;
+  TIMER_START(CPU_TREE);
+
+  /* allocate buffers to arrange communication */
+  mpi_printf("GRAVTREE: Begin tree force.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  TIMER_STOPSTART(CPU_TREE, CPU_TREECOSTMEASURE);
+
+  for(i = 0; i < NUM_THREADS; i++)
+    {
+      ThreadsCosttotal[i] = 0;
+      ThreadFirstExec[i]  = 0;
+    }
+
+  /* find the level (if any) for which we measure gravity cost */
+  for(i = 0, TakeLevel = -1; i < GRAVCOSTLEVELS; i++)
+    {
+      if(All.LevelToTimeBin[i] == timebin)
+        {
+          TakeLevel = i;
+          break;
+        }
+    }
+
+  if(TakeLevel < 0) /* we have not found a matching slot */
+    {
+      for(i = 0; i < GRAVCOSTLEVELS; i++)
+        {
+          if(All.LevelToTimeBin[i] < 0)
+            {
+              All.LevelToTimeBin[i]       = timebin;
+              TakeLevel                   = i;
+              All.LevelHasBeenMeasured[i] = 0;
+              break;
+            }
+        }
+
+      if(TakeLevel < 0)
+        {
+          if(All.HighestOccupiedGravTimeBin - timebin < GRAVCOSTLEVELS) /* we should have space */
+            {
+              /* clear levels that are out of range */
+              for(i = 0; i < GRAVCOSTLEVELS; i++)
+                {
+                  if(All.LevelToTimeBin[i] > All.HighestOccupiedGravTimeBin)
+                    {
+                      All.LevelToTimeBin[i]       = timebin;
+                      TakeLevel                   = i;
+                      All.LevelHasBeenMeasured[i] = 0;
+                      break;
+                    }
+                  if(All.LevelToTimeBin[i] < All.HighestOccupiedGravTimeBin - (GRAVCOSTLEVELS - 1))
+                    {
+                      All.LevelToTimeBin[i]       = timebin;
+                      TakeLevel                   = i;
+                      All.LevelHasBeenMeasured[i] = 0;
+                      break;
+                    }
+                }
+
+              if(TakeLevel < 0)
+                {
+                  if(timebin > All.HighestOccupiedGravTimeBin)
+                    {
+                      for(i = 0; i < GRAVCOSTLEVELS; i++)
+                        {
+                          if(All.LevelToTimeBin[i] == All.HighestOccupiedGravTimeBin)
+                            {
+                              All.LevelToTimeBin[i]       = timebin;
+                              TakeLevel                   = i;
+                              All.LevelHasBeenMeasured[i] = 0;
+                              break;
+                            }
+                        }
+                    }
+                }
+
+              if(TakeLevel < 0)
+                {
+                  mpi_printf("All.HighestOccupiedGravTimeBin=%d   timebin=%d\n", All.HighestOccupiedGravTimeBin, timebin);
+                  for(i = 0; i < GRAVCOSTLEVELS; i++)
+                    {
+                      mpi_printf("All.LevelToTimeBin[i=%d]=%d\n", i, All.LevelToTimeBin[i]);
+                    }
+
+                  fflush(stdout);
+                  MPI_Barrier(MPI_COMM_WORLD);
+
+                  terminate("TakeLevel=%d < 0", TakeLevel);
+                }
+            }
+        }
+    }
+
+  MeasureCostFlag = 0;
+
+  if(TakeLevel >= 0)
+    if(All.LevelHasBeenMeasured[TakeLevel] == 0)
+      {
+        MeasureCostFlag = 1;
+
+        Thread[0].P_CostCount          = mymalloc("Thread_P_CostCount", NumPart * sizeof(int));
+        Thread[0].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int));
+        Thread[0].Node_CostCount       = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int));
+
+        for(i = 1; i < NUM_THREADS; i++)
+          {
+            Thread[i].P_CostCount          = mymalloc("Threads_P_CostCount", NumPart * sizeof(int));
+            Thread[i].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int));
+            Thread[i].Node_CostCount       = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int));
+          }
+
+        for(i = 0; i < NUM_THREADS; i++)
+          Thread[i].Node_CostCount -= Tree_MaxPart;
+
+        for(i = 0; i < NUM_THREADS; i++)
+          ThreadFirstExec[i] = 1;
+      }
+
+  TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE);
+
+  /* Create list of targets. We do this here to simplify the treatment of the two possible sources of points */
+
+  TargetList           = mymalloc("TargetList", (NumPart + Tree_NumPartImported) * sizeof(int));
+  Tree_ResultIndexList = mymalloc("Tree_ResultIndexList", Tree_NumPartImported * sizeof(int));
+
+  Nforces = 0;
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(Tree_Task_list[i] == ThisTask)
+        TargetList[Nforces++] = i;
+    }
+
+  for(i = 0, ncount = 0; i < Tree_NumPartImported; i++)
+#ifndef HIERARCHICAL_GRAVITY
+    if(Tree_Points[i].ActiveFlag)
+#endif /* #ifndef HIERARCHICAL_GRAVITY */
+      {
+        Tree_ResultIndexList[i] = ncount++;
+        TargetList[Nforces++]   = i + Tree_ImportedNodeOffset;
+      }
+
+  Tree_ResultsActiveImported = mymalloc("Tree_ResultsActiveImported", ncount * sizeof(struct resultsactiveimported_data));
+
+  permutate_chunks_in_list(Nforces, TargetList);
+
+  generic_set_MaxNexport();
+
+  /******************************************/
+  /* now execute the tree walk calculations */
+  /******************************************/
+
+  TIMER_STOPSTART(CPU_TREE, CPU_TREEBALSNDRCV);
+
+#ifdef DETAILEDTIMINGS
+  tstart          = second();
+  current_timebin = timebin;
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  iter = generic_comm_pattern(Nforces, gravity_primary_loop, gravity_secondary_loop);
+
+  /* now communicate the forces in Tree_ResultsActiveImported */
+
+  TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREESENDBACK);
+
+#ifdef DETAILEDTIMINGS
+  double tend = second();
+
+  fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_FINISHED,
+          timediff(tstart, tend), timediff(tstart, tend));
+  fflush(FdDetailed);
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  for(j = 0; j < NTask; j++)
+    Recv_count[j] = 0;
+
+  int n;
+  for(i = 0, n = 0, k = 0; i < NTask; i++)
+    for(j = 0; j < Force_Recv_count[i]; j++, n++)
+      {
+#ifndef HIERARCHICAL_GRAVITY
+        if(Tree_Points[n].ActiveFlag)
+#endif /* #ifndef HIERARCHICAL_GRAVITY */
+          {
+            Tree_ResultsActiveImported[k].index = Tree_Points[n].index;
+            Recv_count[i]++;
+            k++;
+          }
+      }
+
+  MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, Nexport = 0, Nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      Nexport += Send_count[j];
+      Nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  struct resultsactiveimported_data *tmp_results = mymalloc("tmp_results", Nexport * sizeof(struct resultsactiveimported_data));
+  memset(tmp_results, -1, Nexport * sizeof(struct resultsactiveimported_data));
+
+  /* exchange  data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              MPI_Sendrecv(&Tree_ResultsActiveImported[Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A,
+                           &tmp_results[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct resultsactiveimported_data),
+                           MPI_BYTE, recvTask, TAG_FOF_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  for(i = 0; i < Nexport; i++)
+    {
+      int target = tmp_results[i].index;
+
+      for(k = 0; k < 3; k++)
+        P[target].GravAccel[k] = tmp_results[i].GravAccel[k];
+    }
+
+  myfree(tmp_results);
+
+  myfree(Tree_ResultsActiveImported);
+  myfree(Tree_ResultIndexList);
+  myfree(TargetList);
+
+  TIMER_STOPSTART(CPU_TREESENDBACK, CPU_TREECOSTMEASURE);
+
+  /* assign node cost to particles */
+  if(MeasureCostFlag)
+    {
+      for(int threadid = 0; threadid < NUM_THREADS; threadid++)
+        if(ThreadFirstExec[threadid])
+          {
+            /* this could happen if neither the primary nor the secondary loop had anything to do */
+            ThreadFirstExec[threadid] = 0;
+            memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int));
+            memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int));
+            memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int));
+          }
+
+      force_assign_cost_values();
+      domain_init_sum_cost();
+
+      All.LevelHasBeenMeasured[TakeLevel] = 1;
+
+      if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0)
+        All.LevelHasBeenMeasured[TakeLevel] = 0;
+
+      for(i = 0; i < NUM_THREADS; i++)
+        Thread[i].Node_CostCount += Tree_MaxPart;
+
+      for(i = NUM_THREADS - 1; i >= 1; i--)
+        {
+          myfree(Thread[i].Node_CostCount);
+          myfree(Thread[i].TreePoints_CostCount);
+          myfree(Thread[i].P_CostCount);
+        }
+
+      myfree(Thread[0].Node_CostCount);
+      myfree(Thread[0].TreePoints_CostCount);
+      myfree(Thread[0].P_CostCount);
+    }
+
+  TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE);
+
+  if(All.TypeOfOpeningCriterion == 1)
+    All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */
+
+  mpi_printf("GRAVTREE: tree-force is done.\n");
+
+  /*  gather some diagnostic information */
+
+  TIMER_STOPSTART(CPU_TREE, CPU_LOGS);
+
+  Costtotal = 0;
+  for(i = 0; i < NUM_THREADS; i++)
+    Costtotal += ThreadsCosttotal[i];
+
+  timer.tree1      = TIMER_DIFF(CPU_TREEWALK1);
+  timer.tree2      = TIMER_DIFF(CPU_TREEWALK2);
+  timer.tree       = timer.tree1 + timer.tree2;
+  timer.commwait   = TIMER_DIFF(CPU_TREEBALSNDRCV) + TIMER_DIFF(CPU_TREESENDBACK);
+  timer.all        = timer.tree + timer.commwait + TIMER_DIFF(CPU_TREE) + TIMER_DIFF(CPU_TREECOSTMEASURE);
+  timer.sumnexport = SumNexport;
+  timer.costtotal  = Costtotal;
+  timer.numnodes   = Tree_NumNodes;
+
+  MPI_Reduce(&iter, &maxiter, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
+  MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
+             MPI_COMM_WORLD);
+  MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
+             MPI_COMM_WORLD);
+
+  All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles;
+
+  if(ThisTask == 0)
+    {
+      fprintf(FdTimings, "Nf=%9lld  timebin=%d  total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles, timebin, All.TotNumOfForces);
+
+      fprintf(FdTimings, "   work-load balance: %g (%g %g), rel1to2: %g\n", timax.tree / ((tisum.tree + 1e-20) / NTask),
+              timax.tree1 / ((tisum.tree1 + 1e-20) / NTask), timax.tree2 / ((tisum.tree2 + 1e-20) / NTask),
+              tisum.tree1 / (tisum.tree1 + tisum.tree2 + 1e-20));
+      fprintf(FdTimings, "   number of iterations:  max=%d, exported fraction: %g\n", maxiter,
+              tisum.sumnexport / (TimeBinsGravity.GlobalNActiveParticles + 1e-20));
+      fprintf(FdTimings, "   part/sec: raw=%g, effective=%g     ia/part: avg=%g\n",
+              TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
+              TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * NTask),
+              tisum.costtotal / (TimeBinsGravity.GlobalNActiveParticles + 1.0e-20));
+
+      fprintf(FdTimings, "   maximum number of nodes: %g, filled: %g\n", timax.numnodes, timax.numnodes / Tree_MaxNodes);
+
+      fprintf(FdTimings, "   avg times: all=%g  tree1=%g  tree2=%g  commwait=%g sec\n", tisum.all / NTask, tisum.tree1 / NTask,
+              tisum.tree2 / NTask, tisum.commwait / NTask);
+
+      myflush(FdTimings);
+    }
+
+  TIMER_STOP(CPU_LOGS);
+}
+
+/*! \brief Evaluate-function for gravitational tree. Calls
+ *         force_treeevaluate.
+ *
+ *  \param[in] target Index of particle.
+ *  \param[in] mode Flag if local or imported particles should be considered.
+ *  \param[in] threadid ID or thread.
+ *
+ *  \return Number of interactions processed for this particle.
+ */
+static int gravity_evaluate(int target, int mode, int threadid)
+{
+  int cost, numnodes, *firstnode;
+  data_in local, *target_data;
+  data_out out, *target_result;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data   = &local;
+      target_result = &out;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data   = &DataGet[target];
+      target_result = &DataResult[target];
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  cost = force_treeevaluate(target_data, target_result, target, mode, threadid, numnodes, firstnode, MeasureCostFlag);
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+
+  /* note: for imported particles, we already have the result places into DataResult[target] */
+
+  return cost;
+}
diff --git a/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c
new file mode 100644
index 0000000000..54e1c5c299
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c
@@ -0,0 +1,1089 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/gravtree_forcetest.c
+ * \date        05/2018
+ * \brief       Test short range gravity evaluation.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void gravity_forcetest(void)
+ *                static void gravity_forcetest_evaluate(int target, int mode,
+ *                  int threadid)
+ *                void gravity_forcetest_testforcelaw(void)
+ *                static void ewald_other_images(double x, double y, double z,
+ *                  double alpha, double force[4])
+ *                static void ewald_correction_force(double x, double y,
+ *                  double z, double force[4])
+ *                void forcetest_ewald_init(void)
+ *                static void ewald_correction_force_table_lookup(double dx,
+ *                  double dy, double dz, double force[4])
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 20.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+#ifdef FORCETEST
+
+#if !defined(EVALPOTENTIAL) && defined(FORCETEST)
+#error "When you enable FORCETEST you should also switch on EVALPOTENTIAL"
+#endif /* #if !defined(EVALPOTENTIAL) && defined(FORCETEST) */
+
+static void gravity_forcetest_evaluate(int target, int mode, int threadid);
+static void ewald_correction_force(double x, double y, double z, double force[4]);
+static void ewald_other_images(double x, double y, double z, double alpha, double force[4]);
+static void ewald_correction_force_table_lookup(double x, double y, double z, double force[4]);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  unsigned char Type;
+  unsigned char SofteningType;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+#ifdef CELL_CENTER_GRAVITY
+  if(P[i].Type == 0)
+    {
+      for(int k = 0; k < 3; k++)
+        in->Pos[k] = SphP[i].Center[k];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      for(int k = 0; k < 3; k++)
+        in->Pos[k] = P[i].Pos[k];
+    }
+
+  in->Type          = P[i].Type;
+  in->SofteningType = P[i].SofteningType;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Acc[3];
+  MyFloat Pot;
+  MyFloat DistToID1;
+#ifdef PMGRID
+  MyFloat AccLongRange[3];
+  MyFloat AccShortRange[3];
+  MyFloat PotLongRange;
+  MyFloat PotShortRange;
+#endif /* #ifdef PMGRID */
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      P[i].GravAccelDirect[0] = out->Acc[0];
+      P[i].GravAccelDirect[1] = out->Acc[1];
+      P[i].GravAccelDirect[2] = out->Acc[2];
+      P[i].PotentialDirect    = out->Pot;
+      P[i].DistToID1          = out->DistToID1;
+#ifdef PMGRID
+      P[i].GravAccelLongRange[0]  = out->AccLongRange[0];
+      P[i].GravAccelLongRange[1]  = out->AccLongRange[1];
+      P[i].GravAccelLongRange[2]  = out->AccLongRange[2];
+      P[i].GravAccelShortRange[0] = out->AccShortRange[0];
+      P[i].GravAccelShortRange[1] = out->AccShortRange[1];
+      P[i].GravAccelShortRange[2] = out->AccShortRange[2];
+      P[i].PotentialLongRange     = out->PotLongRange;
+      P[i].PotentialShortRange    = out->PotShortRange;
+#endif /* #ifdef PMGRID */
+    }
+  else /* combine */
+    {
+      P[i].GravAccelDirect[0] += out->Acc[0];
+      P[i].GravAccelDirect[1] += out->Acc[1];
+      P[i].GravAccelDirect[2] += out->Acc[2];
+      P[i].PotentialDirect += out->Pot;
+      if(out->DistToID1 > 0)
+        P[i].DistToID1 = out->DistToID1;
+#ifdef PMGRID
+      P[i].GravAccelLongRange[0] += out->AccLongRange[0];
+      P[i].GravAccelLongRange[1] += out->AccLongRange[1];
+      P[i].GravAccelLongRange[2] += out->AccLongRange[2];
+      P[i].GravAccelShortRange[0] += out->AccShortRange[0];
+      P[i].GravAccelShortRange[1] += out->AccShortRange[1];
+      P[i].GravAccelShortRange[2] += out->AccShortRange[2];
+      P[i].PotentialLongRange += out->PotLongRange;
+      P[i].PotentialShortRange += out->PotShortRange;
+#endif /* #ifdef PMGRID */
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= TimeBinsGravity.NActiveParticles)
+          break;
+
+        i = TimeBinsGravity.ActiveParticleList[i];
+        if(i < 0)
+          continue;
+
+        if(P[i].TimeBinGrav < 0)
+          gravity_forcetest_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        gravity_forcetest_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief This function computes the gravitational forces for all active
+ *  particles.
+ *
+ *  A new tree is constructed, if the number of force computations since
+ *  it's last construction exceeds some fraction of the total
+ *  particle number, otherwise tree nodes are dynamically updated if needed.
+ *
+ *  \return void
+ */
+void gravity_forcetest(void)
+{
+  int nthis, nloc, ntot;
+  int idx, i, j;
+  double fac1;
+  char buf[200];
+
+  nloc = 0;
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(get_random_number() < FORCETEST)
+        {
+          P[i].TimeBinGrav = -P[i].TimeBinGrav - 1; /* Mark as selected */
+          nloc++;
+        }
+    }
+
+  MPI_Allreduce(&nloc, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("FORCETEST: Testing forces of %d particles\n", ntot);
+
+  double t0 = second();
+
+  generic_set_MaxNexport();
+
+  generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported);
+
+  double t1   = second();
+  double maxt = timediff(t0, t1);
+
+  /*  muliply by G */
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].TimeBinGrav < 0)
+        {
+          for(j = 0; j < 3; j++)
+            {
+              P[i].GravAccelDirect[j] *= All.G;
+#ifdef PMGRID
+              P[i].GravAccelLongRange[j] *= All.G;
+              P[i].GravAccelShortRange[j] *= All.G;
+#endif /* #ifdef PMGRID */
+            }
+
+          P[i].PotentialDirect *= All.G;
+#ifdef PMGRID
+          P[i].PotentialLongRange *= All.G;
+          P[i].PotentialShortRange *= All.G;
+#endif /* #ifdef PMGRID */
+        }
+    }
+
+  /* Finally, the following factor allows a computation of cosmological simulation
+     with vacuum energy in physical coordinates */
+
+  if(All.ComovingIntegrationOn == 0)
+    {
+      fac1 = All.OmegaLambda * All.Hubble * All.Hubble;
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(P[i].TimeBinGrav < 0)
+            for(j = 0; j < 3; j++)
+              P[i].GravAccelDirect[j] += fac1 * P[i].Pos[j];
+        }
+    }
+
+  /* now output the forces to a file */
+
+  for(nthis = 0; nthis < NTask; nthis++)
+    {
+      if(nthis == ThisTask)
+        {
+          sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt");
+
+          if(!(FdForceTest = fopen(buf, "a")))
+            terminate("error in opening file '%s'\n", buf);
+
+          for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+            {
+              i = TimeBinsGravity.ActiveParticleList[idx];
+              if(i < 0)
+                continue;
+
+              if(P[i].TimeBinGrav < 0)
+                {
+#ifdef PMGRID
+                  fprintf(FdForceTest,
+                          "%d %d %lld  %g  %g %g %g  %g  %15.10g %15.10g %15.10g  %15.10g %15.10g %15.10g  %15.10g %15.10g %15.10g  "
+                          "%15.10g %15.10g %15.10g  %15.10g %15.10g %15.10g  %15.10g %15.10g %15.10g %15.10g  %15.10g\n",
+                          P[i].Type, ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1,
+                          P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccelShortRange[0],
+                          P[i].GravAccelShortRange[1], P[i].GravAccelShortRange[2], P[i].GravAccelLongRange[0],
+                          P[i].GravAccelLongRange[1], P[i].GravAccelLongRange[2], P[i].GravAccel[0], P[i].GravAccel[1],
+                          P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], P[i].PotentialDirect,
+                          P[i].PotentialShortRange, P[i].PotentialLongRange, P[i].Potential, P[i].PM_Potential);
+#else  /* #ifdef PMGRID */
+                  fprintf(FdForceTest,
+                          "%d %d %lld %g  %g %g %g %g  %15.10g %15.10g %15.10g  %15.10g %15.10g %15.10g  %15.10g %15.10g\n", P[i].Type,
+                          ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1,
+                          P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccel[0],
+                          P[i].GravAccel[1], P[i].GravAccel[2], P[i].PotentialDirect, P[i].Potential);
+#endif /* #ifdef PMGRID #else */
+                }
+            }
+
+          fclose(FdForceTest);
+        }
+
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].TimeBinGrav < 0)
+        P[i].TimeBinGrav = -P[i].TimeBinGrav - 1;
+    }
+
+  /* Now the force computation is finished */
+
+  if(ThisTask == 0)
+    {
+      double costtotal = NumPart * ntot;
+
+      fprintf(FdTimings, "DIRECT Nf= %d    part/sec=%g | %g  ia/part=%g\n\n", ntot, ((double)ntot) / (NTask * maxt + 1.0e-20),
+              ntot / ((maxt + 1.0e-20) * NTask), ((double)(costtotal)) / (ntot + 1.0e-20));
+
+      myflush(FdTimings);
+    }
+}
+
+/*! \brief This function does the gravitational force computation with direct
+ *  summation for the specified particle.
+ *
+ *  This can be useful for debugging purposes, in particular for explicit
+ *  checks of the force accuracy reached with the tree. Depending on whether
+ *  or not a PMGRID is used, the code does a short-range tree-walk or a full
+ *  one.
+ *
+ *  \param i Index of the particle to be processed.
+ *  \param mode 0: process local particle (phase 1), 1: process imported
+ *         particle (phase 2).
+ *  \param thread_id Id of this thread.
+ *  \param measure_cost_flag Whether the cost of the tree walk should be
+ *         measured.
+ *
+ *  \return Number of interactions processed for particle i.
+ */
+static void gravity_forcetest_evaluate(int target, int mode, int threadid)
+{
+  int j;
+  double h_i, h_j, hmax, mass, dx, dy, dz, r, r2, fac, wp, fac_newton, wp_newton;
+  double pos_x, pos_y, pos_z;
+#ifdef PMGRID
+  double asmth = All.Asmth[0];
+#endif /* #ifdef PMGRID */
+#if !defined(GRAVITY_NOT_PERIODIC)
+  double xtmp, ytmp, ztmp;
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */
+
+  double acc_x     = 0;
+  double acc_y     = 0;
+  double acc_z     = 0;
+  double pot       = 0;
+  double disttoid1 = 0;
+
+  data_out out;
+  data_in local, *target_data;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      /* make sure that the particle is exported to all other tasks */
+      for(int task = 0; task < NTask; task++)
+        if(task != ThisTask)
+          {
+            if(Thread[threadid].Exportflag[task] != target)
+              {
+                Thread[threadid].Exportflag[task]     = target;
+                int nexp                              = Thread[threadid].Nexport++;
+                Thread[threadid].PartList[nexp].Task  = task;
+                Thread[threadid].PartList[nexp].Index = target;
+                Thread[threadid].ExportSpace -= Thread[threadid].ItemSize;
+              }
+
+            int nexp = Thread[threadid].NexportNodes++;
+            nexp     = -1 - nexp;
+            struct datanodelist *nodelist =
+                (struct datanodelist *)(((char *)Thread[threadid].PartList) + Thread[threadid].InitialSpace);
+            nodelist[nexp].Task  = task;
+            nodelist[nexp].Index = target;
+            nodelist[nexp].Node  = 0; /* the node doesn't matter here */
+            Thread[threadid].ExportSpace -= sizeof(struct datanodelist) + sizeof(int);
+          }
+    }
+  else
+    {
+      target_data = &DataGet[target];
+    }
+
+  pos_x = target_data->Pos[0];
+  pos_y = target_data->Pos[1];
+  pos_z = target_data->Pos[2];
+  h_i   = All.ForceSoftening[target_data->SofteningType];
+
+#ifdef PLACEHIGHRESREGION
+  if(pmforce_is_particle_high_res(target_data->Type, target_data->Pos))
+    asmth = All.Asmth[1];
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+  out.Pot = 0;
+#ifdef PMGRID
+  out.PotShortRange = 0;
+  out.PotLongRange  = 0;
+#endif /* #ifdef PMGRID */
+
+  for(int i = 0; i < 3; i++)
+    {
+      out.Acc[i] = 0;
+#ifdef PMGRID
+      out.AccShortRange[i] = 0;
+      out.AccLongRange[i]  = 0;
+#endif /* #ifdef PMGRID */
+    }
+
+  for(j = 0; j < NumPart; j++)
+    {
+      h_j = All.ForceSoftening[P[j].SofteningType];
+
+      if(h_j > h_i)
+        hmax = h_j;
+      else
+        hmax = h_i;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[j].Type == 0)
+        {
+          dx = GRAVITY_NEAREST_X(SphP[j].Center[0] - pos_x);
+          dy = GRAVITY_NEAREST_Y(SphP[j].Center[1] - pos_y);
+          dz = GRAVITY_NEAREST_Z(SphP[j].Center[2] - pos_z);
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        {
+          dx = GRAVITY_NEAREST_X(P[j].Pos[0] - pos_x);
+          dy = GRAVITY_NEAREST_Y(P[j].Pos[1] - pos_y);
+          dz = GRAVITY_NEAREST_Z(P[j].Pos[2] - pos_z);
+        }
+
+      r2 = dx * dx + dy * dy + dz * dz;
+
+      mass = P[j].Mass;
+
+      /* now evaluate the multipole moment */
+
+      r = sqrt(r2);
+
+      if(P[j].ID == 1)
+        disttoid1 = r;
+
+      /* we compute 3 different forces:
+       * (1) The correct direct summation force, if needed with Ewald correction: ftrue
+       * In the case of PM:
+       * (2) The short range direct summation force with only the erfc cut-off (this is what the tree can at best deliver): fsr
+       * (3) The expected PM force based on the long-range part of the Ewald sum. This is equal to ftrue - fsr - fsfr_periodic_images
+       * */
+
+      if(r > 0)
+        {
+          fac_newton = mass / (r2 * r);
+          wp_newton  = -mass / r;
+        }
+      else
+        {
+          fac_newton = 0;
+          wp_newton  = 0;
+        }
+
+      if(r >= hmax)
+        {
+          fac = fac_newton;
+          wp  = wp_newton;
+        }
+      else
+        {
+          double h_inv  = 1.0 / hmax;
+          double h3_inv = h_inv * h_inv * h_inv;
+          double u      = r * h_inv;
+
+          if(u < 0.5)
+            {
+              double u2 = u * u;
+              fac       = mass * h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3));
+              wp        = mass * h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7)));
+            }
+          else
+            {
+              double u2 = u * u, u3 = u2 * u;
+              fac = mass * h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3);
+              wp  = mass * h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u))));
+            }
+        }
+
+      double acc_newton_x = dx * fac;
+      double acc_newton_y = dy * fac;
+      double acc_newton_z = dz * fac;
+      double pot_newton   = wp;
+
+#ifdef PMGRID
+      double u = 0.5 / asmth * r;
+
+      double factor_force = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0);
+      double factor_pot   = erfc(u);
+
+      fac += fac_newton * factor_force;
+      wp += wp_newton * (factor_pot - 1.0);
+
+      double acc_short_x = dx * fac;
+      double acc_short_y = dy * fac;
+      double acc_short_z = dz * fac;
+      double pot_short   = wp + mass * M_PI / (asmth * asmth * boxSize_X * boxSize_Y * boxSize_Z);
+
+      out.AccShortRange[0] += acc_short_x;
+      out.AccShortRange[1] += acc_short_y;
+      out.AccShortRange[2] += acc_short_z;
+      out.PotShortRange += pot_short;
+#endif /* #ifdef PMGRID */
+
+#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL)
+      double fcorr[4];
+
+#if !defined(FORCETEST_TESTFORCELAW)
+      ewald_correction_force_table_lookup(dx, dy, dz, fcorr);
+#else  /* #if !defined(FORCETEST_TESTFORCELAW) */
+      ewald_correction_force(dx, dy, dz, fcorr);
+#endif /* #if !defined(FORCETEST_TESTFORCELAW) #else */
+
+      acc_x = acc_newton_x + mass * fcorr[0];
+      acc_y = acc_newton_y + mass * fcorr[1];
+      acc_z = acc_newton_z + mass * fcorr[2];
+
+      pot = pot_newton + mass * fcorr[3];
+#else  /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */
+      acc_x = acc_newton_x;
+      acc_y = acc_newton_y;
+      acc_z = acc_newton_z;
+      pot = pot_newton;
+#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) #else */
+
+      out.Acc[0] += acc_x;
+      out.Acc[1] += acc_y;
+      out.Acc[2] += acc_z;
+      out.Pot += pot;
+
+#ifdef PMGRID
+      double fimages[4] = {0, 0, 0, 0};
+#ifdef FORCETEST_TESTFORCELAW
+      ewald_other_images(dx, dy, dz, 0.5 / asmth, fimages);
+#endif /* #ifdef FORCETEST_TESTFORCELAW */
+      out.AccLongRange[0] += acc_x - acc_short_x - mass * fimages[0];
+      out.AccLongRange[1] += acc_y - acc_short_y - mass * fimages[1];
+      out.AccLongRange[2] += acc_z - acc_short_z - mass * fimages[2];
+      out.PotLongRange += pot - pot_short - mass * fimages[3];
+#endif /* #ifdef PMGRID */
+    }
+
+  out.DistToID1 = disttoid1;
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+}
+
+#ifdef FORCETEST_TESTFORCELAW
+/*! \brief Places particle with ID 1 radomly in box and calculates force on it.
+ *
+ *  \return void
+ */
+void gravity_forcetest_testforcelaw(void)
+{
+  int Ncycles = 40;
+  double xyz[3], eps;
+
+  ngb_treefree();
+  mark_active_timebins();
+
+  for(int cycle = 0; cycle < Ncycles; cycle++)
+    {
+      mpi_printf("\nTEST-FORCE-LAW: cycle=%d|%d ----------------------------------\n\n", cycle, Ncycles);
+
+      double epsloc = 0, xyzloc[3] = {0, 0, 0};
+
+      /* set particle with ID=1 to new random coordinate in box */
+      for(int n = 0; n < NumPart; n++)
+        {
+          P[n].Type = 1;
+
+          if(P[n].ID == 1)
+            {
+              xyzloc[0] = All.BoxSize * STRETCHX * get_random_number();
+              xyzloc[1] = All.BoxSize * STRETCHY * get_random_number();
+              xyzloc[2] = All.BoxSize * STRETCHZ * get_random_number();
+
+#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1)
+              for(int j = 0; j < 3; j++)
+                xyzloc[j] = 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]);
+#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */
+
+#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2)
+              if(get_random_number() < 0.5)
+                {
+                  for(int j = 0; j < 3; j++)
+                    xyzloc[j] = All.Xmintot[1][j] + get_random_number() * (All.Xmaxtot[1][j] - All.Xmintot[1][j]);
+                }
+#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2) */
+
+              for(int i = 0; i < 3; i++)
+                P[n].Pos[i] = xyzloc[i];
+
+              epsloc = All.ForceSoftening[P[n].SofteningType];
+            }
+        }
+
+      MPI_Allreduce(xyzloc, xyz, 3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(&epsloc, &eps, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      double rmin = 0.01 * eps;
+      double rmax =
+          sqrt(pow(0.5 * All.BoxSize * STRETCHX, 2) + pow(0.5 * All.BoxSize * STRETCHY, 2) + pow(0.5 * All.BoxSize * STRETCHZ, 2));
+
+      for(int n = 0; n < NumPart; n++)
+        {
+          if(P[n].ID != 1)
+            {
+              double r     = exp(log(rmin) + (log(rmax) - log(rmin)) * get_random_number());
+              double theta = acos(2 * get_random_number() - 1);
+              double phi   = 2 * M_PI * get_random_number();
+
+              double dx = r * sin(theta) * cos(phi);
+              double dy = r * sin(theta) * sin(phi);
+              double dz = r * cos(theta);
+
+              double xtmp, ytmp, ztmp;
+              P[n].Pos[0] = WRAP_X(xyz[0] + dx);
+              P[n].Pos[1] = WRAP_Y(xyz[1] + dy);
+              P[n].Pos[2] = WRAP_Z(xyz[2] + dz);
+            }
+        }
+
+      domain_free();
+      domain_Decomposition(); /* do domain decomposition if needed */
+
+#ifdef PMGRID
+      long_range_force();
+#endif /* #ifdef PMGRID */
+
+      compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE);
+    }
+
+  endrun();
+}
+#endif /* #ifdef FORCETEST_TESTFORCELAW */
+
+/*! \brief Periodicity effects in gravity.
+ *
+ *  \param[in] x X coordinate of point.
+ *  \param[in] y Y coordinate of point.
+ *  \param[in] z Z coordinate of point.
+ *  \param[in] alpha Cutoff for tree-PM.
+ *  \param[out] force Force vector.
+ */
+static void ewald_other_images(double x, double y, double z, double alpha, double force[4])
+{
+  double signx, signy, signz;
+
+  for(int i = 0; i < 4; i++)
+    force[i] = 0;
+
+  double r2 = x * x + y * y + z * z;
+
+  if(r2 == 0)
+    return;
+
+  if(x < 0)
+    {
+      x     = -x;
+      signx = +1;
+    }
+  else
+    signx = -1;
+  if(y < 0)
+    {
+      y     = -y;
+      signy = +1;
+    }
+  else
+    signy = -1;
+  if(z < 0)
+    {
+      z     = -z;
+      signz = +1;
+    }
+  else
+    signz = -1;
+
+  double alpha2 = alpha * alpha;
+
+  const int nmax = 4;
+
+  for(int nx = -nmax; nx <= nmax; nx++)
+    for(int ny = -nmax; ny <= nmax; ny++)
+      for(int nz = -nmax; nz <= nmax; nz++)
+        {
+          if(nx != 0 || ny != 0 || nz != 0)
+            {
+              double dx   = x - nx * STRETCHX * All.BoxSize;
+              double dy   = y - ny * STRETCHY * All.BoxSize;
+              double dz   = z - nz * STRETCHZ * All.BoxSize;
+              double r2   = dx * dx + dy * dy + dz * dz;
+              double r    = sqrt(r2);
+              double val  = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2);
+              double val2 = val / (r2 * r);
+              double val3 = erfc(alpha * r) / r;
+
+              force[0] -= dx * val2;
+              force[1] -= dy * val2;
+              force[2] -= dz * val2;
+              force[3] -= val3;
+            }
+        }
+
+  force[0] *= signx;
+  force[1] *= signy;
+  force[2] *= signz;
+}
+
+/*! \brief Force due to periodic boundary conditions.
+ *
+ *  \param[in] x X coordinate of point.
+ *  \param[in] y Y coordinate of point.
+ *  \param[in] z Z coordinate of point.
+ *  \param[out] force Force vector.
+ */
+static void ewald_correction_force(double x, double y, double z, double force[4])
+{
+  double signx, signy, signz;
+
+  for(int i = 0; i < 4; i++)
+    force[i] = 0;
+
+  double r2 = x * x + y * y + z * z;
+
+  if(r2 == 0)
+    return;
+
+  if(x < 0)
+    {
+      x     = -x;
+      signx = +1;
+    }
+  else
+    signx = -1;
+  if(y < 0)
+    {
+      y     = -y;
+      signy = +1;
+    }
+  else
+    signy = -1;
+  if(z < 0)
+    {
+      z     = -z;
+      signz = +1;
+    }
+  else
+    signz = -1;
+
+  double lmin   = imin(imin(STRETCHX, STRETCHY), STRETCHZ);
+  double alpha  = 2.0 / lmin / All.BoxSize;
+  double alpha2 = alpha * alpha;
+  double r      = sqrt(r2);
+  double r3inv  = 1.0 / (r2 * r);
+
+  force[0] += r3inv * x;
+  force[1] += r3inv * y;
+  force[2] += r3inv * z;
+
+  const int nmax = 6;
+
+  for(int nx = -nmax; nx <= nmax; nx++)
+    for(int ny = -nmax; ny <= nmax; ny++)
+      for(int nz = -nmax; nz <= nmax; nz++)
+        {
+          double dx   = x - nx * STRETCHX * All.BoxSize;
+          double dy   = y - ny * STRETCHY * All.BoxSize;
+          double dz   = z - nz * STRETCHZ * All.BoxSize;
+          double r2   = dx * dx + dy * dy + dz * dz;
+          double r    = sqrt(r2);
+          double val  = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2);
+          double val2 = val / (r2 * r);
+          double val3 = erfc(alpha * r) / r; /* for potential */
+
+          force[0] -= dx * val2;
+          force[1] -= dy * val2;
+          force[2] -= dz * val2;
+          force[3] -= val3;
+        }
+
+  int nxmax = (int)(4 * alpha * All.BoxSize * (STRETCHX / lmin) + 0.5);
+  int nymax = (int)(4 * alpha * All.BoxSize * (STRETCHY / lmin) + 0.5);
+  int nzmax = (int)(4 * alpha * All.BoxSize * (STRETCHZ / lmin) + 0.5);
+
+  for(int nx = -nxmax; nx <= nxmax; nx++)
+    for(int ny = -nymax; ny <= nymax; ny++)
+      for(int nz = -nzmax; nz <= nzmax; nz++)
+        {
+          double kx = (2.0 * M_PI / (All.BoxSize * STRETCHX)) * nx;
+          double ky = (2.0 * M_PI / (All.BoxSize * STRETCHY)) * ny;
+          double kz = (2.0 * M_PI / (All.BoxSize * STRETCHZ)) * nz;
+          double k2 = kx * kx + ky * ky + kz * kz;
+
+          if(k2 > 0)
+            {
+              double kdotx = (x * kx + y * ky + z * kz);
+              double vv    = 4.0 * M_PI / (k2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2));
+              double val   = vv * sin(kdotx);
+              double val2  = vv * cos(kdotx);
+              force[0] -= kx * val;
+              force[1] -= ky * val;
+              force[2] -= kz * val;
+              force[3] -= val2;
+            }
+        }
+
+  force[3] += M_PI / (alpha2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) + 1.0 / r;
+
+  force[0] *= signx;
+  force[1] *= signy;
+  force[2] *= signz;
+}
+
+#if !defined(FORCETEST_TESTFORCELAW)
+
+#define TEW_N 128
+
+#define TEW_NX (DBX * STRETCHX * TEW_N)
+#define TEW_NY (DBY * STRETCHY * TEW_N)
+#define TEW_NZ (DBZ * STRETCHZ * TEW_N)
+
+static double Ewd_table[4][TEW_NX + 1][TEW_NY + 1][TEW_NZ + 1];
+static double Ewd_table_intp;
+
+/*! \brief Initializes Ewald correction force test.
+ *
+ *  \return void
+ */
+void forcetest_ewald_init(void)
+{
+  double t0 = second();
+
+  mpi_printf("FORCETEST: initialize high-res Ewald lookup table...\n");
+
+#ifdef LONG_X
+  if(LONG_X != (int)(LONG_X))
+    terminate("LONG_X must be an integer");
+#endif /* #ifdef LONG_X */
+
+#ifdef LONG_Y
+  if(LONG_Y != (int)(LONG_Y))
+    terminate("LONG_Y must be an integer");
+#endif /* #ifdef LONG_Y */
+
+#ifdef LONG_Z
+  if(LONG_Z != (int)(LONG_Z))
+    terminate("LONG_Z must be an integer");
+#endif /* #ifdef LONG_Z */
+
+  /* ok, let's compute things. Actually, we do that in parallel. */
+  int size = (TEW_NX + 1) * (TEW_NY + 1) * (TEW_NZ + 1);
+  int first, count;
+
+  subdivide_evenly(size, NTask, ThisTask, &first, &count);
+
+  for(int n = first; n < first + count; n++)
+    {
+      int i = n / ((TEW_NY + 1) * (TEW_NZ + 1));
+      int j = (n - i * (TEW_NY + 1) * (TEW_NZ + 1)) / (TEW_NZ + 1);
+      int k = (n - i * (TEW_NY + 1) * (TEW_NZ + 1) - j * (TEW_NZ + 1));
+
+      if(ThisTask == 0)
+        {
+          if(((n - first) % (count / 20)) == 0)
+            {
+              printf("%4.1f percent done\n", (n - first) / (count / 100.0));
+              myflush(stdout);
+            }
+        }
+
+      double xx = 0.5 * DBX * STRETCHX * ((double)i) / TEW_NX * All.BoxSize;
+      double yy = 0.5 * DBY * STRETCHY * ((double)j) / TEW_NY * All.BoxSize;
+      double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / TEW_NZ * All.BoxSize;
+
+      double fcorr[4];
+      ewald_correction_force(xx, yy, zz, fcorr);
+
+      for(int rep = 0; rep < 4; rep++)
+        Ewd_table[rep][i][j][k] = fcorr[rep];
+    }
+
+  int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int));
+  int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int));
+
+  for(int i = 0; i < NTask; i++)
+    {
+      int off, cnt;
+      subdivide_evenly(size, NTask, i, &off, &cnt);
+      recvcnts[i] = cnt * sizeof(double);
+      recvoffs[i] = off * sizeof(double);
+    }
+
+  for(int rep = 0; rep < 4; rep++)
+    MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(double), MPI_BYTE, Ewd_table[rep], recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD);
+
+  myfree(recvoffs);
+  myfree(recvcnts);
+
+  /* now scale things to the boxsize that is actually used */
+  Ewd_table_intp = 2 * TEW_N / All.BoxSize;
+
+  double t1 = second();
+  mpi_printf("FORCETEST: Initialization of high-res Ewald table finished, took %g sec.\n", timediff(t0, t1));
+}
+
+/*! \brief Looks up Ewald force from tabulated values.
+ *
+ *  \param[in] dx X position.
+ *  \param[in] dy Y position.
+ *  \param[in] dz Z position.
+ *  \param[out] force Ewald force correction.
+ *
+ *  \return void
+ */
+static void ewald_correction_force_table_lookup(double dx, double dy, double dz, double force[4])
+{
+  int signx, signy, signz;
+  int i, j, k;
+  double u, v, w;
+  double f1, f2, f3, f4, f5, f6, f7, f8;
+
+  if(dx < 0)
+    {
+      dx    = -dx;
+      signx = -1;
+    }
+  else
+    signx = +1;
+
+  if(dy < 0)
+    {
+      dy    = -dy;
+      signy = -1;
+    }
+  else
+    signy = +1;
+
+  if(dz < 0)
+    {
+      dz    = -dz;
+      signz = -1;
+    }
+  else
+    signz = +1;
+
+  u = dx * Ewd_table_intp;
+  i = (int)u;
+  if(i >= TEW_NX)
+    i = TEW_NX - 1;
+  u -= i;
+  v = dy * Ewd_table_intp;
+  j = (int)v;
+  if(j >= TEW_NY)
+    j = TEW_NY - 1;
+  v -= j;
+  w = dz * Ewd_table_intp;
+  k = (int)w;
+  if(k >= TEW_NZ)
+    k = TEW_NZ - 1;
+  w -= k;
+
+  f1 = (1 - u) * (1 - v) * (1 - w);
+  f2 = (1 - u) * (1 - v) * (w);
+  f3 = (1 - u) * (v) * (1 - w);
+  f4 = (1 - u) * (v) * (w);
+  f5 = (u) * (1 - v) * (1 - w);
+  f6 = (u) * (1 - v) * (w);
+  f7 = (u) * (v) * (1 - w);
+  f8 = (u) * (v) * (w);
+
+  for(int rep = 0; rep < 4; rep++)
+    {
+      force[rep] = Ewd_table[rep][i][j][k] * f1 + Ewd_table[rep][i][j][k + 1] * f2 + Ewd_table[rep][i][j + 1][k] * f3 +
+                   Ewd_table[rep][i][j + 1][k + 1] * f4 + Ewd_table[rep][i + 1][j][k] * f5 + Ewd_table[rep][i + 1][j][k + 1] * f6 +
+                   Ewd_table[rep][i + 1][j + 1][k] * f7 + Ewd_table[rep][i + 1][j + 1][k + 1] * f8;
+    }
+
+  force[0] *= signx;
+  force[1] *= signy;
+  force[2] *= signz;
+}
+
+#endif /* #if !defined(FORCETEST_TESTFORCELAW) */
+
+#endif /* #ifdef FORCETEST */
diff --git a/src/amuse/community/arepo/src/gravity/longrange.c b/src/amuse/community/arepo/src/gravity/longrange.c
new file mode 100644
index 0000000000..2fbd6a2e53
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/longrange.c
@@ -0,0 +1,199 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/longrange.c
+ * \date        05/2018
+ * \brief       Driver routines for computation of long-range gravitational
+ *              PM force
+ * \details     contains functions:
+ *                void long_range_init(void)
+ *                void long_range_init_regionsize(void)
+ *                void long_range_force(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef PMGRID
+/*! \brief Driver routine to call initialization of periodic or/and
+ *         non-periodic FFT routines.
+ *
+ *  \return void
+ */
+void long_range_init(void)
+{
+#ifndef GRAVITY_NOT_PERIODIC
+  pm_init_periodic();
+#ifdef TWODIMS
+  pm2d_init_periodic();
+#endif /* #ifdef TWODIMS */
+#ifdef PLACEHIGHRESREGION
+  pm_init_nonperiodic();
+#endif /* #ifdef PLACEHIGHRESREGION */
+#else  /* #ifndef GRAVITY_NOT_PERIODIC */
+  pm_init_nonperiodic();
+#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */
+}
+
+/*! \brief Driver routine to determine the extend of the non-
+ *         periodic or high resolution region.
+ *
+ *  The initialization is done by pm_init_regionsize(). Afterwards
+ *  the convolution kernels are computed by pm_setup_nonperiodic_kernel().
+ *
+ *  \return void
+ */
+void long_range_init_regionsize(void)
+{
+#ifndef GRAVITY_NOT_PERIODIC
+#ifdef PLACEHIGHRESREGION
+  if(RestartFlag != 1)
+    pm_init_regionsize();
+  pm_setup_nonperiodic_kernel();
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+#else  /* #ifndef GRAVITY_NOT_PERIODIC */
+  if(RestartFlag != 1)
+    pm_init_regionsize();
+  pm_setup_nonperiodic_kernel();
+#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */
+}
+
+/*! \brief This function computes the long-range PM force for all particles.
+ *
+ *  In case of a periodic grid the force is calculated by pmforce_periodic()
+ *  otherwise by pmforce_nonperiodic(). If a high resolution region is
+ *  specified for the PM force, pmforce_nonperiodic() calculates that force in
+ *  both cases.
+ *
+ *  \return void
+ */
+void long_range_force(void)
+{
+  int i;
+
+  TIMER_START(CPU_PM_GRAVITY);
+
+#ifdef GRAVITY_NOT_PERIODIC
+  int j;
+  double fac;
+#endif /* #ifdef GRAVITY_NOT_PERIODIC */
+
+  for(i = 0; i < NumPart; i++)
+    {
+      P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0;
+#ifdef EVALPOTENTIAL
+      P[i].PM_Potential = 0;
+#endif /* #ifdef EVALPOTENTIAL */
+    }
+
+#ifndef SELFGRAVITY
+  return;
+#endif /* #ifndef SELFGRAVITY */
+
+#ifndef GRAVITY_NOT_PERIODIC
+
+#ifdef TWODIMS
+  pm2d_force_periodic(0);
+#else  /* #ifdef TWODIMS */
+  pmforce_periodic(0, NULL);
+#endif /* #ifdef TWODIMS #else */
+
+#ifdef PLACEHIGHRESREGION
+  i = pmforce_nonperiodic(1);
+
+  if(i == 1) /* this is returned if a particle lied outside allowed range */
+    {
+      pm_init_regionsize();
+      pm_setup_nonperiodic_kernel();
+      i = pmforce_nonperiodic(1); /* try again */
+    }
+  if(i == 1)
+    terminate("despite we tried to increase the region, we still don't fit all particles in it");
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+#else /* #ifndef GRAVITY_NOT_PERIODIC */
+  i = pmforce_nonperiodic(0);
+
+  if(i == 1) /* this is returned if a particle lied outside allowed range */
+    {
+      pm_init_regionsize();
+      pm_setup_nonperiodic_kernel();
+      i = pmforce_nonperiodic(0); /* try again */
+    }
+  if(i == 1)
+    terminate("despite we tried to increase the region, somehow we still don't fit all particles in it");
+#ifdef PLACEHIGHRESREGION
+  i = pmforce_nonperiodic(1);
+
+  if(i == 1) /* this is returned if a particle lied outside allowed range */
+    {
+      pm_init_regionsize();
+      pm_setup_nonperiodic_kernel();
+
+      /* try again */
+
+      for(i = 0; i < NumPart; i++)
+        P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0;
+
+      i = pmforce_nonperiodic(0) + pmforce_nonperiodic(1);
+    }
+  if(i != 0)
+    terminate("despite we tried to increase the region, somehow we still don't fit all particles in it");
+#endif /* #ifdef PLACEHIGHRESREGION */
+#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */
+
+#ifdef GRAVITY_NOT_PERIODIC
+  if(All.ComovingIntegrationOn)
+    {
+      fac = 0.5 * All.Hubble * All.Hubble * All.Omega0;
+
+      for(i = 0; i < NumPart; i++)
+        for(j = 0; j < 3; j++)
+          P[i].GravPM[j] += fac * P[i].Pos[j];
+    }
+
+  /* Finally, the following factor allows a computation of cosmological simulation
+     with vacuum energy in physical coordinates */
+  if(All.ComovingIntegrationOn == 0)
+    {
+      fac = All.OmegaLambda * All.Hubble * All.Hubble;
+
+      for(i = 0; i < NumPart; i++)
+        for(j = 0; j < 3; j++)
+          P[i].GravPM[j] += fac * P[i].Pos[j];
+    }
+#endif /* #ifdef GRAVITY_NOT_PERIODIC */
+
+  TIMER_STOP(CPU_PM_GRAVITY);
+
+  find_long_range_step_constraint();
+}
+#endif /* #ifdef PMGRID */
diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c
new file mode 100644
index 0000000000..866ef06459
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c
@@ -0,0 +1,1771 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/pm/pm_mpi_fft.c
+ * \date        05/2018
+ * \brief       Home-made parallel FFT transforms as needed by the code.
+ * \details     We only use the one-dimensional FFTW3 routines, because the
+ *              MPI versions of FFTW3 allocate memory for themselves during the
+ *              transforms (which we want to strictly avoid), and because we
+ *              want to allow transforms that are so big that more than 2GB
+ *              may be transferred betweeen processors.
+ *
+ *              contains functions:
+ *                void my_slab_based_fft_init(fft_plan * plan, int NgridX,
+ *                  int NgridY, int NgridZ)
+ *                void my_slab_transposeA(fft_plan * plan, fft_real * field,
+ *                  fft_real * scratch)
+ *                void my_slab_transposeB(fft_plan * plan, fft_real * field,
+ *                  fft_real * scratch)
+ *                static void my_slab_transpose(void *av, void *bv, int *sx,
+ *                  int *firstx, int *sy, int *firsty, int nx, int ny, int nz,
+ *                  int mode)
+ *                void my_slab_based_fft(fft_plan * plan, void *data,
+ *                  void *workspace, int forward)
+ *                void my_slab_based_fft_c2c(fft_plan * plan, void *data,
+ *                  void *workspace, int forward)
+ *                void my_column_based_fft_init(fft_plan * plan, int NgridX,
+ *                  int NgridY, int NgridZ)
+ *                void my_column_based_fft_init_c2c(fft_plan * plan,
+ *                  int NgridX, int NgridY, int NgridZ)
+ *                void my_fft_swap23(fft_plan * plan, fft_real * data,
+ *                  fft_real * out)
+ *                void my_fft_swap23back(fft_plan * plan, fft_real * data,
+ *                  fft_real * out)
+ *                void my_fft_swap13(fft_plan * plan, fft_real * data,
+ *                  fft_real * out)
+ *                void my_fft_swap13back(fft_plan * plan, fft_real * data,
+ *                  fft_real * out)
+ *                void my_column_based_fft(fft_plan * plan, void *data,
+ *                  void *workspace, int forward)
+ *                void my_column_based_fft_c2c(fft_plan * plan, void *data,
+ *                  void *workspace, int forward)#
+ *                static void my_fft_column_remap(fft_complex * data,
+ *                  int Ndims[3], int in_firstcol, int in_ncol,
+ *                  fft_complex * out, int perm[3], int out_firstcol,
+ *                  int out_ncol, size_t * offset_send, size_t * offset_recv,
+ *                  size_t * count_send, size_t * count_recv,
+ *                  size_t just_count_flag)
+ *                static void my_fft_column_transpose(fft_real * data,
+ *                  int Ndims[3], int in_firstcol, int in_ncol, fft_real * out,
+ *                  int perm[3], int out_firstcol, int out_ncol,
+ *                  size_t * offset_send, size_t * offset_recv,
+ *                  size_t * count_send, size_t * count_recv,
+ *                  size_t just_count_flag)
+ *                static void my_fft_column_transpose_c(fft_complex * data,
+ *                  int Ndims[3], int in_firstcol, int in_ncol,
+ *                  fft_complex * out, int perm[3], int out_firstcol,
+ *                  int out_ncol, size_t * offset_send, size_t * offset_recv,
+ *                  size_t * count_send, size_t * count_recv,
+ *                  size_t just_count_flag)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 26.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if defined(PMGRID)
+
+#ifndef FFT_COLUMN_BASED
+/*! \brief Initializes slab based FFT.
+ *
+ *  \param[out] plan FFT plan.
+ *  \param[in] NgridX Number of grid points in X direction.
+ *  \param[in] NgridY Number of grid points in Y direction.
+ *  \param[in] NgridZ Number of grid points in Z direction.
+ *
+ *  \return void
+ */
+void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
+{
+  subdivide_evenly(NgridX, NTask, ThisTask, &plan->slabstart_x, &plan->nslab_x);
+  subdivide_evenly(NgridY, NTask, ThisTask, &plan->slabstart_y, &plan->nslab_y);
+
+  plan->slab_to_task = (int *)mymalloc("slab_to_task", NgridX * sizeof(int));
+
+  for(int task = 0; task < NTask; task++)
+    {
+      int start, n;
+
+      subdivide_evenly(NgridX, NTask, task, &start, &n);
+
+      for(int i = start; i < start + n; i++)
+        plan->slab_to_task[i] = task;
+    }
+
+  MPI_Allreduce(&plan->nslab_x, &plan->largest_x_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+  MPI_Allreduce(&plan->nslab_y, &plan->largest_y_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  plan->slabs_x_per_task = (int *)mymalloc("slabs_x_per_task", NTask * sizeof(int));
+  MPI_Allgather(&plan->nslab_x, 1, MPI_INT, plan->slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  plan->first_slab_x_of_task = (int *)mymalloc("first_slab_x_of_task", NTask * sizeof(int));
+  MPI_Allgather(&plan->slabstart_x, 1, MPI_INT, plan->first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  plan->slabs_y_per_task = (int *)mymalloc("slabs_y_per_task", NTask * sizeof(int));
+  MPI_Allgather(&plan->nslab_y, 1, MPI_INT, plan->slabs_y_per_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  plan->first_slab_y_of_task = (int *)mymalloc("first_slab_y_of_task", NTask * sizeof(int));
+  MPI_Allgather(&plan->slabstart_y, 1, MPI_INT, plan->first_slab_y_of_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  plan->NgridX = NgridX;
+  plan->NgridY = NgridY;
+  plan->NgridZ = NgridZ;
+
+  int Ngridz = NgridZ / 2 + 1; /* dimension needed in complex space */
+
+  plan->Ngridz = Ngridz;
+  plan->Ngrid2 = 2 * Ngridz;
+}
+
+/*! \brief Transposes the array field.
+ *
+ *  The array field is transposed such that the data in x direction is local
+ *  to only one task. This is done, so the force in x-direction can be
+ *  obtained by finite differencing. However the array is not fully
+ *  transposed, i.e. the x-direction is not the fastest running array index.
+ *
+ *  \param[in] plan FFT pan.
+ *  \param[in, out] field The array to transpose.
+ *  \param[out] scratch Scratch space used during communication (same size as
+ *              field).
+ *
+ *  \return void
+ */
+void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch)
+{
+  int n, prod, task, flag_big = 0, flag_big_all = 0;
+
+  prod = NTask * plan->nslab_x;
+
+  for(n = 0; n < prod; n++)
+    {
+      int x    = n / NTask;
+      int task = n % NTask;
+
+      int y;
+
+      for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++)
+        memcpy(scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x +
+                                                   x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
+               field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), plan->NgridZ * sizeof(fft_real));
+    }
+
+  size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
+  size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
+  size_t *soff   = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
+  size_t *roff   = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
+
+  for(task = 0; task < NTask; task++)
+    {
+      scount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real));
+      rcount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real));
+
+      soff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real));
+      roff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real));
+
+      if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+        flag_big = 1;
+    }
+
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(scratch, scount, soff, field, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD);
+
+  myfree(roff);
+  myfree(soff);
+  myfree(rcount);
+  myfree(scount);
+}
+
+/*! \brief Undo the transposition of the array field.
+ *
+ *  The transposition of the array field is undone such that the data in
+ *  x direction is distributed among all tasks again. Thus the result of
+ *  force computation in x-direction is sent back to the original task.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in, out] field The array to transpose.
+ *  \param[out] scratch Scratch space used during communication (same size as
+ *              field).
+ *
+ *  \return void
+ */
+void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch)
+{
+  int n, prod, task, flag_big = 0, flag_big_all = 0;
+
+  size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
+  size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
+  size_t *soff   = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
+  size_t *roff   = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
+
+  for(task = 0; task < NTask; task++)
+    {
+      rcount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real));
+      scount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real));
+
+      roff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real));
+      soff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real));
+
+      if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+        flag_big = 1;
+    }
+
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  myMPI_Alltoallv(field, scount, soff, scratch, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD);
+
+  myfree(roff);
+  myfree(soff);
+  myfree(rcount);
+  myfree(scount);
+
+  prod = NTask * plan->nslab_x;
+
+  for(n = 0; n < prod; n++)
+    {
+      int x    = n / NTask;
+      int task = n % NTask;
+
+      int y;
+      for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++)
+        memcpy(field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y),
+               scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x +
+                                                   x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
+               plan->NgridZ * sizeof(fft_real));
+    }
+}
+
+/*  \brief Transpose a slab decomposed 3D field.
+ *
+ *  Given a slab-decomposed 3D field a[...] with total dimension
+ *  [nx x ny x nz], whose first dimension is split across the processors, this
+ *  routine outputs in b[] the transpose where then the second dimension is
+ *  split across the processors. sx[] gives for each MPI task how many slabs
+ *  it has, and firstx[] is the first slab for a given task. Likewise,
+ *  sy[]/firsty[] gives the same thing for the transposed order. Note, the
+ *  contents of the array a[] will be destroyed by the routine.
+ *
+ *  An element (x,y,z) is accessed in a[] with index
+ *  [([x - firstx] * ny + y) * nz + z] and in b[] as
+ *  [((y - firsty) * nx + x) * nz + z]
+ *
+ *  \param[in, out] av Pointer to array a.
+ *  \param[in, out] bv Pointer to array b.
+ *  \param[in] sx Array storing number of slabs in each task.
+ *  \param[in] fristx Array with first slab in each task.
+ *  \param[in] sy Array storing number of transposed slabs in each task.
+ *  \param[in] firsty Array storing first transposed slab in each task.
+ *  \param[in] nx Number of elements in x direction.
+ *  \param[in] ny Number of elements in y direction.
+ *  \param[in] nz Number of elements in z direction.
+ *  \param[in] mode If mode = 1, the reverse operation is carried out.
+ *
+ *  \return void
+ */
+static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy, int *firsty, int nx, int ny, int nz, int mode)
+{
+  char *a = (char *)av;
+  char *b = (char *)bv;
+
+  size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
+  size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
+  size_t *soff   = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
+  size_t *roff   = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
+  int i, n, prod, flag_big = 0, flag_big_all = 0;
+
+  for(i = 0; i < NTask; i++)
+    {
+      scount[i] = sy[i] * sx[ThisTask] * ((size_t)nz);
+      rcount[i] = sy[ThisTask] * sx[i] * ((size_t)nz);
+      soff[i]   = firsty[i] * sx[ThisTask] * ((size_t)nz);
+      roff[i]   = sy[ThisTask] * firstx[i] * ((size_t)nz);
+
+      if(scount[i] * sizeof(fft_complex) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+        flag_big = 1;
+    }
+
+  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
+   * transfer the data in chunks.
+   */
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  if(mode == 0)
+    {
+      /* first pack the data into contiguous blocks */
+      prod = NTask * sx[ThisTask];
+      for(n = 0; n < prod; n++)
+        {
+          int k = n / NTask;
+          int i = n % NTask;
+          int j;
+
+          for(j = 0; j < sy[i]; j++)
+            memcpy(b + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)),
+                   a + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
+        }
+
+      /* tranfer the data */
+      myMPI_Alltoallv(b, scount, soff, a, rcount, roff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD);
+
+      /* unpack the data into the right order */
+      prod = NTask * sy[ThisTask];
+      for(n = 0; n < prod; n++)
+        {
+          int j = n / NTask;
+          int i = n % NTask;
+          int k;
+
+          for(k = 0; k < sx[i]; k++)
+            memcpy(b + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)),
+                   a + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
+        }
+    }
+  else
+    {
+      /* first pack the data into contiguous blocks */
+      prod = NTask * sy[ThisTask];
+      for(n = 0; n < prod; n++)
+        {
+          int j = n / NTask;
+          int i = n % NTask;
+          int k;
+
+          for(k = 0; k < sx[i]; k++)
+            memcpy(b + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)),
+                   a + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
+        }
+
+      /* tranfer the data */
+      myMPI_Alltoallv(b, rcount, roff, a, scount, soff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD);
+
+      /* unpack the data into the right order */
+      prod = NTask * sx[ThisTask];
+      for(n = 0; n < prod; n++)
+        {
+          int k = n / NTask;
+          int i = n % NTask;
+          int j;
+
+          for(j = 0; j < sy[i]; j++)
+            memcpy(b + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)),
+                   a + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
+        }
+    }
+  /* now the result is in b[] */
+
+  myfree(roff);
+  myfree(soff);
+  myfree(rcount);
+  myfree(scount);
+}
+
+/*! \brief Performs a slab-based Fast Fourier transformation.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in, out] data Array to be Fourier transformed.
+ *  \param[out] workspace Workspace to temporary operate in.
+ *  \param[in] forward Forward (1) or backward (-1) Fourier transformaiton?
+ *
+ *  \return void
+ */
+void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward)
+{
+  int n, prod;
+  int slabsx = plan->slabs_x_per_task[ThisTask];
+  int slabsy = plan->slabs_y_per_task[ThisTask];
+
+  int ngridx  = plan->NgridX;
+  int ngridy  = plan->NgridY;
+  int ngridz  = plan->Ngridz;
+  int ngridz2 = 2 * ngridz;
+
+  size_t ngridx_long  = ngridx;
+  size_t ngridy_long  = ngridy;
+  size_t ngridz_long  = ngridz;
+  size_t ngridz2_long = ngridz2;
+
+  fft_real *data_real       = (fft_real *)data;
+  fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace;
+
+  if(forward == 1)
+    {
+      /* do the z-direction FFT, real to complex */
+      prod = slabsx * ngridy;
+      for(n = 0; n < prod; n++)
+        {
+          FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * ngridz2_long, workspace_complex + n * ngridz_long);
+        }
+
+      /* do the y-direction FFT, complex to complex */
+      prod = slabsx * ngridz;
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
+        }
+
+      /* now our data resides in data_complex[] */
+
+      /* do the transpose */
+      my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
+                        plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
+
+      /* now the data is in workspace_complex[] */
+
+      /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */
+      prod = slabsy * ngridz;
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
+        }
+
+      /* now the result is in data_complex[] */
+    }
+  else
+    {
+      prod = slabsy * ngridz;
+
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
+        }
+
+      my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
+                        plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
+
+      prod = slabsx * ngridz;
+
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
+        }
+
+      prod = slabsx * ngridy;
+
+      for(n = 0; n < prod; n++)
+        {
+          FFTW(execute_dft_c2r)(plan->backward_plan_zdir, workspace_complex + n * ngridz_long, data_real + n * ngridz2_long);
+        }
+
+      /* now the result is in data[] */
+    }
+}
+
+/*! \brief Performs a slab-based complex to complex Fast Fourier
+ *         transformation.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in, out] data Array to be Fourier transformed.
+ *  \param[out] workspace Workspace to temporary operate in.
+ *  \param[in] forward Forward (1) or backward (-1) Fourier transformaiton?
+ *
+ *  \return void
+ */
+void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward)
+{
+  int n, prod;
+  int slabsx = plan->slabs_x_per_task[ThisTask];
+  int slabsy = plan->slabs_y_per_task[ThisTask];
+
+  int ngridx = plan->NgridX;
+  int ngridy = plan->NgridY;
+  int ngridz = plan->NgridZ;
+
+  size_t ngridx_long = ngridx;
+  size_t ngridy_long = ngridy;
+  size_t ngridz_long = ngridz;
+
+  fft_complex *data_start   = (fft_complex *)data;
+  fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace;
+
+  if(forward == 1)
+    {
+      /* do the z-direction FFT, complex to complex */
+      prod = slabsx * ngridy;
+      for(n = 0; n < prod; n++)
+        {
+          FFTW(execute_dft)(plan->forward_plan_zdir, data_start + n * ngridz, workspace_complex + n * ngridz);
+        }
+
+      /* do the y-direction FFT, complex to complex */
+      prod = slabsx * ngridz;
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
+        }
+
+      /* now our data resides in data_complex[] */
+
+      /* do the transpose */
+      my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
+                        plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
+
+      /* now the data is in workspace_complex[] */
+
+      /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */
+      prod = slabsy * ngridz;
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
+        }
+
+      /* now the result is in data_complex[] */
+    }
+  else
+    {
+      prod = slabsy * ngridz;
+
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
+        }
+
+      my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
+                        plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
+
+      prod = slabsx * ngridz;
+
+      for(n = 0; n < prod; n++)
+        {
+          int i = n / ngridz;
+          int j = n % ngridz;
+
+          FFTW(execute_dft)
+          (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
+        }
+
+      prod = slabsx * ngridy;
+
+      for(n = 0; n < prod; n++)
+        {
+          FFTW(execute_dft)(plan->backward_plan_zdir, workspace_complex + n * ngridz, data_start + n * ngridz);
+        }
+
+      /* now the result is in data[] */
+    }
+}
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3],
+                                int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send,
+                                size_t *count_recv, size_t just_count_flag);
+
+static void my_fft_column_transpose(fft_real *data, int Ndims[3], /* global dimensions of data cube */
+                                    int in_firstcol, int in_ncol, /* first column and number of columns */
+                                    fft_real *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send,
+                                    size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag);
+
+static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], /* global dimensions of data cube */
+                                      int in_firstcol, int in_ncol,    /* first column and number of columns */
+                                      fft_complex *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send,
+                                      size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag);
+
+/*! \brief Initializes column based FFT.
+ *
+ *  \param[out] plan FFT plan.
+ *  \param[in] NgridX Number of grid points in X direction.
+ *  \param[in] NgridY Number of grid points in Y direction.
+ *  \param[in] NgridZ Number of grid points in Z direction.
+ *
+ *  \return void
+ */
+void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
+{
+  plan->NgridX = NgridX;
+  plan->NgridY = NgridY;
+  plan->NgridZ = NgridZ;
+
+  int Ngridz = NgridZ / 2 + 1;
+
+  plan->Ngridz = Ngridz;
+  plan->Ngrid2 = 2 * Ngridz;
+
+  int columns, avg, exc, tasklastsection, pivotcol;
+
+  columns         = NgridX * NgridY;
+  avg             = (columns - 1) / NTask + 1;
+  exc             = NTask * avg - columns;
+  tasklastsection = NTask - exc;
+  pivotcol        = tasklastsection * avg;
+
+  plan->pivotcol        = pivotcol;
+  plan->avg             = avg;
+  plan->tasklastsection = tasklastsection;
+
+  if(ThisTask < tasklastsection)
+    {
+      plan->base_firstcol = ThisTask * avg;
+      plan->base_ncol     = avg;
+    }
+  else
+    {
+      plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection);
+      plan->base_ncol     = avg - 1;
+    }
+
+  plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1;
+
+  subdivide_evenly(NgridX * Ngridz, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol);
+
+  subdivide_evenly(NgridY * Ngridz, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol);
+
+  subdivide_evenly(plan->NgridX * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ);
+
+  subdivide_evenly(plan->NgridY * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ);
+
+  plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol;
+
+  plan->max_datasize = ((size_t)plan->Ngrid2) * plan->base_ncol;
+  plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol);
+  plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol);
+  plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY);
+  plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX);
+
+  plan->fftsize = plan->max_datasize;
+
+  plan->offsets_send_A      = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t));
+  plan->offsets_recv_A      = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t));
+  plan->offsets_send_B      = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t));
+  plan->offsets_recv_B      = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t));
+  plan->offsets_send_C      = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t));
+  plan->offsets_recv_C      = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t));
+  plan->offsets_send_D      = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t));
+  plan->offsets_recv_D      = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t));
+  plan->offsets_send_13     = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t));
+  plan->offsets_recv_13     = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t));
+  plan->offsets_send_23     = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t));
+  plan->offsets_recv_23     = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t));
+  plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t));
+  plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t));
+  plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t));
+  plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t));
+
+  plan->count_send_A      = mymalloc_clear("count_send_A", NTask * sizeof(size_t));
+  plan->count_recv_A      = mymalloc_clear("count_recv_A", NTask * sizeof(size_t));
+  plan->count_send_B      = mymalloc_clear("count_send_B", NTask * sizeof(size_t));
+  plan->count_recv_B      = mymalloc_clear("count_recv_B", NTask * sizeof(size_t));
+  plan->count_send_C      = mymalloc_clear("count_send_C", NTask * sizeof(size_t));
+  plan->count_recv_C      = mymalloc_clear("count_recv_C", NTask * sizeof(size_t));
+  plan->count_send_D      = mymalloc_clear("count_send_D", NTask * sizeof(size_t));
+  plan->count_recv_D      = mymalloc_clear("count_recv_D", NTask * sizeof(size_t));
+  plan->count_send_13     = mymalloc_clear("count_send_13", NTask * sizeof(size_t));
+  plan->count_recv_13     = mymalloc_clear("count_recv_13", NTask * sizeof(size_t));
+  plan->count_send_23     = mymalloc_clear("count_send_23", NTask * sizeof(size_t));
+  plan->count_recv_23     = mymalloc_clear("count_recv_23", NTask * sizeof(size_t));
+  plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t));
+  plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t));
+  plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t));
+  plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t));
+
+  int dimA[3]  = {plan->NgridX, plan->NgridY, plan->Ngridz};
+  int permA[3] = {0, 2, 1};
+
+  my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol,
+                      plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1);
+
+  int dimB[3]  = {plan->NgridX, plan->Ngridz, plan->NgridY};
+  int permB[3] = {2, 1, 0};
+
+  my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol,
+                      plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B,
+                      1);
+
+  int dimC[3]  = {plan->NgridY, plan->Ngridz, plan->NgridX};
+  int permC[3] = {2, 1, 0};
+
+  my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC,
+                      plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C,
+                      plan->count_recv_C, 1);
+
+  int dimD[3]  = {plan->NgridX, plan->Ngridz, plan->NgridY};
+  int permD[3] = {0, 2, 1};
+
+  my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol,
+                      plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1);
+
+  int dim23[3]  = {plan->NgridX, plan->NgridY, plan->Ngrid2};
+  int perm23[3] = {0, 2, 1};
+
+  my_fft_column_transpose(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ,
+                          plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1);
+
+  int dim23back[3]  = {plan->NgridX, plan->Ngrid2, plan->NgridY};
+  int perm23back[3] = {0, 2, 1};
+
+  my_fft_column_transpose(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol,
+                          plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1);
+
+  int dim13[3]  = {plan->NgridX, plan->NgridY, plan->Ngrid2};
+  int perm13[3] = {2, 1, 0};
+
+  my_fft_column_transpose(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ,
+                          plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1);
+
+  int dim13back[3]  = {plan->Ngrid2, plan->NgridY, plan->NgridX};
+  int perm13back[3] = {2, 1, 0};
+
+  my_fft_column_transpose(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol,
+                          plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1);
+}
+
+/*! \brief Initializes complex to complex column based FFT.
+ *
+ *  \param[out] plan FFT plan.
+ *  \param[in] NgridX Number of grid points in X direction.
+ *  \param[in] NgridY Number of grid points in Y direction.
+ *  \param[in] NgridZ Number of grid points in Z direction.
+ *
+ *  \return void
+ */
+void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
+{
+  plan->NgridX = NgridX;
+  plan->NgridY = NgridY;
+  plan->NgridZ = NgridZ;
+
+  int columns, avg, exc, tasklastsection, pivotcol;
+
+  columns         = NgridX * NgridY;
+  avg             = (columns - 1) / NTask + 1;
+  exc             = NTask * avg - columns;
+  tasklastsection = NTask - exc;
+  pivotcol        = tasklastsection * avg;
+
+  plan->pivotcol        = pivotcol;
+  plan->avg             = avg;
+  plan->tasklastsection = tasklastsection;
+
+  if(ThisTask < tasklastsection)
+    {
+      plan->base_firstcol = ThisTask * avg;
+      plan->base_ncol     = avg;
+    }
+  else
+    {
+      plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection);
+      plan->base_ncol     = avg - 1;
+    }
+
+  plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1;
+
+  subdivide_evenly(NgridX * NgridZ, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol);
+
+  subdivide_evenly(NgridY * NgridZ, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol);
+
+  subdivide_evenly(plan->NgridX * plan->NgridZ, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ);
+
+  subdivide_evenly(plan->NgridY * plan->NgridZ, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ);
+
+  plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol;
+
+  plan->max_datasize = 2 * ((size_t)plan->NgridZ) * plan->base_ncol;
+  plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol);
+  plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol);
+  plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY);
+  plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX);
+
+  plan->fftsize = plan->max_datasize;
+
+  plan->offsets_send_A      = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t));
+  plan->offsets_recv_A      = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t));
+  plan->offsets_send_B      = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t));
+  plan->offsets_recv_B      = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t));
+  plan->offsets_send_C      = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t));
+  plan->offsets_recv_C      = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t));
+  plan->offsets_send_D      = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t));
+  plan->offsets_recv_D      = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t));
+  plan->offsets_send_13     = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t));
+  plan->offsets_recv_13     = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t));
+  plan->offsets_send_23     = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t));
+  plan->offsets_recv_23     = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t));
+  plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t));
+  plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t));
+  plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t));
+  plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t));
+
+  plan->count_send_A      = mymalloc_clear("count_send_A", NTask * sizeof(size_t));
+  plan->count_recv_A      = mymalloc_clear("count_recv_A", NTask * sizeof(size_t));
+  plan->count_send_B      = mymalloc_clear("count_send_B", NTask * sizeof(size_t));
+  plan->count_recv_B      = mymalloc_clear("count_recv_B", NTask * sizeof(size_t));
+  plan->count_send_C      = mymalloc_clear("count_send_C", NTask * sizeof(size_t));
+  plan->count_recv_C      = mymalloc_clear("count_recv_C", NTask * sizeof(size_t));
+  plan->count_send_D      = mymalloc_clear("count_send_D", NTask * sizeof(size_t));
+  plan->count_recv_D      = mymalloc_clear("count_recv_D", NTask * sizeof(size_t));
+  plan->count_send_13     = mymalloc_clear("count_send_13", NTask * sizeof(size_t));
+  plan->count_recv_13     = mymalloc_clear("count_recv_13", NTask * sizeof(size_t));
+  plan->count_send_23     = mymalloc_clear("count_send_23", NTask * sizeof(size_t));
+  plan->count_recv_23     = mymalloc_clear("count_recv_23", NTask * sizeof(size_t));
+  plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t));
+  plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t));
+  plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t));
+  plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t));
+
+  int dimA[3]  = {plan->NgridX, plan->NgridY, plan->NgridZ};
+  int permA[3] = {0, 2, 1};
+
+  my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol,
+                      plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1);
+
+  int dimB[3]  = {plan->NgridX, plan->NgridZ, plan->NgridY};
+  int permB[3] = {2, 1, 0};
+
+  my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol,
+                      plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B,
+                      1);
+
+  int dimC[3]  = {plan->NgridY, plan->NgridZ, plan->NgridX};
+  int permC[3] = {2, 1, 0};
+
+  my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC,
+                      plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C,
+                      plan->count_recv_C, 1);
+
+  int dimD[3]  = {plan->NgridX, plan->NgridZ, plan->NgridY};
+  int permD[3] = {0, 2, 1};
+
+  my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol,
+                      plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1);
+
+  int dim23[3]  = {plan->NgridX, plan->NgridY, plan->NgridZ};
+  int perm23[3] = {0, 2, 1};
+
+  my_fft_column_transpose_c(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ,
+                            plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1);
+
+  int dim23back[3]  = {plan->NgridX, plan->NgridZ, plan->NgridY};
+  int perm23back[3] = {0, 2, 1};
+
+  my_fft_column_transpose_c(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol,
+                            plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1);
+
+  int dim13[3]  = {plan->NgridX, plan->NgridY, plan->NgridZ};
+  int perm13[3] = {2, 1, 0};
+
+  my_fft_column_transpose_c(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ,
+                            plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1);
+
+  int dim13back[3]  = {plan->NgridZ, plan->NgridY, plan->NgridX};
+  int perm13back[3] = {2, 1, 0};
+
+  my_fft_column_transpose_c(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol,
+                            plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1);
+}
+
+/*! \brief YZ column transpose.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in] data Array with data to be swapped.
+ *  \param[out] out Array with data output.
+ *
+ *  \return void
+ */
+void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out)
+{
+  int dim23[3]  = {plan->NgridX, plan->NgridY, plan->Ngrid2};
+  int perm23[3] = {0, 2, 1};
+
+  my_fft_column_transpose(data, dim23, plan->base_firstcol, plan->base_ncol, out, perm23, plan->firstcol_XZ, plan->ncol_XZ,
+                          plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 0);
+}
+
+/*! \brief Reverse YZ column transpose.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in] data Array with data to be swapped.
+ *  \param[out] out Array with data output.
+ *
+ *  \return void
+ */
+void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out)
+{
+  int dim23back[3]  = {plan->NgridX, plan->Ngrid2, plan->NgridY};
+  int perm23back[3] = {0, 2, 1};
+
+  my_fft_column_transpose(data, dim23back, plan->firstcol_XZ, plan->ncol_XZ, out, perm23back, plan->base_firstcol, plan->base_ncol,
+                          plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 0);
+}
+
+/*! \brief XZ column transpose.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in] data Array with data to be swapped.
+ *  \param[out] out Array with data output.
+ *
+ *  \return void
+ */
+void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out)
+{
+  int dim13[3]  = {plan->NgridX, plan->NgridY, plan->Ngrid2};
+  int perm13[3] = {2, 1, 0};
+
+  my_fft_column_transpose(data, dim13, plan->base_firstcol, plan->base_ncol, out, perm13, plan->firstcol_YZ, plan->ncol_YZ,
+                          plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 0);
+}
+
+/*! \brief Reverse XZ column transpose.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in] data Array with data to be swapped.
+ *  \param[out] out Array with data output.
+ *
+ *  \return void
+ */
+void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out)
+{
+  int dim13back[3]  = {plan->Ngrid2, plan->NgridY, plan->NgridX};
+  int perm13back[3] = {2, 1, 0};
+
+  my_fft_column_transpose(data, dim13back, plan->firstcol_YZ, plan->ncol_YZ, out, perm13back, plan->base_firstcol, plan->base_ncol,
+                          plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 0);
+}
+
+/*! \brief Performs a column-based Fast Fourier transformation.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in, out] data Array to be Fourier transformed.
+ *  \param[out] workspace Workspace to temporary operate in.
+ *  \param[in] forward Forward (1) or backward (-1) Fourier transformaiton?
+ *
+ *  \return void
+ */
+void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward)
+{
+  size_t n;
+  fft_real *data_real = data, *workspace_real = workspace;
+  fft_complex *data_complex = data, *workspace_complex = workspace;
+
+  if(forward == 1)
+    {
+      /* do the z-direction FFT, real to complex */
+      for(n = 0; n < plan->base_ncol; n++)
+        FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * plan->Ngrid2, workspace_complex + n * plan->Ngridz);
+
+      int dimA[3]  = {plan->NgridX, plan->NgridY, plan->Ngridz};
+      int permA[3] = {0, 2, 1};
+
+      my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA,
+                          plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A,
+                          plan->count_send_A, plan->count_recv_A, 0);
+
+      /* do the y-direction FFT in 'data', complex to complex */
+      for(n = 0; n < plan->transposed_ncol; n++)
+        FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY);
+
+      int dimB[3]  = {plan->NgridX, plan->Ngridz, plan->NgridY};
+      int permB[3] = {2, 1, 0};
+
+      my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB,
+                          plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B,
+                          plan->count_send_B, plan->count_recv_B, 0);
+
+      /* do the x-direction FFT in 'data', complex to complex */
+      for(n = 0; n < plan->second_transposed_ncol; n++)
+        FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX);
+
+      /* result is now in workspace */
+    }
+  else
+    {
+      /* do inverse FFT in 'data' */
+      for(n = 0; n < plan->second_transposed_ncol; n++)
+        FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX);
+
+      int dimC[3]  = {plan->NgridY, plan->Ngridz, plan->NgridX};
+      int permC[3] = {2, 1, 0};
+
+      my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC,
+                          plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C,
+                          plan->count_send_C, plan->count_recv_C, 0);
+
+      /* do inverse FFT in 'data' */
+      for(n = 0; n < plan->transposed_ncol; n++)
+        FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY);
+
+      int dimD[3]  = {plan->NgridX, plan->Ngridz, plan->NgridY};
+      int permD[3] = {0, 2, 1};
+
+      my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD,
+                          plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D,
+                          plan->count_recv_D, 0);
+
+      /* do complex-to-real inverse transform on z-coordinates */
+      for(n = 0; n < plan->base_ncol; n++)
+        FFTW(execute_dft_c2r)(plan->backward_plan_zdir, data_complex + n * plan->Ngridz, workspace_real + n * plan->Ngrid2);
+    }
+}
+
+/*! \brief Performs a slab-based complex to complex Fast Fourier
+ *         transformation.
+ *
+ *  \param[in] plan FFT plan.
+ *  \param[in, out] data Array to be Fourier transformed.
+ *  \param[out] workspace Workspace to temporary operate in.
+ *  \param[in] forward Forward (1) or backward (-1) Fourier transformaiton?
+ *
+ *  \return void
+ */
+void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward)
+{
+  size_t n;
+  fft_complex *data_complex = data, *workspace_complex = workspace;
+
+  if(forward == 1)
+    {
+      /* do the z-direction FFT, complex to complex */
+      for(n = 0; n < plan->base_ncol; n++)
+        FFTW(execute_dft)(plan->forward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ);
+
+      int dimA[3]  = {plan->NgridX, plan->NgridY, plan->NgridZ};
+      int permA[3] = {0, 2, 1};
+
+      my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA,
+                          plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A,
+                          plan->count_send_A, plan->count_recv_A, 0);
+
+      /* do the y-direction FFT in 'data', complex to complex */
+      for(n = 0; n < plan->transposed_ncol; n++)
+        FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY);
+
+      int dimB[3]  = {plan->NgridX, plan->NgridZ, plan->NgridY};
+      int permB[3] = {2, 1, 0};
+
+      my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB,
+                          plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B,
+                          plan->count_send_B, plan->count_recv_B, 0);
+
+      /* do the x-direction FFT in 'data', complex to complex */
+      for(n = 0; n < plan->second_transposed_ncol; n++)
+        FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX);
+
+      /* result is now in workspace */
+    }
+  else
+    {
+      /* do inverse FFT in 'data' */
+      for(n = 0; n < plan->second_transposed_ncol; n++)
+        FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX);
+
+      int dimC[3]  = {plan->NgridY, plan->NgridZ, plan->NgridX};
+      int permC[3] = {2, 1, 0};
+
+      my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC,
+                          plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C,
+                          plan->count_send_C, plan->count_recv_C, 0);
+
+      /* do inverse FFT in 'data' */
+      for(n = 0; n < plan->transposed_ncol; n++)
+        FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY);
+
+      int dimD[3]  = {plan->NgridX, plan->NgridZ, plan->NgridY};
+      int permD[3] = {0, 2, 1};
+
+      my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD,
+                          plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D,
+                          plan->count_recv_D, 0);
+
+      /* do complex-to-complex inverse transform on z-coordinates */
+      for(n = 0; n < plan->base_ncol; n++)
+        FFTW(execute_dft)(plan->backward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ);
+    }
+}
+
+/*! \brief Remaps column-based FFT data.
+ *
+ *  \param[in] data Data to be transposed.
+ *  \param[in] Ndims Global number of dimensions of data cube.
+ *  \param[in] in_firstcol First column.
+ *  \param[in] in_ncol Number of columns.
+ *  \param[out] out Data output.
+ *  \param[in] perm Permutations in dimensions.
+ *  \param[out] out_firstcol First column in output data.
+ *  \param[out] out_ncol Number of columns in output data.
+ *  \param[out] offset_send Offset in array for send operation to MPI tasks.
+ *  \param[out] offset_recv Offset in array for receive operation from MPI
+ *              tasks.
+ *  \param[out] count_send Count how many elements have to be sent to each
+ *              MPI task.
+ *  \param[out] count_recv Count how many elements have to be received from
+ *              each MPI task.
+ *  \param[in] just_count_flag Do element counting for communication instead
+ *             of data transfer.
+ *
+ *  \return void
+ */
+static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3],
+                                int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send,
+                                size_t *count_recv, size_t just_count_flag)
+{
+  int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3];
+  size_t nimport, nexport;
+
+  /* determine the inverse permutation */
+  for(j = 0; j < 3; j++)
+    perm_rev[j] = perm[j];
+
+  if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */
+    {
+      for(j = 0; j < 3; j++)
+        perm_rev[j] = perm[perm[j]];
+
+      if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2))
+        terminate("bummer");
+    }
+
+  int in_colums          = Ndims[0] * Ndims[1];
+  int in_avg             = (in_colums - 1) / NTask + 1;
+  int in_exc             = NTask * in_avg - in_colums;
+  int in_tasklastsection = NTask - in_exc;
+  int in_pivotcol        = in_tasklastsection * in_avg;
+
+  int out_colums          = Ndims[perm[0]] * Ndims[perm[1]];
+  int out_avg             = (out_colums - 1) / NTask + 1;
+  int out_exc             = NTask * out_avg - out_colums;
+  int out_tasklastsection = NTask - out_exc;
+  int out_pivotcol        = out_tasklastsection * out_avg;
+
+  size_t i, ncells = ((size_t)in_ncol) * Ndims[2];
+
+  xyz[0] = in_firstcol / Ndims[1];
+  xyz[1] = in_firstcol % Ndims[1];
+  xyz[2] = 0;
+
+  memset(count_send, 0, NTask * sizeof(size_t));
+
+  /* loop over all cells in input array and determine target processor */
+  for(i = 0; i < ncells; i++)
+    {
+      /* determine target task */
+      uvw[0] = xyz[perm[0]];
+      uvw[1] = xyz[perm[1]];
+      uvw[2] = xyz[perm[2]];
+
+      int newcol = Ndims[perm[1]] * uvw[0] + uvw[1];
+      if(newcol < out_pivotcol)
+        target = newcol / out_avg;
+      else
+        target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection;
+
+      /* move data element to targettask */
+
+      if(just_count_flag)
+        count_send[target]++;
+      else
+        {
+          size_t off  = offset_send[target] + count_send[target]++;
+          out[off][0] = data[i][0];
+          out[off][1] = data[i][1];
+        }
+      xyz[2]++;
+      if(xyz[2] == Ndims[2])
+        {
+          xyz[2] = 0;
+          xyz[1]++;
+          if(xyz[1] == Ndims[1])
+            {
+              xyz[1] = 0;
+              xyz[0]++;
+            }
+        }
+    }
+
+  if(just_count_flag)
+    {
+      MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+      for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++)
+        {
+          nexport += count_send[j];
+          nimport += count_recv[j];
+
+          if(j > 0)
+            {
+              offset_send[j] = offset_send[j - 1] + count_send[j - 1];
+              offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1];
+            }
+        }
+
+      if(nexport != ncells)
+        terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells);
+    }
+  else
+    {
+      nimport = 0;
+
+      /* exchange all the data */
+      for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+        {
+          recvTask = ThisTask ^ ngrp;
+
+          if(recvTask < NTask)
+            {
+              if(count_send[recvTask] > 0 || count_recv[recvTask] > 0)
+                myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A,
+                               &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask,
+                               TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              nimport += count_recv[recvTask];
+            }
+        }
+
+      /* now loop over the new cell layout */
+      /* find enclosing rectangle around columns in new plane */
+
+      int first[3], last[3];
+
+      first[0] = out_firstcol / Ndims[perm[1]];
+      first[1] = out_firstcol % Ndims[perm[1]];
+      first[2] = 0;
+
+      last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]];
+      last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+      last[2] = Ndims[perm[2]] - 1;
+
+      if(first[1] + out_ncol >= Ndims[perm[1]])
+        {
+          first[1] = 0;
+          last[1]  = Ndims[perm[1]] - 1;
+        }
+
+      /* now need to map this back to the old coordinates */
+
+      int xyz_first[3], xyz_last[3];
+
+      for(j = 0; j < 3; j++)
+        {
+          xyz_first[j] = first[perm_rev[j]];
+          xyz_last[j]  = last[perm_rev[j]];
+        }
+
+      memset(count_recv, 0, NTask * sizeof(size_t));
+
+      size_t count = 0;
+
+      /* traverse an enclosing box around the new cell layout in the old order */
+      for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++)
+        for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++)
+          for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++)
+            {
+              /* check that the point is actually part of a column */
+              uvw[0] = xyz[perm[0]];
+              uvw[1] = xyz[perm[1]];
+              uvw[2] = xyz[perm[2]];
+
+              int col = uvw[0] * Ndims[perm[1]] + uvw[1];
+
+              if(col >= out_firstcol && col < out_firstcol + out_ncol)
+                {
+                  /* determine origin task */
+                  int newcol = Ndims[1] * xyz[0] + xyz[1];
+                  if(newcol < in_pivotcol)
+                    origin = newcol / in_avg;
+                  else
+                    origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection;
+
+                  size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2];
+
+                  /* move data element from origin task */
+                  size_t off    = offset_recv[origin] + count_recv[origin]++;
+                  out[index][0] = data[off][0];
+                  out[index][1] = data[off][1];
+
+                  count++;
+                }
+            }
+
+      if(count != nimport)
+        {
+          int fi = out_firstcol % Ndims[perm[1]];
+          int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+
+          terminate("count=%lld nimport=%lld   ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol,
+                    fi, la, first[1], last[1]);
+        }
+    }
+}
+
+/*! \brief Transposes column-based FFT data.
+ *
+ *  \param[in] data Data to be transposed.
+ *  \param[in] Ndims Global number of dimensions of data cube.
+ *  \param[in] in_firstcol First column.
+ *  \param[in] in_ncol Number of columns.
+ *  \param[out] out Data output.
+ *  \param[in] perm Permutations in dimensions.
+ *  \param[out] out_firstcol First column in output data.
+ *  \param[out] out_ncol Number of columns in output data.
+ *  \param[out] offset_send Offset in array for send operation to MPI tasks.
+ *  \param[out] offset_recv Offset in array for receive operation from MPI
+ *              tasks.
+ *  \param[out] count_send Count how many elements have to be sent to each
+ *              MPI task.
+ *  \param[out] count_recv Count how many elements have to be received from
+ *              each MPI task.
+ *  \param[in] just_count_flag Do element counting for communication instead
+ *             of data transfer.
+ *
+ *  \return void
+ */
+static void my_fft_column_transpose(fft_real *data, int Ndims[3], int in_firstcol, int in_ncol, fft_real *out, int perm[3],
+                                    int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send,
+                                    size_t *count_recv, size_t just_count_flag)
+{
+  int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3];
+  size_t nimport, nexport;
+
+  /* determine the inverse permutation */
+  for(j = 0; j < 3; j++)
+    perm_rev[j] = perm[j];
+
+  if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */
+    {
+      for(j = 0; j < 3; j++)
+        perm_rev[j] = perm[perm[j]];
+
+      if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2))
+        terminate("bummer");
+    }
+
+  int in_colums          = Ndims[0] * Ndims[1];
+  int in_avg             = (in_colums - 1) / NTask + 1;
+  int in_exc             = NTask * in_avg - in_colums;
+  int in_tasklastsection = NTask - in_exc;
+  int in_pivotcol        = in_tasklastsection * in_avg;
+
+  int out_colums          = Ndims[perm[0]] * Ndims[perm[1]];
+  int out_avg             = (out_colums - 1) / NTask + 1;
+  int out_exc             = NTask * out_avg - out_colums;
+  int out_tasklastsection = NTask - out_exc;
+  int out_pivotcol        = out_tasklastsection * out_avg;
+
+  size_t i, ncells = ((size_t)in_ncol) * Ndims[2];
+
+  xyz[0] = in_firstcol / Ndims[1];
+  xyz[1] = in_firstcol % Ndims[1];
+  xyz[2] = 0;
+
+  memset(count_send, 0, NTask * sizeof(size_t));
+
+  /* loop over all cells in input array and determine target processor */
+  for(i = 0; i < ncells; i++)
+    {
+      /* determine target task */
+      uvw[0] = xyz[perm[0]];
+      uvw[1] = xyz[perm[1]];
+      uvw[2] = xyz[perm[2]];
+
+      int newcol = Ndims[perm[1]] * uvw[0] + uvw[1];
+      if(newcol < out_pivotcol)
+        target = newcol / out_avg;
+      else
+        target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection;
+
+      /* move data element to targettask */
+
+      if(just_count_flag)
+        count_send[target]++;
+      else
+        {
+          size_t off = offset_send[target] + count_send[target]++;
+          out[off]   = data[i];
+        }
+      xyz[2]++;
+      if(xyz[2] == Ndims[2])
+        {
+          xyz[2] = 0;
+          xyz[1]++;
+          if(xyz[1] == Ndims[1])
+            {
+              xyz[1] = 0;
+              xyz[0]++;
+            }
+        }
+    }
+
+  if(just_count_flag)
+    {
+      MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+      for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++)
+        {
+          nexport += count_send[j];
+          nimport += count_recv[j];
+
+          if(j > 0)
+            {
+              offset_send[j] = offset_send[j - 1] + count_send[j - 1];
+              offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1];
+            }
+        }
+
+      if(nexport != ncells)
+        terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells);
+    }
+  else
+    {
+      nimport = 0;
+
+      /* exchange all the data */
+      for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+        {
+          recvTask = ThisTask ^ ngrp;
+
+          if(recvTask < NTask)
+            {
+              if(count_send[recvTask] > 0 || count_recv[recvTask] > 0)
+                myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A,
+                               &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A,
+                               MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              nimport += count_recv[recvTask];
+            }
+        }
+
+      /* now loop over the new cell layout */
+      /* find enclosing rectangle around columns in new plane */
+
+      int first[3], last[3];
+
+      first[0] = out_firstcol / Ndims[perm[1]];
+      first[1] = out_firstcol % Ndims[perm[1]];
+      first[2] = 0;
+
+      last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]];
+      last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+      last[2] = Ndims[perm[2]] - 1;
+
+      if(first[1] + out_ncol >= Ndims[perm[1]])
+        {
+          first[1] = 0;
+          last[1]  = Ndims[perm[1]] - 1;
+        }
+
+      /* now need to map this back to the old coordinates */
+
+      int xyz_first[3], xyz_last[3];
+
+      for(j = 0; j < 3; j++)
+        {
+          xyz_first[j] = first[perm_rev[j]];
+          xyz_last[j]  = last[perm_rev[j]];
+        }
+
+      memset(count_recv, 0, NTask * sizeof(size_t));
+
+      size_t count = 0;
+
+      /* traverse an enclosing box around the new cell layout in the old order */
+      for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++)
+        for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++)
+          for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++)
+            {
+              /* check that the point is actually part of a column */
+              uvw[0] = xyz[perm[0]];
+              uvw[1] = xyz[perm[1]];
+              uvw[2] = xyz[perm[2]];
+
+              int col = uvw[0] * Ndims[perm[1]] + uvw[1];
+
+              if(col >= out_firstcol && col < out_firstcol + out_ncol)
+                {
+                  /* determine origin task */
+                  int newcol = Ndims[1] * xyz[0] + xyz[1];
+                  if(newcol < in_pivotcol)
+                    origin = newcol / in_avg;
+                  else
+                    origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection;
+
+                  size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2];
+
+                  /* move data element from origin task */
+                  size_t off = offset_recv[origin] + count_recv[origin]++;
+                  out[index] = data[off];
+
+                  count++;
+                }
+            }
+
+      if(count != nimport)
+        {
+          int fi = out_firstcol % Ndims[perm[1]];
+          int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+
+          terminate("count=%lld nimport=%lld   ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol,
+                    fi, la, first[1], last[1]);
+        }
+    }
+}
+
+/*! \brief Transposes column-based complex FFT data.
+ *
+ *  \param[in] data Data to be transposed.
+ *  \param[in] Ndims Global number of dimensions of data cube.
+ *  \param[in] in_firstcol First column.
+ *  \param[in] in_ncol Number of columns.
+ *  \param[out] out Data output.
+ *  \param[in] perm Permutations in dimensions.
+ *  \param[out] out_firstcol First column in output data.
+ *  \param[out] out_ncol Number of columns in output data.
+ *  \param[out] offset_send Offset in array for send operation to MPI tasks.
+ *  \param[out] offset_recv Offset in array for receive operation from MPI
+ *              tasks.
+ *  \param[out] count_send Count how many elements have to be sent to each
+ *              MPI task.
+ *  \param[out] count_recv Count how many elements have to be received from
+ *              each MPI task.
+ *  \param[in] just_count_flag Do element counting for communication instead
+ *             of data transfer.
+ *
+ *  \return void
+ */
+static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3],
+                                      int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send,
+                                      size_t *count_recv, size_t just_count_flag)
+{
+  int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3];
+  size_t nimport, nexport;
+
+  /* determine the inverse permutation */
+  for(j = 0; j < 3; j++)
+    perm_rev[j] = perm[j];
+
+  if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */
+    {
+      for(j = 0; j < 3; j++)
+        perm_rev[j] = perm[perm[j]];
+
+      if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2))
+        terminate("bummer");
+    }
+
+  int in_colums          = Ndims[0] * Ndims[1];
+  int in_avg             = (in_colums - 1) / NTask + 1;
+  int in_exc             = NTask * in_avg - in_colums;
+  int in_tasklastsection = NTask - in_exc;
+  int in_pivotcol        = in_tasklastsection * in_avg;
+
+  int out_colums          = Ndims[perm[0]] * Ndims[perm[1]];
+  int out_avg             = (out_colums - 1) / NTask + 1;
+  int out_exc             = NTask * out_avg - out_colums;
+  int out_tasklastsection = NTask - out_exc;
+  int out_pivotcol        = out_tasklastsection * out_avg;
+
+  size_t i, ncells = ((size_t)in_ncol) * Ndims[2];
+
+  xyz[0] = in_firstcol / Ndims[1];
+  xyz[1] = in_firstcol % Ndims[1];
+  xyz[2] = 0;
+
+  memset(count_send, 0, NTask * sizeof(size_t));
+
+  /* loop over all cells in input array and determine target processor */
+  for(i = 0; i < ncells; i++)
+    {
+      /* determine target task */
+      uvw[0] = xyz[perm[0]];
+      uvw[1] = xyz[perm[1]];
+      uvw[2] = xyz[perm[2]];
+
+      int newcol = Ndims[perm[1]] * uvw[0] + uvw[1];
+      if(newcol < out_pivotcol)
+        target = newcol / out_avg;
+      else
+        target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection;
+
+      /* move data element to targettask */
+
+      if(just_count_flag)
+        count_send[target]++;
+      else
+        {
+          size_t off  = offset_send[target] + count_send[target]++;
+          out[off][0] = data[i][0];
+          out[off][1] = data[i][1];
+        }
+      xyz[2]++;
+      if(xyz[2] == Ndims[2])
+        {
+          xyz[2] = 0;
+          xyz[1]++;
+          if(xyz[1] == Ndims[1])
+            {
+              xyz[1] = 0;
+              xyz[0]++;
+            }
+        }
+    }
+
+  if(just_count_flag)
+    {
+      MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+      for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++)
+        {
+          nexport += count_send[j];
+          nimport += count_recv[j];
+
+          if(j > 0)
+            {
+              offset_send[j] = offset_send[j - 1] + count_send[j - 1];
+              offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1];
+            }
+        }
+
+      if(nexport != ncells)
+        terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells);
+    }
+  else
+    {
+      nimport = 0;
+
+      /* exchange all the data */
+      for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+        {
+          recvTask = ThisTask ^ ngrp;
+
+          if(recvTask < NTask)
+            {
+              if(count_send[recvTask] > 0 || count_recv[recvTask] > 0)
+                myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A,
+                               &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask,
+                               TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              nimport += count_recv[recvTask];
+            }
+        }
+
+      /* now loop over the new cell layout */
+      /* find enclosing rectangle around columns in new plane */
+
+      int first[3], last[3];
+
+      first[0] = out_firstcol / Ndims[perm[1]];
+      first[1] = out_firstcol % Ndims[perm[1]];
+      first[2] = 0;
+
+      last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]];
+      last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+      last[2] = Ndims[perm[2]] - 1;
+
+      if(first[1] + out_ncol >= Ndims[perm[1]])
+        {
+          first[1] = 0;
+          last[1]  = Ndims[perm[1]] - 1;
+        }
+
+      /* now need to map this back to the old coordinates */
+
+      int xyz_first[3], xyz_last[3];
+
+      for(j = 0; j < 3; j++)
+        {
+          xyz_first[j] = first[perm_rev[j]];
+          xyz_last[j]  = last[perm_rev[j]];
+        }
+
+      memset(count_recv, 0, NTask * sizeof(size_t));
+
+      size_t count = 0;
+
+      /* traverse an enclosing box around the new cell layout in the old order */
+      for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++)
+        for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++)
+          for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++)
+            {
+              /* check that the point is actually part of a column */
+              uvw[0] = xyz[perm[0]];
+              uvw[1] = xyz[perm[1]];
+              uvw[2] = xyz[perm[2]];
+
+              int col = uvw[0] * Ndims[perm[1]] + uvw[1];
+
+              if(col >= out_firstcol && col < out_firstcol + out_ncol)
+                {
+                  /* determine origin task */
+                  int newcol = Ndims[1] * xyz[0] + xyz[1];
+                  if(newcol < in_pivotcol)
+                    origin = newcol / in_avg;
+                  else
+                    origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection;
+
+                  size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2];
+
+                  /* move data element from origin task */
+                  size_t off    = offset_recv[origin] + count_recv[origin]++;
+                  out[index][0] = data[off][0];
+                  out[index][1] = data[off][1];
+
+                  count++;
+                }
+            }
+
+      if(count != nimport)
+        {
+          int fi = out_firstcol % Ndims[perm[1]];
+          int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]];
+
+          terminate("count=%lld nimport=%lld   ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol,
+                    fi, la, first[1], last[1]);
+        }
+    }
+}
+
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+#endif /* #if defined(PMGRID) */
diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c
new file mode 100644
index 0000000000..7346af2849
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c
@@ -0,0 +1,2087 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/pm/pm_non_periodic.c
+ * \date        05/2018
+ * \brief       Code for non-periodic FFT to compute long-range PM force.
+ * \details     contains functions:
+ *                void pm_init_regionsize(void)
+ *                void pm_init_nonperiodic(void)
+ *                int pmforce_is_particle_high_res(int type, MyDouble * Pos)
+ *                void pmforce_nonperiodic_zoom_optimized_prepare_density(int
+ *                  grnr)
+ *                void pmforce_nonperiodic_zoom_optimized_readout_forces_or_
+ *                  potential(int grnr, int dim)
+ *                void pmforce_nonperiodic_uniform_optimized_prepare_density(
+ *                  int grnr)
+ *                void pmforce_nonperiodic_uniform_optimized_readout_forces_or_
+ *                  potential(int grnr, int dim)
+ *                int pmforce_nonperiodic(int grnr)
+ *                void pm_setup_nonperiodic_kernel(void)
+ *                static int pm_periodic_compare_sortindex(const void *a,
+ *                  const void *b)
+ *                static void msort_pmperiodic_with_tmp(large_numpart_type * b,
+ *                  size_t n, large_numpart_type * t)
+ *                static void mysort_pmperiodic(void *b, size_t n, size_t s,
+ *                  int (*cmp) (const void *, const void *))
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC))
+
+#if defined(LONG_X) || defined(LONG_Y) || defined(LONG_Z)
+#error "LONG_X/Y/Z not supported for the non-periodic FFT gravity code"
+#endif /* #if defined(LONG_X) || defined(LONG_Y) || defined (LONG_Z) */
+
+#ifndef GRIDBOOST
+#define GRIDBOOST 2
+#endif /* #ifndef GRIDBOOST */
+
+#define GRID (GRIDBOOST * PMGRID)
+#define GRIDz (GRID / 2 + 1)
+#define GRID2 (2 * GRIDz)
+
+#if(GRID > 1024)
+typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid
+                                         with a single index */
+#else                                 /* #if (GRID > 1024) */
+typedef unsigned int large_array_offset;
+#endif                                /* #if (GRID > 1024) #else */
+
+#ifdef NUMPART_PER_TASK_LARGE
+typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this
+                                         data type should be used */
+#else                                 /* #ifdef NUMPART_PER_TASK_LARGE */
+typedef int large_numpart_type;
+#endif                                /* #ifdef NUMPART_PER_TASK_LARGE */
+
+/* short-cut macros for accessing different 3D arrays */
+#define FI(x, y, z) (((large_array_offset)GRID2) * (GRID * (x) + (y)) + (z))
+#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z))
+#define TI(x, y, z) (((large_array_offset)GRID) * ((x) + (y)*myplan.nslab_x) + (z))
+
+static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */
+
+/*! \var maxfftsize
+ *  \brief maximum size of the local fft grid among all tasks
+ */
+static size_t maxfftsize;
+
+/*! \var rhogrid
+ *  \brief This array hold the local part of the density field and
+ *  after the FFTs the local part of the potential
+ *
+ *  \var forcegrid
+ *  \brief This array will contain the force field
+ *
+ *  \var workspace
+ *  \brief Workspace array used during the FFTs
+ */
+static fft_real *rhogrid, *forcegrid, *workspace;
+
+/*! \brief Array containing the FFT of 'rhogrid'
+ *
+ *  This pointer points to the same array as 'rhogrid',
+ *  because in-place FFTs are used.
+ */
+static fft_complex *fft_of_rhogrid;
+
+static fft_real *kernel[2];
+static fft_complex *fft_of_kernel[2];
+
+/*! \param Determine particle extent.
+ *
+ *  This function determines the particle extension of all particles, and for
+ *  those types selected with PLACEHIGHRESREGION if this is used, and then
+ *  determines the boundaries of the non-periodic FFT-mesh that can be placed
+ *  on this region. Note that a sufficient buffer region at the rim of the
+ *  occupied part of the mesh needs to be reserved in order to allow a correct
+ *  finite differencing using a 4-point formula. In addition, to allow
+ *  non-periodic boundaries, the actual FFT mesh used is twice as large in
+ *  each dimension compared with GRID.
+ *
+ *  \return void
+ */
+void pm_init_regionsize(void)
+{
+  double meshinner[2], xmin[2][3], xmax[2][3];
+  int i, j;
+
+  /* find enclosing rectangle */
+
+  for(j = 0; j < 3; j++)
+    {
+      xmin[0][j] = xmin[1][j] = 1.0e36;
+      xmax[0][j] = xmax[1][j] = -1.0e36;
+    }
+
+  for(i = 0; i < NumPart; i++)
+    for(j = 0; j < 3; j++)
+      {
+        if(P[i].Pos[j] > xmax[0][j])
+          xmax[0][j] = P[i].Pos[j];
+        if(P[i].Pos[j] < xmin[0][j])
+          xmin[0][j] = P[i].Pos[j];
+
+#ifdef PLACEHIGHRESREGION
+        if(((1 << P[i].Type) & (PLACEHIGHRESREGION)))
+          {
+            if(P[i].Pos[j] > xmax[1][j])
+              xmax[1][j] = P[i].Pos[j];
+            if(P[i].Pos[j] < xmin[1][j])
+              xmin[1][j] = P[i].Pos[j];
+          }
+#endif /* #ifdef PLACEHIGHRESREGION */
+      }
+
+  MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+  for(j = 0; j < 2; j++)
+    {
+      All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0];
+      All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]);
+      All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]);
+#ifdef ENLARGEREGION
+      All.TotalMeshSize[j] *= ENLARGEREGION;
+#endif /* #ifdef ENLARGEREGION */
+
+      /* symmetrize the box onto the center */
+      for(i = 0; i < 3; i++)
+        {
+          All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2;
+          All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j];
+        }
+    }
+
+  /* this will produce enough room for zero-padding and buffer region to
+     allow finite differencing of the potential  */
+
+  for(j = 0; j < 2; j++)
+    {
+      meshinner[j] = All.TotalMeshSize[j];
+      All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double)(GRID - 2 - 8));
+    }
+
+  /* move lower left corner by two cells to allow finite differencing of the potential by a 4-point function */
+
+  for(j = 0; j < 2; j++)
+    for(i = 0; i < 3; i++)
+      {
+        All.Corner[j][i]      = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID;
+        All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID);
+      }
+
+#ifdef PLACEHIGHRESREGION
+  All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
+  All.Rcut[1]  = RCUT * All.Asmth[1];
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+#ifdef PLACEHIGHRESREGION
+  if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0])
+    {
+      All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 2));
+
+      for(i = 0; i < 3; i++)
+        {
+          All.Corner[1][i]      = All.Xmintot[1][i] - 1.0001 * All.Rcut[0];
+          All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
+        }
+
+      if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0])
+        {
+          All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 10));
+
+          for(i = 0; i < 3; i++)
+            {
+              All.Corner[1][i]      = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[1] / GRID);
+              All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
+            }
+        }
+
+      All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
+      All.Rcut[1]  = RCUT * All.Asmth[1];
+
+      mpi_printf("PM-NONPERIODIC: All.Asmth[0]=%g All.Asmth[1]=%g\n", All.Asmth[0], All.Asmth[1]);
+    }
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+#ifdef PLACEHIGHRESREGION
+  mpi_printf(
+      "PM-NONPERIODIC: Allowed region for isolated PM mesh (high-res): (%g|%g|%g)  -> (%g|%g|%g)   ext=%g  totmeshsize=%g  "
+      "meshsize=%g\n\n",
+      All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2], All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2], meshinner[1],
+      All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID);
+#endif /* #ifdef PLACEHIGHRESREGION */
+}
+
+/*! \brief Initialization of the non-periodic PM routines.
+ *
+ *  The plan-files for FFTW are created. Finally, the routine to set-up the
+ *  non-periodic Greens function is called.
+ *
+ *  \return void
+ */
+void pm_init_nonperiodic(void)
+{
+  /* Set up the FFTW-3 plan files. */
+  int ndim[1] = {GRID}; /* dimension of the 1D transforms */
+
+  /* temporarily allocate some arrays to make sure that out-of-place plans are created */
+  rhogrid   = (fft_real *)mymalloc("rhogrid", GRID2 * sizeof(fft_real));
+  forcegrid = (fft_real *)mymalloc("forcegrid", GRID2 * sizeof(fft_real));
+
+#ifdef DOUBLEPRECISION_FFTW
+  int alignflag = 0;
+#else  /* #ifdef DOUBLEPRECISION_FFTW */
+  /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */
+  int alignflag = FFTW_UNALIGNED;
+#endif /* #ifdef DOUBLEPRECISION_FFTW #else */
+#ifndef FFT_COLUMN_BASED
+  int stride = GRIDz;
+#else  /* #ifndef FFT_COLUMN_BASED */
+  int stride    = 1;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndim, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz,
+                                                     FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.forward_plan_xdir =
+      FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.forward_plan_ydir =
+      FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndim, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2,
+                                                      FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_xdir =
+      FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_ydir =
+      FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myfree(forcegrid);
+  myfree(rhogrid);
+
+#ifndef FFT_COLUMN_BASED
+
+  my_slab_based_fft_init(&myplan, GRID, GRID, GRID);
+
+  maxfftsize = myplan.largest_x_slab * GRID * ((size_t)GRID2);
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+  my_column_based_fft_init(&myplan, GRID, GRID, GRID);
+
+  maxfftsize = myplan.max_datasize;
+
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  /* now allocate memory to hold the FFT fields */
+
+  size_t bytes, bytes_tot = 0;
+
+#if defined(GRAVITY_NOT_PERIODIC)
+  kernel[0] = (fft_real *)mymalloc("kernel[0]", bytes = maxfftsize * sizeof(fft_real));
+  bytes_tot += bytes;
+  fft_of_kernel[0] = (fft_complex *)kernel[0];
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+
+#if defined(PLACEHIGHRESREGION)
+  kernel[1] = (fft_real *)mymalloc("kernel[1]", bytes = maxfftsize * sizeof(fft_real));
+  bytes_tot += bytes;
+  fft_of_kernel[1] = (fft_complex *)kernel[1];
+#endif /* #if defined(PLACEHIGHRESREGION) */
+
+  mpi_printf("\nPM-NONPERIODIC: Allocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
+}
+
+#ifdef PLACEHIGHRESREGION
+/*! \brief Is this a high res particle in high resolution region?
+ *
+ *  For cosmological zoom simulations.
+ *
+ *  \param[in] type Parcile type.
+ *  \param[in] Pos Position of particle.
+ *
+ *  \return 0: not high res; 1: high res.
+ */
+int pmforce_is_particle_high_res(int type, MyDouble *Pos)
+{
+  int flag = 1;
+
+  if((1 << type) & (PLACEHIGHRESREGION))
+    return 1;
+
+#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1)
+  double r2 = 0;
+  for(int j = 0; j < 3; j++)
+    r2 += pow(Pos[j] - 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]), 2);
+
+  if(sqrt(r2) > 0.5 * (All.Xmaxtot[1][0] - All.Xmintot[1][0]))
+    return 0;
+#else /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */
+
+  for(int j = 0; j < 3; j++)
+    if(Pos[j] < All.Xmintot[1][j] || Pos[j] > All.Xmaxtot[1][j])
+      {
+        flag = 0; /* we are outside */
+        break;
+      }
+
+#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) #else */
+
+  return flag;
+}
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+#ifdef PM_ZOOM_OPTIMIZED
+
+static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *));
+static int pm_periodic_compare_sortindex(const void *a, const void *b);
+
+/*! \brief This structure links the particles to the mesh cells, to which they
+ *         contribute their mass.
+ *
+ *  Each particle will have eight items of this structure in the 'part' array.
+ *  For each of the eight mesh cells the CIC assignment will contribute,
+ *  one item of this struct exists.
+ */
+static struct part_slab_data
+{
+  large_array_offset globalindex; /*!< index in the global density mesh */
+  large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the
+                                   CIC assignment this item belongs to */
+  large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during
+                                    local mass and force assignment) */
+} * part;                        /*!< array of part_slab_data linking the local particles to their mesh cells */
+
+static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount;
+static large_array_offset *localfield_globalindex, *import_globalindex;
+static fft_real *localfield_data, *import_data;
+static large_numpart_type num_on_grid;
+
+/*! \brief Prepares density field for nonperiodic FFTs.
+ *
+ *  \param[in] grnr (0, 1) 0 if full mesh, 1 if highres grid.
+ *
+ *  \return void
+ */
+void pmforce_nonperiodic_zoom_optimized_prepare_density(int grnr)
+{
+  large_numpart_type i;
+  int level, recvTask;
+  MPI_Status status;
+
+  double to_slab_fac = GRID / All.TotalMeshSize[grnr];
+
+  part                               = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data)));
+  large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type)));
+
+  int ngrid = 0;
+
+  /* determine the cells each particle accesses */
+  for(i = 0; i < NumPart; i++)
+    {
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        pos = SphP[i].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+      if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0])
+        continue;
+      if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1])
+        continue;
+      if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2])
+        continue;
+
+      int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+      int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+      int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2]));
+      int myngrid;
+
+      {
+        myngrid = ngrid;
+        ngrid += 1;
+      }
+
+      large_numpart_type index_on_grid = ((large_numpart_type)myngrid) * 8;
+
+      int xx, yy, zz;
+
+      for(xx = 0; xx < 2; xx++)
+        for(yy = 0; yy < 2; yy++)
+          for(zz = 0; zz < 2; zz++)
+            {
+              int slab_xx = slab_x + xx;
+              int slab_yy = slab_y + yy;
+              int slab_zz = slab_z + zz;
+
+              if(slab_xx >= GRID)
+                slab_xx -= GRID;
+              if(slab_yy >= GRID)
+                slab_yy -= GRID;
+              if(slab_zz >= GRID)
+                slab_zz -= GRID;
+
+              large_array_offset offset = FI(slab_xx, slab_yy, slab_zz);
+
+              part[index_on_grid].partindex   = (i << 3) + (xx << 2) + (yy << 1) + zz;
+              part[index_on_grid].globalindex = offset;
+              part_sortindex[index_on_grid]   = index_on_grid;
+              index_on_grid++;
+            }
+    }
+
+  /* note: num_on_grid will be  8 times larger than the particle number, but num_field_points will generally be much smaller */
+  num_on_grid = ((large_numpart_type)ngrid) * 8;
+
+  /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */
+  mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex);
+
+  large_array_offset num_field_points;
+
+  if(num_on_grid > 0)
+    num_field_points = 1;
+  else
+    num_field_points = 0;
+
+  /* determine the number of unique field points */
+  for(i = 1; i < num_on_grid; i++)
+    {
+      if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
+        num_field_points++;
+    }
+
+  /* allocate the local field */
+  localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex",
+                                                                  num_field_points * sizeof(large_array_offset));
+  localfield_data        = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real));
+  localfield_first       = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t));
+  localfield_sendcount   = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t));
+  localfield_offset      = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t));
+  localfield_recvcount   = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t));
+
+  for(i = 0; i < NTask; i++)
+    {
+      localfield_first[i]     = 0;
+      localfield_sendcount[i] = 0;
+    }
+
+  /* establish the cross link between the part[ ]-array and the local list of
+   * mesh points. Also, count on which CPU the needed field points are stored.
+   */
+  for(i = 0, num_field_points = 0; i < num_on_grid; i++)
+    {
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
+          num_field_points++;
+
+      part[part_sortindex[i]].localindex = num_field_points;
+
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
+          continue;
+
+      localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex;
+
+#ifndef FFT_COLUMN_BASED
+      int slab = part[part_sortindex[i]].globalindex / (GRID * GRID2);
+      int task = myplan.slab_to_task[slab];
+#else  /* #ifndef FFT_COLUMN_BASED */
+      int task, column = part[part_sortindex[i]].globalindex / (GRID2);
+
+      if(column < myplan.pivotcol)
+        task = column / myplan.avg;
+      else
+        task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+      if(localfield_sendcount[task] == 0)
+        localfield_first[task] = num_field_points;
+
+      localfield_sendcount[task]++;
+    }
+  num_field_points++;
+
+  for(i = 1, localfield_offset[0] = 0; i < NTask; i++)
+    localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1];
+
+  myfree_movable(part_sortindex);
+  part_sortindex = NULL;
+
+  /* now bin the local particle data onto the mesh list */
+  for(i = 0; i < num_field_points; i++)
+    localfield_data[i] = 0;
+
+  for(i = 0; i < num_on_grid; i += 8)
+    {
+      int pindex = (part[i].partindex >> 3);
+
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[pindex].Type == 0)
+        pos = SphP[pindex].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[pindex].Pos;
+
+      int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+      int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+      int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2]));
+
+      double dx = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x;
+      double dy = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y;
+      double dz = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z;
+
+      double weight = P[pindex].Mass;
+
+      localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
+      localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz;
+      localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz);
+      localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz;
+      localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz);
+      localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz;
+      localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz);
+      localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz;
+    }
+
+  rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real));
+
+  /* clear local FFT-mesh density field */
+  large_array_offset ii;
+  for(ii = 0; ii < maxfftsize; ii++)
+    rhogrid[ii] = 0;
+
+  /* exchange data and add contributions to the local mesh-path */
+  MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+    {
+      recvTask = ThisTask ^ level;
+
+      if(recvTask < NTask)
+        {
+          if(level > 0)
+            {
+              import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real));
+              import_globalindex =
+                  (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset));
+
+              if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
+                {
+                  myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real),
+                                 MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real),
+                                 MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
+
+                  myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                 localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B,
+                                 import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                 TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
+                }
+            }
+          else
+            {
+              import_data        = localfield_data + localfield_offset[ThisTask];
+              import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+            }
+
+          /* note: here every element in rhogrid is only accessed once, so there should be no race condition */
+          for(i = 0; i < localfield_recvcount[recvTask]; i++)
+            {
+              /* determine offset in local FFT slab */
+#ifndef FFT_COLUMN_BASED
+              large_array_offset offset =
+                  import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2);
+#else  /* #ifndef FFT_COLUMN_BASED */
+              large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+              rhogrid[offset] += import_data[i];
+            }
+
+          if(level > 0)
+            {
+              myfree(import_globalindex);
+              myfree(import_data);
+            }
+        }
+    }
+}
+
+/*! \brief Reads out the force component corresponding to spatial dimension
+ *         'dim'.
+ *
+ *  If dim is negative, potential values are read out and assigned to
+ *  particles.
+ *
+ *  \param[in] grnr Number of grid (0: base, 1 high-res)
+ *  \param[in] dim Dimension to be read out
+ *             (<0: potential,>=0 force component).
+ *
+ *  \return void
+ */
+void pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(int grnr, int dim)
+{
+#ifdef EVALPOTENTIAL
+  /* factor to get potential */
+  double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
+#endif /* #ifdef EVALPOTENTIAL */
+
+  large_numpart_type i;
+  int level, recvTask;
+  MPI_Status status;
+
+  fft_real *grid;
+
+  if(dim < 0)
+    grid = rhogrid;
+  else
+    grid = forcegrid;
+
+  double to_slab_fac = GRID / All.TotalMeshSize[grnr];
+
+  for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+    {
+      recvTask = ThisTask ^ level;
+
+      if(recvTask < NTask)
+        {
+          if(level > 0)
+            {
+              import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real));
+              import_globalindex =
+                  (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset));
+
+              if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
+                {
+                  myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                 localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C,
+                                 import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                 TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
+                }
+            }
+          else
+            {
+              import_data        = localfield_data + localfield_offset[ThisTask];
+              import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+            }
+
+          for(i = 0; i < localfield_recvcount[recvTask]; i++)
+            {
+#ifndef FFT_COLUMN_BASED
+              large_array_offset offset =
+                  import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2);
+#else  /* #ifndef FFT_COLUMN_BASED */
+              large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+              import_data[i] = grid[offset];
+            }
+
+          if(level > 0)
+            {
+              myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A,
+                             localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real),
+                             MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
+
+              myfree(import_globalindex);
+              myfree(import_data);
+            }
+        }
+    }
+
+  /* read out the force/potential values, which all have been assembled in localfield_data */
+
+  int k, ngrid = (num_on_grid >> 3);
+
+  for(k = 0; k < ngrid; k++)
+    {
+      large_numpart_type j = (((large_numpart_type)k) << 3);
+
+      int i = (part[j].partindex >> 3);
+
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        pos = SphP[i].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+#ifdef PLACEHIGHRESREGION
+      if(grnr == 1)
+        if(!(pmforce_is_particle_high_res(P[i].Type, pos)))
+          continue;
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+      int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+      double dx  = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x;
+
+      int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+      double dy  = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y;
+
+      int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2]));
+      double dz  = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z;
+
+      double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                     localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz +
+                     localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) +
+                     localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz +
+                     localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) +
+                     localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz +
+                     localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) +
+                     localfield_data[part[j + 7].localindex] * (dx)*dy * dz;
+
+      if(dim < 0)
+        {
+#ifdef EVALPOTENTIAL
+          P[i].PM_Potential += value * fac;
+#endif /* #ifdef EVALPOTENTIAL */
+        }
+      else
+        P[i].GravPM[dim] += value;
+    }
+}
+
+#else /* #ifdef PM_ZOOM_OPTIMIZED */
+/* Here come the routines for a different communication algorithm that is better suited for a homogenuously loaded boxes.
+ */
+
+/*! \brief Particle buffer structure
+ */
+static struct partbuf
+{
+  MyFloat Mass;
+  MyFloat Pos[3];
+} * partin, *partout;
+
+static size_t nimport, nexport;
+
+static size_t *Sndpm_count, *Sndpm_offset;
+static size_t *Rcvpm_count, *Rcvpm_offset;
+
+/*! \brief Prepares density for pm calculation in algorithm optimized for
+ *         uniform densities.
+ *
+ *  \param[in] grnr Number of grid (0: base grid, 1: high res grid).
+ *
+ *  \return void
+ */
+void pmforce_nonperiodic_uniform_optimized_prepare_density(int grnr)
+{
+  int i, j;
+
+  double to_slab_fac = GRID / All.TotalMeshSize[grnr];
+
+  /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset.
+   * This should hopefully prevent a performance penalty from 'false sharing' for these variables
+   */
+  int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t);
+
+  Sndpm_count  = mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t));
+  Sndpm_offset = mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t));
+  Rcvpm_count  = mymalloc("Rcvpm_count", NTask * sizeof(size_t));
+  Rcvpm_offset = mymalloc("Rcvpm_offset", NTask * sizeof(size_t));
+
+  /* determine the slabs/columns each particles accesses */
+  {
+    size_t *send_count = Sndpm_count + get_thread_num() * multiNtask;
+
+    /* each threads needs to do theloop to clear its send_count[] array */
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        if(P[i].Type == 0)
+          pos = SphP[i].Center;
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0])
+          continue;
+        if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1])
+          continue;
+        if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2])
+          continue;
+
+        int slab_x  = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+        int slab_xx = slab_x + 1;
+
+#ifndef FFT_COLUMN_BASED
+        int task0   = myplan.slab_to_task[slab_x];
+        int task1   = myplan.slab_to_task[slab_xx];
+
+        send_count[task0]++;
+        if(task0 != task1)
+          send_count[task1]++;
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y  = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+        int slab_yy = slab_y + 1;
+
+        int column0 = slab_x * GRID + slab_y;
+        int column1 = slab_x * GRID + slab_yy;
+        int column2 = slab_xx * GRID + slab_y;
+        int column3 = slab_xx * GRID + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        send_count[task0]++;
+        if(task1 != task0)
+          send_count[task1]++;
+        if(task2 != task1 && task2 != task0)
+          send_count[task2]++;
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          send_count[task3]++;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+      }
+  }
+
+  /* collect thread-specific offset table and collect the results from the other threads */
+  for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++)
+    for(j = 0; j < MaxThreads; j++)
+      {
+        int ind_prev, ind = j * multiNtask + i;
+        if(ind > 0)
+          {
+            if(j == 0)
+              ind_prev = (MaxThreads - 1) * multiNtask + i - 1;
+            else
+              ind_prev = ind - multiNtask;
+
+            Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
+          }
+      }
+
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += Sndpm_count[j];
+      nimport += Rcvpm_count[j];
+
+      if(j > 0)
+        {
+          Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
+          Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
+        }
+    }
+
+  /* allocate import and export buffer */
+  partin  = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf));
+  partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf));
+
+  {
+    size_t *send_count  = Sndpm_count + get_thread_num() * multiNtask;
+    size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask;
+
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    /* fill export buffer */
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        if(P[i].Type == 0)
+          pos = SphP[i].Center;
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0])
+          continue;
+        if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1])
+          continue;
+        if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2])
+          continue;
+
+        int slab_x  = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+        int slab_xx = slab_x + 1;
+
+#ifndef FFT_COLUMN_BASED
+        int task0   = myplan.slab_to_task[slab_x];
+        int task1   = myplan.slab_to_task[slab_xx];
+
+        size_t ind0        = send_offset[task0] + send_count[task0]++;
+        partout[ind0].Mass = P[i].Mass;
+        for(j = 0; j < 3; j++)
+          partout[ind0].Pos[j] = pos[j];
+
+        if(task0 != task1)
+          {
+            size_t ind1        = send_offset[task1] + send_count[task1]++;
+            partout[ind1].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind1].Pos[j] = pos[j];
+          }
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y  = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+        int slab_yy = slab_y + 1;
+
+        int column0 = slab_x * GRID + slab_y;
+        int column1 = slab_x * GRID + slab_yy;
+        int column2 = slab_xx * GRID + slab_y;
+        int column3 = slab_xx * GRID + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        size_t ind0        = send_offset[task0] + send_count[task0]++;
+        partout[ind0].Mass = P[i].Mass;
+        for(j = 0; j < 3; j++)
+          partout[ind0].Pos[j] = pos[j];
+
+        if(task1 != task0)
+          {
+            size_t ind1        = send_offset[task1] + send_count[task1]++;
+            partout[ind1].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind1].Pos[j] = pos[j];
+          }
+        if(task2 != task1 && task2 != task0)
+          {
+            size_t ind2        = send_offset[task2] + send_count[task2]++;
+            partout[ind2].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind2].Pos[j] = pos[j];
+          }
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          {
+            size_t ind3        = send_offset[task3] + send_count[task3]++;
+            partout[ind3].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind3].Pos[j] = pos[j];
+          }
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+      }
+  }
+
+  /* collect the send_count[] results from the other threads */
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  int flag_big = 0, flag_big_all;
+  for(i = 0; i < NTask; i++)
+    if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+      flag_big = 1;
+
+  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
+   * transfer the data in chunks.
+   */
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /* exchange particle data */
+  myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all,
+                  MPI_COMM_WORLD);
+
+  myfree(partout);
+
+  /* allocate density field */
+  rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real));
+
+  /* clear local FFT-mesh density field */
+  large_array_offset ii;
+
+  for(ii = 0; ii < maxfftsize; ii++)
+    rhogrid[ii] = 0;
+
+#ifndef FFT_COLUMN_BASED
+  /* bin particle data onto mesh, in multi-threaded fashion */
+  {
+    int tid = get_thread_num();
+
+    int first_y, count_y;
+    subdivide_evenly(GRID, MaxThreads, tid, &first_y, &count_y);
+    int last_y = first_y + count_y - 1;
+
+    for(i = 0; i < nimport; i++)
+      {
+        int slab_y  = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]));
+        int slab_yy = slab_y + 1;
+        double dy   = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
+        int flag_slab_y, flag_slab_yy;
+
+        if(slab_y >= first_y && slab_y <= last_y)
+          flag_slab_y = 1;
+        else
+          flag_slab_y = 0;
+
+        if(slab_yy >= first_y && slab_yy <= last_y)
+          flag_slab_yy = 1;
+        else
+          flag_slab_yy = 0;
+
+        if(flag_slab_y || flag_slab_yy)
+          {
+            double mass = partin[i].Mass;
+
+            int slab_x  = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]));
+            int slab_z  = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]));
+            int slab_xx = slab_x + 1;
+            int slab_zz = slab_z + 1;
+
+            double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
+            double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
+
+            int flag_slab_x, flag_slab_xx;
+
+            if(myplan.slab_to_task[slab_x] == ThisTask)
+              {
+                slab_x -= myplan.first_slab_x_of_task[ThisTask];
+                flag_slab_x = 1;
+              }
+            else
+              flag_slab_x = 0;
+
+            if(myplan.slab_to_task[slab_xx] == ThisTask)
+              {
+                slab_xx -= myplan.first_slab_x_of_task[ThisTask];
+                flag_slab_xx = 1;
+              }
+            else
+              flag_slab_xx = 0;
+
+            if(flag_slab_x)
+              {
+                if(flag_slab_y)
+                  {
+                    rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
+                    rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
+                  }
+
+                if(flag_slab_yy)
+                  {
+                    rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
+                    rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
+                  }
+              }
+
+            if(flag_slab_xx)
+              {
+                if(flag_slab_y)
+                  {
+                    rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
+                    rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
+                  }
+
+                if(flag_slab_yy)
+                  {
+                    rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
+                    rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz));
+                  }
+              }
+          }
+      }
+  }
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+  struct data_cols
+  {
+    int col0, col1, col2, col3;
+    double dx, dy;
+  } * aux;
+
+  aux = mymalloc("aux", nimport * sizeof(struct data_cols));
+
+  for(i = 0; i < nimport; i++)
+    {
+      int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]));
+      int slab_xx = slab_x + 1;
+
+      int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]));
+      int slab_yy = slab_y + 1;
+
+      aux[i].dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
+      aux[i].dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
+
+      aux[i].col0 = slab_x * GRID + slab_y;
+      aux[i].col1 = slab_x * GRID + slab_yy;
+      aux[i].col2 = slab_xx * GRID + slab_y;
+      aux[i].col3 = slab_xx * GRID + slab_yy;
+    }
+
+  {
+    int tid = get_thread_num();
+
+    int first_col, last_col, count_col;
+    subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col);
+    last_col = first_col + count_col - 1;
+    first_col += myplan.base_firstcol;
+    last_col += myplan.base_firstcol;
+
+    for(i = 0; i < nimport; i++)
+      {
+        int flag0, flag1, flag2, flag3;
+        int col0 = aux[i].col0;
+        int col1 = aux[i].col1;
+        int col2 = aux[i].col2;
+        int col3 = aux[i].col3;
+
+        if(col0 >= first_col && col0 <= last_col)
+          flag0 = 1;
+        else
+          flag0 = 0;
+
+        if(col1 >= first_col && col1 <= last_col)
+          flag1 = 1;
+        else
+          flag1 = 0;
+
+        if(col2 >= first_col && col2 <= last_col)
+          flag2 = 1;
+        else
+          flag2 = 0;
+
+        if(col3 >= first_col && col3 <= last_col)
+          flag3 = 1;
+        else
+          flag3 = 0;
+
+        if(flag0 || flag1 || flag2 || flag3)
+          {
+            double mass = partin[i].Mass;
+
+            double dx = aux[i].dx;
+            double dy = aux[i].dy;
+
+            int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]));
+            int slab_zz = slab_z + 1;
+
+            double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
+
+            if(flag0)
+              {
+                rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
+                rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
+              }
+
+            if(flag1)
+              {
+                rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
+                rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
+              }
+
+            if(flag2)
+              {
+                rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
+                rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
+              }
+
+            if(flag3)
+              {
+                rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
+                rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz));
+              }
+          }
+      }
+  }
+
+  myfree(aux);
+
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+}
+
+/*! \brief If dim<0, this function reads out the potential, otherwise
+ *         Cartesian force components.
+ *
+ *  \param[in] grnr Grid number (0: base grid, 1: high res grid).
+ *  \param[in] dim Dimension of component to be read out (< 0: potential).
+ *
+ *  \return void
+ */
+void pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(int grnr, int dim)
+{
+#ifdef EVALPOTENTIAL
+  /* factor to get potential */
+  double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
+#endif /* #ifdef EVALPOTENTIAL */
+
+  double to_slab_fac = GRID / All.TotalMeshSize[grnr];
+
+  double *flistin  = (double *)mymalloc("flistin", nimport * sizeof(double));
+  double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double));
+
+  fft_real *grid;
+
+  if(dim < 0)
+    grid = rhogrid;
+  else
+    grid = forcegrid;
+
+  size_t i;
+  for(i = 0; i < nimport; i++)
+    {
+      flistin[i] = 0;
+
+      int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]));
+      int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]));
+      int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]));
+
+      double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
+      double dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
+      double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
+
+      int slab_xx = slab_x + 1;
+      int slab_yy = slab_y + 1;
+      int slab_zz = slab_z + 1;
+
+#ifndef FFT_COLUMN_BASED
+      if(myplan.slab_to_task[slab_x] == ThisTask)
+        {
+          slab_x -= myplan.first_slab_x_of_task[ThisTask];
+
+          flistin[i] += +grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
+                        grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
+                        grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
+        }
+
+      if(myplan.slab_to_task[slab_xx] == ThisTask)
+        {
+          slab_xx -= myplan.first_slab_x_of_task[ThisTask];
+
+          flistin[i] += +grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
+                        grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
+                        grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
+        }
+#else  /* #ifndef FFT_COLUMN_BASED */
+      int column0 = slab_x * GRID + slab_y;
+      int column1 = slab_x * GRID + slab_yy;
+      int column2 = slab_xx * GRID + slab_y;
+      int column3 = slab_xx * GRID + slab_yy;
+
+      if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol)
+        {
+          flistin[i] += +grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
+        }
+      if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol)
+        {
+          flistin[i] +=
+              +grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
+        }
+
+      if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol)
+        {
+          flistin[i] +=
+              +grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
+        }
+
+      if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol)
+        {
+          flistin[i] += +grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
+        }
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+    }
+
+  /* exchange the potential component data */
+  int flag_big = 0, flag_big_all;
+  for(i = 0; i < NTask; i++)
+    if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+      flag_big = 1;
+
+  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
+   * transfer the data in chunks.
+   */
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /* exchange  data */
+  myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all,
+                  MPI_COMM_WORLD);
+
+  /* now assign them to the correct particles */
+  int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t);
+
+  {
+    size_t *send_count  = Sndpm_count + get_thread_num() * multiNtask;
+    size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask;
+
+    int j;
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    int i;
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        if(P[i].Type == 0)
+          pos = SphP[i].Center;
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0])
+          continue;
+        if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1])
+          continue;
+        if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2])
+          continue;
+
+        int slab_x  = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0]));
+        int slab_xx = slab_x + 1;
+
+#ifndef FFT_COLUMN_BASED
+        int task0   = myplan.slab_to_task[slab_x];
+        int task1   = myplan.slab_to_task[slab_xx];
+
+        double value = flistout[send_offset[task0] + send_count[task0]++];
+
+        if(task0 != task1)
+          value += flistout[send_offset[task1] + send_count[task1]++];
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1]));
+        int slab_yy = slab_y + 1;
+
+        int column0 = slab_x * GRID + slab_y;
+        int column1 = slab_x * GRID + slab_yy;
+        int column2 = slab_xx * GRID + slab_y;
+        int column3 = slab_xx * GRID + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        double value = flistout[send_offset[task0] + send_count[task0]++];
+
+        if(task1 != task0)
+          value += flistout[send_offset[task1] + send_count[task1]++];
+
+        if(task2 != task1 && task2 != task0)
+          value += flistout[send_offset[task2] + send_count[task2]++];
+
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          value += flistout[send_offset[task3] + send_count[task3]++];
+#endif /* #ifndef FFT_COLUMN_BASED */
+
+#ifdef PLACEHIGHRESREGION
+        if(grnr == 1)
+          if(!(pmforce_is_particle_high_res(P[i].Type, pos)))
+            continue;
+#endif /* #ifdef PLACEHIGHRESREGION */
+
+        if(dim < 0)
+          {
+#ifdef EVALPOTENTIAL
+            P[i].PM_Potential += value * fac;
+#endif /* #ifdef EVALPOTENTIAL */
+          }
+        else
+          P[i].GravPM[dim] += value;
+      }
+  }
+
+  int j;
+  /* restore total Sndpm_count */
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  myfree(flistout);
+  myfree(flistin);
+}
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+
+/*! \brief Calculates the long-range non-periodic forces using the PM method.
+ *
+ *  The potential is Gaussian filtered with Asmth, given in mesh-cell units.
+ *  The potential is finite differenced using a 4-point finite differencing
+ *  formula to obtain the force fields, which are then interpolated to the
+ *  particle positions. We carry out a CIC charge assignment, and compute the
+ *  potenial by Fourier transform methods. The CIC kernel is deconvolved.
+ *
+ *  \param[in] grnr Grid number (0: base grid, 1 high res grid).
+ *
+ *  \return 0
+ */
+int pmforce_nonperiodic(int grnr)
+{
+  int i, j, flag, flagsum, dim;
+
+  double tstart = second();
+
+  mpi_printf("PM-NONPERIODIC: Starting non-periodic PM calculation (grid=%d)  presently allocated=%g MB).\n", grnr,
+             AllocatedBytes / (1024.0 * 1024.0));
+
+#ifndef NUMPART_PER_TASK_LARGE
+  if((((long long)NumPart) << 3) >= (((long long)1) << 31))
+    terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help.");
+#endif /* #ifndef NUMPART_PER_TASK_LARGE */
+
+  double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); /* to get potential */
+  fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID);                                               /* for finite differencing */
+
+  /* first, check whether all particles lie in the allowed region */
+  for(i = 0, flag = 0; i < NumPart; i++)
+    {
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        pos = SphP[i].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+#ifdef PLACEHIGHRESREGION
+      if(grnr == 0 || (grnr == 1 && pmforce_is_particle_high_res(P[i].Type, pos)))
+#endif /* #ifdef PLACEHIGHRESREGION */
+        {
+          for(j = 0; j < 3; j++)
+            {
+              if(pos[j] < All.Xmintot[grnr][j] || pos[j] > All.Xmaxtot[grnr][j])
+                {
+                  if(flag == 0)
+                    {
+                      printf("Particle Id=%llu on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\n",
+                             (unsigned long long)P[i].ID, ThisTask, pos[0], pos[1], pos[2]);
+                      myflush(stdout);
+                    }
+                  flag++;
+                  break;
+                }
+            }
+        }
+    }
+
+  MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  if(flagsum > 0)
+    {
+      mpi_printf("PM-NONPERIODIC: In total %d particles were outside allowed range.\n", flagsum);
+      return 1; /* error - need to return because particles were outside allowed range */
+    }
+
+#ifdef PM_ZOOM_OPTIMIZED
+  pmforce_nonperiodic_zoom_optimized_prepare_density(grnr);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+  pmforce_nonperiodic_uniform_optimized_prepare_density(grnr);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+
+  /* allocate the memory to hold the FFT fields */
+  forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real));
+
+  workspace = forcegrid;
+
+#ifndef FFT_COLUMN_BASED
+  fft_of_rhogrid = (fft_complex *)&rhogrid[0];
+#else  /* #ifndef FFT_COLUMN_BASED */
+  fft_of_rhogrid = (fft_complex *)&workspace[0];
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  /* Do the FFT of the density field */
+#ifndef FFT_COLUMN_BASED
+  my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+  my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  /* multiply with kernel in Fourier space */
+  /* multiply with the Fourier transform of the Green's function (kernel) */
+  /* multiply with Green's function in order to obtain the potential */
+
+#ifdef FFT_COLUMN_BASED
+  for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
+    {
+#else  /* #ifdef FFT_COLUMN_BASED */
+  for(int x = 0; x < GRID; x++)
+    for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
+      for(int z = 0; z < GRIDz; z++)
+        {
+#endif /* #ifdef FFT_COLUMN_BASED #else */
+
+#ifndef FFT_COLUMN_BASED
+      large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z;
+#endif /* #ifndef FFT_COLUMN_BASED */
+
+      double re = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][0] - fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][1];
+      double im = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][1] + fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][0];
+
+      fft_of_rhogrid[ip][0] = re;
+      fft_of_rhogrid[ip][1] = im;
+    }
+
+    /* Do the inverse FFT to get the potential */
+
+#ifndef FFT_COLUMN_BASED
+  my_slab_based_fft(&myplan, rhogrid, workspace, -1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+  my_column_based_fft(&myplan, workspace, rhogrid, -1);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  /* Now rhogrid holds the potential */
+
+#ifdef EVALPOTENTIAL
+#ifdef PM_ZOOM_OPTIMIZED
+  pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, -1);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+  pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, -1);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+#endif /* #ifdef EVALPOTENTIAL */
+
+  /* get the force components by finite differencing of the potential for each dimension,
+   * and send the results back to the right CPUs
+   */
+  for(dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */
+    {
+      /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose */
+#ifndef FFT_COLUMN_BASED
+      if(dim == 0)
+        my_slab_transposeA(&myplan, rhogrid, forcegrid); /* compute the transpose of the potential field for finite differencing */
+
+      for(int y = 2; y < GRID / 2 - 2; y++)
+        for(int x = 0; x < myplan.nslab_x; x++)
+          if(x + myplan.slabstart_x >= 2 && x + myplan.slabstart_x < GRID / 2 - 2)
+            for(int z = 2; z < GRID / 2 - 2; z++)
+              {
+                int yrr = y, yll = y, yr = y, yl = y;
+                int zrr = z, zll = z, zr = z, zl = z;
+
+                switch(dim)
+                  {
+                    case 0: /* note: for the x-direction, we difference the transposed direction (y) */
+                    case 1:
+                      yr  = y + 1;
+                      yl  = y - 1;
+                      yrr = y + 2;
+                      yll = y - 2;
+
+                      break;
+                    case 2:
+                      zr  = z + 1;
+                      zl  = z - 1;
+                      zrr = z + 2;
+                      zll = z - 2;
+
+                      break;
+                  }
+
+                if(dim == 0)
+                  forcegrid[TI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[TI(x, yl, zl)] - rhogrid[TI(x, yr, zr)]) -
+                                                  (1.0 / 6) * (rhogrid[TI(x, yll, zll)] - rhogrid[TI(x, yrr, zrr)]));
+                else
+                  forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, zl)] - rhogrid[FI(x, yr, zr)]) -
+                                                  (1.0 / 6) * (rhogrid[FI(x, yll, zll)] - rhogrid[FI(x, yrr, zrr)]));
+              }
+
+      if(dim == 0)
+        my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */
+#else                                                    /* #ifndef FFT_COLUMN_BASED */
+      fft_real *scratch = NULL, *forcep, *potp;
+
+      if(dim != 2)
+        {
+          scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */
+          memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real));
+
+          if(dim == 1)
+            my_fft_swap23(&myplan, scratch, forcegrid);
+          else
+            my_fft_swap13(&myplan, scratch, forcegrid);
+        }
+
+      int ncols;
+      if(dim == 2)
+        ncols = myplan.base_ncol;
+      else if(dim == 1)
+        ncols = myplan.ncol_XZ;
+      else
+        ncols = myplan.ncol_YZ;
+
+      large_array_offset i;
+
+      for(i = 0; i < ncols; i++)
+        {
+          if(dim != 2)
+            {
+              forcep = &scratch[GRID * i];
+              potp   = &forcegrid[GRID * i];
+            }
+          else
+            {
+              forcep = &forcegrid[GRID2 * i];
+              potp   = &rhogrid[GRID2 * i];
+            }
+
+          int z;
+          for(z = 2; z < GRID / 2 - 2; z++)
+            {
+              int zr  = z + 1;
+              int zl  = z - 1;
+              int zrr = z + 2;
+              int zll = z - 2;
+
+              forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr]));
+            }
+        }
+
+      if(dim != 2)
+        {
+          if(dim == 1)
+            my_fft_swap23back(&myplan, scratch, forcegrid);
+          else
+            my_fft_swap13back(&myplan, scratch, forcegrid);
+
+          myfree(scratch);
+        }
+#endif                                                   /* #ifndef FFT_COLUMN_BASED #else */
+
+#ifdef PM_ZOOM_OPTIMIZED
+      pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, dim);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+      pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, dim);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+    }
+
+  /* free stuff */
+  myfree(forcegrid);
+  myfree(rhogrid);
+
+#ifdef PM_ZOOM_OPTIMIZED
+  myfree(localfield_recvcount);
+  myfree(localfield_offset);
+  myfree(localfield_sendcount);
+  myfree(localfield_first);
+  myfree(localfield_data);
+  myfree(localfield_globalindex);
+  myfree(part);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+  myfree(partin);
+  myfree(Rcvpm_offset);
+  myfree(Rcvpm_count);
+  myfree(Sndpm_offset);
+  myfree(Sndpm_count);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+
+  double tend = second();
+
+  mpi_printf("PM-NONPERIODIC: done.  (took %g seconds)\n", timediff(tstart, tend));
+
+  return 0;
+}
+
+/*! \brief Sets-up the Greens function for the non-periodic potential in real
+ *         space, and then converts it to Fourier space by means of an FFT.
+ *
+ *  \return void
+ */
+void pm_setup_nonperiodic_kernel(void)
+{
+  int i, j, k, x, y, z;
+  double xx, yy, zz, r, u, fac;
+
+  mpi_printf("PM-NONPERIODIC: Setting up non-periodic PM kernel (GRID=%d)  presently allocated=%g MB).\n", (int)GRID,
+             AllocatedBytes / (1024.0 * 1024.0));
+
+  /* now set up kernel and its Fourier transform */
+
+#if defined(GRAVITY_NOT_PERIODIC)
+  for(i = 0; i < maxfftsize; i++) /* clear local field */
+    kernel[0][i] = 0;
+
+#ifndef FFT_COLUMN_BASED
+  for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++)
+    for(j = 0; j < GRID; j++)
+      {
+#else  /* #ifndef FFT_COLUMN_BASED */
+  int c;
+  for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++)
+    {
+      i = c / GRID;
+      j = c % GRID;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+        for(k = 0; k < GRID; k++)
+          {
+            xx = ((double)i) / GRID;
+            yy = ((double)j) / GRID;
+            zz = ((double)k) / GRID;
+
+            if(xx >= 0.5)
+              xx -= 1.0;
+            if(yy >= 0.5)
+              yy -= 1.0;
+            if(zz >= 0.5)
+              zz -= 1.0;
+
+            r = sqrt(xx * xx + yy * yy + zz * zz);
+
+            u = 0.5 * r / (((double)ASMTH) / GRID);
+
+            fac = 1 - erfc(u);
+
+#ifndef FFT_COLUMN_BASED
+            size_t ip = FI(i - myplan.slabstart_x, j, k);
+#else  /* #ifndef FFT_COLUMN_BASED */
+          size_t ip = FC(c, k);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+            if(r > 0)
+              kernel[0][ip] = -fac / r;
+            else
+              kernel[0][ip] = -1 / (sqrt(M_PI) * (((double)ASMTH) / GRID));
+          }
+      }
+
+  {
+    fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real));
+    /* Do the FFT of the kernel */
+#ifndef FFT_COLUMN_BASED
+    my_slab_based_fft(&myplan, kernel[0], workspc, 1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+    my_column_based_fft(&myplan, kernel[0], workspc, 1); /* result is in workspace, not in kernel */
+    memcpy(kernel[0], workspc, maxfftsize * sizeof(fft_real));
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+    myfree(workspc);
+  }
+
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+
+#if defined(PLACEHIGHRESREGION)
+
+  for(i = 0; i < maxfftsize; i++) /* clear local field */
+    kernel[1][i] = 0;
+
+#ifndef FFT_COLUMN_BASED
+  for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++)
+    for(j = 0; j < GRID; j++)
+      {
+#else  /* #ifndef FFT_COLUMN_BASED */
+  int c;
+  for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++)
+    {
+      i = c / GRID;
+      j = c % GRID;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+        for(k = 0; k < GRID; k++)
+          {
+            xx = ((double)i) / GRID;
+            yy = ((double)j) / GRID;
+            zz = ((double)k) / GRID;
+
+            if(xx >= 0.5)
+              xx -= 1.0;
+            if(yy >= 0.5)
+              yy -= 1.0;
+            if(zz >= 0.5)
+              zz -= 1.0;
+
+            r = sqrt(xx * xx + yy * yy + zz * zz);
+
+            u = 0.5 * r / (((double)ASMTH) / GRID);
+
+            fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u);
+
+#ifndef FFT_COLUMN_BASED
+            size_t ip = FI(i - myplan.slabstart_x, j, k);
+#else  /* #ifndef FFT_COLUMN_BASED */
+          size_t ip = FC(c, k);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+            if(r > 0)
+              kernel[1][ip] = -fac / r;
+            else
+              {
+                fac           = 1 - All.Asmth[1] / All.Asmth[0];
+                kernel[1][ip] = -fac / (sqrt(M_PI) * (((double)ASMTH) / GRID));
+              }
+          }
+      }
+
+  {
+    fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real));
+    /* Do the FFT of the kernel */
+#ifndef FFT_COLUMN_BASED
+    my_slab_based_fft(&myplan, kernel[1], workspc, 1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+    my_column_based_fft(&myplan, kernel[1], workspc, 1); /* result is in workspace, not in kernel */
+    memcpy(kernel[1], workspc, maxfftsize * sizeof(fft_real));
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+    myfree(workspc);
+  }
+
+#endif /* #if defined(PLACEHIGHRESREGION) */
+
+  /* deconvolve the Greens function twice with the CIC kernel */
+#ifdef FFT_COLUMN_BASED
+
+  large_array_offset ip, ipcell;
+
+  for(ip = 0; ip < myplan.second_transposed_ncells; ip++)
+    {
+      ipcell = ip + myplan.transposed_firstcol * GRID;
+      y      = ipcell / (GRID * GRIDz);
+      int yr = ipcell % (GRID * GRIDz);
+      z      = yr / GRID;
+      x      = yr % GRID;
+#else  /* #ifdef FFT_COLUMN_BASED */
+  for(x = 0; x < GRID; x++)
+    for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
+      for(z = 0; z < GRIDz; z++)
+        {
+#endif /* #ifdef FFT_COLUMN_BASED #else */
+
+      double kx, ky, kz;
+
+      if(x > GRID / 2)
+        kx = x - GRID;
+      else
+        kx = x;
+      if(y > GRID / 2)
+        ky = y - GRID;
+      else
+        ky = y;
+      if(z > GRID / 2)
+        kz = z - GRID;
+      else
+        kz = z;
+
+      double k2 = kx * kx + ky * ky + kz * kz;
+
+      if(k2 > 0)
+        {
+          double fx = 1, fy = 1, fz = 1;
+
+          if(kx != 0)
+            {
+              fx = (M_PI * kx) / GRID;
+              fx = sin(fx) / fx;
+            }
+          if(ky != 0)
+            {
+              fy = (M_PI * ky) / GRID;
+              fy = sin(fy) / fy;
+            }
+          if(kz != 0)
+            {
+              fz = (M_PI * kz) / GRID;
+              fz = sin(fz) / fz;
+            }
+
+          double ff = 1 / (fx * fy * fz);
+          ff        = ff * ff * ff * ff;
+
+#ifndef FFT_COLUMN_BASED
+          large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z;
+#endif /* #ifndef FFT_COLUMN_BASED */
+#if defined(GRAVITY_NOT_PERIODIC)
+          fft_of_kernel[0][ip][0] *= ff;
+          fft_of_kernel[0][ip][1] *= ff;
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+#if defined(PLACEHIGHRESREGION)
+          fft_of_kernel[1][ip][0] *= ff;
+          fft_of_kernel[1][ip][1] *= ff;
+#endif /* #if defined(PLACEHIGHRESREGION) */
+        }
+    }
+
+  /* end deconvolution */
+}
+
+#ifdef PM_ZOOM_OPTIMIZED
+
+/*! \brief Sort function for 'part' array indices.
+ *
+ *  Sorts the indices into the 'part' array by the global index of the
+ *  corresponding 'part_slab_data' struct.
+ *
+ *  \param[in] a index to be compared.
+ *  \param[in] b index to be compared.
+ *
+ *  \return sort result
+ */
+static int pm_periodic_compare_sortindex(const void *a, const void *b)
+{
+  if(part[*(int *)a].globalindex < part[*(int *)b].globalindex)
+    return -1;
+
+  if(part[*(int *)a].globalindex > part[*(int *)b].globalindex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Implements the sorting function for mysort_pmperiodic()
+ *
+ *  The index array is sorted using a merge sort algorithm.
+ *
+ *  \param[in, out] b Index array to sort.
+ *  \param[in] n Number of elements to sort.
+ *  \param[out] t Temporary buffer array.
+ *
+ *  \return void
+ */
+static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t)
+{
+  large_numpart_type *tmp;
+  large_numpart_type *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = b;
+  b2 = b + n1;
+
+  msort_pmperiodic_with_tmp(b1, n1, t);
+  msort_pmperiodic_with_tmp(b2, n2, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(part[*b1].globalindex <= part[*b2].globalindex)
+        {
+          --n1;
+          *tmp++ = *b1++;
+        }
+      else
+        {
+          --n2;
+          *tmp++ = *b2++;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * sizeof(large_numpart_type));
+
+  memcpy(b, t, (n - n2) * sizeof(large_numpart_type));
+}
+
+/*! \brief Sorts the index array b of n entries using the sort kernel
+ *         cmp.
+ *
+ *  The parameter s is set to sizeof(int). The index array b
+ *  is sorted according to the globalindex field of the referenced item in the
+ *  'part' array
+ *
+ *  \param[in, out] b The index array to sort.
+ *  \param[in] n Number of entries in array b.
+ *  \param[in] s Size of each entry (must be sizeof(int)).
+ *  \param[in] cmp Comparison function.
+ *
+ *  \return void
+ */
+static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *))
+{
+  const size_t size = n * s;
+
+  large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size);
+
+  msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp);
+
+  myfree(tmp);
+}
+#endif /* #ifdef PM_ZOOM_OPTIMIZED */
+
+#endif /* #if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC)) */
diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c
new file mode 100644
index 0000000000..319404f797
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c
@@ -0,0 +1,2034 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/pm/pm_periodic.c
+ * \date        05/2018
+ * \brief       Routines for periodic PM-force computation.
+ * \details     These routines support two different strategies for doing the
+ *              particle data exchange to assemble the density field and to
+ *              read out the forces and potentials:
+ *
+ *              The default scheme sends the particle positions to the target
+ *              slabs, and bins them there. This works usually well for
+ *              homogeneously loaded boxes, but can be problematic for zoom-in
+ *              runs. In the latter case, PM_ZOOM_OPTIMIZED can be activated,
+ *              where the data is binned on the originating processor followed
+ *              by assembly of the binned density field.
+ *
+ *              In addition, the routines can be either used with a slab-based
+ *              FFT (as is traditionally done in FFTW), or with a column-based
+ *              FFT. The latter requires more communication and is hence
+ *              usually slower than the slab-based one. But if the number of
+ *              MPI ranks exceeds the number of cells per dimension, then the
+ *              column-based one can still scale and offers a balanced memory
+ *              consumption, whereas this is not the case for the slab-based
+ *              approach. To select the column-based FFT, the switch
+ *              FFT_COLUMN_BASED can be activated.
+ *
+ *              The switches PM_ZOOM_OPTIMIZED and FFT_COLUMN_BASED may also
+ *              be combined, such that there are 4 main modes of how the PM
+ *              routines may operate.
+ *
+ *              It is also possible to use non-cubical boxes, by means of
+ *              setting one or several of the LONG_X, LONG_Y, and LONG_Z
+ *              options in the config file. The values need to be integers,
+ *              and then BoxSize is stretched by that factor in the
+ *              corresponding dimension.
+ *
+ *              Much of the code is multi-threaded, so there should be some
+ *              speed-up if OpenMP is used with NUM_THREADS > 1, but the
+ *              benefit may be limited because the data transfer steps (which
+ *              weigh in quite heavily) are not accelerated by this.
+ *
+ *              If eight times the particle load per processor exceeds 2^31
+ *              ~ 2 billion, one should activate NUMPART_PER_TASK_LARGE. The
+ *              code will check this condition and terminate if this is
+ *              violated, so there should hopefully be no severe risk to
+ *              accidentally forget this.
+ *
+ *              contains functions:
+ *                void pm_init_periodic(void)
+ *                void pmforce_zoom_optimized_prepare_density(int mode, int
+ *                  *typelist)
+ *                void pmforce_zoom_optimized_readout_forces_or_potential(int
+ *                  dim)
+ *                static void pmforce_uniform_optimized_prepare_density(int
+ *                  mode)
+ *                static void pmforce_uniform_optimized_readout_forces_or_
+ *                  potential(int dim)
+ *                void pmforce_periodic(int mode, int *typelist)
+ *                static int pm_periodic_compare_sortindex(const void *a,
+ *                  const void *b)
+ *                static void msort_pmperiodic_with_tmp(large_numpart_type * b,
+ *                  size_t n, large_numpart_type * t)
+ *                static void mysort_pmperiodic(void *b, size_t n, size_t s,
+ *                  int (*cmp) (const void *, const void *))
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if defined(PMGRID)
+
+#define GRIDX (PMGRID * STRETCHX * DBX + DBX_EXTRA)
+#define GRIDY (PMGRID * STRETCHY * DBY + DBY_EXTRA)
+#define GRIDZ (PMGRID * STRETCHZ * DBZ + DBZ_EXTRA)
+
+#define GRIDz (GRIDZ / 2 + 1)
+#define GRID2 (2 * GRIDz)
+
+#if(GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024)
+typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid
+                                         with a single index */
+#else                                 /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) */
+typedef unsigned int large_array_offset;
+#endif                                /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) #else */
+
+#ifdef NUMPART_PER_TASK_LARGE
+typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this
+                                         data type should be used */
+#else                                 /* #ifdef NUMPART_PER_TASK_LARGE */
+typedef int large_numpart_type;
+#endif                                /* #ifdef NUMPART_PER_TASK_LARGE #else */
+
+/* short-cut macros for accessing different 3D arrays */
+#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z))
+#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z))
+#ifndef FFT_COLUMN_BASED
+#define NI(x, y, z) (((large_array_offset)GRIDZ) * ((y) + (x)*myplan.nslab_y) + (z))
+#endif /* #ifndef FFT_COLUMN_BASED */
+
+/* variables for power spectrum estimation */
+#ifndef BINS_PS
+#define BINS_PS 2000 /* number of bins for power spectrum computation */
+#endif               /* #ifndef BINS_PS */
+#ifndef POWERSPEC_FOLDFAC
+#define POWERSPEC_FOLDFAC 16. /* folding factor to obtain an estimate of the power spectrum on very small scales */
+#endif                        /* #ifndef POWERSPEC_FOLDFAC */
+
+static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */
+
+/*! \var maxfftsize
+ *  \brief maximum size of the local fft grid among all tasks
+ */
+static size_t maxfftsize;
+
+/*! \var rhogrid
+ *  \brief This array hold the local part of the density field and
+ *  after the FFTs the local part of the potential
+ *
+ *  \var forcegrid
+ *  \brief This array will contain the force field
+ *
+ *  \var workspace
+ *  \brief Workspace array used during the FFTs
+ */
+static fft_real *rhogrid, *forcegrid, *workspace;
+
+/*! \brief Array containing the FFT of #rhogrid
+ *
+ *  This pointer points to the same array as #rhogrid,
+ *  because in-place FFTs are used.
+ */
+static fft_complex *fft_of_rhogrid;
+
+/* Variable for power spectrum calculation */
+static double power_spec_totmass, power_spec_totmass2;
+static long long power_spec_totnumpart;
+
+/*! \brief This routine generates the FFT-plans to carry out the FFTs later on.
+ *
+ *  Some auxiliary variables for bookkeeping are also initialized.
+ *
+ *  \return void
+ */
+void pm_init_periodic(void)
+{
+#ifdef LONG_X
+  if(LONG_X != (int)(LONG_X))
+    terminate("LONG_X must be an integer if used with PMGRID");
+#endif /* #ifdef LONG_X */
+
+#ifdef LONG_Y
+  if(LONG_Y != (int)(LONG_Y))
+    terminate("LONG_Y must be an integer if used with PMGRID");
+#endif /* #ifdef LONG_Y */
+
+#ifdef LONG_Z
+  if(LONG_Z != (int)(LONG_Z))
+    terminate("LONG_Z must be an integer if used with PMGRID");
+#endif /* #ifdef LONG_Z */
+
+  All.Asmth[0] = ASMTH * All.BoxSize / PMGRID;
+  All.Rcut[0]  = RCUT * All.Asmth[0];
+
+  /* Set up the FFTW-3 plan files. */
+  int ndimx[1] = {GRIDX}; /* dimension of the 1D transforms */
+  int ndimy[1] = {GRIDY}; /* dimension of the 1D transforms */
+  int ndimz[1] = {GRIDZ}; /* dimension of the 1D transforms */
+
+  int max_GRID2 = 2 * (imax(imax(GRIDX, GRIDY), GRIDZ) / 2 + 1);
+
+  /* temporarily allocate some arrays to make sure that out-of-place plans are created */
+  rhogrid   = (fft_real *)mymalloc("rhogrid", max_GRID2 * sizeof(fft_real));
+  forcegrid = (fft_real *)mymalloc("forcegrid", max_GRID2 * sizeof(fft_real));
+
+#ifdef DOUBLEPRECISION_FFTW
+  int alignflag = 0;
+#else  /* #ifdef DOUBLEPRECISION_FFTW */
+  /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */
+  int alignflag = FFTW_UNALIGNED;
+#endif /* #ifdef DOUBLEPRECISION_FFTW #else */
+
+  myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndimz, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz,
+                                                     FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+#ifndef FFT_COLUMN_BASED
+  int stride = GRIDz;
+#else  /* #ifndef FFT_COLUMN_BASED */
+  int stride    = 1;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  myplan.forward_plan_ydir =
+      FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.forward_plan_xdir =
+      FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_xdir =
+      FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRIDX, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_ydir =
+      FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride,
+                          GRIDz * GRIDY, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2,
+                                                      FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
+
+  myfree(forcegrid);
+  myfree(rhogrid);
+
+#ifndef FFT_COLUMN_BASED
+
+  my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
+
+  maxfftsize = imax(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2);
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+  my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
+
+  maxfftsize = myplan.max_datasize;
+
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+}
+
+/* Below, the two functions
+ *
+ *           pmforce_ ...... _prepare_density()
+ * and
+ *           pmforce_ ...... _readout_forces_or_potential(int dim)
+ *
+ * are defined in two different versions, one that works better for uniform
+ * simulations, the other for zoom-in runs. Only one of the two sets is used,
+ * depending on the setting of PM_ZOOM_OPTIMIZED.
+ */
+#ifdef PM_ZOOM_OPTIMIZED
+static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *));
+static int pm_periodic_compare_sortindex(const void *a, const void *b);
+
+/*! \brief This structure links the particles to the mesh cells, to which they
+ *         contribute their mass.
+ *
+ *  Each particle will have eight items of this structure in the #part array.
+ *  For each of the eight mesh cells the CIC assignment will contribute,
+ *  one item of this struct exists.
+ */
+static struct part_slab_data
+{
+  large_array_offset globalindex; /*!< index in the global density mesh */
+  large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the
+                                   CIC assignment this item belongs to */
+  large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during
+                                    local mass and force assignment) */
+} * part;                        /*!< array of part_slab_data linking the local particles to their mesh cells */
+
+static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount;
+static large_array_offset *localfield_globalindex, *import_globalindex;
+static fft_real *localfield_data, *import_data;
+
+/*! \brief Prepares density field for PM calculation in zoom-optimized
+ *         algorithm.
+ *
+ *  \param[in] mode Modes force calculation or power spectrum calculation.
+ *  \param[in] typelist Which particles to include (only for power spectrum).
+ *
+ *  \return void
+ */
+void pmforce_zoom_optimized_prepare_density(int mode, int *typelist)
+{
+  large_numpart_type i;
+  int level, recvTask;
+  MPI_Status status;
+
+  double to_slab_fac =
+      PMGRID / All.BoxSize; /* note: This is the same as GRIDX / (All.BoxSize * LONG_X), and similarly for each dimension */
+
+  if(mode == 2)
+    to_slab_fac *= POWERSPEC_FOLDFAC;
+  if(mode == 3)
+    to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC;
+
+  part                               = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data)));
+  large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type)));
+
+  /* determine the cells each particle accesses */
+  for(i = 0; i < NumPart; i++)
+    {
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      MyDouble posw[3], xtmp, ytmp, ztmp;
+      if(P[i].Type == 0)
+        {
+          posw[0] = WRAP_X(SphP[i].Center[0]);
+          posw[1] = WRAP_Y(SphP[i].Center[1]);
+          posw[2] = WRAP_Z(SphP[i].Center[2]);
+
+          pos = posw;
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+      int slab_x = (int)(to_slab_fac * pos[0]);
+      int slab_y = (int)(to_slab_fac * pos[1]);
+      int slab_z = (int)(to_slab_fac * pos[2]);
+
+      if(mode >= 2)
+        {
+          slab_x %= GRIDX;
+          slab_y %= GRIDY;
+          slab_z %= GRIDZ;
+        }
+      else
+        {
+          if(slab_x >= GRIDX)
+            slab_x -= GRIDX;
+          if(slab_y >= GRIDY)
+            slab_y -= GRIDY;
+          if(slab_z >= GRIDZ)
+            slab_z -= GRIDZ;
+        }
+
+      large_numpart_type index_on_grid = ((large_numpart_type)i) << 3;
+
+      for(int xx = 0; xx < 2; xx++)
+        for(int yy = 0; yy < 2; yy++)
+          for(int zz = 0; zz < 2; zz++)
+            {
+              int slab_xx = slab_x + xx;
+              int slab_yy = slab_y + yy;
+              int slab_zz = slab_z + zz;
+
+              if(slab_xx >= GRIDX)
+                slab_xx -= GRIDX;
+              if(slab_yy >= GRIDY)
+                slab_yy -= GRIDY;
+              if(slab_zz >= GRIDZ)
+                slab_zz -= GRIDZ;
+
+              large_array_offset offset = FI(slab_xx, slab_yy, slab_zz);
+
+              part[index_on_grid].partindex   = (i << 3) + (xx << 2) + (yy << 1) + zz;
+              part[index_on_grid].globalindex = offset;
+              part_sortindex[index_on_grid]   = index_on_grid;
+              index_on_grid++;
+            }
+    }
+
+  /* note: num_on_grid will be  8 times larger than the particle number, but num_field_points will generally be much smaller */
+
+  large_array_offset num_field_points;
+  large_numpart_type num_on_grid = ((large_numpart_type)NumPart) << 3;
+
+  /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */
+  mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex);
+
+  if(num_on_grid > 0)
+    num_field_points = 1;
+  else
+    num_field_points = 0;
+
+  /* determine the number of unique field points */
+  for(i = 1; i < num_on_grid; i++)
+    {
+      if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
+        num_field_points++;
+    }
+
+  /* allocate the local field */
+  localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex",
+                                                                  num_field_points * sizeof(large_array_offset));
+  localfield_data        = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real));
+  localfield_first       = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t));
+  localfield_sendcount   = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t));
+  localfield_offset      = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t));
+  localfield_recvcount   = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t));
+
+  for(i = 0; i < NTask; i++)
+    {
+      localfield_first[i]     = 0;
+      localfield_sendcount[i] = 0;
+    }
+
+  /* establish the cross link between the part[ ]-array and the local list of
+   * mesh points. Also, count on which CPU the needed field points are stored.
+   */
+  for(i = 0, num_field_points = 0; i < num_on_grid; i++)
+    {
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
+          num_field_points++;
+
+      part[part_sortindex[i]].localindex = num_field_points;
+
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
+          continue;
+
+      localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex;
+
+#ifndef FFT_COLUMN_BASED
+      int slab = part[part_sortindex[i]].globalindex / (GRIDY * GRID2);
+      int task = myplan.slab_to_task[slab];
+#else  /* #ifndef FFT_COLUMN_BASED */
+      int task, column = part[part_sortindex[i]].globalindex / (GRID2);
+
+      if(column < myplan.pivotcol)
+        task = column / myplan.avg;
+      else
+        task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+      if(localfield_sendcount[task] == 0)
+        localfield_first[task] = num_field_points;
+
+      localfield_sendcount[task]++;
+    }
+  num_field_points++;
+
+  for(i = 1, localfield_offset[0] = 0; i < NTask; i++)
+    localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1];
+
+  myfree_movable(part_sortindex);
+  part_sortindex = NULL;
+
+  /* now bin the local particle data onto the mesh list */
+  for(i = 0; i < num_field_points; i++)
+    localfield_data[i] = 0;
+
+  for(i = 0; i < num_on_grid; i += 8)
+    {
+      int pindex = (part[i].partindex >> 3);
+
+      MyDouble *pos;
+#ifdef CELL_CENTER_GRAVITY
+      MyDouble posw[3], xtmp, ytmp, ztmp;
+      if(P[pindex].Type == 0)
+        {
+          posw[0] = WRAP_X(SphP[pindex].Center[0]);
+          posw[1] = WRAP_Y(SphP[pindex].Center[1]);
+          posw[2] = WRAP_Z(SphP[pindex].Center[2]);
+
+          pos = posw;
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[pindex].Pos;
+
+      int slab_x = (int)(to_slab_fac * pos[0]);
+      int slab_y = (int)(to_slab_fac * pos[1]);
+      int slab_z = (int)(to_slab_fac * pos[2]);
+
+      double dx = to_slab_fac * pos[0] - slab_x;
+      double dy = to_slab_fac * pos[1] - slab_y;
+      double dz = to_slab_fac * pos[2] - slab_z;
+
+      double weight = P[pindex].Mass;
+
+      if(mode) /* only for power spectrum calculation */
+        if(typelist[P[pindex].Type] == 0)
+          continue;
+
+      localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
+      localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz;
+      localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz);
+      localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz;
+      localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz);
+      localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz;
+      localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz);
+      localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz;
+    }
+
+  rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real));
+
+  /* clear local FFT-mesh density field */
+  large_array_offset ii;
+  for(ii = 0; ii < maxfftsize; ii++)
+    rhogrid[ii] = 0;
+
+  /* exchange data and add contributions to the local mesh-path */
+  MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+    {
+      recvTask = ThisTask ^ level;
+
+      if(recvTask < NTask)
+        {
+          if(level > 0)
+            {
+              import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real));
+              import_globalindex =
+                  (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset));
+
+              if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
+                {
+                  myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real),
+                                 MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real),
+                                 MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
+
+                  myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                 localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B,
+                                 import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                 TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
+                }
+            }
+          else
+            {
+              import_data        = localfield_data + localfield_offset[ThisTask];
+              import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+            }
+
+          /* note: here every element in rhogrid is only accessed once, so there should be no race condition */
+          for(i = 0; i < localfield_recvcount[recvTask]; i++)
+            {
+              /* determine offset in local FFT slab */
+#ifndef FFT_COLUMN_BASED
+              large_array_offset offset =
+                  import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2);
+#else  /* #ifndef FFT_COLUMN_BASED */
+              large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+              rhogrid[offset] += import_data[i];
+            }
+
+          if(level > 0)
+            {
+              myfree(import_globalindex);
+              myfree(import_data);
+            }
+        }
+    }
+}
+
+/* \brief Function to read out the force component corresponding to spatial
+ *        dimension 'dim'.
+ *
+ *  \param[in] dim Dimension to be read out; If dim is negative, potential
+ *             values are read out and assigned to particles.
+ *
+ *  \return void
+ */
+void pmforce_zoom_optimized_readout_forces_or_potential(int dim)
+{
+#ifdef EVALPOTENTIAL
+  double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential  */
+#endif                                                                                    /* #ifdef EVALPOTENTIAL */
+
+  large_numpart_type i;
+  int level, recvTask;
+  MPI_Status status;
+
+  fft_real *grid;
+
+  if(dim < 0)
+    grid = rhogrid;
+  else
+    grid = forcegrid;
+
+  double to_slab_fac = PMGRID / All.BoxSize;
+
+  for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+    {
+      recvTask = ThisTask ^ level;
+
+      if(recvTask < NTask)
+        {
+          if(level > 0)
+            {
+              import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real));
+              import_globalindex =
+                  (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset));
+
+              if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
+                {
+                  myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                 localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C,
+                                 import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                 TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
+                }
+            }
+          else
+            {
+              import_data        = localfield_data + localfield_offset[ThisTask];
+              import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+            }
+
+          for(i = 0; i < localfield_recvcount[recvTask]; i++)
+            {
+#ifndef FFT_COLUMN_BASED
+              large_array_offset offset =
+                  import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2);
+#else  /* #ifndef FFT_COLUMN_BASED */
+              large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+              import_data[i] = grid[offset];
+            }
+
+          if(level > 0)
+            {
+              myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A,
+                             localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real),
+                             MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
+
+              myfree(import_globalindex);
+              myfree(import_data);
+            }
+        }
+    }
+
+  /* read out the froce/potential values, which all have been assembled in localfield_data */
+  for(i = 0; i < NumPart; i++)
+    {
+      large_numpart_type j = (i << 3);
+
+      MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+      MyDouble posw[3], xtmp, ytmp, ztmp;
+      if(P[i].Type == 0)
+        {
+          posw[0] = WRAP_X(SphP[i].Center[0]);
+          posw[1] = WRAP_Y(SphP[i].Center[1]);
+          posw[2] = WRAP_Z(SphP[i].Center[2]);
+
+          pos = posw;
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[i].Pos;
+
+      int slab_x = (int)(to_slab_fac * pos[0]);
+      double dx  = to_slab_fac * pos[0] - slab_x;
+
+      int slab_y = (int)(to_slab_fac * pos[1]);
+      double dy  = to_slab_fac * pos[1] - slab_y;
+
+      int slab_z = (int)(to_slab_fac * pos[2]);
+      double dz  = to_slab_fac * pos[2] - slab_z;
+
+      double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                     localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz +
+                     localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) +
+                     localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz +
+                     localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) +
+                     localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz +
+                     localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) +
+                     localfield_data[part[j + 7].localindex] * (dx)*dy * dz;
+
+      if(dim < 0)
+        {
+#ifdef EVALPOTENTIAL
+          P[i].PM_Potential += value * fac;
+#endif /* #ifdef EVALPOTENTIAL */
+        }
+      else
+        P[i].GravPM[dim] += value;
+    }
+}
+
+#else /* #ifdef PM_ZOOM_OPTIMIZED */
+
+/*
+ *  Here come the routines for a different communication algorithm that is
+ *  better suited for a homogenuously loaded boxes.
+ */
+
+/*! \brief Structure for particle buffer.
+ */
+static struct partbuf
+{
+  MyFloat Mass;
+  MyFloat Pos[3];
+} * partin, *partout;
+
+static size_t nimport, nexport;
+
+static size_t *Sndpm_count, *Sndpm_offset;
+static size_t *Rcvpm_count, *Rcvpm_offset;
+
+/*! \brief Prepares density field for PM calculation in uniform box optimized
+ *         algorithm.
+ *
+ *  \param[in] mode Modes force calculation.
+ *
+ *  \return void
+ */
+static void pmforce_uniform_optimized_prepare_density(int mode)
+{
+  int i, j;
+
+  double to_slab_fac = PMGRID / All.BoxSize;
+
+  if(mode == 2)
+    to_slab_fac *= POWERSPEC_FOLDFAC;
+  if(mode == 3)
+    to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC;
+
+  /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset.
+   * This should hopefully prevent a performance penalty from 'false sharing' for these variables
+   */
+  int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t);
+
+  Sndpm_count  = (size_t *)mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t));
+  Sndpm_offset = (size_t *)mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t));
+  Rcvpm_count  = (size_t *)mymalloc("Rcvpm_count", NTask * sizeof(size_t));
+  Rcvpm_offset = (size_t *)mymalloc("Rcvpm_offset", NTask * sizeof(size_t));
+
+  /* determine the slabs/columns each particles accesses */
+  {
+    size_t *send_count = Sndpm_count + get_thread_num() * multiNtask;
+
+    /* each threads needs to do theloop to clear its send_count[] array */
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        MyDouble posw[3], xtmp, ytmp, ztmp;
+        if(P[i].Type == 0)
+          {
+            posw[0] = WRAP_X(SphP[i].Center[0]);
+            posw[1] = WRAP_Y(SphP[i].Center[1]);
+            posw[2] = WRAP_Z(SphP[i].Center[2]);
+
+            pos = posw;
+          }
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        int slab_x  = (int)(to_slab_fac * pos[0]);
+        int slab_xx = slab_x + 1;
+
+        if(mode >= 2)
+          {
+            slab_x %= GRIDX;
+            slab_xx %= GRIDX;
+          }
+        else
+          {
+            if(slab_x >= GRIDX)
+              slab_x -= GRIDX;
+
+            if(slab_xx >= GRIDX)
+              slab_xx -= GRIDX;
+          }
+
+#ifndef FFT_COLUMN_BASED
+        int task0 = myplan.slab_to_task[slab_x];
+        int task1 = myplan.slab_to_task[slab_xx];
+
+        send_count[task0]++;
+        if(task0 != task1)
+          send_count[task1]++;
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y  = (int)(to_slab_fac * pos[1]);
+        int slab_yy = slab_y + 1;
+
+        if(mode >= 2)
+          {
+            slab_y %= GRIDY;
+            slab_yy %= GRIDY;
+          }
+        else
+          {
+            if(slab_y >= GRIDY)
+              slab_y -= GRIDY;
+
+            if(slab_yy >= GRIDY)
+              slab_yy -= GRIDY;
+          }
+
+        int column0 = slab_x * GRIDY + slab_y;
+        int column1 = slab_x * GRIDY + slab_yy;
+        int column2 = slab_xx * GRIDY + slab_y;
+        int column3 = slab_xx * GRIDY + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        send_count[task0]++;
+        if(task1 != task0)
+          send_count[task1]++;
+        if(task2 != task1 && task2 != task0)
+          send_count[task2]++;
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          send_count[task3]++;
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+      }
+  }
+
+  /* collect thread-specific offset table and collect the results from the other threads */
+  for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++)
+    for(j = 0; j < MaxThreads; j++)
+      {
+        int ind_prev, ind = j * multiNtask + i;
+        if(ind > 0)
+          {
+            if(j == 0)
+              ind_prev = (MaxThreads - 1) * multiNtask + i - 1;
+            else
+              ind_prev = ind - multiNtask;
+
+            Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
+          }
+      }
+
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += Sndpm_count[j];
+      nimport += Rcvpm_count[j];
+
+      if(j > 0)
+        {
+          Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
+          Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
+        }
+    }
+
+  /* allocate import and export buffer */
+  partin  = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf));
+  partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf));
+
+  {
+    size_t *send_count  = Sndpm_count + get_thread_num() * multiNtask;
+    size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask;
+
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    /* fill export buffer */
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        MyDouble posw[3], xtmp, ytmp, ztmp;
+        if(P[i].Type == 0)
+          {
+            posw[0] = WRAP_X(SphP[i].Center[0]);
+            posw[1] = WRAP_Y(SphP[i].Center[1]);
+            posw[2] = WRAP_Z(SphP[i].Center[2]);
+
+            pos = posw;
+          }
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        int slab_x  = (int)(to_slab_fac * pos[0]);
+        int slab_xx = slab_x + 1;
+
+        if(mode >= 2)
+          {
+            slab_x %= GRIDX;
+            slab_xx %= GRIDX;
+          }
+        else
+          {
+            if(slab_x >= GRIDX)
+              slab_x -= GRIDX;
+
+            if(slab_xx >= GRIDX)
+              slab_xx -= GRIDX;
+          }
+
+#ifndef FFT_COLUMN_BASED
+        int task0 = myplan.slab_to_task[slab_x];
+        int task1 = myplan.slab_to_task[slab_xx];
+
+        size_t ind0        = send_offset[task0] + send_count[task0]++;
+        partout[ind0].Mass = P[i].Mass;
+        for(j = 0; j < 3; j++)
+          partout[ind0].Pos[j] = pos[j];
+
+        if(task0 != task1)
+          {
+            size_t ind1        = send_offset[task1] + send_count[task1]++;
+            partout[ind1].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind1].Pos[j] = pos[j];
+          }
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y  = (int)(to_slab_fac * pos[1]);
+        int slab_yy = slab_y + 1;
+
+        if(mode >= 2)
+          {
+            slab_y %= GRIDY;
+            slab_yy %= GRIDY;
+          }
+        else
+          {
+            if(slab_y >= GRIDY)
+              slab_y -= GRIDY;
+
+            if(slab_yy >= GRIDY)
+              slab_yy -= GRIDY;
+          }
+
+        int column0 = slab_x * GRIDY + slab_y;
+        int column1 = slab_x * GRIDY + slab_yy;
+        int column2 = slab_xx * GRIDY + slab_y;
+        int column3 = slab_xx * GRIDY + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        size_t ind0        = send_offset[task0] + send_count[task0]++;
+        partout[ind0].Mass = P[i].Mass;
+        for(j = 0; j < 3; j++)
+          partout[ind0].Pos[j] = pos[j];
+
+        if(task1 != task0)
+          {
+            size_t ind1        = send_offset[task1] + send_count[task1]++;
+            partout[ind1].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind1].Pos[j] = pos[j];
+          }
+        if(task2 != task1 && task2 != task0)
+          {
+            size_t ind2        = send_offset[task2] + send_count[task2]++;
+            partout[ind2].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind2].Pos[j] = pos[j];
+          }
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          {
+            size_t ind3        = send_offset[task3] + send_count[task3]++;
+            partout[ind3].Mass = P[i].Mass;
+            for(j = 0; j < 3; j++)
+              partout[ind3].Pos[j] = pos[j];
+          }
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+      }
+  }
+
+  /* collect the send_count[] results from the other threads */
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  int flag_big = 0, flag_big_all;
+  for(i = 0; i < NTask; i++)
+    if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+      flag_big = 1;
+
+  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
+   * transfer the data in chunks.
+   */
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /* exchange particle data */
+  myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all,
+                  MPI_COMM_WORLD);
+
+  myfree(partout);
+
+  /* allocate density field */
+  rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real));
+
+  /* clear local FFT-mesh density field */
+  large_array_offset ii;
+  for(ii = 0; ii < maxfftsize; ii++)
+    rhogrid[ii] = 0;
+
+#ifndef FFT_COLUMN_BASED
+  /* bin particle data onto mesh, in multi-threaded fashion */
+  {
+    int tid = get_thread_num();
+
+    int first_y, count_y;
+    subdivide_evenly(GRIDY, MaxThreads, tid, &first_y, &count_y);
+    int last_y = first_y + count_y - 1;
+
+    for(i = 0; i < nimport; i++)
+      {
+        int slab_y  = (int)(to_slab_fac * partin[i].Pos[1]);
+        int slab_yy = slab_y + 1;
+        double dy   = to_slab_fac * partin[i].Pos[1] - slab_y;
+
+        if(mode >= 2)
+          {
+            slab_y %= GRIDY;
+            slab_yy %= GRIDY;
+          }
+        else
+          {
+            if(slab_y >= GRIDY)
+              slab_y -= GRIDY;
+
+            if(slab_yy >= GRIDY)
+              slab_yy -= GRIDY;
+          }
+
+        int flag_slab_y, flag_slab_yy;
+
+        if(slab_y >= first_y && slab_y <= last_y)
+          flag_slab_y = 1;
+        else
+          flag_slab_y = 0;
+
+        if(slab_yy >= first_y && slab_yy <= last_y)
+          flag_slab_yy = 1;
+        else
+          flag_slab_yy = 0;
+
+        if(flag_slab_y || flag_slab_yy)
+          {
+            double mass = partin[i].Mass;
+
+            int slab_x  = (int)(to_slab_fac * partin[i].Pos[0]);
+            int slab_z  = (int)(to_slab_fac * partin[i].Pos[2]);
+            int slab_xx = slab_x + 1;
+            int slab_zz = slab_z + 1;
+
+            double dx = to_slab_fac * partin[i].Pos[0] - slab_x;
+            double dz = to_slab_fac * partin[i].Pos[2] - slab_z;
+
+            if(mode >= 2)
+              {
+                slab_x %= GRIDX;
+                slab_z %= GRIDZ;
+                slab_xx %= GRIDX;
+                slab_zz %= GRIDZ;
+              }
+            else
+              {
+                if(slab_x >= GRIDX)
+                  slab_x -= GRIDX;
+                if(slab_z >= GRIDZ)
+                  slab_z -= GRIDZ;
+
+                if(slab_xx >= GRIDX)
+                  slab_xx -= GRIDX;
+                if(slab_zz >= GRIDZ)
+                  slab_zz -= GRIDZ;
+              }
+
+            int flag_slab_x, flag_slab_xx;
+
+            if(myplan.slab_to_task[slab_x] == ThisTask)
+              {
+                slab_x -= myplan.first_slab_x_of_task[ThisTask];
+                flag_slab_x = 1;
+              }
+            else
+              flag_slab_x = 0;
+
+            if(myplan.slab_to_task[slab_xx] == ThisTask)
+              {
+                slab_xx -= myplan.first_slab_x_of_task[ThisTask];
+                flag_slab_xx = 1;
+              }
+            else
+              flag_slab_xx = 0;
+
+            if(flag_slab_x)
+              {
+                if(flag_slab_y)
+                  {
+                    rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
+                    rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
+                  }
+
+                if(flag_slab_yy)
+                  {
+                    rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
+                    rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
+                  }
+              }
+
+            if(flag_slab_xx)
+              {
+                if(flag_slab_y)
+                  {
+                    rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
+                    rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
+                  }
+
+                if(flag_slab_yy)
+                  {
+                    rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
+                    rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz));
+                  }
+              }
+          }
+      }
+  }
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+  struct data_cols
+  {
+    int col0, col1, col2, col3;
+    double dx, dy;
+  } * aux;
+
+  aux = mymalloc("aux", nimport * sizeof(struct data_cols));
+
+  for(i = 0; i < nimport; i++)
+    {
+      int slab_x = (int)(to_slab_fac * partin[i].Pos[0]);
+      int slab_xx = slab_x + 1;
+
+      int slab_y = (int)(to_slab_fac * partin[i].Pos[1]);
+      int slab_yy = slab_y + 1;
+
+      aux[i].dx = to_slab_fac * partin[i].Pos[0] - slab_x;
+      aux[i].dy = to_slab_fac * partin[i].Pos[1] - slab_y;
+
+      if(mode >= 2)
+        {
+          slab_x %= GRIDX;
+          slab_xx %= GRIDX;
+          slab_y %= GRIDY;
+          slab_yy %= GRIDY;
+        }
+      else
+        {
+          if(slab_x >= GRIDX)
+            slab_x -= GRIDX;
+          if(slab_xx >= GRIDX)
+            slab_xx -= GRIDX;
+
+          if(slab_y >= GRIDY)
+            slab_y -= GRIDY;
+          if(slab_yy >= GRIDY)
+            slab_yy -= GRIDY;
+        }
+
+      aux[i].col0 = slab_x * GRIDY + slab_y;
+      aux[i].col1 = slab_x * GRIDY + slab_yy;
+      aux[i].col2 = slab_xx * GRIDY + slab_y;
+      aux[i].col3 = slab_xx * GRIDY + slab_yy;
+    }
+
+  {
+    int tid = get_thread_num();
+
+    int first_col, last_col, count_col;
+    subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col);
+    last_col = first_col + count_col - 1;
+    first_col += myplan.base_firstcol;
+    last_col += myplan.base_firstcol;
+
+    for(i = 0; i < nimport; i++)
+      {
+        int flag0, flag1, flag2, flag3;
+        int col0 = aux[i].col0;
+        int col1 = aux[i].col1;
+        int col2 = aux[i].col2;
+        int col3 = aux[i].col3;
+
+        if(col0 >= first_col && col0 <= last_col)
+          flag0 = 1;
+        else
+          flag0 = 0;
+
+        if(col1 >= first_col && col1 <= last_col)
+          flag1 = 1;
+        else
+          flag1 = 0;
+
+        if(col2 >= first_col && col2 <= last_col)
+          flag2 = 1;
+        else
+          flag2 = 0;
+
+        if(col3 >= first_col && col3 <= last_col)
+          flag3 = 1;
+        else
+          flag3 = 0;
+
+        if(flag0 || flag1 || flag2 || flag3)
+          {
+            double mass = partin[i].Mass;
+
+            double dx = aux[i].dx;
+            double dy = aux[i].dy;
+
+            int slab_z = (int)(to_slab_fac * partin[i].Pos[2]);
+            int slab_zz = slab_z + 1;
+
+            double dz = to_slab_fac * partin[i].Pos[2] - slab_z;
+
+            if(mode >= 2)
+              {
+                slab_z %= GRIDZ;
+                slab_zz %= GRIDZ;
+              }
+            else
+              {
+                if(slab_z >= GRIDZ)
+                  slab_z -= GRIDZ;
+
+                if(slab_zz >= GRIDZ)
+                  slab_zz -= GRIDZ;
+              }
+
+            if(flag0)
+              {
+                rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
+                rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
+              }
+
+            if(flag1)
+              {
+                rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
+                rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
+              }
+
+            if(flag2)
+              {
+                rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
+                rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
+              }
+
+            if(flag3)
+              {
+                rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
+                rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz));
+              }
+          }
+      }
+  }
+
+  myfree(aux);
+
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+}
+
+/* \brief Function to read out the force component corresponding to spatial
+ *        dimension 'dim'.
+ *
+ *  \param[in] dim Dimension to be read out; If dim is negative, potential values
+ *             are read out and assigned to  particles.
+ *
+ *  \return void
+ */
+static void pmforce_uniform_optimized_readout_forces_or_potential(int dim)
+{
+#ifdef EVALPOTENTIAL
+  double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential  */
+#endif /* #ifdef EVALPOTENTIAL */
+
+  double to_slab_fac = PMGRID / All.BoxSize;
+
+  double *flistin  = (double *)mymalloc("flistin", nimport * sizeof(double));
+  double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double));
+
+  fft_real *grid;
+
+  if(dim < 0)
+    grid = rhogrid;
+  else
+    grid = forcegrid;
+
+  size_t i;
+  for(i = 0; i < nimport; i++)
+    {
+      flistin[i] = 0;
+
+      int slab_x = (int)(to_slab_fac * partin[i].Pos[0]);
+      int slab_y = (int)(to_slab_fac * partin[i].Pos[1]);
+      int slab_z = (int)(to_slab_fac * partin[i].Pos[2]);
+
+      double dx = to_slab_fac * partin[i].Pos[0] - slab_x;
+      double dy = to_slab_fac * partin[i].Pos[1] - slab_y;
+      double dz = to_slab_fac * partin[i].Pos[2] - slab_z;
+
+      if(slab_x >= GRIDX)
+        slab_x -= GRIDX;
+      if(slab_y >= GRIDY)
+        slab_y -= GRIDY;
+      if(slab_z >= GRIDZ)
+        slab_z -= GRIDZ;
+
+      int slab_xx = slab_x + 1;
+      int slab_yy = slab_y + 1;
+      int slab_zz = slab_z + 1;
+
+      if(slab_xx >= GRIDX)
+        slab_xx -= GRIDX;
+      if(slab_yy >= GRIDY)
+        slab_yy -= GRIDY;
+      if(slab_zz >= GRIDZ)
+        slab_zz -= GRIDZ;
+
+#ifndef FFT_COLUMN_BASED
+      if(myplan.slab_to_task[slab_x] == ThisTask)
+        {
+          slab_x -= myplan.first_slab_x_of_task[ThisTask];
+
+          flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
+                        grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
+                        grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
+        }
+
+      if(myplan.slab_to_task[slab_xx] == ThisTask)
+        {
+          slab_xx -= myplan.first_slab_x_of_task[ThisTask];
+
+          flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
+                        grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
+                        grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
+        }
+#else  /* #ifndef FFT_COLUMN_BASED */
+      int column0 = slab_x * GRIDY + slab_y;
+      int column1 = slab_x * GRIDY + slab_yy;
+      int column2 = slab_xx * GRIDY + slab_y;
+      int column3 = slab_xx * GRIDY + slab_yy;
+
+      if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol)
+        {
+          flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
+                        grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
+        }
+      if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol)
+        {
+          flistin[i] +=
+              grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
+        }
+
+      if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol)
+        {
+          flistin[i] +=
+              grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
+        }
+
+      if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol)
+        {
+          flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
+        }
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+    }
+
+  /* exchange the potential component data */
+  int flag_big = 0, flag_big_all;
+  for(i = 0; i < NTask; i++)
+    if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
+      flag_big = 1;
+
+  /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
+   * transfer the data in chunks.
+   */
+  MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /* exchange  data */
+  myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all,
+                  MPI_COMM_WORLD);
+
+  /* now assign them to the correct particles */
+  int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t);
+
+  {
+    size_t *send_count  = Sndpm_count + get_thread_num() * multiNtask;
+    size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask;
+
+    int j;
+    for(j = 0; j < NTask; j++)
+      send_count[j] = 0;
+
+    int i;
+    for(i = 0; i < NumPart; i++)
+      {
+        MyDouble *pos;
+
+#ifdef CELL_CENTER_GRAVITY
+        MyDouble posw[3], xtmp, ytmp, ztmp;
+        if(P[i].Type == 0)
+          {
+            posw[0] = WRAP_X(SphP[i].Center[0]);
+            posw[1] = WRAP_Y(SphP[i].Center[1]);
+            posw[2] = WRAP_Z(SphP[i].Center[2]);
+
+            pos = posw;
+          }
+        else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+          pos = P[i].Pos;
+
+        int slab_x  = (int)(to_slab_fac * pos[0]);
+        int slab_xx = slab_x + 1;
+
+        if(slab_x >= GRIDX)
+          slab_x -= GRIDX;
+
+        if(slab_xx >= GRIDX)
+          slab_xx -= GRIDX;
+
+#ifndef FFT_COLUMN_BASED
+        int task0 = myplan.slab_to_task[slab_x];
+        int task1 = myplan.slab_to_task[slab_xx];
+
+        double value = flistout[send_offset[task0] + send_count[task0]++];
+
+        if(task0 != task1)
+          value += flistout[send_offset[task1] + send_count[task1]++];
+#else  /* #ifndef FFT_COLUMN_BASED */
+        int slab_y = (int)(to_slab_fac * pos[1]);
+        int slab_yy = slab_y + 1;
+
+        if(slab_y >= GRIDY)
+          slab_y -= GRIDY;
+
+        if(slab_yy >= GRIDY)
+          slab_yy -= GRIDY;
+
+        int column0 = slab_x * GRIDY + slab_y;
+        int column1 = slab_x * GRIDY + slab_yy;
+        int column2 = slab_xx * GRIDY + slab_y;
+        int column3 = slab_xx * GRIDY + slab_yy;
+
+        int task0, task1, task2, task3;
+
+        if(column0 < myplan.pivotcol)
+          task0 = column0 / myplan.avg;
+        else
+          task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column1 < myplan.pivotcol)
+          task1 = column1 / myplan.avg;
+        else
+          task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column2 < myplan.pivotcol)
+          task2 = column2 / myplan.avg;
+        else
+          task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        if(column3 < myplan.pivotcol)
+          task3 = column3 / myplan.avg;
+        else
+          task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection;
+
+        double value = flistout[send_offset[task0] + send_count[task0]++];
+
+        if(task1 != task0)
+          value += flistout[send_offset[task1] + send_count[task1]++];
+
+        if(task2 != task1 && task2 != task0)
+          value += flistout[send_offset[task2] + send_count[task2]++];
+
+        if(task3 != task0 && task3 != task1 && task3 != task2)
+          value += flistout[send_offset[task3] + send_count[task3]++];
+#endif /* #ifndef FFT_COLUMN_BASED */
+        if(dim < 0)
+          {
+#ifdef EVALPOTENTIAL
+            P[i].PM_Potential += value * fac;
+#endif /* #ifdef EVALPOTENTIAL */
+          }
+        else
+          P[i].GravPM[dim] += value;
+      }
+  }
+
+  int j;
+  /* restore total Sndpm_count */
+  for(j = 1; j < MaxThreads; j++)
+    for(i = 0; i < NTask; i++)
+      Sndpm_count[i] += Sndpm_count[i + j * multiNtask];
+
+  myfree(flistout);
+  myfree(flistin);
+}
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+
+/*! \brief Calculates the long-range periodic force given the particle
+ *         positions using the PM method.
+ *
+ *  The force is Gaussian filtered with Asmth, given in
+ *  mesh-cell units. We carry out a CIC charge assignment, and compute the
+ *  potential by fast Fourier transform methods. The potential is
+ *  finite-differenced using a 4-point finite differencing formula, and the
+ *  forces are interpolated tri-linearly to the particle positions. The CIC
+ *  kernel is deconvolved.
+ *
+ *  \param[in] mode For mode=0, normal force calculation, mode=1, only density
+ *             field construction for a power spectrum calculation. In the
+ *             later case, typelist flags the particle types that should be
+ *             included in the density field.
+ *  \param[in] typelist Flags of particle types included in power spectrum
+ *             calculation.
+ *
+ *  \return void
+ */
+void pmforce_periodic(int mode, int *typelist)
+{
+  int x, y, z, xx, yy, zz;
+
+  double tstart = second();
+
+  if(mode == 0)
+    mpi_printf("PM-PERIODIC: Starting periodic PM calculation.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+#ifndef NUMPART_PER_TASK_LARGE
+  if((((long long)NumPart) << 3) >= (((long long)1) << 31))
+    terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help.");
+#endif /* #ifndef NUMPART_PER_TASK_LARGE */
+
+  double asmth2 = All.Asmth[0] * All.Asmth[0];
+  double d      = All.BoxSize / PMGRID;
+  double dhalf  = 0.5 * d;
+
+  double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential  */
+
+  fac *= 1 / (2 * d); /* for finite differencing */
+
+#ifdef PM_ZOOM_OPTIMIZED
+  pmforce_zoom_optimized_prepare_density(mode, typelist);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+  pmforce_uniform_optimized_prepare_density(mode);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+
+  /* allocate the memory to hold the FFT fields */
+
+  forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real));
+
+  workspace = forcegrid;
+
+#ifndef FFT_COLUMN_BASED
+  fft_of_rhogrid = (fft_complex *)&rhogrid[0];
+#else  /* #ifndef FFT_COLUMN_BASED */
+  fft_of_rhogrid = (fft_complex *)&workspace[0];
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  /* Do the FFT of the density field */
+#ifndef FFT_COLUMN_BASED
+  my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+  my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+  if(mode != 0)
+    {
+      /* used to measure powerspectrum */
+    }
+  else
+    {
+      /* multiply with Green's function in order to obtain the potential (or forces for spectral diffencing) */
+
+      double kfacx = 2.0 * M_PI / (GRIDX * d);
+      double kfacy = 2.0 * M_PI / (GRIDY * d);
+      double kfacz = 2.0 * M_PI / (GRIDZ * d);
+
+#ifdef FFT_COLUMN_BASED
+      for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
+        {
+          large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
+          y                         = ipcell / (GRIDX * GRIDz);
+          int yr                    = ipcell % (GRIDX * GRIDz);
+          z                         = yr / GRIDX;
+          x                         = yr % GRIDX;
+#else  /* #ifdef FFT_COLUMN_BASED */
+      for(x = 0; x < GRIDX; x++)
+        for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
+          for(z = 0; z < GRIDz; z++)
+            {
+#endif /* #ifdef FFT_COLUMN_BASED #else */
+          if(x >= (GRIDX / 2))
+            xx = x - GRIDX;
+          else
+            xx = x;
+          if(y >= (GRIDY / 2))
+            yy = y - GRIDY;
+          else
+            yy = y;
+          if(z >= (GRIDZ / 2))
+            zz = z - GRIDZ;
+          else
+            zz = z;
+
+          double kx = kfacx * xx;
+          double ky = kfacy * yy;
+          double kz = kfacz * zz;
+
+          double k2 = kx * kx + ky * ky + kz * kz;
+
+          if(k2 > 0)
+            {
+              double smth = -exp(-k2 * asmth2) / k2;
+
+              /* do deconvolution */
+
+              double fx = 1, fy = 1, fz = 1;
+
+              if(xx != 0)
+                {
+                  fx = kx * dhalf;
+                  fx = sin(fx) / fx;
+                }
+              if(yy != 0)
+                {
+                  fy = ky * dhalf;
+                  fy = sin(fy) / fy;
+                }
+              if(zz != 0)
+                {
+                  fz = kz * dhalf;
+                  fz = sin(fz) / fz;
+                }
+
+              double ff     = 1 / (fx * fy * fz);
+              double deconv = ff * ff * ff * ff;
+
+              smth *= deconv; /* deconvolution */
+
+#ifndef FFT_COLUMN_BASED
+              large_array_offset ip = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
+#endif /* #ifndef FFT_COLUMN_BASED */
+
+              fft_of_rhogrid[ip][0] *= smth;
+              fft_of_rhogrid[ip][1] *= smth;
+            }
+        }
+
+#ifdef FFT_COLUMN_BASED
+      if(myplan.second_transposed_firstcol == 0)
+        fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0;
+#else  /* #ifdef FFT_COLUMN_BASED */
+      if(myplan.slabstart_y == 0)
+        fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0;
+#endif /* #ifdef FFT_COLUMN_BASED #else */
+
+        /* Do the inverse FFT to get the potential/forces */
+
+#ifndef FFT_COLUMN_BASED
+      my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], -1);
+#else  /* #ifndef FFT_COLUMN_BASED */
+      my_column_based_fft(&myplan, workspace, rhogrid, -1);
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+      /* Now rhogrid holds the potential/forces */
+
+#ifdef EVALPOTENTIAL
+#ifdef PM_ZOOM_OPTIMIZED
+      pmforce_zoom_optimized_readout_forces_or_potential(-1);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+      pmforce_uniform_optimized_readout_forces_or_potential(-1);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+#endif /* #ifdef EVALPOTENTIAL */
+
+      /* get the force components by finite differencing of the potential for each dimension,
+       * and send the results back to the right CPUs
+       */
+      for(int dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */
+        {
+          /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose
+           */
+
+#ifndef FFT_COLUMN_BASED
+          if(dim == 0)
+            {
+              my_slab_transposeA(&myplan, rhogrid,
+                                 forcegrid); /* compute the transpose of the potential field for finite differencing */
+              /* note: for the x-direction, we difference the transposed field */
+
+              for(x = 0; x < GRIDX; x++)
+                for(y = 0; y < myplan.nslab_y; y++)
+                  for(z = 0; z < GRIDZ; z++)
+                    {
+                      int xrr = x + 2, xll = x - 2, xr = x + 1, xl = x - 1;
+                      if(xr >= GRIDX)
+                        xr -= GRIDX;
+                      if(xrr >= GRIDX)
+                        xrr -= GRIDX;
+                      if(xl < 0)
+                        xl += GRIDX;
+                      if(xll < 0)
+                        xll += GRIDX;
+
+                      forcegrid[NI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[NI(xl, y, z)] - rhogrid[NI(xr, y, z)]) -
+                                                      (1.0 / 6) * (rhogrid[NI(xll, y, z)] - rhogrid[NI(xrr, y, z)]));
+                    }
+
+              my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */
+            }
+          else
+            {
+              for(y = 0; y < GRIDY; y++)
+                for(x = 0; x < myplan.nslab_x; x++)
+                  for(z = 0; z < GRIDZ; z++)
+                    {
+                      if(dim == 1)
+                        {
+                          int yr = y + 1, yl = y - 1, yrr = y + 2, yll = y - 2;
+                          if(yr >= GRIDY)
+                            yr -= GRIDY;
+                          if(yrr >= GRIDY)
+                            yrr -= GRIDY;
+                          if(yl < 0)
+                            yl += GRIDY;
+                          if(yll < 0)
+                            yll += GRIDY;
+
+                          forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, z)] - rhogrid[FI(x, yr, z)]) -
+                                                          (1.0 / 6) * (rhogrid[FI(x, yll, z)] - rhogrid[FI(x, yrr, z)]));
+                        }
+                      else if(dim == 2)
+                        {
+                          int zr = z + 1, zl = z - 1, zrr = z + 2, zll = z - 2;
+                          if(zr >= GRIDZ)
+                            zr -= GRIDZ;
+                          if(zrr >= GRIDZ)
+                            zrr -= GRIDZ;
+                          if(zl < 0)
+                            zl += GRIDZ;
+                          if(zll < 0)
+                            zll += GRIDZ;
+
+                          forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, y, zl)] - rhogrid[FI(x, y, zr)]) -
+                                                          (1.0 / 6) * (rhogrid[FI(x, y, zll)] - rhogrid[FI(x, y, zrr)]));
+                        }
+                    }
+            }
+
+#else  /* #ifndef FFT_COLUMN_BASED */
+
+          if(dim == 2)
+            {
+              for(large_array_offset i = 0; i < myplan.base_ncol; i++)
+                {
+                  fft_real *forcep = &forcegrid[GRID2 * i];
+                  fft_real *potp   = &rhogrid[GRID2 * i];
+
+                  for(int z = 0; z < GRIDZ; z++)
+                    {
+                      int zr  = z + 1;
+                      int zl  = z - 1;
+                      int zrr = z + 2;
+                      int zll = z - 2;
+
+                      if(zr >= GRIDZ)
+                        zr -= GRIDZ;
+                      if(zrr >= GRIDZ)
+                        zrr -= GRIDZ;
+                      if(zl < 0)
+                        zl += GRIDZ;
+                      if(zll < 0)
+                        zll += GRIDZ;
+
+                      forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr]));
+                    }
+                }
+            }
+          else if(dim == 1)
+            {
+              fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */
+              memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real));
+
+              my_fft_swap23(&myplan, scratch, forcegrid);
+
+              for(large_array_offset i = 0; i < myplan.ncol_XZ; i++)
+                {
+                  fft_real *forcep = &scratch[GRIDY * i];
+                  fft_real *potp   = &forcegrid[GRIDY * i];
+
+                  for(int y = 0; y < GRIDY; y++)
+                    {
+                      int yr  = y + 1;
+                      int yl  = y - 1;
+                      int yrr = y + 2;
+                      int yll = y - 2;
+
+                      if(yr >= GRIDY)
+                        yr -= GRIDY;
+                      if(yrr >= GRIDY)
+                        yrr -= GRIDY;
+                      if(yl < 0)
+                        yl += GRIDY;
+                      if(yll < 0)
+                        yll += GRIDY;
+
+                      forcep[y] = fac * ((4.0 / 3) * (potp[yl] - potp[yr]) - (1.0 / 6) * (potp[yll] - potp[yrr]));
+                    }
+                }
+
+              my_fft_swap23back(&myplan, scratch, forcegrid);
+              myfree(scratch);
+            }
+          else if(dim == 0)
+            {
+              fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */
+              memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real));
+
+              my_fft_swap13(&myplan, scratch, forcegrid);
+
+              for(large_array_offset i = 0; i < myplan.ncol_YZ; i++)
+                {
+                  fft_real *forcep = &scratch[GRIDX * i];
+                  fft_real *potp   = &forcegrid[GRIDX * i];
+
+                  for(int x = 0; x < GRIDX; x++)
+                    {
+                      int xr  = x + 1;
+                      int xl  = x - 1;
+                      int xrr = x + 2;
+                      int xll = x - 2;
+
+                      if(xr >= GRIDX)
+                        xr -= GRIDX;
+                      if(xrr >= GRIDX)
+                        xrr -= GRIDX;
+                      if(xl < 0)
+                        xl += GRIDX;
+                      if(xll < 0)
+                        xll += GRIDX;
+
+                      forcep[x] = fac * ((4.0 / 3) * (potp[xl] - potp[xr]) - (1.0 / 6) * (potp[xll] - potp[xrr]));
+                    }
+                }
+
+              my_fft_swap13back(&myplan, scratch, forcegrid);
+              myfree(scratch);
+            }
+#endif /* #ifndef FFT_COLUMN_BASED #else */
+
+#ifdef PM_ZOOM_OPTIMIZED
+          pmforce_zoom_optimized_readout_forces_or_potential(dim);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+          pmforce_uniform_optimized_readout_forces_or_potential(dim);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */
+        }
+    }
+
+  /* free stuff */
+
+  myfree(forcegrid);
+  myfree(rhogrid);
+
+#ifdef PM_ZOOM_OPTIMIZED
+  myfree(localfield_recvcount);
+  myfree(localfield_offset);
+  myfree(localfield_sendcount);
+  myfree(localfield_first);
+  myfree(localfield_data);
+  myfree(localfield_globalindex);
+  myfree(part);
+#else  /* #ifdef PM_ZOOM_OPTIMIZED */
+  myfree(partin);
+  myfree(Rcvpm_offset);
+  myfree(Rcvpm_count);
+  myfree(Sndpm_offset);
+  myfree(Sndpm_count);
+#endif /* #ifdef PM_ZOOM_OPTIMIZED */
+
+  double tend = second();
+
+  if(mode == 0)
+    mpi_printf("PM-PERIODIC: done.  (took %g seconds)\n", timediff(tstart, tend));
+}
+
+#ifdef PM_ZOOM_OPTIMIZED
+
+/*! \brief Sort function for 'part' array indices.
+ *
+ * Sorts the indices into the 'part' array by the global index of the
+ * corresponding 'part_slab_data' struct.
+ *
+ * \param[in] a Index to be compared.
+ * \param[in] b Index to be compared.
+ *
+ * \return sort result
+ */
+static int pm_periodic_compare_sortindex(const void *a, const void *b)
+{
+  if(part[*(int *)a].globalindex < part[*(int *)b].globalindex)
+    return -1;
+
+  if(part[*(int *)a].globalindex > part[*(int *)b].globalindex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Implements the sorting function for mysort_pmperiodic().
+ *
+ *  The index array is sorted using a merge sort algorithm.
+ *
+ *  \param[in, out] b Index array to sort.
+ *  \param[in] n Number of elements to sort.
+ *  \param[out] t Temporary buffer array.
+ *
+ *  \return void
+ */
+static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t)
+{
+  large_numpart_type *tmp;
+  large_numpart_type *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = b;
+  b2 = b + n1;
+
+  msort_pmperiodic_with_tmp(b1, n1, t);
+  msort_pmperiodic_with_tmp(b2, n2, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(part[*b1].globalindex <= part[*b2].globalindex)
+        {
+          --n1;
+          *tmp++ = *b1++;
+        }
+      else
+        {
+          --n2;
+          *tmp++ = *b2++;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * sizeof(large_numpart_type));
+
+  memcpy(b, t, (n - n2) * sizeof(large_numpart_type));
+}
+
+/*! \brief Sort the index array b of n entries using the sort kernel
+ *         cmp.
+ *
+ *  The parameter s is set to sizeof(int). The index array b is sorted
+ *  according to the globalindex field of the referenced item in the 'part'
+ *  array.
+ *
+ *  \param[in, out] b The index array to sort.
+ *  \param[in] n Number of entries in array b.
+ *  \param[in] s Size of each entry (must be sizeof(int)).
+ *  \param[in] cmp Comparison function.
+ */
+static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *))
+{
+  const size_t size = n * s;
+
+  large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size);
+
+  msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp);
+
+  myfree(tmp);
+}
+#endif /* #ifdef PM_ZOOM_OPTIMIZED */
+
+#endif /* #if defined(PMGRID) */
diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c
new file mode 100644
index 0000000000..6ace982b68
--- /dev/null
+++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c
@@ -0,0 +1,905 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gravity/pm/pm_periodic2d.c
+ * \date        05/2018
+ * \brief       Routines for periodic PM-force computation in 2d.
+ * \details     contains functions:
+ *                void pm2d_init_periodic(void)
+ *                void pm2d_init_periodic_allocate(void)
+ *                void pm2d_init_periodic_free(void)
+ *                void pm2d_force_periodic(int mode)
+ *                int pm2d_periodic_compare_sortindex(const void *a, const
+ *                  void *b)
+ *                static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n,
+ *                  int *t)
+ *                void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s,
+ *                  int (*cmp) (const void *, const void *))
+ *                void pm2d_periodic_transposeA(fftw_real * field,
+ *                  fftw_real * scratch)
+ *                void pm2d_periodic_transposeB(fftw_real * field,
+ *                  fftw_real * scratch)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef PMGRID
+#ifndef GRAVITY_NOT_PERIODIC
+#ifdef TWODIMS
+
+#ifdef NOTYPEPREFIX_FFTW
+#include <rfftw_mpi.h>
+#else /* #ifdef NOTYPEPREFIX_FFTW */
+#ifdef DOUBLEPRECISION_FFTW
+#include <drfftw_mpi.h> /* double precision FFTW */
+#else                   /* #ifdef DOUBLEPRECISION_FFTW */
+#include <srfftw_mpi.h>
+#endif /* #ifdef DOUBLEPRECISION_FFTW #else */
+#endif /* #ifdef NOTYPEPREFIX_FFTW #else */
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#define PMGRID2 (2 * (PMGRID / 2 + 1))
+
+#if(PMGRID > 1024)
+typedef long long large_array_offset;
+#else  /* #if (PMGRID > 1024) */
+typedef unsigned int large_array_offset;
+#endif /* #if (PMGRID > 1024) #else */
+
+#define d_fftw_real fftw_real
+
+static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
+
+static int slab_to_task[PMGRID];
+static int *slabs_x_per_task;
+static int *first_slab_x_of_task;
+
+static int slabstart_x, nslab_x, slabstart_y, nslab_y, smallest_slab;
+
+static int fftsize, maxfftsize;
+
+static fftw_real *rhogrid, *forcegrid, *workspace;
+static d_fftw_real *d_rhogrid, *d_forcegrid, *d_workspace;
+
+static fftw_complex *fft_of_rhogrid;
+
+static MyFloat to_slab_fac;
+
+void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch);
+void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch);
+int pm2d_periodic_compare_sortindex(const void *a, const void *b);
+
+/*! \brief Data for fft slab.
+ */
+static struct part_slab_data
+{
+  large_array_offset globalindex;
+  int partindex;
+  int localindex;
+} * part;
+
+static int *part_sortindex;
+
+/*! \brief This routines generates the FFTW-plans to carry out the parallel
+ *         FFTs later on. Some auxiliary variables are also initialized.
+ *
+ *  \return void
+ */
+void pm2d_init_periodic(void)
+{
+  int i;
+  int slab_to_task_local[PMGRID];
+
+  All.Asmth[0] = ASMTH * All.BoxSize / PMGRID;
+  All.Rcut[0]  = RCUT * All.Asmth[0];
+
+  /* Set up the FFTW plan files. */
+
+  fft_forward_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
+  fft_inverse_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
+
+  /* Workspace out the ranges on each processor. */
+
+  rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
+
+  for(i = 0; i < PMGRID; i++)
+    slab_to_task_local[i] = 0;
+
+  for(i = 0; i < nslab_x; i++)
+    slab_to_task_local[slabstart_x + i] = ThisTask;
+
+  MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+
+  slabs_x_per_task = (int *)mymalloc("slabs_per_task", NTask * sizeof(int));
+  MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  first_slab_x_of_task = (int *)mymalloc("first_slab_of_task", NTask * sizeof(int));
+  MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD);
+
+  to_slab_fac = PMGRID / All.BoxSize;
+
+  MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  printf("maxfftsize=%d PMGRID=%d\n", maxfftsize, PMGRID);
+}
+
+/*! \brief Allocates memory for 2d PM algorithm.
+ *
+ *  This function allocates the memory neeed to compute the long-range PM
+ *  force. Three fields are used, one to hold the density (and its FFT, and
+ *  then the real-space potential), one to hold the force field obtained by
+ *  finite differencing, and finally a workspace field, which is used both as
+ *  workspace for the parallel FFT, and as buffer for the communication
+ *  algorithm used in the force computation.
+ *
+ *  \return void
+ */
+void pm2d_init_periodic_allocate(void)
+{
+  double bytes_tot = 0;
+  size_t bytes;
+
+  /* allocate the memory to hold the FFT fields */
+
+  rhogrid = (fftw_real *)mymalloc("rhogrid", bytes = maxfftsize * sizeof(d_fftw_real));
+  bytes_tot += bytes;
+
+  forcegrid = (fftw_real *)mymalloc("forcegrid", bytes = maxfftsize * sizeof(d_fftw_real));
+  bytes_tot += bytes;
+
+  part = (struct part_slab_data *)mymalloc("part", bytes = 4 * NumPart * sizeof(struct part_slab_data));
+  bytes_tot += bytes;
+
+  part_sortindex = (int *)mymalloc("part_sortindex", bytes = 4 * NumPart * sizeof(int));
+  bytes_tot += bytes;
+
+  if(ThisTask == 0)
+    printf("Using %g MByte for periodic FFT computation. (presently allocated=%g MB)\n", bytes_tot / (1024.0 * 1024.0),
+           AllocatedBytes / (1024.0 * 1024.0));
+
+  workspace = forcegrid;
+
+  fft_of_rhogrid = (fftw_complex *)&rhogrid[0];
+
+  d_rhogrid   = (d_fftw_real *)rhogrid;
+  d_forcegrid = (d_fftw_real *)forcegrid;
+  d_workspace = (d_fftw_real *)workspace;
+}
+
+/*! \brief This routine frees the space allocated for the parallel FFT
+ *         algorithm.
+ *
+ *  \return void
+ */
+void pm2d_init_periodic_free(void)
+{
+  /* allocate the memory to hold the FFT fields */
+  myfree(part_sortindex);
+  myfree(part);
+  myfree(forcegrid);
+  myfree(rhogrid);
+}
+
+/*! \brief Long range periodic 2d gravity.
+ *
+ *  Calculates the long-range periodic force given the particle positions
+ *  using the PM method. The force is Gaussian filtered with Asmth, given in
+ *  mesh-cell units. We carry out a CIC charge assignment, and compute the
+ *  potenial by Fourier transform methods. The potential is finite differenced
+ *  using a 4-point finite differencing formula, and the forces are
+ *  interpolated tri-linearly to the particle positions. The CIC kernel is
+ *  deconvolved. Note that the particle distribution is not in the slab
+ *  decomposition that is used for the FFT. Instead, overlapping patches
+ *  between local domains and FFT slabs are communicated as needed.
+ *
+ *  \param[in] mode 0: normal PM force; 1: calculate mesh correction vector.
+ *
+ *  \return void
+ */
+void pm2d_force_periodic(int mode)
+{
+  double k2, kx, ky, smth;
+  double dx, dy, weight;
+  double fx, fy, ff;
+  double asmth2, fac, acc_dim;
+  int i, j, N, slab, level, sendTask, recvTask, task;
+  int x, y, yl, yr, yll, yrr, ip, dim;
+  int slab_x, slab_y;
+  int slab_xx, slab_yy;
+  int num_on_grid, num_field_points, pindex, xx, yy;
+  MPI_Status status;
+  int *localfield_count, *localfield_first, *localfield_offset, *localfield_togo;
+  large_array_offset offset, *localfield_globalindex, *import_globalindex;
+  d_fftw_real *localfield_d_data, *import_d_data;
+  fftw_real *localfield_data, *import_data;
+
+  if(ThisTask == 0)
+    {
+      printf("Starting periodic PM-2d calculation.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+      myflush(stdout);
+    }
+
+  asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
+  asmth2 *= asmth2;
+
+  fac = All.G / (M_PI * All.BoxSize);    /* to get potential */
+  fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */
+
+  if(mode == 1)
+    {
+      fac *= 1.0 / (All.G) * All.BoxSize;
+    }
+  else
+    {
+      fac *= All.BoxSize;
+    }
+
+  pm2d_init_periodic_allocate();
+
+  if(mode == 0)
+    N = NumPart;
+  else
+    N = NumGas;
+
+  /* determine the cells each particles accesses */
+  for(i = 0, num_on_grid = 0; i < N; i++)
+    {
+      slab_x = (int)(to_slab_fac * P[i].Pos[0]);
+      slab_y = (int)(to_slab_fac * P[i].Pos[1]);
+
+      if(slab_x >= PMGRID)
+        slab_x = PMGRID - 1;
+      if(slab_y >= PMGRID)
+        slab_y = PMGRID - 1;
+
+      for(xx = 0; xx < 2; xx++)
+        for(yy = 0; yy < 2; yy++)
+          {
+            slab_xx = slab_x + xx;
+            slab_yy = slab_y + yy;
+
+            if(slab_xx >= PMGRID)
+              slab_xx -= PMGRID;
+            if(slab_yy >= PMGRID)
+              slab_yy -= PMGRID;
+
+            offset = (PMGRID2 * slab_xx + slab_yy);
+
+            part[num_on_grid].partindex   = (i << 2) + (xx << 1) + yy;
+            part[num_on_grid].globalindex = offset;
+            part_sortindex[num_on_grid]   = num_on_grid;
+            num_on_grid++;
+          }
+    }
+
+  /* note: num_on_grid will be  4 times larger than the particle number,
+     but num_field_points will generally be much smaller */
+
+  /* bring the part-field into the order of the accessed cells. This allow the removal of duplicates */
+  pm2d_mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(int), pm2d_periodic_compare_sortindex);
+
+  /* determine the number of unique field points */
+  for(i = 0, num_field_points = 0; i < num_on_grid; i++)
+    {
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
+          continue;
+
+      num_field_points++;
+    }
+
+  /* allocate the local field */
+  localfield_globalindex = (large_array_offset *)mymalloc("first_slab_of_task", num_field_points * sizeof(large_array_offset));
+  localfield_d_data      = (d_fftw_real *)mymalloc("localfield_d_data", num_field_points * sizeof(d_fftw_real));
+  localfield_data        = (fftw_real *)localfield_d_data;
+  localfield_first       = (int *)mymalloc("localfield_d_data", NTask * sizeof(int));
+  localfield_count       = (int *)mymalloc("localfield_count", NTask * sizeof(int));
+  localfield_offset      = (int *)mymalloc("localfield_count", NTask * sizeof(int));
+  localfield_togo        = (int *)mymalloc("localfield_togo", NTask * NTask * sizeof(int));
+
+  for(i = 0; i < NTask; i++)
+    {
+      localfield_first[i] = 0;
+      localfield_count[i] = 0;
+    }
+
+  /* establish the cross link between the part[] array and the local list of
+     mesh points. Also, count on which CPU how many of the needed field points are stored */
+  for(i = 0, num_field_points = 0; i < num_on_grid; i++)
+    {
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
+          num_field_points++;
+
+      part[part_sortindex[i]].localindex = num_field_points;
+
+      if(i > 0)
+        if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
+          continue;
+
+      localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex;
+
+      slab = part[part_sortindex[i]].globalindex / PMGRID2;
+      task = slab_to_task[slab];
+      if(localfield_count[task] == 0)
+        localfield_first[task] = num_field_points;
+      localfield_count[task]++;
+    }
+  num_field_points++;
+
+  for(i = 1, localfield_offset[0] = 0; i < NTask; i++)
+    localfield_offset[i] = localfield_offset[i - 1] + localfield_count[i - 1];
+
+  /* now bin the local particle data onto the mesh list */
+
+  for(i = 0; i < num_field_points; i++)
+    localfield_d_data[i] = 0;
+
+  for(i = 0; i < num_on_grid; i += 4)
+    {
+      pindex = (part[i].partindex >> 2);
+
+      slab_x = (int)(to_slab_fac * P[pindex].Pos[0]);
+      slab_y = (int)(to_slab_fac * P[pindex].Pos[1]);
+
+      dx = to_slab_fac * P[pindex].Pos[0] - slab_x;
+      dy = to_slab_fac * P[pindex].Pos[1] - slab_y;
+
+      weight = P[pindex].Mass;
+
+      localfield_d_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy);
+      localfield_d_data[part[i + 1].localindex] += weight * (1.0 - dx) * dy;
+      localfield_d_data[part[i + 2].localindex] += weight * (dx) * (1.0 - dy);
+      localfield_d_data[part[i + 3].localindex] += weight * (dx)*dy;
+    }
+
+  /* clear local FFT-mesh density field */
+  for(i = 0; i < fftsize; i++)
+    d_rhogrid[i] = 0;
+
+  /* exchange data and add contributions to the local mesh-path */
+
+  MPI_Allgather(localfield_count, NTask, MPI_INT, localfield_togo, NTask, MPI_INT, MPI_COMM_WORLD);
+
+  for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+    {
+      sendTask = ThisTask;
+      recvTask = ThisTask ^ level;
+
+      if(recvTask < NTask)
+        {
+          if(level > 0)
+            {
+              import_d_data =
+                  (d_fftw_real *)mymalloc("import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(d_fftw_real));
+              import_globalindex = (large_array_offset *)mymalloc(
+                  "import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset));
+
+              if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0)
+                {
+                  MPI_Sendrecv(localfield_d_data + localfield_offset[recvTask],
+                               localfield_togo[sendTask * NTask + recvTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A,
+                               import_d_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask,
+                               TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
+
+                  MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                               localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                               TAG_NONPERIOD_B, import_globalindex,
+                               localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                               TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
+                }
+            }
+          else
+            {
+              import_d_data      = localfield_d_data + localfield_offset[ThisTask];
+              import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+            }
+
+          for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++)
+            {
+              /* determine offset in local FFT slab */
+              offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2;
+
+              d_rhogrid[offset] += import_d_data[i];
+            }
+
+          if(level > 0)
+            {
+              myfree(import_globalindex);
+              myfree(import_d_data);
+            }
+        }
+    }
+
+  /* Do the FFT of the density field */
+
+  rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
+
+  /* multiply with Green's function for the potential */
+
+  for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
+    for(x = 0; x < PMGRID; x++)
+      {
+        if(x > PMGRID / 2)
+          kx = x - PMGRID;
+        else
+          kx = x;
+        if(y > PMGRID / 2)
+          ky = y - PMGRID;
+        else
+          ky = y;
+
+        k2 = kx * kx + ky * ky;
+
+        if(k2 > 0)
+          {
+            smth = -exp(-k2 * asmth2) / k2;
+
+            /* do deconvolution */
+
+            fx = fy = 1;
+            if(kx != 0)
+              {
+                fx = (M_PI * kx) / PMGRID;
+                fx = sin(fx) / fx;
+              }
+            if(ky != 0)
+              {
+                fy = (M_PI * ky) / PMGRID;
+                fy = sin(fy) / fy;
+              }
+            ff = 1 / (fx * fy);
+            smth *= ff * ff * ff * ff;
+
+            /* end deconvolution */
+
+            ip = PMGRID * (y - slabstart_y) + x;
+            fft_of_rhogrid[ip].re *= smth;
+            fft_of_rhogrid[ip].im *= smth;
+          }
+      }
+
+  if(slabstart_y == 0)
+    fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0;
+
+  /* Do the inverse FFT to get the potential */
+
+  rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
+
+#ifdef EVALPOTENTIAL /* now read out the potential */
+  if(mode == 0)
+    {
+      for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+        {
+          sendTask = ThisTask;
+          recvTask = ThisTask ^ level;
+
+          if(recvTask < NTask)
+            {
+              if(level > 0)
+                {
+                  import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real));
+                  import_globalindex = (large_array_offset *)mymalloc(
+                      "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset));
+
+                  if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0)
+                    {
+                      MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                   localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                   TAG_NONPERIOD_C, import_globalindex,
+                                   localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                   TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
+                    }
+                }
+              else
+                {
+                  import_data        = localfield_data + localfield_offset[ThisTask];
+                  import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+                }
+
+              for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++)
+                {
+                  offset         = import_globalindex[i] - first_slab_x_of_task[ThisTask] * ((large_array_offset)PMGRID2);
+                  import_data[i] = rhogrid[offset];
+                }
+
+              if(level > 0)
+                {
+                  MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask,
+                               TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask],
+                               localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A,
+                               MPI_COMM_WORLD, &status);
+
+                  myfree(import_globalindex);
+                  myfree(import_data);
+                }
+            }
+        }
+
+      /* read out the potential values, which all have been assembled in localfield_data */
+
+      double pot;
+
+      for(i = 0, j = 0; i < N; i++)
+        {
+          while(j < num_on_grid && (part[j].partindex >> 2) != i)
+            j++;
+
+          slab_x = (int)(to_slab_fac * P[i].Pos[0]);
+          dx     = to_slab_fac * P[i].Pos[0] - slab_x;
+
+          slab_y = (int)(to_slab_fac * P[i].Pos[1]);
+          dy     = to_slab_fac * P[i].Pos[1] - slab_y;
+
+          pot = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) +
+                localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy + localfield_data[part[j + 2].localindex] * dx * (1.0 - dy) +
+                localfield_data[part[j + 3].localindex] * dx * dy;
+
+          P[i].PM_Potential += pot * fac * (2 * All.BoxSize / PMGRID);
+          /* compensate the finite differencing factor */;
+        }
+    }
+#endif /* #ifdef EVALPOTENTIAL */
+
+  /* get the force components by finite differencing the potential for each dimension,
+     and send back the results to the right CPUs */
+
+  for(dim = 1; dim >= 0; dim--) /* Calculate each component of the force. */
+    { /* we do the x component last, because for differencing the potential in the x-direction, we need to contruct the transpose */
+      if(dim == 0)
+        pm2d_periodic_transposeA(rhogrid, forcegrid); /* compute the transpose of the potential field */
+
+      for(xx = slabstart_x; xx < (slabstart_x + nslab_x); xx++)
+        for(y = 0; y < PMGRID; y++)
+          {
+            x = xx - slabstart_x;
+
+            yrr = yll = yr = yl = y;
+
+            yr  = y + 1;
+            yl  = y - 1;
+            yrr = y + 2;
+            yll = y - 2;
+            if(yr >= PMGRID)
+              yr -= PMGRID;
+            if(yrr >= PMGRID)
+              yrr -= PMGRID;
+            if(yl < 0)
+              yl += PMGRID;
+            if(yll < 0)
+              yll += PMGRID;
+
+            if(dim == 0)
+              {
+                forcegrid[x + y * nslab_x] = fac * ((4.0 / 3) * (rhogrid[(x + yl * nslab_x)] - rhogrid[(x + yr * nslab_x)]) -
+                                                    (1.0 / 6) * (rhogrid[(x + yll * nslab_x)] - rhogrid[(x + yrr * nslab_x)]));
+              }
+            else
+              {
+                forcegrid[PMGRID2 * x + y] = fac * ((4.0 / 3) * (rhogrid[PMGRID2 * x + yl] - rhogrid[PMGRID2 * x + yr]) -
+                                                    (1.0 / 6) * (rhogrid[PMGRID2 * x + yll] - rhogrid[PMGRID2 * x + yrr]));
+              }
+          }
+
+      if(dim == 0)
+        pm2d_periodic_transposeB(forcegrid, rhogrid); /* compute the transpose of the potential field */
+
+      /* send the force components to the right processors */
+
+      for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
+        {
+          sendTask = ThisTask;
+          recvTask = ThisTask ^ level;
+
+          if(recvTask < NTask)
+            {
+              if(level > 0)
+                {
+                  import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real));
+                  import_globalindex = (large_array_offset *)mymalloc(
+                      "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset));
+
+                  if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0)
+                    {
+                      MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
+                                   localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                   TAG_NONPERIOD_C, import_globalindex,
+                                   localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask,
+                                   TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
+                    }
+                }
+              else
+                {
+                  import_data        = localfield_data + localfield_offset[ThisTask];
+                  import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
+                }
+
+              for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++)
+                {
+                  /* determine offset in local FFT slab */
+                  offset         = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2;
+                  import_data[i] = forcegrid[offset];
+                }
+
+              if(level > 0)
+                {
+                  MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask,
+                               TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask],
+                               localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A,
+                               MPI_COMM_WORLD, &status);
+
+                  myfree(import_globalindex);
+                  myfree(import_data);
+                }
+            }
+        }
+
+      /* read out the forces, which all have been assembled in localfield_data */
+
+      for(i = 0, j = 0; i < N; i++)
+        {
+          while(j < num_on_grid && (part[j].partindex >> 2) != i)
+            j++;
+
+          slab_x = (int)(to_slab_fac * P[i].Pos[0]);
+          dx     = to_slab_fac * P[i].Pos[0] - slab_x;
+
+          slab_y = (int)(to_slab_fac * P[i].Pos[1]);
+          dy     = to_slab_fac * P[i].Pos[1] - slab_y;
+
+          acc_dim = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) +
+                    localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy +
+                    localfield_data[part[j + 2].localindex] * (dx) * (1.0 - dy) + localfield_data[part[j + 3].localindex] * (dx)*dy;
+
+          P[i].GravPM[dim] += acc_dim;
+        }
+    }
+
+  /* free locallist */
+  myfree(localfield_togo);
+  myfree(localfield_offset);
+  myfree(localfield_count);
+  myfree(localfield_first);
+  myfree(localfield_d_data);
+  myfree(localfield_globalindex);
+
+  pm2d_init_periodic_free();
+
+  mpi_printf("done PM-2d.\n");
+}
+
+/*! \brief Compares two objects of type part_slab_data.
+ *
+ *  According to element globalindex.
+ *
+ *  \param[in] a Index of first object in part array.
+ *  \param[in] b Index of second object in part array.
+ *
+ *  \return (-1,0,1); -1 if part[a].globalindex < part[b].globalindex
+ */
+int pm2d_periodic_compare_sortindex(const void *a, const void *b)
+{
+  if(part[*(int *)a].globalindex < part[*(int *)b].globalindex)
+    return -1;
+
+  if(part[*(int *)a].globalindex > part[*(int *)b].globalindex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Merge sort algorithm for 2d periodic particle mesh algorithm.
+ *
+ *  \param[in, out] b Array to be sorted.
+ *  \param[in] n Size of array b.
+ *  \param[in, out] t Temporary array.
+ *
+ *  \return void
+ */
+static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n, int *t)
+{
+  int *tmp;
+  int *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = b;
+  b2 = b + n1;
+
+  pm2d_msort_pmperiodic_with_tmp(b1, n1, t);
+  pm2d_msort_pmperiodic_with_tmp(b2, n2, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(part[*b1].globalindex <= part[*b2].globalindex)
+        {
+          --n1;
+          *tmp++ = *b1++;
+        }
+      else
+        {
+          --n2;
+          *tmp++ = *b2++;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * sizeof(int));
+
+  memcpy(b, t, (n - n2) * sizeof(int));
+}
+
+/*! \brief Wrapper for sorting algorithm in 2d periodic PM algorithm.
+ *
+ *  Uses pm2d_msort_pmperiodic_with_tmp.
+ *
+ *  \param[in, out] b Array to be sorted.
+ *  \param[in] n Number of elements in array b.
+ *  \param[in] s Size of individual element of b (for memory allocation).
+ *  \param[in] cmp Compare function (unused).
+ *
+ *  \return void
+ */
+void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *))
+{
+  const size_t size = n * s;
+
+  int *tmp = (int *)mymalloc("tmp", size);
+
+  pm2d_msort_pmperiodic_with_tmp((int *)b, n, tmp);
+
+  myfree(tmp);
+}
+
+/*! \brief Transpose operation for 2d fft.
+ *
+ *  Used for transposing rhogrid.
+ *
+ *  \param[in, out] field Field that needs to be transposed.
+ *  \param[in, out] scratch Temporary data.
+ *
+ *  \return void
+ */
+void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch)
+{
+  int x, y, task;
+
+  for(task = 0; task < NTask; task++)
+    for(x = 0; x < nslab_x; x++)
+      for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++)
+        {
+          scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))] =
+              field[PMGRID2 * x + y];
+        }
+
+#ifndef NO_ISEND_IRECV_IN_DOMAIN
+  MPI_Request *requests;
+  int nrequests = 0;
+
+  requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request));
+
+  for(task = 0; task < NTask; task++)
+    {
+      MPI_Isend(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task,
+                TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]);
+
+      MPI_Irecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task,
+                TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]);
+    }
+
+  MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE);
+  myfree(requests);
+#else  /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */
+  int ngrp;
+
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      task = ThisTask ^ ngrp;
+
+      if(task < NTask)
+        {
+          MPI_Sendrecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE,
+                       task, TAG_KEY, field + first_slab_x_of_task[task] * nslab_x,
+                       nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+        }
+    }
+#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */
+}
+
+/*! \brief Transpose operation for 2d fft.
+ *
+ *  Used for forcegrid transpose.
+ *
+ *  \param[in, out] field Field that needs to be transposed.
+ *  \param[in, out] scratch Temporary data.
+ *
+ *  \return void
+ */
+void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch)
+{
+  int x, y, task;
+
+#ifndef NO_ISEND_IRECV_IN_DOMAIN
+  MPI_Request *requests;
+  int nrequests = 0;
+
+  requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request));
+
+  for(task = 0; task < NTask; task++)
+    {
+      MPI_Isend(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task,
+                TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]);
+
+      MPI_Irecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task,
+                TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]);
+    }
+
+  MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE);
+  myfree(requests);
+
+#else  /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */
+  int ngrp;
+
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      task = ThisTask ^ ngrp;
+
+      if(task < NTask)
+        {
+          MPI_Sendrecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE,
+                       task, TAG_KEY, scratch + first_slab_x_of_task[task] * nslab_x,
+                       nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD,
+                       MPI_STATUS_IGNORE);
+        }
+    }
+#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */
+
+  for(task = 0; task < NTask; task++)
+    for(x = 0; x < nslab_x; x++)
+      for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++)
+        {
+          field[PMGRID2 * x + y] =
+              scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))];
+        }
+}
+
+#endif /* #ifdef TWODIMS */
+#endif /* #ifndef GRAVITY_NOT_PERIODIC */
+#endif /* #ifdef PMGRID */
diff --git a/src/amuse/community/arepo/src/hydro/finite_volume_solver.c b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c
new file mode 100644
index 0000000000..287fe14bb4
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c
@@ -0,0 +1,1895 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/finite_volume_solver.c
+ * \date        05/2018
+ * \brief       Core algorithms of the finite-volume solver.
+ * \details     contains functions:
+ *                void compute_interface_fluxes(tessellation * T)
+ *                void backup_face_areas(tessellation * T)
+ *                void restore_face_areas(tessellation * T)
+ *                int face_get_state(tessellation * T, int p, int i, struct
+ *                  state *st)
+ *                void face_boundary_check_vertex(tessellation * T, int p,
+ *                  MyFloat * velx, MyFloat * vely, MyFloat * velz)
+ *                void face_boundary_check(point * p, double *velx, double
+ *                  *vely, double *velz)
+ *                int face_check_responsibility_of_this_task(tessellation * T,
+ *                  int p1, int p2, struct state *st_L, struct state *st_R)
+ *                double face_timestep(struct state *state_L, struct state
+ *                  *state_R, double *hubble_a, double *atime)
+ *                void state_convert_to_local_frame(struct state *st, double
+ *                  *vel_face, double hubble_a, double atime)
+ *                void face_do_time_extrapolation(struct state *delta,
+ *                  struct state *st, double atime)
+ *                void face_do_spatial_extrapolation(struct state *delta,
+ *                  struct state *st, struct state *st_other)
+ *                void face_do_spatial_extrapolation_single_quantity(double
+ *                  *delta, double st, double st_other, MySingle * grad,
+ *                  double *dx, double *r)
+ *                void face_add_extrapolations(struct state *st_face, struct
+ *                  state *delta_time, struct state *delta_space, struct
+ *                  fvs_stat *stat)
+ *                void face_add_extrapolation(struct state *st_face, struct
+ *                  state *delta, struct fvs_stat *stat)
+ *                void face_add_extrapolation_with_check(struct state *st_face,
+ *                  struct state *delta, struct fvs_stat *stat)
+ *                void face_turn_velocities(struct state *st, struct geometry
+ *                  *geom)
+ *                void solve_advection(struct state *st_L, struct state *st_R,
+ *                  struct state_face *st_face, struct geometry *geom,
+ *                  double *vel_face)
+ *                void face_turnback_velocities(struct state_face *st_face,
+ *                  struct geometry *geom)
+ *                void face_set_scalar_states_and_fluxes(struct state *st_L,
+ *                  struct state *st_R, struct state_face *st_face, struct
+ *                  fluxes *flux)
+ *                void flux_convert_to_lab_frame(struct state *st_L, struct
+ *                  state *st_R, double *vel_face, struct fluxes *flux)
+ *                void face_turn_momentum_flux(struct fluxes *flux, struct
+ *                  geometry *geom)
+ *                void face_get_fluxes(struct state *st_L, struct state *st_R,
+ *                  struct state_face *st_face, struct fluxes *flux, struct
+ *                  geometry *geom, double *vel_face)
+ *                void face_limit_fluxes(struct state *st_L, struct state
+ *                  *st_R, struct state *st_center_L, struct state
+ *                  *st_center_R, struct fluxes *flux, double dt, double
+ *                  *count, double *count_reduced)
+ *                void face_clear_fluxes(struct fluxes *flux)
+ *                void face_add_fluxes_advection(struct state_face *st_face,
+ *                  struct fluxes *flux, struct geometry *geom, double
+ *                  *vel_face)
+ *                int flux_list_data_compare(const void *a, const void *b)
+ *                void apply_flux_list(void)
+ *                void fvs_initialize_statistics(struct fvs_stat *stat)
+ *                void fvs_evaluate_statistics(struct fvs_stat *stat)
+ *                void apply_spherical_source_terms()
+ *                void add_spin_source_term_from_grid_movement()
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 17.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+/*! \brief Data needed for flux calculation.
+ */
+static struct flux_list_data
+{
+  int task, index;
+  double dM, dP[3];
+#ifdef MHD
+  double dB[3];
+#endif /* #ifdef MHD */
+
+#ifndef ISOTHERM_EQS
+  double dEnergy;
+#endif /* #ifndef ISOTHERM_EQS */
+#ifdef MAXSCALARS
+  double dConservedScalars[MAXSCALARS];
+#endif /* #ifdef MAXSCALARS */
+} * FluxList;
+
+static int Nflux, MaxNflux;
+
+struct primexch *PrimExch;
+struct grad_data *GradExch;
+
+/*! state on a face determined by Riemann solver */
+struct state_face state_face;
+
+/*! flux through a face */
+struct fluxes fluxes;
+
+struct geometry geom;
+
+#ifdef ONEDIMS_SPHERICAL
+void apply_spherical_source_terms();
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+static void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat);
+static void fvs_initialize_statistics(struct fvs_stat *stat);
+static void fvs_evaluate_statistics(struct fvs_stat *stat);
+
+#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS
+void backup_face_areas(tessellation *T);
+void restore_face_areas(tessellation *T);
+#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */
+
+/*! \brief Main routine to compute fluxes across interfaces given am mesh T.
+ *
+ *  Adds these fluxes to conserved variables.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void compute_interface_fluxes(tessellation *T)
+{
+#ifdef NOHYDRO
+  return;
+#endif /* #ifdef NOHYDRO */
+  TIMER_START(CPU_FLUXES);
+
+  int i, j;
+  double count = 0, count_reduced = 0, tot_count, tot_count_reduced;
+  double face_dt, hubble_a, atime;
+  struct fvs_stat stat;
+#ifdef MHD
+  double sqrtatime;
+#endif /* #ifdef MHD */
+
+#ifdef GODUNOV_STATS
+  FILE *fdstats;
+  char buf[1000];
+
+  sprintf(buf, "%s/godunov_stats_%d.txt", All.OutputDir, ThisTask);
+  if(!(fdstats = fopen(buf, "w")))
+    terminate("error in opening file '%s'", buf);
+#endif /* #ifdef GODUNOV_STATS */
+
+#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS
+  backup_face_areas(T);
+#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */
+
+  fvs_initialize_statistics(&stat);
+
+  MaxNflux = T->Indi.AllocFacNflux;
+  Nflux    = 0;
+  FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data));
+
+  face *VF  = T->VF;
+  point *DP = T->DP;
+
+  for(i = 0; i < T->Nvf; i++)
+    {
+      struct state state_L, state_center_L, delta_time_L, delta_space_L;
+      struct state state_R, state_center_R, delta_time_R, delta_space_R;
+
+      face_dt = 0; /* the default is that this face is not active */
+
+      /* calculate normal vectors */
+      if(face_get_normals(T, i, &geom))
+        continue;
+
+      /* get the values of the states at the center of the cells */
+      if(face_get_state(T, VF[i].p1, i, &state_center_L))
+        continue;
+
+      if(face_get_state(T, VF[i].p2, i, &state_center_R))
+        continue;
+
+      /* only treat faces where one of the two sides is active */
+      if(!TimeBinSynchronized[state_center_L.timeBin] && !TimeBinSynchronized[state_center_R.timeBin])
+        continue;
+
+      /* clarify whether the face should be done by this task (it may be present also on another task) */
+      if(face_check_responsibility_of_this_task(T, VF[i].p1, VF[i].p2, &state_center_L, &state_center_R))
+        continue;
+
+      /* calculate timestep of the face */
+      face_dt = face_timestep(&state_center_L, &state_center_R, &hubble_a, &atime);
+#ifdef MHD
+      sqrtatime = sqrt(atime);
+#endif /* #ifdef MHD */
+
+      if(!(face_dt > 0))
+        continue;
+
+      /* now estimate the velocity of the midpoint of the face based on the velocities of the generators of the mesh. */
+      double vel_face[3];
+
+      if(All.ComovingIntegrationOn)
+        for(j = 0; j < 3; j++)
+          {
+            state_center_L.velVertex[j] /= atime; /* convert vertex motion to peculiar velocity */
+            state_center_R.velVertex[j] /= atime;
+          }
+
+      /* rough motion of mid-point of edge */
+      vel_face[0] = 0.5 * (state_center_L.velVertex[0] + state_center_R.velVertex[0]);
+      vel_face[1] = 0.5 * (state_center_L.velVertex[1] + state_center_R.velVertex[1]);
+      vel_face[2] = 0.5 * (state_center_L.velVertex[2] + state_center_R.velVertex[2]);
+
+      double cx, cy, cz, facv;
+
+      cx = VF[i].cx - 0.5 * (DP[VF[i].p2].x + DP[VF[i].p1].x);
+      cy = VF[i].cy - 0.5 * (DP[VF[i].p2].y + DP[VF[i].p1].y);
+      cz = VF[i].cz - 0.5 * (DP[VF[i].p2].z + DP[VF[i].p1].z);
+
+      facv = (cx * (state_center_L.velVertex[0] - state_center_R.velVertex[0]) +
+              cy * (state_center_L.velVertex[1] - state_center_R.velVertex[1]) +
+              cz * (state_center_L.velVertex[2] - state_center_R.velVertex[2])) /
+             geom.nn;
+
+      /* put in a limiter for highly distorted cells */
+      double cc = sqrt(cx * cx + cy * cy + cz * cz);
+      if(cc > 0.9 * geom.nn)
+        facv *= (0.9 * geom.nn) / cc;
+
+      vel_face[0] += facv * geom.nx;
+      vel_face[1] += facv * geom.ny;
+      vel_face[2] += facv * geom.nz;
+
+#if defined(VORONOI_STATIC_MESH)
+      vel_face[0] = 0;
+      vel_face[1] = 0;
+      vel_face[2] = 0;
+#endif /* #if defined(VORONOI_STATIC_MESH) */
+
+#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)
+      double vel_face_turned[3];
+      /* for these riemann solvers, the riemann problem is not solved in the
+       * restframe of the face, instead the mesh motion is accounted for via
+       * an advection step.
+       */
+
+      /* turn the face velocity */
+      vel_face_turned[0] = vel_face[0] * geom.nx + vel_face[1] * geom.ny + vel_face[2] * geom.nz;
+      vel_face_turned[1] = vel_face[0] * geom.mx + vel_face[1] * geom.my + vel_face[2] * geom.mz;
+      vel_face_turned[2] = vel_face[0] * geom.px + vel_face[1] * geom.py + vel_face[2] * geom.pz;
+#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */
+
+      state_convert_to_local_frame(&state_center_L, vel_face, hubble_a, atime);
+      state_convert_to_local_frame(&state_center_R, vel_face, hubble_a, atime);
+
+      /* copy center state to state at interface, then add extrapolation terms */
+      state_L = state_center_L;
+      state_R = state_center_R;
+
+      face_do_time_extrapolation(&delta_time_L, &state_center_L, atime);
+      face_do_time_extrapolation(&delta_time_R, &state_center_R, atime);
+
+      face_do_spatial_extrapolation(&delta_space_L, &state_center_L, &state_center_R);
+      face_do_spatial_extrapolation(&delta_space_R, &state_center_R, &state_center_L);
+
+      face_add_extrapolations(&state_L, &delta_time_L, &delta_space_L, &stat);
+      face_add_extrapolations(&state_R, &delta_time_R, &delta_space_R, &stat);
+
+#ifdef MHD
+      if(All.ComovingIntegrationOn)
+        {
+          state_L.Bx /= sqrtatime;
+          state_L.By /= sqrtatime;
+          state_L.Bz /= sqrtatime;
+
+          state_R.Bx /= sqrtatime;
+          state_R.By /= sqrtatime;
+          state_R.Bz /= sqrtatime;
+        }
+#endif /* #ifdef MHD */
+
+#ifndef MESHRELAX
+#ifndef ISOTHERM_EQS
+      /* check for crazy values */
+      if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0)
+        {
+          printf("i=%d press_L=%g press_R=%g rho_L=%g rho_R=%g\n", i, state_L.press, state_R.press, state_L.rho, state_R.rho);
+          printf("area=%g lx=%g ly=%g   rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy);
+          terminate("found crazy values");
+        }
+#else  /* #ifndef ISOTHERM_EQS */
+      if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0)
+        {
+          printf("i=%d rho_L=%g rho_R=%g\n", i, state_L.rho, state_R.rho);
+          printf("area=%g lx=%g ly=%g   rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy);
+          terminate("found crazy values");
+        }
+#endif /* #ifndef ISOTHERM_EQS #else */
+#endif /* #ifndef MESHRELAX */
+
+      /* mirror velocity in case of reflecting boundaries */
+      face_boundary_check(&T->DP[VF[i].p1], &state_L.velx, &state_L.vely, &state_L.velz);
+      face_boundary_check(&T->DP[VF[i].p2], &state_R.velx, &state_R.vely, &state_R.velz);
+
+#ifdef MHD
+      /* mirror magnetic field in case of reflecting boundaries */
+      face_boundary_check(&T->DP[VF[i].p1], &state_L.Bx, &state_L.By, &state_L.Bz);
+      face_boundary_check(&T->DP[VF[i].p2], &state_R.Bx, &state_R.By, &state_R.Bz);
+#endif /* #ifdef MHD */
+
+      /* turn the velocities to get velx perpendicular and vely and velz in the plane of the face */
+      face_turn_velocities(&state_L, &geom);
+      face_turn_velocities(&state_R, &geom);
+
+#ifndef MESHRELAX
+
+      /* call Riemann solver */
+
+      double press;
+#ifdef RIEMANN_HLLC
+      press = godunov_flux_3d_hllc(&state_L, &state_R, &state_face, &fluxes);
+#else /* #ifdef RIEMANN_HLLC */
+#ifdef RIEMANN_HLLD
+      press = godunov_flux_3d_hlld(&state_L, &state_R, vel_face_turned, &state_face, &fluxes);
+#else  /* #ifdef RIEMANN_HLLD */
+      press = godunov_flux_3d(&state_L, &state_R, &state_face); /* exact ideal gas solver */
+#endif /* #ifdef RIEMANN_HLLD #else */
+#endif /* #ifdef RIEMANN_HLLC #else */
+
+      if(press < 0)
+        terminate("press < 0: ID_L: %d, ID_R: %d", VF[i].p1, VF[i].p2);
+
+#ifdef GODUNOV_STATS
+      get_mach_numbers(&state_L, &state_R, press);
+      if(st_L.rho > 1.0e-6 && st_R.rho > 1.0e-6)
+        fprintf(fdstats, "%g %g %g   %g %g %g  %g %g %g  %g %g %g\n", state_L.rho, state_L.velx, state_L.press, state_L.rho,
+                state_L.velx, state_L.press, state_face.rho, state_face.velx, state_face.press, state_L.mach, state_R.mach,
+                VF[i].area);
+#endif /* GODUNOV_STATS */
+
+#endif /* #ifndef MESHRELAX */
+
+      /* turn the velocity field back */
+      face_turnback_velocities(&state_face, &geom);
+
+      /* add the face velocity again */
+      state_face.velx += vel_face[0];
+      state_face.vely += vel_face[1];
+      state_face.velz += vel_face[2];
+
+#ifndef MESHRELAX
+
+#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)
+      /* for non-exact Riemann solver, fluxes are already computed in the local frame, so convert to lab frame and turn momentum fluxes
+       * to the lab orientation  */
+      flux_convert_to_lab_frame(&state_L, &state_R, vel_face_turned, &fluxes);
+      face_turn_momentum_flux(&fluxes, &geom);
+
+#else /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */
+
+      /* calculate fluxes for exact Riemann problem */
+      /* compute net flux with dot-product of outward normal and area of face */
+      /* multiplication with area and time-step comes later */
+
+      face_get_fluxes(&state_L, &state_R, &state_face, &fluxes, &geom, vel_face);
+
+#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)  #else */
+
+      /* set the face states and fluxes of those quantities that are passively advected */
+      face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes);
+
+      face_limit_fluxes(&state_L, &state_R, &state_center_L, &state_center_R, &fluxes, face_dt, &count, &count_reduced);
+
+      /* put in cosmological factors */
+      if(All.ComovingIntegrationOn)
+        {
+          fluxes.momentum[0] *= atime;
+          fluxes.momentum[1] *= atime;
+          fluxes.momentum[2] *= atime;
+          fluxes.energy *= atime * atime;
+#ifdef MHD
+          fluxes.B[0] *= sqrtatime;
+          fluxes.B[1] *= sqrtatime;
+          fluxes.B[2] *= sqrtatime;
+#ifdef MHD_POWELL
+          state_face.Bx *= sqrtatime;
+#endif /* #ifdef MHD_POWELL */
+#endif /* #ifdef MHD */
+        }
+
+#else /* #ifndef MESHRELAX */
+
+      /* just solve the advection equation instead of Riemann problem */
+
+      solve_advection(&state_L, &state_R, &state_face, &geom, vel_face);
+      face_clear_fluxes(&fluxes);
+      face_add_fluxes_advection(&state_face, &fluxes, &geom, vel_face);
+      face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes);
+
+#endif /* #ifndef MESHRELAX #else */
+
+#ifndef ISOTHERM_EQS
+      if(!gsl_finite(fluxes.energy))
+        {
+          printf("i=%d eFlux-Bummer: %g %g %g\n", i, fluxes.energy, state_face.press, state_face.rho);
+          printf("rho_L=%g velx_L=%g vely_L=%g velz_L=%g press_L=%g\n", state_L.rho, state_L.velx, state_L.vely, state_L.velz,
+                 state_L.press);
+          printf("rho_R=%g velx_R=%g vely_R=%g velz_R=%g press_R=%g\n", state_R.rho, state_R.velx, state_R.vely, state_R.velz,
+                 state_R.press);
+          print_particle_info(i);
+          terminate("infinity encountered");
+        }
+#endif /* #ifndef ISOTHERM_EQS */
+
+      /* now apply the flux to update the conserved states of the cells */
+
+      if(face_dt > 0) /* selects active faces */
+        {
+          int k, p, q;
+          double dir;
+          double fac = face_dt * VF[i].area;
+#if defined(MAXSCALARS)
+          int m;
+#endif /* #if defined(MAXSCALARS) */
+
+          fac *= 0.5;
+
+#if defined(MHD_POWELL)
+          struct state *state_center, *delta_time;
+#endif /* #if defined(MHD_POWELL) */
+          for(k = 0; k < 2; k++)
+            {
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+              int qother;
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+              if(k == 0)
+                {
+                  q   = VF[i].p1;
+                  p   = DP[q].index;
+                  dir = -fac;
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+                  qother = VF[i].p2;
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+#if defined(MHD_POWELL)
+                  state_center = &state_center_L;
+                  delta_time   = &delta_time_L;
+#endif /* #if defined(MHD_POWELL) */
+                }
+              else
+                {
+                  q   = VF[i].p2;
+                  p   = DP[q].index;
+                  dir = +fac;
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+                  qother = VF[i].p1;
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+#if defined(MHD_POWELL)
+                  state_center = &state_center_R;
+                  delta_time   = &delta_time_R;
+#endif /* #if defined(MHD_POWELL) */
+                }
+
+              if(DP[q].task == ThisTask)
+                {
+                  if(DP[q].index >= NumGas) /* this is a local ghost point */
+                    {
+                      if(DP[VF[i].p1].ID == DP[VF[i].p2].ID) /* this may happen for reflective points */
+                        continue;
+                      p -= NumGas;
+                    }
+
+                  /* note: this will be executed if P[p] is a local point, independent of active or not */
+                  P[p].Mass += dir * fluxes.mass;
+                  SphP[p].Momentum[0] += dir * fluxes.momentum[0];
+                  SphP[p].Momentum[1] += dir * fluxes.momentum[1];
+                  SphP[p].Momentum[2] += dir * fluxes.momentum[2];
+
+#ifdef MHD
+                  SphP[p].BConserved[0] += dir * fluxes.B[0];
+                  SphP[p].BConserved[1] += dir * fluxes.B[1];
+                  SphP[p].BConserved[2] += dir * fluxes.B[2];
+#if defined(MHD_POWELL)
+                  double Velx = state_center->velx + delta_time->velx + vel_face[0];
+                  double Vely = state_center->vely + delta_time->vely + vel_face[1];
+                  double Velz = state_center->velz + delta_time->velz + vel_face[2];
+
+                  if(All.ComovingIntegrationOn)
+                    {
+                      Velx += atime * hubble_a * state_center->dx;
+                      Vely += atime * hubble_a * state_center->dy;
+                      Velz += atime * hubble_a * state_center->dz;
+                    }
+
+                  double Bx = state_center->Bx + delta_time->Bx;
+                  double By = state_center->By + delta_time->By;
+                  double Bz = state_center->Bz + delta_time->Bz;
+
+                  SphP[p].BConserved[0] += dir * Velx * state_face.Bx;
+                  SphP[p].BConserved[1] += dir * Vely * state_face.Bx;
+                  SphP[p].BConserved[2] += dir * Velz * state_face.Bx;
+
+                  SphP[p].Momentum[0] += dir * Bx * state_face.Bx;
+                  SphP[p].Momentum[1] += dir * By * state_face.Bx;
+                  SphP[p].Momentum[2] += dir * Bz * state_face.Bx;
+
+                  SphP[p].Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime;
+
+                  {
+                    double dMomX = dir * Bx * state_face.Bx;
+                    double dMomY = dir * By * state_face.Bx;
+                    double dMomZ = dir * Bz * state_face.Bx;
+
+                    All.Powell_Momentum[0] += dMomX;
+                    All.Powell_Momentum[1] += dMomY;
+                    All.Powell_Momentum[2] += dMomZ;
+
+                    double dx = SphP[p].Center[0] - 0.5 * All.BoxSize;
+                    double dy = SphP[p].Center[1] - 0.5 * All.BoxSize;
+                    double dz = SphP[p].Center[2] - 0.5 * All.BoxSize;
+
+                    All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY;
+                    All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ;
+                    All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX;
+                    All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime;
+                  }
+#endif /* #if defined(MHD_POWELL) */
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+                  for(m = 0; m < N_Scalar; m++)
+                    {
+                      *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[m].offset_mass) += dir * fluxes.scalars[m];
+                    }
+#endif /* #ifdef MAXSCALARS */
+
+#if !defined(ISOTHERM_EQS)
+                  SphP[p].Energy += dir * fluxes.energy;
+#endif /* #if !defined(ISOTHERM_EQS)  */
+                }
+              else
+                {
+                  /* here we have a foreign ghost point */
+                  if(DP[q].originalindex < 0)
+                    terminate("should not happen");
+
+                  if(Nflux >= MaxNflux)
+                    {
+                      T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR;
+                      MaxNflux = T->Indi.AllocFacNflux;
+#ifdef VERBOSE
+                      printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux,
+                             T->Indi.AllocFacNflux);
+#endif /* #ifdef VERBOSE */
+                      FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data));
+
+                      if(Nflux >= MaxNflux)
+                        terminate("Nflux >= MaxNflux");
+                    }
+
+                  FluxList[Nflux].task  = DP[q].task;
+                  FluxList[Nflux].index = DP[q].originalindex;
+
+                  FluxList[Nflux].dM = dir * fluxes.mass;
+
+                  FluxList[Nflux].dP[0] = dir * fluxes.momentum[0];
+                  FluxList[Nflux].dP[1] = dir * fluxes.momentum[1];
+                  FluxList[Nflux].dP[2] = dir * fluxes.momentum[2];
+
+#if !defined(ISOTHERM_EQS)
+                  FluxList[Nflux].dEnergy = dir * fluxes.energy;
+#endif /* #if !defined(ISOTHERM_EQS)  */
+
+#ifdef MHD
+                  FluxList[Nflux].dB[0] = dir * fluxes.B[0];
+                  FluxList[Nflux].dB[1] = dir * fluxes.B[1];
+                  FluxList[Nflux].dB[2] = dir * fluxes.B[2];
+#if defined(MHD_POWELL)
+                  double Velx = state_center->velx + delta_time->velx + vel_face[0];
+                  double Vely = state_center->vely + delta_time->vely + vel_face[1];
+                  double Velz = state_center->velz + delta_time->velz + vel_face[2];
+
+                  if(All.ComovingIntegrationOn)
+                    {
+                      Velx += atime * hubble_a * state_center->dx;
+                      Vely += atime * hubble_a * state_center->dy;
+                      Velz += atime * hubble_a * state_center->dz;
+                    }
+
+                  double Bx = state_center->Bx + delta_time->Bx;
+                  double By = state_center->By + delta_time->By;
+                  double Bz = state_center->Bz + delta_time->Bz;
+
+                  FluxList[Nflux].dB[0] += dir * Velx * state_face.Bx;
+                  FluxList[Nflux].dB[1] += dir * Vely * state_face.Bx;
+                  FluxList[Nflux].dB[2] += dir * Velz * state_face.Bx;
+
+                  FluxList[Nflux].dP[0] += dir * Bx * state_face.Bx;
+                  FluxList[Nflux].dP[1] += dir * By * state_face.Bx;
+                  FluxList[Nflux].dP[2] += dir * Bz * state_face.Bx;
+#ifndef ISOTHERM_EQS
+                  FluxList[Nflux].dEnergy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime;
+#endif /* #ifndef ISOTHERM_EQS */
+
+                  {
+                    double dMomX = dir * Bx * state_face.Bx;
+                    double dMomY = dir * By * state_face.Bx;
+                    double dMomZ = dir * Bz * state_face.Bx;
+
+                    All.Powell_Momentum[0] += dMomX;
+                    All.Powell_Momentum[1] += dMomY;
+                    All.Powell_Momentum[2] += dMomZ;
+
+                    double dx = PrimExch[p].Center[0] - 0.5 * All.BoxSize;
+                    double dy = PrimExch[p].Center[1] - 0.5 * All.BoxSize;
+                    double dz = PrimExch[p].Center[2] - 0.5 * All.BoxSize;
+
+                    All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY;
+                    All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ;
+                    All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX;
+                    All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime;
+                  }
+#endif /* #if defined(MHD_POWELL) */
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+                  for(m = 0; m < N_Scalar; m++)
+                    FluxList[Nflux].dConservedScalars[m] = dir * fluxes.scalars[m];
+#endif /* #ifdef MAXSCALARS */
+
+                  Nflux++;
+                }
+            }
+        }
+    }
+  /* end of big loop over all faces */
+
+  TIMER_STOPSTART(CPU_FLUXES, CPU_FLUXES_COMM);
+
+  /* now exchange the flux-list and apply it when needed */
+  apply_flux_list();
+
+  TIMER_STOPSTART(CPU_FLUXES_COMM, CPU_FLUXES);
+
+  myfree(FluxList);
+
+  double in[2] = {count, count_reduced}, out[2];
+  MPI_Reduce(in, out, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  if(ThisTask == 0)
+    {
+      tot_count         = out[0];
+      tot_count_reduced = out[1];
+
+      printf("FLUX: exchanged fluxes over %g faces, with %g reduced (fraction %g), cumulative fraction %g\n", tot_count,
+             tot_count_reduced, tot_count_reduced / (tot_count + 1.0e-30), All.TotCountReducedFluxes / (All.TotCountFluxes + 1.0e-30));
+      All.TotCountReducedFluxes += tot_count_reduced;
+      All.TotCountFluxes += tot_count;
+    }
+
+  fvs_evaluate_statistics(&stat);
+
+#ifdef MESHRELAX
+  for(i = 0; i < NumGas; i++)
+    {
+      if(P[i].Mass < 0)
+        {
+          terminate("negative mass reached for cell=%d mass=%g", P[i].ID, P[i].Mass);
+
+          P[i].Mass           = 0;
+          SphP[i].Energy      = 0;
+          SphP[i].Momentum[0] = 0;
+          SphP[i].Momentum[1] = 0;
+          SphP[i].Momentum[2] = 0;
+        }
+    }
+#endif /* #ifdef MESHRELAX */
+
+#ifdef GODUNOV_STATS
+  endrun();
+#endif /* #ifdef GODUNOV_STATS */
+
+#ifdef ONEDIMS_SPHERICAL
+  apply_spherical_source_terms();
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+#if defined(MHD_POWELL) && defined(VERBOSE)
+  double Powell_Momentum[3];
+  double Powell_Angular_Momentum[3];
+  double Powell_Energy;
+
+  MPI_Reduce(All.Powell_Momentum, Powell_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(All.Powell_Angular_Momentum, Powell_Angular_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&All.Powell_Energy, &Powell_Energy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    printf("MHD_POWELL: Total ST contribution: Mom=%g,%g,%g   AngMom=%g,%g,%g   Energy=%g\n", Powell_Momentum[0], Powell_Momentum[1],
+           Powell_Momentum[2], Powell_Angular_Momentum[0], Powell_Angular_Momentum[1], Powell_Angular_Momentum[2], Powell_Energy);
+#endif /* #if defined(MHD_POWELL) && defined(VERBOSE) */
+
+#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS
+  restore_face_areas(T);
+#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */
+
+  TIMER_STOP(CPU_FLUXES);
+}
+
+#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS
+/*! \brief Writes face areas to a backup variable.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void backup_face_areas(tessellation *T)
+{
+  for(int i = 0; i < T->Nvf; i++)
+    T->VF[i].area_backup = T->VF[i].area;
+}
+
+/*! \brief Restores face areas from a backup variable.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void restore_face_areas(tessellation *T)
+{
+  for(int i = 0; i < T->Nvf; i++)
+    T->VF[i].area = T->VF[i].area_backup;
+}
+#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */
+
+/*! \brief Gets value of hydrodynamial quantities at face.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] p Index in DP array.
+ *  \param[in] i Index in VF array.
+ *  \param[out] st State at face.
+ *
+ *  \return 0
+ */
+int face_get_state(tessellation *T, int p, int i, struct state *st)
+{
+  int particle;
+#if defined(MAXSCALARS)
+  int j;
+#endif /* #if defined(MAXSCALARS) */
+  double aBegin;
+
+  point *DP = T->DP;
+  face *VF  = T->VF;
+
+  particle = DP[p].index;
+
+  if(particle < 0)
+    return -1;
+
+  if(particle >= NumGas && DP[p].task == ThisTask)
+    particle -= NumGas;
+
+  /* interpolation vector for the left state */
+  if(DP[p].task == ThisTask)
+    {
+      st->dx = VF[i].cx - SphP[particle].Center[0];
+      st->dy = VF[i].cy - SphP[particle].Center[1];
+      st->dz = VF[i].cz - SphP[particle].Center[2];
+    }
+  else
+    {
+      st->dx = VF[i].cx - PrimExch[particle].Center[0];
+      st->dy = VF[i].cy - PrimExch[particle].Center[1];
+      st->dz = VF[i].cz - PrimExch[particle].Center[2];
+    }
+
+    /* correct for periodicity */
+#if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL)
+  if(st->dx < -boxHalf_X)
+    st->dx += boxSize_X;
+  if(st->dx > boxHalf_X)
+    st->dx -= boxSize_X;
+#endif /* #if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL) */
+#if !defined(REFLECTIVE_Y)
+  if(st->dy < -boxHalf_Y)
+    st->dy += boxSize_Y;
+  if(st->dy > boxHalf_Y)
+    st->dy -= boxSize_Y;
+#endif /* #if !defined(REFLECTIVE_Y) */
+#if !defined(REFLECTIVE_Z)
+  if(st->dz < -boxHalf_Z)
+    st->dz += boxSize_Z;
+  if(st->dz > boxHalf_Z)
+    st->dz -= boxSize_Z;
+#endif /* #if !defined(REFLECTIVE_Z) */
+
+#ifdef ONEDIMS_SPHERICAL
+  if(DP[p].task == ThisTask)
+    st->radius = SphP[particle].Center[0];
+  else
+    st->radius = PrimExch[particle].Center[0];
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+  if(DP[p].task == ThisTask)
+    {
+      st->velGas[0] = P[particle].Vel[0];
+      st->velGas[1] = P[particle].Vel[1];
+      st->velGas[2] = P[particle].Vel[2];
+
+      st->velVertex[0] = SphP[particle].VelVertex[0];
+      st->velVertex[1] = SphP[particle].VelVertex[1];
+      st->velVertex[2] = SphP[particle].VelVertex[2];
+
+      st->rho = SphP[particle].Density;
+
+      st->press = SphP[particle].Pressure;
+
+      st->grad = &SphP[particle].Grad;
+
+      st->timeBin = P[particle].TimeBinHydro;
+
+      st->volume = SphP[particle].Volume;
+
+#ifdef MHD
+      st->Bx = SphP[particle].B[0];
+      st->By = SphP[particle].B[1];
+      st->Bz = SphP[particle].B[2];
+#ifdef MHD_POWELL
+      st->divB = SphP[particle].DivB;
+#endif /* #ifdef MHD_POWELL */
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+      for(j = 0; j < N_Scalar; j++)
+        st->scalars[j] = *(MyFloat *)(((char *)(&SphP[particle])) + scalar_elements[j].offset);
+#endif /* #ifdef MAXSCALARS */
+
+      aBegin = SphP[particle].TimeLastPrimUpdate;
+
+      st->oldmass     = SphP[particle].OldMass;
+      st->surfacearea = SphP[particle].SurfaceArea;
+      st->activearea  = SphP[particle].ActiveArea;
+      st->csnd        = get_sound_speed(particle);
+      st->ID          = P[particle].ID;
+    }
+  else
+    {
+      st->velGas[0] = PrimExch[particle].VelGas[0];
+      st->velGas[1] = PrimExch[particle].VelGas[1];
+      st->velGas[2] = PrimExch[particle].VelGas[2];
+
+      st->velVertex[0] = PrimExch[particle].VelVertex[0];
+      st->velVertex[1] = PrimExch[particle].VelVertex[1];
+      st->velVertex[2] = PrimExch[particle].VelVertex[2];
+
+      st->rho = PrimExch[particle].Density;
+
+      st->press = PrimExch[particle].Pressure;
+
+      st->grad = &GradExch[particle];
+
+      st->timeBin = PrimExch[particle].TimeBinHydro; /* This is the hydro timestep */
+
+      st->volume = PrimExch[particle].Volume;
+
+#ifdef MHD
+      st->Bx = PrimExch[particle].B[0];
+      st->By = PrimExch[particle].B[1];
+      st->Bz = PrimExch[particle].B[2];
+#ifdef MHD_POWELL
+      st->divB = PrimExch[particle].DivB;
+#endif /* #ifdef MHD_POWELL */
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+      for(j = 0; j < N_Scalar; j++)
+        st->scalars[j] = PrimExch[particle].Scalars[j];
+#endif /* #ifdef MAXSCALARS */
+
+      aBegin = PrimExch[particle].TimeLastPrimUpdate;
+
+      st->oldmass     = PrimExch[particle].OldMass;
+      st->surfacearea = PrimExch[particle].SurfaceArea;
+      st->activearea  = PrimExch[particle].ActiveArea;
+      st->csnd        = PrimExch[particle].Csnd;
+      st->ID          = DP[p].ID;
+    }
+
+  st->dtExtrapolation = All.Time - aBegin;
+
+  /* check for reflecting or outflowing boundaries */
+  face_boundary_check_vertex(T, p, &st->velVertex[0], &st->velVertex[1], &st->velVertex[2]);
+
+  return 0;
+}
+
+/*! \brief Checks for boundary cells with non-periodic boundary conditions.
+ *
+ *  Adjusts the velocities accordingly.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] p Index in DP array.
+ *  \param[in, out] velx Velocity in x coordinate.
+ *  \param[in, out] vely Velocity in y coordinate.
+ *  \param[in, out] velz Velocity in z coordinate.
+ *
+ *  \return void
+ */
+void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz)
+{
+  /* check for reflecting or outflowing boundaries */
+#if defined(REFLECTIVE_X)
+  if((T->DP[p].image_flags & REFL_X_FLAGS))
+    *velx *= -1;
+#endif /* #if defined(REFLECTIVE_X) */
+#if defined(REFLECTIVE_Y)
+  if((T->DP[p].image_flags & REFL_Y_FLAGS))
+    *vely *= -1;
+#endif /* #if defined(REFLECTIVE_Y) */
+#if defined(REFLECTIVE_Z)
+  if((T->DP[p].image_flags & REFL_Z_FLAGS))
+    *velz *= -1;
+#endif /* #if defined(REFLECTIVE_Z) */
+
+#ifdef ONEDIMS_SPHERICAL
+  if(p == -1)
+    *velx *= -1;
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+}
+
+/*! \brief Checks for boundary cells with non-periodic boundary conditions.
+ *
+ *  \param[in] p Pointer to point.
+ *  \param[in, out] velx Velocity in x direction.
+ *  \param[in, out] vely Velocity in y direction.
+ *  \param[in, out] velz Velocity in z direction.
+ *
+ *  \return void
+ */
+void face_boundary_check(point *p, double *velx, double *vely, double *velz)
+{
+  /* check for reflecting or outflowing boundaries */
+#if defined(REFLECTIVE_X)
+  if((p->image_flags & REFL_X_FLAGS) && !(p->image_flags & OUTFLOW_X))
+    *velx *= -1;
+#endif /* #if defined(REFLECTIVE_X) */
+#if defined(REFLECTIVE_Y)
+  if((p->image_flags & REFL_Y_FLAGS) && !(p->image_flags & OUTFLOW_Y))
+    *vely *= -1;
+#endif /* #if defined(REFLECTIVE_Y) */
+#if defined(REFLECTIVE_Z)
+  if((p->image_flags & REFL_Z_FLAGS) && !(p->image_flags & OUTFLOW_Z))
+    *velz *= -1;
+#endif /* #if defined(REFLECTIVE_Z) */
+
+#ifdef ONEDIMS_SPHERICAL
+  if(p == &Mesh.DP[-1])
+    *velx *= -1;
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+}
+
+/*! \brief Checks whether local task is responsible for a face.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] p1 Index in DP array of point1 making up the face.
+ *  \param[in] p2 Index in DP array of point2 making up the face.
+ *  \param[in] st_L Left hand side state of the face.
+ *  \param[in] st_R Right hand side state of the face.
+ *
+ *  \return -1 if not local responsibility, 0 if it is.
+ */
+int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R)
+{
+  int low_p, high_p;
+  struct state *low_state, *high_state;
+
+  point *DP = T->DP;
+
+  if(DP[p1].ID < DP[p2].ID)
+    {
+      low_p      = p1;
+      high_p     = p2;
+      low_state  = st_L;
+      high_state = st_R;
+    }
+  else if(DP[p1].ID > DP[p2].ID)
+    {
+      low_p      = p2;
+      high_p     = p1;
+      low_state  = st_R;
+      high_state = st_L;
+    }
+  else
+    {
+      /* equality of the IDs should only occur for reflective boundaries */
+      if(DP[p1].task == ThisTask && DP[p1].index < NumGas)
+        {
+          low_p      = p1;
+          high_p     = p2;
+          low_state  = st_L;
+          high_state = st_R;
+        }
+      else
+        {
+          low_p      = p2;
+          high_p     = p1;
+          low_state  = st_R;
+          high_state = st_L;
+        }
+    }
+
+  if(TimeBinSynchronized[low_state->timeBin]) /* the one with the lower ID is active */
+    {
+      /* we need to check whether the one with the lower ID is a local particle */
+      if(DP[low_p].task == ThisTask && DP[low_p].index < NumGas)
+        return 0;
+    }
+  else if(TimeBinSynchronized[high_state->timeBin]) /* only the side with the higher ID is active */
+    {
+      /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */
+      if(DP[high_p].task == ThisTask && DP[high_p].index < NumGas)
+        return 0;
+    }
+
+  return -1; /* we can skip this face on the local task */
+}
+
+/*! \brief Determines timestep of face.
+ *
+ *  \param[in] state_L Left hand side state of face.
+ *  \param[in] state_R Right hand side state of face.
+ *  \param[out] hubble_a Value of Hubble function at scalefactor
+ *              a(cosmological).
+ *  \param[out] atime Scalefactor (cosmological).
+ *
+ *  \return Face timestep.
+ */
+double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime)
+{
+  integertime ti_begin_L, ti_begin_R;
+  short int timeBin;
+  double face_dt;
+
+  /* determine most recent start of the time bins */
+  ti_begin_L = (All.Ti_Current >> state_L->timeBin) << state_L->timeBin;
+  ti_begin_R = (All.Ti_Current >> state_R->timeBin) << state_R->timeBin;
+
+  /* take the minimum of the two */
+  timeBin = state_L->timeBin;
+  if(timeBin > state_R->timeBin)
+    timeBin = state_R->timeBin;
+
+  /* compute the half-step prediction times */
+  state_L->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_L) * All.Timebase_interval;
+  state_R->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_R) * All.Timebase_interval;
+
+  if(All.ComovingIntegrationOn)
+    {
+      /* calculate scale factor at middle of timestep */
+      *atime    = All.TimeBegin * exp((All.Ti_Current + (((integertime)1) << (timeBin - 1))) * All.Timebase_interval);
+      *hubble_a = hubble_function(*atime);
+    }
+  else
+    *atime = *hubble_a = 1.0;
+
+  /* set the actual time-step for the face */
+  face_dt = (((integertime)1) << timeBin) * All.Timebase_interval;
+
+  if(All.ComovingIntegrationOn)
+    {
+      /* converts to delta_t */
+      state_L->dt_half /= *hubble_a;
+      state_R->dt_half /= *hubble_a;
+      face_dt /= *hubble_a;
+
+      face_dt /= *atime; /* we need dt/a, the (1/a) takes care of the gradient in the cosmological euler equations */
+
+      state_L->dtExtrapolation /= *hubble_a;
+      state_L->dtExtrapolation /= *atime;
+      state_R->dtExtrapolation /= *hubble_a;
+      state_R->dtExtrapolation /= *atime;
+    }
+
+  return face_dt;
+}
+
+/*! \brief Converts the velocities to local frame, compensating for the
+ *         movement of the face.
+ *
+ *  \param[in, out] st State to be converted to local frame.
+ *  \param[in] vel_face Face velocity.
+ *  \param[in] hubble_a Value of Hubble function at scalefactor
+ *             a (cosmological).
+ *  \param[in] atime Scalefactor (cosmological).
+ *
+ *  \return void
+ */
+void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime)
+{
+  if(All.ComovingIntegrationOn)
+    {
+      st->velGas[0] /= atime; /* convert to peculiar velocity */
+      st->velGas[1] /= atime;
+      st->velGas[2] /= atime;
+    }
+
+  st->velx = st->velGas[0] - vel_face[0];
+  st->vely = st->velGas[1] - vel_face[1];
+  st->velz = st->velGas[2] - vel_face[2];
+
+  if(All.ComovingIntegrationOn)
+    {
+      st->velx -= atime * hubble_a * st->dx; /* need to get the physical velocity relative to the face */
+      st->vely -= atime * hubble_a * st->dy;
+      st->velz -= atime * hubble_a * st->dz;
+    }
+}
+
+/*! \brief Extrapolates the state in time.
+ *
+ *  \param[out] delta Change due to time extrapolation.
+ *  \param[in] st State to be extrapolated.
+ *  \param[in] atime Scalefactor at this time (cosmological).
+ *
+ *  \return void
+ */
+void face_do_time_extrapolation(struct state *delta, struct state *st, double atime)
+{
+  /* st is the state at the center of the cell */
+
+  /* the code still allows for emtpy cells but we are going to divide
+   * by rho, so ...
+   */
+  if(st->rho <= 0)
+    return;
+
+#if defined(MESHRELAX) || defined(DISABLE_TIME_EXTRAPOLATION)
+  /* do not time extrapolation */
+  (void)st;
+  (void)atime;
+  memset(delta, 0, sizeof(struct state));
+  return;
+#endif /* #if defined (MESHRELAX) || defined (DISABLE_TIME_EXTRAPOLATION) */
+
+  struct grad_data *grad = st->grad;
+
+  double dt_half = st->dtExtrapolation;
+
+  if(All.ComovingIntegrationOn)
+    dt_half /= atime;
+
+  delta->rho = -dt_half * (st->velx * grad->drho[0] + st->rho * grad->dvel[0][0] + st->vely * grad->drho[1] +
+                           st->rho * grad->dvel[1][1] + st->velz * grad->drho[2] + st->rho * grad->dvel[2][2]);
+
+  delta->velx = -dt_half * (1.0 / st->rho * grad->dpress[0] + st->velx * grad->dvel[0][0] + st->vely * grad->dvel[0][1] +
+                            st->velz * grad->dvel[0][2]);
+
+  delta->vely = -dt_half * (1.0 / st->rho * grad->dpress[1] + st->velx * grad->dvel[1][0] + st->vely * grad->dvel[1][1] +
+                            st->velz * grad->dvel[1][2]);
+
+  delta->velz = -dt_half * (1.0 / st->rho * grad->dpress[2] + st->velx * grad->dvel[2][0] + st->vely * grad->dvel[2][1] +
+                            st->velz * grad->dvel[2][2]);
+
+  delta->press = -dt_half * (GAMMA * st->press * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) +
+                             st->velx * grad->dpress[0] + st->vely * grad->dpress[1] + st->velz * grad->dpress[2]);
+
+#ifdef ONEDIMS_SPHERICAL
+  delta->velx += dt_half * 2. * st->press / (st->rho * st->radius);
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+#ifdef MHD
+  delta->velx +=
+      -dt_half * (1.0 / st->rho *
+                  (st->By * grad->dB[1][0] + st->Bz * grad->dB[2][0] - st->By * grad->dB[0][1] - st->Bz * grad->dB[0][2]) / atime);
+
+  delta->vely +=
+      -dt_half * (1.0 / st->rho *
+                  (st->Bx * grad->dB[0][1] + st->Bz * grad->dB[2][1] - st->Bx * grad->dB[1][0] - st->Bz * grad->dB[1][2]) / atime);
+
+  delta->velz +=
+      -dt_half * (1.0 / st->rho *
+                  (st->Bx * grad->dB[0][2] + st->By * grad->dB[1][2] - st->Bx * grad->dB[2][0] - st->By * grad->dB[2][1]) / atime);
+
+  delta->Bx =
+      -dt_half * (-st->velx * grad->dB[1][1] - grad->dvel[0][1] * st->By + st->vely * grad->dB[0][1] + grad->dvel[1][1] * st->Bx +
+                  st->velz * grad->dB[0][2] + grad->dvel[2][2] * st->Bx - st->velx * grad->dB[2][2] - grad->dvel[0][2] * st->Bz);
+
+  delta->By =
+      -dt_half * (+st->velx * grad->dB[1][0] + grad->dvel[0][0] * st->By - st->vely * grad->dB[0][0] - grad->dvel[1][0] * st->Bx -
+                  st->vely * grad->dB[2][2] - grad->dvel[1][2] * st->Bz + st->velz * grad->dB[1][2] + grad->dvel[2][2] * st->By);
+
+  delta->Bz =
+      -dt_half * (-st->velz * grad->dB[0][0] - grad->dvel[2][0] * st->Bx + st->velx * grad->dB[2][0] + grad->dvel[0][0] * st->Bz +
+                  st->vely * grad->dB[2][1] + grad->dvel[1][1] * st->Bz - st->velz * grad->dB[1][1] - grad->dvel[2][1] * st->By);
+#endif /* #ifdef MHD */
+
+#if defined(MAXSCALARS)
+  int k;
+  for(k = 0; k < N_Scalar; k++)
+    {
+      delta->scalars[k] =
+          -dt_half * (st->velx * grad->dscalars[k][0] + st->vely * grad->dscalars[k][1] + st->velz * grad->dscalars[k][2]);
+    }
+#endif /* #if defined(MAXSCALARS) */
+}
+
+/*! \brief Extrapolates the state in space.
+ *
+ *  Linear extrapolation with neighbor cell to their common face.
+ *
+ *  \param[out] delta Change due to time extrapolation.
+ *  \param[in] st State to be extrapolated.
+ *  \param[in] st_other state of other cell.
+ *
+ *  \return void
+ */
+void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other)
+{
+#ifdef DISABLE_SPATIAL_RECONSTRUCTION
+  memset(delta, 0, sizeof(struct state));
+  return;
+#endif /* #ifdef DISABLE_SPATIAL_RECONSTRUCTION */
+
+#ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS
+  if(dmax(st->press, st_other->press) > 100. * dmin(st->press, st_other->press))
+    {
+      memset(delta, 0, sizeof(struct state));
+      return;
+    }
+#endif /* #ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS */
+
+  struct grad_data *grad = st->grad;
+
+  double dx[3];
+  dx[0] = st->dx;
+  dx[1] = st->dy;
+  dx[2] = st->dz;
+
+  double r[3];
+  r[0] = -st_other->dx + st->dx;
+  r[1] = -st_other->dy + st->dy;
+  r[2] = -st_other->dz + st->dz;
+
+  face_do_spatial_extrapolation_single_quantity(&delta->rho, st->rho, st_other->rho, grad->drho, dx, r);
+
+  face_do_spatial_extrapolation_single_quantity(&delta->velx, st->velx, st_other->velx, grad->dvel[0], dx, r);
+  face_do_spatial_extrapolation_single_quantity(&delta->vely, st->vely, st_other->vely, grad->dvel[1], dx, r);
+  face_do_spatial_extrapolation_single_quantity(&delta->velz, st->velz, st_other->velz, grad->dvel[2], dx, r);
+
+  face_do_spatial_extrapolation_single_quantity(&delta->press, st->press, st_other->press, grad->dpress, dx, r);
+
+#ifdef MHD
+  face_do_spatial_extrapolation_single_quantity(&delta->Bx, st->Bx, st_other->Bx, grad->dB[0], dx, r);
+  face_do_spatial_extrapolation_single_quantity(&delta->By, st->By, st_other->By, grad->dB[1], dx, r);
+  face_do_spatial_extrapolation_single_quantity(&delta->Bz, st->Bz, st_other->Bz, grad->dB[2], dx, r);
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  int k;
+  for(k = 0; k < N_Scalar; k++)
+    {
+      face_do_spatial_extrapolation_single_quantity(&delta->scalars[k], st->scalars[k], st_other->scalars[k], grad->dscalars[k], dx,
+                                                    r);
+    }
+#endif /* #ifdef MAXSCALARS */
+}
+
+/*! \brief Extrapolates a single quantity in space.
+ *
+ *  Linear interpolation with neighbor cell to their common face.
+ *
+ *  \param[out] delta Change due to time extrapolation.
+ *  \param[in] st State to be extrapolated (unused).
+ *  \param[in] st_other state of other cell (unused).
+ *  \param[in] grad Gradient used for extrapolation.
+ *  \param[in] dx normal vector.
+ *  \param[in] r (unused).
+ *
+ *  \return void
+ */
+void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r)
+{
+  (void)st;
+  (void)st_other;
+  (void)r;
+  *delta = grad[0] * dx[0] + grad[1] * dx[1] + grad[2] * dx[2];
+}
+
+/*! \brief Adds space and time extrapolation to state.
+ *
+ *  \param[in, out] st_face State that is modified.
+ *  \param[in] delta_time Change of state due to time extrapolation.
+ *  \param[in] delta_space Change of state due to space extrapolation.
+ *  \param[in, out] stat Structure that counts face value statistics.
+ *
+ *  \return void
+ */
+void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat)
+{
+  stat->count_disable_extrapolation += 1;
+
+  if(st_face->rho <= 0)
+    return;
+
+  if(st_face->rho + delta_time->rho + delta_space->rho < 0 || st_face->press + delta_time->press + delta_space->press < 0)
+    return;
+
+  stat->count_disable_extrapolation -= 1;
+
+#if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION)
+  face_add_extrapolation(st_face, delta_time, stat);
+#endif /* #if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION)  */
+
+#if !defined(DISABLE_SPATIAL_EXTRAPOLATION)
+  face_add_extrapolation(st_face, delta_space, stat);
+#endif /* #if !defined(DISABLE_SPATIAL_EXTRAPOLATION) */
+}
+
+/*! \brief Adds an extrapolation to state.
+ *
+ *  Called in face_add_extrapolations(..).
+ *
+ *  \param[in, out] st_face State that is modified.
+ *  \param[in] delta Change of state due to extrapolation.
+ *  \param[in] stat (unused)
+ *
+ *  \return void
+ */
+void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat)
+{
+  st_face->rho += delta->rho;
+  st_face->velx += delta->velx;
+  st_face->vely += delta->vely;
+  st_face->velz += delta->velz;
+  st_face->press += delta->press;
+
+#ifdef MHD
+#ifndef ONEDIMS
+  /* in one dimension, Bx has to be constant! */
+  st_face->Bx += delta->Bx;
+#endif /* #ifndef ONEDIMS */
+  st_face->By += delta->By;
+  st_face->Bz += delta->Bz;
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  int k;
+  for(k = 0; k < N_Scalar; k++)
+    st_face->scalars[k] += delta->scalars[k];
+#endif /* #ifdef MAXSCALARS */
+}
+
+/*! \brief Adds an extrapolation to state.
+ *
+ *  But checks for positivity of density.
+ *
+ *  \param[in, out] st_face State that is modified.
+ *  \param[in] delta Change of state due to extrapolation.
+ *  \param[in, out] stat Structure that counts face value statistics.
+ *
+ *  \return void
+ */
+void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat)
+{
+  stat->count_disable_extrapolation += 1;
+
+  if(st_face->rho <= 0)
+    return;
+
+  if(st_face->rho + delta->rho < 0 || st_face->press + delta->press < 0)
+    return;
+
+  stat->count_disable_extrapolation -= 1;
+
+  face_add_extrapolation(st_face, delta, stat);
+}
+
+/*! \brief Rotates velocities and magnetic field.
+ *
+ *  \param[in, out] st State that containes velocities to be rotated.
+ *  \param[in] geom Geometry with a rotation matrix.
+ *
+ *  \return void
+ */
+void face_turn_velocities(struct state *st, struct geometry *geom)
+{
+  double velx, vely, velz;
+
+  velx = st->velx;
+  vely = st->vely;
+  velz = st->velz;
+
+  st->velx = velx * geom->nx + vely * geom->ny + velz * geom->nz;
+  st->vely = velx * geom->mx + vely * geom->my + velz * geom->mz;
+  st->velz = velx * geom->px + vely * geom->py + velz * geom->pz;
+
+#ifdef MHD
+  double Bx, By, Bz;
+
+  Bx = st->Bx;
+  By = st->By;
+  Bz = st->Bz;
+
+  st->Bx = Bx * geom->nx + By * geom->ny + Bz * geom->nz;
+  st->By = Bx * geom->mx + By * geom->my + Bz * geom->mz;
+  st->Bz = Bx * geom->px + By * geom->py + Bz * geom->pz;
+#endif /* #ifdef MHD */
+}
+
+/*! \brief Sets the state at the face to its upwind value.
+ *
+ *  \param[in] st_L Left hand side hydrodynamical state.
+ *  \param[in] st_R Right hand side hydrodynamical state.
+ *  \param[out] st_face State at face.
+ *  \param[in] geom Geometry structure that includes normal vector of face.
+ *  \param[in] vel_face Velocity vector of face.
+ *
+ *  \return void
+ */
+void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face)
+{
+  double ev = vel_face[0] * geom->nx + vel_face[1] * geom->ny + vel_face[2] * geom->nz;
+
+  if(ev < 0)
+    {
+      st_face->rho   = st_L->rho;
+      st_face->velx  = st_L->velx;
+      st_face->vely  = st_L->vely;
+      st_face->velz  = st_L->velz;
+      st_face->press = st_L->press;
+    }
+  else
+    {
+      st_face->rho   = st_R->rho;
+      st_face->velx  = st_R->velx;
+      st_face->vely  = st_R->vely;
+      st_face->velz  = st_R->velz;
+      st_face->press = st_R->press;
+    }
+}
+
+/*! \brief Rotates velocities backwards.
+ *
+ *  Inverse operation to face_turn_velocities(...).
+ *
+ *  \param[in, out] st State that containes velocities to be rotated.
+ *  \param[in] geom Geometry with a rotation matrix.
+ *
+ *  \return void
+ */
+void face_turnback_velocities(struct state_face *st_face, struct geometry *geom)
+{
+  double velx, vely, velz;
+
+  velx = st_face->velx;
+  vely = st_face->vely;
+  velz = st_face->velz;
+
+  st_face->velx = velx * geom->nx + vely * geom->mx + velz * geom->px;
+  st_face->vely = velx * geom->ny + vely * geom->my + velz * geom->py;
+  st_face->velz = velx * geom->nz + vely * geom->mz + velz * geom->pz;
+}
+
+/*! \brief Sets the scalar states compute the scalar flux from mass flux.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[out] st_face Face state.
+ *  \param[out] flux Flux over face.
+ *
+ *  \return void
+ */
+void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux)
+{
+#if defined(MAXSCALARS)
+  int i;
+
+  double normfac, normifac;
+
+  if(flux->mass > 0)
+    st_face->scalars = st_L->scalars;
+  else
+    st_face->scalars = st_R->scalars;
+
+  /* Normalize species here */
+  normfac = 0;
+
+  for(i = 0; i < N_Scalar; i++)
+    {
+      flux->scalars[i] = st_face->scalars[i] * flux->mass;
+
+      if(scalar_elements[i].type == SCALAR_TYPE_SPECIES)
+        normfac += st_face->scalars[i];
+    }
+
+  if(normfac != 0)
+    {
+      normifac = 1.0 / normfac;
+
+      for(i = 0; i < N_Scalar; i++)
+        if(scalar_elements[i].type == SCALAR_TYPE_SPECIES || scalar_elements[i].type == SCALAR_TYPE_NORMALIZE)
+          flux->scalars[i] *= normifac;
+    }
+
+#endif /* #if defined(MAXSCALARS) */
+}
+
+#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)
+/*! \brief Converts flux from face frame to simulation box frame.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[in] vel_face Velocity vector of face.
+ *  \param[in, out] flux Flux vector accross face.
+ *
+ *  \return void
+ */
+void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux)
+{
+  double momx = flux->momentum[0];
+  double momy = flux->momentum[1];
+  double momz = flux->momentum[2];
+
+  flux->momentum[0] += vel_face[0] * flux->mass;
+  flux->momentum[1] += vel_face[1] * flux->mass;
+  flux->momentum[2] += vel_face[2] * flux->mass;
+
+  flux->energy += momx * vel_face[0] + momy * vel_face[1] + momz * vel_face[2] +
+                  0.5 * flux->mass * (vel_face[0] * vel_face[0] + vel_face[1] * vel_face[1] + vel_face[2] * vel_face[2]);
+
+#ifdef MHD
+  double Bx;
+  Bx = 0.5 * (st_L->Bx + st_R->Bx);
+
+  flux->B[0] -= vel_face[0] * Bx;
+  flux->B[1] -= vel_face[1] * Bx;
+  flux->B[2] -= vel_face[2] * Bx;
+#endif /* #ifdef MHD */
+}
+#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */
+
+/*! \brief Rotates momenum flux and magnetic flux vector.
+ *
+ *  flux->momentum vector needs to be turned in case the HLLC or Rosunov
+ *  Riemann solvers are used.
+ *
+ *  \param[in, out] flux Flux vector which is rotated.
+ *  \param[in] geom Geometry structure that holds rotation matrix.
+ *
+ *  \return void
+ */
+void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom)
+{
+  double momx = flux->momentum[0];
+  double momy = flux->momentum[1];
+  double momz = flux->momentum[2];
+
+  flux->momentum[0] = momx * geom->nx + momy * geom->mx + momz * geom->px;
+  flux->momentum[1] = momx * geom->ny + momy * geom->my + momz * geom->py;
+  flux->momentum[2] = momx * geom->nz + momy * geom->mz + momz * geom->pz;
+
+#ifdef MHD
+  double Bx = flux->B[0];
+  double By = flux->B[1];
+  double Bz = flux->B[2];
+
+  flux->B[0] = Bx * geom->nx + By * geom->mx + Bz * geom->px;
+  flux->B[1] = Bx * geom->ny + By * geom->my + Bz * geom->py;
+  flux->B[2] = Bx * geom->nz + By * geom->mz + Bz * geom->pz;
+#endif /* #ifdef MHD */
+}
+
+/*! \brief Calculates the flux from face states.
+ *
+ *  \param[in] st_L (unused)
+ *  \param[in] st_R (unused)
+ *  \param[in] st_face State at face.
+ *  \param[out] flux Flux at face.
+ *  \param[in] geom Geometry structure containing normal vector of face.
+ *  \param[in] vel_face Velocity vector of face.
+ *
+ *  \return void
+ */
+void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom,
+                     double *vel_face)
+{
+  double fac;
+
+  /* calculate fluxes for ordinary Riemann solver */
+
+  fac = (st_face->velx - vel_face[0]) * geom->nx + (st_face->vely - vel_face[1]) * geom->ny + (st_face->velz - vel_face[2]) * geom->nz;
+
+  flux->mass = st_face->rho * fac;
+
+  flux->momentum[0] = (st_face->rho * st_face->velx * fac + st_face->press * geom->nx);
+  flux->momentum[1] = (st_face->rho * st_face->vely * fac + st_face->press * geom->ny);
+  flux->momentum[2] = (st_face->rho * st_face->velz * fac + st_face->press * geom->nz);
+
+#ifndef ISOTHERM_EQS
+  flux->energy =
+      (0.5 * st_face->rho * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) +
+       st_face->press / GAMMA_MINUS1) *
+          fac +
+      st_face->press * (st_face->velx * geom->nx + st_face->vely * geom->ny + st_face->velz * geom->nz);
+#endif /* #ifndef ISOTHERM_EQS */
+}
+
+/*! \brief Flux limiter.
+ *
+ *  Make sure cell cannot loose more mass than it contains...
+ *
+ *  \param[in] st_L Left hand side hydrodynamical state.
+ *  \param[in] st_R Right hand side hydrodynamical state.
+ *  \param[in] st_center_L (unused)
+ *  \param[in] st_center_R (unused)
+ *  \param[in, out] fulx Flux vector.
+ *  \param[in] dt Timestep.
+ *  \param[in, out] count Number of calls of this function.
+ *  \param[in, out] count_reduced Number if flux reductions caused by this
+ *                  function.
+ *
+ *  \return void
+ */
+void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R,
+                       struct fluxes *flux, double dt, double *count, double *count_reduced)
+{
+  *count = *count + 1.0;
+
+  /* choose upwind mass to determine a stability bound on the maximum allowed mass exchange,
+     (we do this to prevent negative masses under all circumstances) */
+  double upwind_mass, upwind_activearea, reduc_fac;
+  integertime upwind_timebin, downstream_timebin;
+
+  if(flux->mass > 0)
+    {
+      upwind_mass        = st_L->oldmass;
+      upwind_activearea  = st_L->activearea;
+      upwind_timebin     = st_L->timeBin;
+      downstream_timebin = st_R->timeBin;
+    }
+  else
+    {
+      upwind_mass        = st_R->oldmass;
+      upwind_activearea  = st_R->activearea;
+      upwind_timebin     = st_R->timeBin;
+      downstream_timebin = st_L->timeBin;
+    }
+
+  if(upwind_timebin > downstream_timebin)
+    dt *= pow(2, upwind_timebin - downstream_timebin);
+
+  if(fabs(flux->mass * dt * upwind_activearea) > 0.9 * upwind_mass)
+    {
+      reduc_fac = 0.9 * upwind_mass / fabs(flux->mass * dt * upwind_activearea);
+
+      *count_reduced = *count_reduced + 1.0;
+
+      flux->mass *= reduc_fac;
+      flux->energy *= reduc_fac;
+      flux->momentum[0] *= reduc_fac;
+      flux->momentum[1] *= reduc_fac;
+      flux->momentum[2] *= reduc_fac;
+
+      /* remark: do not reduce the magnetic field flux, as it is not coupled to the mass flux */
+#ifdef MAXSCALARS
+      for(int i = 0; i < N_Scalar; i++)
+        flux->scalars[i] *= reduc_fac;
+#endif /* #ifdef MAXSCALARS */
+    }
+}
+
+/*! \brief Set flux vector entries to zero.
+ *
+ *  \param[out] flux Flux vector.
+ *
+ *  \return void
+ */
+void face_clear_fluxes(struct fluxes *flux)
+{
+  flux->mass        = 0;
+  flux->momentum[0] = 0;
+  flux->momentum[1] = 0;
+  flux->momentum[2] = 0;
+  flux->energy      = 0;
+#ifdef MHD
+  flux->B[0] = 0;
+  flux->B[1] = 0;
+  flux->B[2] = 0;
+#endif /* #ifdef MHD */
+}
+
+/*! \brief Adds flux due to advection to flux vector.
+ *
+ *  \param[in] st_face State at face.
+ *  \param[in, out] flux Flux vector.
+ *  \param[in] geom Geometry structure containing the face normal vector.
+ *  \param[in] vel_face Velocity vector of the face.
+ *
+ *  \return void
+ */
+void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face)
+{
+  double fac = -vel_face[0] * geom->nx - vel_face[1] * geom->ny - vel_face[2] * geom->nz;
+
+  flux->mass += st_face->rho * fac;
+
+  flux->momentum[0] += st_face->rho * st_face->velx * fac;
+  flux->momentum[1] += st_face->rho * st_face->vely * fac;
+  flux->momentum[2] += st_face->rho * st_face->velz * fac;
+
+  flux->energy +=
+      0.5 * st_face->rho * fac * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) +
+      st_face->press / GAMMA_MINUS1 * fac;
+}
+
+/*! \brief Compares tasks of flux list data.
+ *
+ *  Sort kernel for flux list data.
+ *
+ *  \param[in] a First flux list data object.
+ *  \param[in] b Second flux list data object.
+ *
+ *  \return (-1,0,1) -1 if a->task < b->task.
+ */
+int flux_list_data_compare(const void *a, const void *b)
+{
+  if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task))
+    return -1;
+
+  if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Communicates flux list and applies fluxes to conserved hydro
+ *         variables.
+ *
+ *  \return void
+ */
+void apply_flux_list(void)
+{
+  int i, j, p, nimport, ngrp, recvTask;
+#if defined(MAXSCALARS)
+  int k;
+#endif /* #if defined(MAXSCALARS) */
+
+  /* now exchange the flux-list and apply it when needed */
+
+  mysort(FluxList, Nflux, sizeof(struct flux_list_data), flux_list_data_compare);
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0; i < Nflux; i++)
+    Send_count[FluxList[i].task]++;
+
+  if(Send_count[ThisTask] > 0)
+    terminate("Send_count[ThisTask]");
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data));
+
+  /* exchange particle data */
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask,
+                           TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  /* apply the fluxes */
+
+  for(i = 0; i < nimport; i++)
+    {
+      p = FluxListGet[i].index;
+
+      P[p].Mass += FluxListGet[i].dM;
+
+      SphP[p].Momentum[0] += FluxListGet[i].dP[0];
+      SphP[p].Momentum[1] += FluxListGet[i].dP[1];
+      SphP[p].Momentum[2] += FluxListGet[i].dP[2];
+#ifdef MHD
+      SphP[p].BConserved[0] += FluxListGet[i].dB[0];
+      SphP[p].BConserved[1] += FluxListGet[i].dB[1];
+      SphP[p].BConserved[2] += FluxListGet[i].dB[2];
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+      for(k = 0; k < N_Scalar; k++)
+        *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k];
+#endif /* #ifdef MAXSCALARS */
+
+#ifndef ISOTHERM_EQS
+      SphP[p].Energy += FluxListGet[i].dEnergy;
+#endif /* #ifndef ISOTHERM_EQS */
+    }
+  myfree(FluxListGet);
+}
+
+/*! \brief Initializes statistics of finite volume solver.
+ *
+ *  \param[out] stat Statistics structure.
+ *
+ *  \return void
+ */
+void fvs_initialize_statistics(struct fvs_stat *stat) { stat->count_disable_extrapolation = 0; }
+
+/*! \brief Gathers statistics properties from all tasks and prints information.
+ *
+ *  \param[in] stat Finite volume solver statistics structure.
+ *
+ *  \return void
+ */
+void fvs_evaluate_statistics(struct fvs_stat *stat)
+{
+#ifdef VERBOSE
+  int count_disable_extrapolation = 0;
+  MPI_Reduce(&stat->count_disable_extrapolation, &count_disable_extrapolation, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+  mpi_printf("FLUX: Disabled extrapolation for %d interfaces.\n", count_disable_extrapolation);
+#endif /* #ifdef VERBOSE */
+}
+
+#ifdef ONEDIMS_SPHERICAL
+/*! \brief Applies source terms that occur due to spherical symmetry.
+ *
+ *  \return void
+ */
+void apply_spherical_source_terms()
+{
+  int idx, i;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      double Pressure         = SphP[i].Pressure;
+      double dt_Extrapolation = All.Time - SphP[i].TimeLastPrimUpdate;
+      struct grad_data *grad  = &SphP[i].Grad;
+
+      Pressure += -dt_Extrapolation * (GAMMA * Pressure * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) +
+                                       P[i].Vel[0] * grad->dpress[0] + P[i].Vel[1] * grad->dpress[1] + P[i].Vel[2] * grad->dpress[2]);
+
+      double dt = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval;
+      SphP[i].Momentum[0] += dt * Pressure * (Mesh.VF[i + 1].area - Mesh.VF[i].area);
+    }
+}
+#endif /* #ifdef ONEDIMS_SPHERICAL */
diff --git a/src/amuse/community/arepo/src/hydro/gradients.c b/src/amuse/community/arepo/src/hydro/gradients.c
new file mode 100644
index 0000000000..191c13635c
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/gradients.c
@@ -0,0 +1,149 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/gradients.c
+ * \date        05/2018
+ * \brief       Routines to initialize gradient data.
+ * \details     contains functions:
+ *                void init_gradients()
+ *                void gradient_init(MyFloat * addr, MyFloat * addr_exch,
+ *                  MySingle * addr_grad, int type)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+int N_Grad = 0;
+
+struct grad_elements grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm;
+
+/*! \brief Initializes all gradient fields.
+ *
+ *  Density, velocity, pressure and if needed magnetic fields and passive
+ *  scalars.
+ *
+ *  \return void
+ */
+void init_gradients()
+{
+#if defined(MAXSCALARS)
+  int k;
+#endif /* #if defined(MAXSCALARS) */
+
+  gradient_init(&SphP[0].Density, &PrimExch[0].Density, SphP[0].Grad.drho, GRADIENT_TYPE_DENSITY);
+
+  gradient_init(&P[0].Vel[0], &PrimExch[0].VelGas[0], SphP[0].Grad.dvel[0], GRADIENT_TYPE_VELX);
+  gradient_init(&P[0].Vel[1], &PrimExch[0].VelGas[1], SphP[0].Grad.dvel[1], GRADIENT_TYPE_VELY);
+  gradient_init(&P[0].Vel[2], &PrimExch[0].VelGas[2], SphP[0].Grad.dvel[2], GRADIENT_TYPE_VELZ);
+
+  gradient_init(&SphP[0].Pressure, &PrimExch[0].Pressure, SphP[0].Grad.dpress, GRADIENT_TYPE_PRESSURE);
+
+#ifdef MHD
+  gradient_init(&SphP[0].B[0], &PrimExch[0].B[0], SphP[0].Grad.dB[0], GRADIENT_TYPE_NORMAL);
+  gradient_init(&SphP[0].B[1], &PrimExch[0].B[1], SphP[0].Grad.dB[1], GRADIENT_TYPE_NORMAL);
+  gradient_init(&SphP[0].B[2], &PrimExch[0].B[2], SphP[0].Grad.dB[2], GRADIENT_TYPE_NORMAL);
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  MyFloat *addr;
+
+  for(k = 0; k < N_Scalar; k++)
+    {
+      addr = (MyFloat *)(((char *)(&SphP[0])) + scalar_elements[k].offset);
+      gradient_init(addr, &PrimExch[0].Scalars[k], SphP[0].Grad.dscalars[k], GRADIENT_TYPE_NORMAL);
+    }
+#endif /* #ifdef MAXSCALARS */
+
+  mpi_printf("INIT: %d/%d Gradients used.\n", N_Grad, MAXGRADIENTS);
+}
+
+/*! \brief Initialize a gradient field.
+ *
+ *  Each time this initialization routine is called, the global variable
+ *  NGrad is incremented by 1.
+ *
+ *  \param[in] addr Pointer to element in SphP[0] struct (for Vel in P[0])
+ *  \param[in] addr_exch Pointer to element in PrimExch[0] struct
+ *  \param[in] addr_grad Pointer to element in SphP[0].Grad struct
+ *  \param[in] type Type of gradient
+ *
+ *  \return void
+ */
+void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type)
+{
+  if(N_Grad == MAXGRADIENTS)
+    {
+      mpi_printf("Failed to register gradient, maximum of %d already reached\n", MAXGRADIENTS);
+      terminate("MAXGRADIENTS reached");
+    }
+
+  grad_elements[N_Grad].type = type;
+
+  if((type == GRADIENT_TYPE_VELX) || (type == GRADIENT_TYPE_VELY) || (type == GRADIENT_TYPE_VELZ))
+    {
+      /* basic structure is P */
+      grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&P[0]);
+    }
+  else
+    {
+      /* basic structure is SphP */
+      grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&SphP[0]);
+    }
+
+  grad_elements[N_Grad].offset_exch = ((char *)addr_exch) - ((char *)&PrimExch[0]);
+  grad_elements[N_Grad].offset_grad = ((char *)addr_grad) - ((char *)&(SphP[0].Grad));
+
+  switch(type)
+    {
+      case GRADIENT_TYPE_VELX:
+        GVelx = &grad_elements[N_Grad];
+        break;
+      case GRADIENT_TYPE_VELY:
+        GVely = &grad_elements[N_Grad];
+        break;
+      case GRADIENT_TYPE_VELZ:
+        GVelz = &grad_elements[N_Grad];
+        break;
+      case GRADIENT_TYPE_DENSITY:
+        GDensity = &grad_elements[N_Grad];
+        break;
+      case GRADIENT_TYPE_PRESSURE:
+        GPressure = &grad_elements[N_Grad];
+        break;
+      case GRADIENT_TYPE_UTHERM:
+        GUtherm = &grad_elements[N_Grad];
+        break;
+      default:
+        break;
+    }
+
+  N_Grad++;
+}
diff --git a/src/amuse/community/arepo/src/hydro/mhd.c b/src/amuse/community/arepo/src/hydro/mhd.c
new file mode 100644
index 0000000000..33eaf7eab5
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/mhd.c
@@ -0,0 +1,99 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mhd.c
+ * \date        05/2018
+ * \brief       Source terms for MHD implementation needed for cosmological
+ *              MHD equations as well as Powell source terms.
+ * \details     contains functions:
+ *                void do_mhd_source_terms_first_half(void)
+ *                void do_mhd_source_terms_second_half(void)
+ *                void do_mhd_source_terms(void)
+ *                void do_mhd_powell_source_terms(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef MHD
+
+static void do_mhd_source_terms(void);
+
+/*! \brief First half of the MHD source terms.
+ *
+ *  Before hydrodynamics timestep.
+ *
+ *  \return void
+ */
+void do_mhd_source_terms_first_half(void)
+{
+  do_mhd_source_terms();
+  update_primitive_variables();
+}
+
+/*! \brief Second half of the MHD source terms.
+ *
+ *  After hydrodynamics timestep.
+ *
+ *  \return void
+ */
+void do_mhd_source_terms_second_half(void)
+{
+  do_mhd_source_terms();
+  update_primitive_variables();
+}
+
+/*! \brief Adds source terms of MHD equations in expanding spacetime (i.e.
+ *         in cosmological simulations) to energy.
+ *
+ *  \return void
+ */
+void do_mhd_source_terms(void)
+{
+  TIMER_START(CPU_MHD);
+
+  if(All.ComovingIntegrationOn)
+    {
+      double atime    = All.Time;
+      double hubble_a = hubble_function(atime);
+
+      int idx, i;
+      for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          double dt_cell = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval /
+                           hubble_a; /* half the timestep of the cell */
+          SphP[i].Energy += dt_cell * 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) *
+                            SphP[i].Volume * atime * hubble_a;
+        }
+    }
+
+  TIMER_STOP(CPU_MHD);
+}
+
+#endif /* #ifdef MHD */
diff --git a/src/amuse/community/arepo/src/hydro/riemann.c b/src/amuse/community/arepo/src/hydro/riemann.c
new file mode 100644
index 0000000000..24f664352f
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/riemann.c
@@ -0,0 +1,955 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/riemann.c
+ * \date        05/2018
+ * \brief       Exact, iterative Riemann solver; both adiabatic and isothermal.
+ * \details     contains functions:
+ *                double godunov_flux_3d(struct state *st_L, struct state
+ *                  *st_R, struct state_face *st_face)
+ *                void sample_solution_vaccum_left_3d(double S, struct state
+ *                  *st_R, struct state_face *st_face)
+ *                void sample_solution_vaccum_right_3d(double S, struct state
+ *                  *st_L, struct state_face *st_face)
+ *                void sample_solution_vacuum_generate_3d(double S, struct
+ *                  state *st_L, struct state *st_R, struct state_face
+ *                  *st_face)
+ *                void get_mach_numbers(struct state *st_L, struct state
+ *                  *st_R, double Press)
+ *                void sample_solution_3d(double S, struct state *st_L,
+ *                  struct state *st_R, double Press, double Vel, struct
+ *                  state_face *st_face)
+ *                int riemann(struct state *st_L, struct state *st_R, double
+ *                  *Press, double *Vel)
+ *                void pressure_function(double P, struct state *st, double *F,
+ *                  double *FD)
+ *                double guess_for_pressure(struct state *st_L,
+ *                  struct state *st_R)
+ *                void riemann_isotherm(struct state *st_L, struct state *st_R,
+ *                  double *Rho, double *Vel, double csnd)
+ *                void isothermal_function(double rhostar, double rho,
+ *                  double *F, double *FD)
+ *                void sample_solution_isothermal3d(double S, struct state
+ *                  *st_L, struct state *st_R, double Rho, double Vel,
+ *                  struct state_face *st_face, double csnd)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD))
+
+#define GAMMA_G1 ((GAMMA - 1.0) / (2.0 * GAMMA))
+#define GAMMA_G2 ((GAMMA + 1.0) / (2.0 * GAMMA))
+#define GAMMA_G3 ((2.0 * GAMMA / (GAMMA - 1.0)))
+#define GAMMA_G4 (2.0 / (GAMMA - 1.0))
+#define GAMMA_G5 (2.0 / (GAMMA + 1.0))
+#define GAMMA_G6 ((GAMMA - 1.0) / (GAMMA + 1.0))
+#define GAMMA_G7 (0.5 * (GAMMA - 1.0))
+#define GAMMA_G8 (1.0 / GAMMA)
+#define GAMMA_G9 (GAMMA - 1.0)
+
+#define TOL 1.0e-8
+
+/*! \brief Calculates face state from Riemann problem.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[out] st_face State at face.
+ *
+ *  \return 0.
+ */
+double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face)
+{
+  double Vel;
+
+#ifndef ISOTHERM_EQS
+  {
+    if(st_L->press == 0 && st_R->press == 0)
+      {
+        /* vacuum state */
+        st_face->velx  = 0;
+        st_face->rho   = 0;
+        st_face->press = 0;
+        st_face->vely  = 0;
+        st_face->velz  = 0;
+#ifdef MAXSCALARS
+        st_face->scalars = NULL;
+#endif /* #ifdef MAXSCALARS */
+        return 0;
+      }
+
+    if(st_L->rho > 0 && st_R->rho > 0)
+      {
+        st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho);
+        st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho);
+
+        double Press;
+
+        if(riemann(st_L, st_R, &Press, &Vel))
+          {
+            sample_solution_3d(0.0, /* S=x/t */
+                               st_L, st_R, Press, Vel, st_face);
+            return Press;
+          }
+        else
+          {
+            /* ICs lead to vacuum, need to sample vacuum solution */
+
+            sample_solution_vacuum_generate_3d(0.0, /* S=x/t */
+                                               st_L, st_R, st_face);
+            return 0;
+          }
+      }
+    else
+      {
+        if(st_L->rho == 0 && st_R->rho > 0)
+          {
+            sample_solution_vacuum_left_3d(0.0, /* S=x/t */
+                                           st_R, st_face);
+            return 0;
+          }
+        else if(st_R->rho == 0 && st_L->rho > 0)
+          {
+            sample_solution_vacuum_right_3d(0.0, /* S=x/t */
+                                            st_L, st_face);
+            return 0;
+          }
+        else if(st_R->rho == 0 && st_L->rho == 0)
+          {
+            /* vacuum state */
+            st_face->velx  = 0;
+            st_face->rho   = 0;
+            st_face->press = 0;
+            st_face->vely  = 0;
+            st_face->velz  = 0;
+#ifdef MAXSCALARS
+            st_face->scalars = NULL;
+#endif /* #ifdef MAXSCALARS */
+          }
+        else
+          {
+            terminate("one of the densities is negative\n");
+          }
+        return 0;
+      }
+  }
+
+#else  /* #ifndef ISOTHERM_EQS */
+  double Rho;
+  double csnd;
+
+  csnd = All.IsoSoundSpeed;
+  riemann_isotherm(st_L, st_R, &Rho, &Vel, csnd);
+
+  sample_solution_isothermal3d(0.0, /* S=x/t */
+                               st_L, st_R, Rho, Vel, st_face, csnd);
+
+  st_face->press = st_face->rho * csnd * csnd;
+
+  return 0;
+#endif /* #ifndef ISOTHERM_EQS #else */
+}
+
+/*! \brief Sample solution for a vacuum state at the left hand side.
+ *
+ *  \param[in] S Position x / t.
+ *  \param[in] st_R Right hand side state.
+ *  \param[out] st_face State at face.
+ *
+ *  \return void
+ */
+void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face)
+{
+  double Csnd;
+
+  double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1;
+
+  st_face->vely = st_R->vely;
+  st_face->velz = st_R->velz;
+#ifdef MAXSCALARS
+  st_face->scalars = st_R->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+  if(S >= Sr)
+    {
+      /* right fan */
+
+      double shr = st_R->velx + st_R->csnd;
+
+      if(S >= shr) /* right data state */
+        {
+          st_face->rho   = st_R->rho;
+          st_face->velx  = st_R->velx;
+          st_face->press = st_R->press;
+        }
+      else
+        {
+          /* rarefaction fan right state */
+          st_face->velx  = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S);
+          Csnd           = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S));
+          st_face->rho   = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4);
+          st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3);
+        }
+    }
+  else
+    {
+      /* vacuum state */
+      st_face->velx  = Sr;
+      st_face->rho   = 0;
+      st_face->press = 0;
+    }
+}
+
+/*! \brief Sample solution for a vacuum state at the right hand side.
+ *
+ *  \param[in] S S Position x / t.
+ *  \param[in] st_L Left hand side state.
+ *  \param[out] st_face State at face.
+ *
+ *  \return void
+ */
+void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face)
+{
+  double Csnd;
+
+  double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1;
+
+  st_face->vely = st_L->vely;
+  st_face->velz = st_L->velz;
+#ifdef MAXSCALARS
+  st_face->scalars = st_L->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+  if(S <= Sl)
+    {
+      /* left fan */
+
+      double shl = st_L->velx - st_L->csnd;
+
+      if(S <= shl)
+        {
+          /* left data state */
+          st_face->rho   = st_L->rho;
+          st_face->velx  = st_L->velx;
+          st_face->press = st_L->press;
+        }
+      else
+        {
+          /* rarefaction fan left state */
+          st_face->velx  = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S);
+          Csnd           = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S));
+          st_face->rho   = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4);
+          st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3);
+        }
+    }
+  else
+    {
+      /* vacuum in between */
+      st_face->velx  = Sl;
+      st_face->rho   = 0;
+      st_face->press = 0;
+    }
+}
+
+/*! \brief Sample solution for vacuum states.
+ *
+ *  \param[in] S S Position x / t
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[out] st_face State at face.
+ *
+ *  \return void
+ */
+void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face)
+{
+  double Csnd;
+
+  double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1;
+  double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1;
+
+  if(S <= Sl)
+    {
+      /* left fan */
+
+      st_face->vely = st_L->vely;
+      st_face->velz = st_L->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_L->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      double shl = st_L->velx - st_L->csnd;
+
+      if(S <= shl)
+        {
+          /* left data state */
+          st_face->rho   = st_L->rho;
+          st_face->velx  = st_L->velx;
+          st_face->press = st_L->press;
+        }
+      else
+        {
+          /* rarefaction fan left state */
+          st_face->velx  = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S);
+          Csnd           = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S));
+          st_face->rho   = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4);
+          st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3);
+        }
+    }
+  else if(S >= Sr)
+    {
+      /* right fan */
+
+      double shr = st_R->velx + st_R->csnd;
+
+      st_face->vely = st_R->vely;
+      st_face->velz = st_R->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_R->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      if(S >= shr) /* right data state */
+        {
+          st_face->rho   = st_R->rho;
+          st_face->velx  = st_R->velx;
+          st_face->press = st_R->press;
+        }
+      else
+        {
+          /* rarefaction fan right state */
+          st_face->velx  = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S);
+          Csnd           = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S));
+          st_face->rho   = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4);
+          st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3);
+        }
+    }
+  else
+    {
+      /* vacuum in between */
+      st_face->velx  = S;
+      st_face->rho   = 0;
+      st_face->press = 0;
+
+      st_face->vely = st_L->vely + (st_R->vely - st_L->vely) * (S - Sl) / (Sr - Sl);
+      st_face->velz = st_L->velz + (st_R->velz - st_L->velz) * (S - Sl) / (Sr - Sl);
+
+#ifdef MAXSCALARS
+      st_face->scalars = NULL;
+#endif /* #ifdef MAXSCALARS */
+    }
+}
+
+/* \brief Calculates Mach numbers of shocks from Riemann problem.
+ *
+ *  Mostly used for statistics.
+ *
+ *  \param[in] S Position x / t
+ *  \param[in, out] st_L Left hand side state.
+ *  \param[in, out] st_R Right hand side state.
+ *  \param[in] Press Central pressure
+ *
+ *  \return void
+ */
+void get_mach_numbers(struct state *st_L, struct state *st_R, double Press)
+{
+#if defined GODUNOV_STATS
+  if(Press <= st_L->press) /* left fan */
+    {
+      st_L->mach = 0;
+    }
+  else /* left shock */
+    {
+      double pml = Press / st_L->press;
+      st_L->mach = sqrt(GAMMA_G2 * pml + GAMMA_G1);
+    }
+
+  if(Press > st_R->press) /* right shock */
+    {
+      double pmr = Press / st_R->press;
+      st_R->mach = sqrt(GAMMA_G2 * pmr + GAMMA_G1);
+    }
+  else
+    {
+      st_R->mach = 0;
+    }
+#endif /* #if defined GODUNOV_STATS */
+}
+
+/*! \brief Samples 3d solution to Riemann problem.
+ *
+ *  \param[in] S Position x / t.
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[in] Press Pressure in central region.
+ *  \param[in] Vel Velocity in central region.
+ *  \param[out] st_face State at face.
+ *
+ *  \return void
+ */
+void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face)
+{
+  double Csnd;
+
+  if(S <= Vel) /* sample point is left of contact */
+    {
+      st_face->vely = st_L->vely;
+      st_face->velz = st_L->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_L->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      if(Press <= st_L->press) /* left fan */
+        {
+          double shl = st_L->velx - st_L->csnd;
+
+          if(S <= shl) /* left data state */
+            {
+              st_face->rho   = st_L->rho;
+              st_face->velx  = st_L->velx;
+              st_face->press = st_L->press;
+            }
+          else
+            {
+              double cml = st_L->csnd * pow(Press / st_L->press, GAMMA_G1);
+              double stl = Vel - cml;
+
+              if(S > stl) /* middle left state */
+                {
+                  st_face->rho   = st_L->rho * pow(Press / st_L->press, GAMMA_G8);
+                  st_face->velx  = Vel;
+                  st_face->press = Press;
+                }
+              else /* left state inside fan */
+                {
+                  st_face->velx  = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S);
+                  Csnd           = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S));
+                  st_face->rho   = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4);
+                  st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3);
+                }
+            }
+        }
+      else /* left shock */
+        {
+          if(st_L->press > 0)
+            {
+              double pml = Press / st_L->press;
+              double sl  = st_L->velx - st_L->csnd * sqrt(GAMMA_G2 * pml + GAMMA_G1);
+
+              if(S <= sl) /* left data state */
+                {
+                  st_face->rho   = st_L->rho;
+                  st_face->velx  = st_L->velx;
+                  st_face->press = st_L->press;
+                }
+              else /* middle left state behind shock */
+                {
+                  st_face->rho   = st_L->rho * (pml + GAMMA_G6) / (pml * GAMMA_G6 + 1.0);
+                  st_face->velx  = Vel;
+                  st_face->press = Press;
+                }
+            }
+          else
+            {
+              st_face->rho   = st_L->rho / GAMMA_G6;
+              st_face->velx  = Vel;
+              st_face->press = Press;
+            }
+        }
+    }
+  else /* right of contact */
+    {
+      st_face->vely = st_R->vely;
+      st_face->velz = st_R->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_R->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      if(Press > st_R->press) /* right shock */
+        {
+          if(st_R->press > 0)
+            {
+              double pmr = Press / st_R->press;
+              double sr  = st_R->velx + st_R->csnd * sqrt(GAMMA_G2 * pmr + GAMMA_G1);
+
+              if(S >= sr) /* right data state */
+                {
+                  st_face->rho   = st_R->rho;
+                  st_face->velx  = st_R->velx;
+                  st_face->press = st_R->press;
+                }
+              else /* middle right state behind shock */
+                {
+                  st_face->rho   = st_R->rho * (pmr + GAMMA_G6) / (pmr * GAMMA_G6 + 1.0);
+                  st_face->velx  = Vel;
+                  st_face->press = Press;
+                }
+            }
+          else
+            {
+              st_face->rho   = st_R->rho / GAMMA_G6;
+              st_face->velx  = Vel;
+              st_face->press = Press;
+            }
+        }
+      else /* right fan */
+        {
+          double shr = st_R->velx + st_R->csnd;
+
+          if(S >= shr) /* right data state */
+            {
+              st_face->rho   = st_R->rho;
+              st_face->velx  = st_R->velx;
+              st_face->press = st_R->press;
+            }
+          else
+            {
+              double cmr = st_R->csnd * pow(Press / st_R->press, GAMMA_G1);
+              double str = Vel + cmr;
+
+              if(S <= str) /* middle right state */
+                {
+                  st_face->rho   = st_R->rho * pow(Press / st_R->press, GAMMA_G8);
+                  st_face->velx  = Vel;
+                  st_face->press = Press;
+                }
+              else /* fan right state */
+                {
+                  st_face->velx  = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S);
+                  Csnd           = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S));
+                  st_face->rho   = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4);
+                  st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3);
+                }
+            }
+        }
+    }
+}
+
+/*! \brief Riemann-solver; i.e. iterative solver of central pressure of a
+ *         Riemann problem.
+ *
+ *  Solution via root-finding of pressure function.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[in, out] Press Central pressure; needs some initial guess.
+ *  \param[out] Vel Velocity in central region.
+ *
+ *  \return 0: failed, 1: success.
+ */
+int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel)
+{
+  double F_L, FD_L, F_R, FD_R, pold;
+
+  double dVel = st_R->velx - st_L->velx;
+
+  double critVel = GAMMA_G4 * (st_L->csnd + st_R->csnd) - dVel;
+
+  if(critVel < 0)
+    {
+      /*
+         printf("ICs lead to vacuum. stopping. Csnd_L=%g Csnd_R=%g dVel=%g\n", Csnd_L, Csnd_R, dVel);
+       */
+      return 0;
+    }
+
+  double p = guess_for_pressure(st_L, st_R);
+
+  int iter = 0;
+
+  do /* newton-raphson scheme */
+    {
+      pold = p;
+
+      pressure_function(p, st_L, &F_L, &FD_L);
+      pressure_function(p, st_R, &F_R, &FD_R);
+
+      if(iter < MAXITER / 2)
+        p -= (F_L + F_R + dVel) / (FD_L + FD_R);
+      else
+        p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R);
+
+      if(p < 0.1 * pold)
+        p = 0.1 * pold;
+
+      pressure_function(p, st_L, &F_L, &FD_L);
+      pressure_function(p, st_R, &F_R, &FD_R);
+
+      if(iter < MAXITER / 2)
+        p -= (F_L + F_R + dVel) / (FD_L + FD_R);
+      else
+        p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R);
+
+      if(p < 0.1 * pold)
+        p = 0.1 * pold;
+
+      iter++;
+    }
+  while(2 * fabs((p - pold) / (p + pold)) > TOL && iter < MAXITER);
+
+  if(iter >= MAXITER)
+    {
+      printf("Task=%d: Warning: ICs for riemann solver lead to divergence.\n", ThisTask);
+      printf("Rho_L=%g  Vel_L=%g  Press_L=%g  Csnd_L=%g\n", st_L->rho, st_L->velx, st_L->press, st_L->csnd);
+      printf("Rho_R=%g  Vel_R=%g  Press_R=%g  Csnd_R=%g\n", st_R->rho, st_R->velx, st_R->press, st_R->csnd);
+      printf("Adopted solution: Press=%g Vel=%g\n", p, 0.5 * (st_L->velx + st_R->velx + F_R - F_L));
+
+      FILE *fd;
+
+      if((fd = fopen("riemann.dat", "w")))
+        {
+          fwrite(&st_L->rho, sizeof(double), 1, fd);
+          fwrite(&st_L->velx, sizeof(double), 1, fd);
+          fwrite(&st_L->press, sizeof(double), 1, fd);
+          fwrite(&st_L->csnd, sizeof(double), 1, fd);
+          fwrite(&st_R->rho, sizeof(double), 1, fd);
+          fwrite(&st_R->velx, sizeof(double), 1, fd);
+          fwrite(&st_R->press, sizeof(double), 1, fd);
+          fwrite(&st_R->csnd, sizeof(double), 1, fd);
+          fclose(fd);
+        }
+    }
+
+  /* prepare output values */
+  *Press = p;
+  *Vel   = 0.5 * (st_L->velx + st_R->velx + F_R - F_L);
+
+  return 1;
+}
+
+/*! \brief Pressure function for root-finding.
+ *
+ *  \param[in] P Pressure.
+ *  \param[in] st Hydrodynamic state.
+ *  \param[out] F pressure function.
+ *  \param[out] FD derivative of pressure function.
+ *
+ *  \return void
+ */
+void pressure_function(double P, struct state *st, double *F, double *FD)
+{
+  if(P <= st->press) /* rarefaction wave */
+    {
+      double prat = P / st->press;
+
+      *F  = GAMMA_G4 * st->csnd * (pow(prat, GAMMA_G1) - 1.0);
+      *FD = (1.0 / (st->rho * st->csnd)) * pow(prat, -GAMMA_G2);
+    }
+  else /* shock wave */
+    {
+      double ak  = GAMMA_G5 / st->rho;
+      double bk  = GAMMA_G6 * st->press;
+      double qrt = sqrt(ak / (bk + P));
+
+      *F  = (P - st->press) * qrt;
+      *FD = (1.0 - 0.5 * (P - st->press) / (bk + P)) * qrt;
+    }
+}
+
+/*! \brief Returns initial guess for central pressure of the Riemann problem.
+ *
+ *  This is used as the starting value for the root-finding iteration.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *
+ *  \return Guess for pressure in central region.
+ */
+double guess_for_pressure(struct state *st_L, struct state *st_R)
+{
+#define QMAX 2.0
+
+  double pmin, pmax;
+
+  double pv =
+      0.5 * (st_L->press + st_R->press) - 0.125 * (st_R->velx - st_L->velx) * (st_L->rho + st_R->rho) * (st_L->csnd + st_R->csnd);
+
+  if(st_L->press < st_R->press)
+    {
+      pmin = st_L->press;
+      pmax = st_R->press;
+    }
+  else
+    {
+      pmin = st_R->press;
+      pmax = st_L->press;
+    }
+
+  if(pmin > 0)
+    {
+      double qrat = pmax / pmin;
+
+      if(qrat <= QMAX && (pmin <= pv && pv <= pmax))
+        {
+          if(pv < 0)
+            {
+              printf("pv=%g\n", pv);
+              terminate("negative pv");
+            }
+
+          return pv;
+        }
+      else
+        {
+          if(pv < pmin) /* use two-rarefaction solution */
+            {
+              double pnu = (st_L->csnd + st_R->csnd) - GAMMA_G7 * (st_R->velx - st_L->velx);
+              double pde = st_L->csnd / pow(st_L->press, GAMMA_G1) + st_R->csnd / pow(st_R->press, GAMMA_G1);
+
+              return pow(pnu / pde, GAMMA_G3);
+            }
+          else /* two-shock approximation  */
+            {
+              double gel = sqrt((GAMMA_G5 / st_L->rho) / (GAMMA_G6 * st_L->press + pv));
+              double ger = sqrt((GAMMA_G5 / st_R->rho) / (GAMMA_G6 * st_R->press + pv));
+              double x   = (gel * st_L->press + ger * st_R->press - (st_R->velx - st_L->velx)) / (gel + ger);
+
+              if(x < pmin || x > pmax)
+                {
+                  x = pmin;
+                }
+
+              return x;
+            }
+        }
+    }
+  else
+    {
+      return (pmin + pmax) / 2;
+    }
+}
+
+/*! \brief Riemann-solver for isothermal gas.
+ *
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[in, out] Rho Central density; needs some initial guess.
+ *  \param[out] Vel Velocity in central region.
+ *  \param[in] csnd Sound speed.
+ *
+ *  \return void
+ */
+void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd)
+{
+  double F_L, FD_L, F_R, FD_R, rhoold, drho;
+  double rho;
+
+  double dVel = (st_R->velx - st_L->velx) / csnd;
+
+  if(dVel > 0)
+    rho = sqrt(st_L->rho * st_R->rho * exp(-dVel));
+  else
+    rho = 0.5 * (st_L->rho + st_R->rho);
+
+  int iter = 0;
+
+  if(st_L->rho <= 0 || st_R->rho <= 0)
+    terminate("isothermal Riemann solver was called with zero or negative density\n");
+
+  do /* newton-raphson scheme */
+    {
+      isothermal_function(rho, st_L->rho, &F_L, &FD_L);
+      isothermal_function(rho, st_R->rho, &F_R, &FD_R);
+
+      rhoold = rho;
+      drho   = -0.5 * (F_L + F_R + dVel) / (FD_L + FD_R);
+
+      if(fabs(drho) > 0.25 * rho)
+        drho = 0.25 * rho * fabs(drho) / drho;
+
+      rho += drho;
+
+      iter++;
+    }
+  while(2 * fabs(rho - rhoold) / (st_L->rho + st_R->rho) > TOL && iter < MAXITER);
+
+  if(iter >= MAXITER)
+    {
+#ifndef LONGIDS
+      printf("ID_L=%u ID_R=%u  Rho_L=%g Rho_R=%g  Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx,
+             st_R->velx);
+#else  /* #ifndef LONGIDS */
+      printf("ID_L=%llu ID_R=%llu  Rho_L=%g Rho_R=%g  Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx,
+             st_R->velx);
+#endif /* #ifndef LONGIDS #else */
+      terminate("ICs for isothermal riemann solver lead to divergence. stopping.");
+      /*
+       *Rho = 0.5 * (Rho_L + Rho_R);
+       *Vel = 0.5 * (Vel_L + Vel_R);
+       return;
+       */
+    }
+
+  /* prepare output values */
+  *Rho = rho;
+  *Vel = 0.5 * (st_L->velx + st_R->velx + csnd * (F_R - F_L));
+}
+
+/*! \brief "Pressure" function for isothermal gas.
+ *
+ *  Needed for root-finding in riemann_isotherm.
+ *
+ *  \param[in] rhostar Central density.
+ *  \param[in] rho External density.
+ *  \param[out] F Isotherma function.
+ *  \param[out] FD Derivative of isothermal function.
+ *
+ *  \return void
+ */
+void isothermal_function(double rhostar, double rho, double *F, double *FD)
+{
+  if(rhostar <= rho) /* rarefaction wave */
+    {
+      *F  = log(rhostar / rho);
+      *FD = 1.0 / rho;
+    }
+  else /* shock wave */
+    {
+      *F  = (rhostar - rho) / sqrt(rhostar * rho);
+      *FD = 0.5 / rhostar * (sqrt(rhostar / rho) + sqrt(rho / rhostar));
+    }
+}
+
+/*! \brief Samples 3d solution to Riemann problem with isothermal gas.
+ *
+ *  \param[in] S S Position x / t.
+ *  \param[in] st_L Left hand side state.
+ *  \param[in] st_R Right hand side state.
+ *  \param[in] Rho central density.
+ *  \paramm[in] Vel Velocity in central region.
+ *  \param[out] st_face State at face.
+ *  \param[in] csnd Sound speed.
+ *
+ *  \return void
+ */
+void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face,
+                                  double csnd)
+{
+  if(S <= Vel) /* sample point is left of contact */
+    {
+      st_face->vely = st_L->vely;
+      st_face->velz = st_L->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_L->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      if(Rho <= st_L->rho) /* left fan */
+        {
+          double shl = st_L->velx - csnd;
+
+          if(S <= shl) /* left data state */
+            {
+              st_face->rho  = st_L->rho;
+              st_face->velx = st_L->velx;
+            }
+          else
+            {
+              double stl = Vel - csnd;
+
+              if(S > stl) /* middle left state */
+                {
+                  st_face->rho  = Rho;
+                  st_face->velx = Vel;
+                }
+              else /* left state inside fan */
+                {
+                  st_face->velx = S + csnd;
+                  st_face->rho  = st_L->rho * exp(-((S + csnd) - st_L->velx) / csnd);
+                }
+            }
+        }
+      else /* left shock */
+        {
+          double sl = (st_L->rho * st_L->velx - Rho * Vel) / (st_L->rho - Rho);
+
+          if(S <= sl) /* left data state */
+            {
+              st_face->rho  = st_L->rho;
+              st_face->velx = st_L->velx;
+            }
+          else /* left state behind shock */
+            {
+              st_face->rho  = Rho;
+              st_face->velx = Vel;
+            }
+        }
+    }
+  else /* right of contact */
+    {
+      st_face->vely = st_R->vely;
+      st_face->velz = st_R->velz;
+#ifdef MAXSCALARS
+      st_face->scalars = st_R->scalars;
+#endif /* #ifdef MAXSCALARS */
+
+      if(Rho > st_R->rho) /* right shock */
+        {
+          double sr = (st_R->rho * st_R->velx - Rho * Vel) / (st_R->rho - Rho);
+
+          if(S >= sr) /* right data state */
+            {
+              st_face->rho  = st_R->rho;
+              st_face->velx = st_R->velx;
+            }
+          else /* right state behind shock */
+            {
+              st_face->rho  = Rho;
+              st_face->velx = Vel;
+            }
+        }
+      else /* right fan */
+        {
+          double shr = st_R->velx + csnd;
+
+          if(S >= shr) /* right data state */
+            {
+              st_face->rho  = st_R->rho;
+              st_face->velx = st_R->velx;
+            }
+          else
+            {
+              double str = Vel + csnd;
+
+              if(S <= str) /* middle right state */
+                {
+                  st_face->rho  = Rho;
+                  st_face->velx = Vel;
+                }
+              else /* fan right state */
+                {
+                  st_face->velx = S - csnd;
+                  st_face->rho  = st_R->rho * exp(((S - csnd) - st_R->velx) / csnd);
+                }
+            }
+        }
+    }
+}
+
+#endif /* #if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)) */
diff --git a/src/amuse/community/arepo/src/hydro/riemann_hllc.c b/src/amuse/community/arepo/src/hydro/riemann_hllc.c
new file mode 100644
index 0000000000..80fb519ceb
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/riemann_hllc.c
@@ -0,0 +1,213 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/riemann_hllc.c
+ * \date        05/2018
+ * \brief       Routines for a HLLC Riemann solver.
+ * \details     contains functions:
+ *                static void hllc_get_fluxes_from_state(struct state *st,
+ *                  struct fluxes *flux)
+ *                static double get_hllc_star_fluxes(const struct state *st,
+ *                  const struct fluxes *flux, struct fluxes *hllc_flux,
+ *                  double S_star, double S)
+ *                double godunov_flux_3d_hllc(struct state *st_L, struct state
+ *                  *st_R, struct state_face *st_face, struct fluxes *flux)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#if defined(RIEMANN_HLLC)
+
+#if defined(RIEMANN_HLLD)
+#error option RIEMANN_HLLC is incompatible with option RIEMANN_HLLD.
+Only one Riemann solver can be chosen among the above options.If none of them is selected,
+    the exact Riemann solver will be used.
+#endif /* #if defined(RIEMANN_HLLD) */
+    /*! \brief Calculates the flux from a state.
+     *
+     *  Mass, momentum and energy flux.
+     *
+     *  \param[in] st State.
+     *  \param[out] flux Flux corresponding to the state.
+     *
+     *  \return void
+     */
+    static void
+    hllc_get_fluxes_from_state(struct state *st, struct fluxes *flux)
+{
+  flux->mass        = st->rho * st->velx;
+  flux->momentum[0] = st->rho * st->velx * st->velx + st->press;
+  flux->momentum[1] = st->rho * st->velx * st->vely;
+  flux->momentum[2] = st->rho * st->velx * st->velz;
+
+  st->Energy   = st->press / GAMMA_MINUS1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz);
+  flux->energy = (st->Energy + st->press) * st->velx;
+}
+
+/*! \brief Calculates a central flux in HLLC approximation.
+ *
+ *  \param[in] st State of the Riemann problem (either left or right).
+ *  \param[in] flux Flux through face (either left or right).
+ *  \param[out] hllc_flux State at the face (determined by this routine).
+ *  \param[in] S_star speed of characteristics in central region.
+ *  \param[in] S speed of characteristics in outside state (left or right).
+ *
+ *  \return Central density.
+ */
+static double get_hllc_star_fluxes(const struct state *st, const struct fluxes *flux, struct fluxes *hllc_flux, double S_star,
+                                   double S)
+{
+  double Q0 = st->rho * (S - st->velx) / (S - S_star);
+  double Q1 = Q0 * S_star;
+  double Q2 = Q0 * st->vely;
+  double Q3 = Q0 * st->velz;
+  double Q4 = Q0 * (st->Energy / st->rho + (S_star - st->velx) * (S_star + st->press / (st->rho * (S - st->velx))));
+
+  hllc_flux->mass = flux->mass + S * (Q0 - st->rho);
+
+  hllc_flux->momentum[0] = flux->momentum[0] + S * (Q1 - st->rho * st->velx);
+
+  hllc_flux->momentum[1] = flux->momentum[1] + S * (Q2 - st->rho * st->vely);
+
+  hllc_flux->momentum[2] = flux->momentum[2] + S * (Q3 - st->rho * st->velz);
+
+  hllc_flux->energy = flux->energy + S * (Q4 - st->Energy);
+
+  return Q0;
+}
+
+/*! \brief Main routine for the hllc Riemann solver.
+ *
+ *  Called in finite_volume_solver.c
+ *
+ *  \param[in] st_L Left state of the Riemann problem.
+ *  \param[in] st_R Right state of the Riemann problem.
+ *  \param[out] st_face State at face.
+ *  \param[out] flux Flux through face.
+ *
+ *  \return Pressure.
+ */
+double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux)
+{
+  double S_L, S_R, S_star;
+  double Press_star, rho_star;
+  double rho_hat, csnd_hat;
+
+  if(st_L->rho > 0 && st_R->rho > 0)
+    {
+      struct fluxes flux_L, flux_R;
+
+      st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho);
+      st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho);
+
+      /* first estimate wave speeds */
+      S_L = dmin(st_L->velx - st_L->csnd, st_R->velx - st_R->csnd);
+      S_R = dmax(st_L->velx + st_L->csnd, st_R->velx + st_R->csnd);
+
+      rho_hat    = 0.5 * (st_L->rho + st_R->rho);
+      csnd_hat   = 0.5 * (st_L->csnd + st_R->csnd);
+      Press_star = 0.5 * ((st_L->press + st_R->press) + (st_L->velx - st_R->velx) * (rho_hat * csnd_hat));
+      S_star     = 0.5 * ((st_L->velx + st_R->velx) + (st_L->press - st_R->press) / (rho_hat * csnd_hat));
+
+      /* compute fluxes for the left and right states */
+      hllc_get_fluxes_from_state(st_L, &flux_L);
+      hllc_get_fluxes_from_state(st_R, &flux_R);
+
+      if(S_L >= 0.0) /* F_hllc = F_L */
+        {
+          /* copy the fluxes from the left state */
+          flux->mass        = flux_L.mass;
+          flux->momentum[0] = flux_L.momentum[0];
+          flux->momentum[1] = flux_L.momentum[1];
+          flux->momentum[2] = flux_L.momentum[2];
+          flux->energy      = flux_L.energy;
+
+          /* set the primitive variables at the face */
+          st_face->rho   = st_L->rho;
+          st_face->velx  = st_L->velx;
+          st_face->vely  = st_L->vely;
+          st_face->velz  = st_L->velz;
+          st_face->press = st_L->press;
+        }
+      else if(S_R <= 0.0) /* F_hllc = F_R */
+        {
+          /* copy the fluxes from the left state */
+          flux->mass        = flux_R.mass;
+          flux->momentum[0] = flux_R.momentum[0];
+          flux->momentum[1] = flux_R.momentum[1];
+          flux->momentum[2] = flux_R.momentum[2];
+          flux->energy      = flux_R.energy;
+
+          /* set the primitive variables at the face */
+          st_face->rho   = st_R->rho;
+          st_face->velx  = st_R->velx;
+          st_face->vely  = st_R->vely;
+          st_face->velz  = st_R->velz;
+          st_face->press = st_R->press;
+        }
+      else if(S_L <= 0.0 && S_star >= 0.0) /* F_hllc = F*_L */
+        {
+          /* compute star flux */
+          rho_star = get_hllc_star_fluxes(st_L, &flux_L, flux, S_star, S_L);
+
+          /* set the primitive variables at the face */
+          st_face->rho   = rho_star;
+          st_face->velx  = S_star;
+          st_face->vely  = st_L->vely;
+          st_face->velz  = st_L->velz;
+          st_face->press = Press_star;
+        }
+      else /* F_hllc = F*_R */
+        {
+          /* compute star flux */
+          rho_star = get_hllc_star_fluxes(st_R, &flux_R, flux, S_star, S_R);
+
+          /* set the primitive variables at the face */
+          st_face->rho   = rho_star;
+          st_face->velx  = S_star;
+          st_face->vely  = st_R->vely;
+          st_face->velz  = st_R->velz;
+          st_face->press = Press_star;
+        }
+    }
+  else
+    {
+      printf("Left:  st_L->press=%g st_L->rho=%g  st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx);
+      printf("Right: st_R->press=%g st_R->rho=%g  st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx);
+      terminate("density is zero\n");
+      return 0;
+    }
+
+  return st_face->press;
+}
+
+#endif /* #if defined(RIEMANN_HLLC) */
diff --git a/src/amuse/community/arepo/src/hydro/riemann_hlld.c b/src/amuse/community/arepo/src/hydro/riemann_hlld.c
new file mode 100644
index 0000000000..8770282bd4
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/riemann_hlld.c
@@ -0,0 +1,567 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/riemann_hlld.c
+ * \date        05/2018
+ * \brief       Routines for a HLLD Riemann solver (to be used for MHD).
+ * \details     contains functions:
+ *                static inline int state_and_flux_valid(const struct state
+ *                  *st, const struct fluxes *flux)
+ *                double godunov_flux_3d_hlld(struct state *st_L, struct state
+ *                  *st_R, double *vel_face, struct state_face *st_face,
+ *                  struct fluxes *flux)
+ *                static double hlld_get_fast_wave(struct state *st)
+ *                static void hlld_get_fluxes_from_state(struct state *st,
+ *                  struct fluxes *flux, double *st_ptot)
+ *                static void hlld_get_star(struct state *st_star, struct
+ *                  state *st, double S, double S_M, double ptot, double
+ *                  ptot_star)
+ *                static void hlld_get_fluxes_star(struct state *st_A, struct
+ *                  state *st_A_star, struct fluxes *flux_A, double S_A,
+ *                  struct fluxes *flux)
+ *                static void hlld_get_starstar_L(struct state *st_star_L,
+ *                  struct state *st_star_R, struct state *st_starstar)
+ *                static void hlld_get_starstar_R(struct state *st_star_L,
+ *                  struct state *st_star_R, struct state *st_starstar)
+ *                static void hlld_get_starstar(struct state *st_star_L,
+ *                  struct state *st_star_R, struct state *st_starstar,
+ *                  struct state *st_star_A, double sign)
+ *                static void hlld_get_fluxes_starstar(struct state *st_A,
+ *                  struct state *st_A_star, struct state *st_A_starstar,
+ *                  struct fluxes *flux_A, double S_A, double S_A_star, struct
+ *                  fluxes *flux)
+ *                static void hll_get_star(struct state *st_star, struct
+ *                  fluxes *flux_L, struct fluxes *flux_R, struct state *st_L,
+ *                  struct state *st_R, double S_L, double S_R)
+ *                static void hll_get_flux(struct fluxes *flux, struct fluxes
+ *                  *flux_L, struct fluxes *flux_R, struct state *st_L,
+ *                  struct state *st_R, double S_L, double S_R)
+ *                static void lax_get_flux(struct fluxes *flux, struct fluxes
+ *                  *flux_L, struct fluxes *flux_R, struct state *st_L, struct
+ *                  state *st_R, double S)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#if defined(RIEMANN_HLLD)
+
+static double hlld_get_fast_wave(struct state *st);
+static void hlld_get_fluxes_from_state(struct state *st_face, struct fluxes *flux, double *st_ptot);
+static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star);
+static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux);
+static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar);
+static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar);
+static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A,
+                              double sign);
+static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A,
+                                     double S_A, double S_A_star, struct fluxes *flux);
+static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S_L, double S_R);
+static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S_L, double S_R);
+static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S);
+
+/*! \brief Check if pressure, energy and energy flux have valid values.
+ *
+ *  \param[in] st State.
+ *  \param[in] flux Flux.
+ *
+ *  \return 1 if valid state and flux, 0 otherwise.
+ */
+static inline int state_and_flux_valid(const struct state *st, const struct fluxes *flux)
+{
+  return (st->press >= 0) && gsl_finite(st->press) && gsl_finite(flux->energy);
+}
+
+/*! \brief Main routine for the hlld Riemann solver.
+ *
+ *  Called in finite_volume_solver.c.
+ *
+ *  \param[in] st_L Left state of the Riemann problem.
+ *  \param[in] st_R Right state of the Riemann problem.
+ *  \param[in] vel_face Velocity at which the face is moving.
+ *  \param[out] st_face State at face.
+ *  \param[out] flux Flux through face.
+ *
+ *  \return Pressure.
+ */
+double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux)
+{
+  struct state st_Lstar, st_Rstar, st_star;
+  struct state st_Lstarstar, st_Rstarstar;
+  struct state *st_middle;
+  double Bx;
+  double cf_L, cf_R;
+  double S, S_L, S_R, S_M, S_L_star, S_R_star;
+  double ptot_L, ptot_R;
+
+  S_R_star = S_L_star = S_M = 0.;
+
+  if(st_L->rho > 0 && st_R->rho > 0)
+    {
+      Bx         = 0.5 * (st_L->Bx + st_R->Bx);
+      flux->B[0] = 0.;
+
+      st_L->Bx    = Bx;
+      st_R->Bx    = Bx;
+      st_face->Bx = Bx;
+
+      /* get wave speeds first */
+      cf_L = hlld_get_fast_wave(st_L);
+      cf_R = hlld_get_fast_wave(st_R);
+
+      S = dmax(dmax(fabs(st_L->velx - cf_L), fabs(st_R->velx - cf_R)), dmax(fabs(st_L->velx + cf_L), fabs(st_R->velx + cf_R)));
+
+      S_L = dmin(st_L->velx - cf_L, st_R->velx - cf_R);
+      S_R = dmax(st_L->velx + cf_L, st_R->velx + cf_R);
+
+      if(S_L >= 0)
+        {
+          st_middle = st_L;
+          hlld_get_fluxes_from_state(st_L, flux, NULL);
+        }
+      else if(S_R <= 0)
+        {
+          st_middle = st_R;
+          hlld_get_fluxes_from_state(st_R, flux, NULL);
+        }
+      else
+        {
+          // stars are needed
+          struct fluxes flux_R, flux_L;
+
+          hlld_get_fluxes_from_state(st_L, &flux_L, &ptot_L);
+          hlld_get_fluxes_from_state(st_R, &flux_R, &ptot_R);
+
+          S_M = ((S_R - st_R->velx) * st_R->rho * st_R->velx - (S_L - st_L->velx) * st_L->rho * st_L->velx - ptot_R + ptot_L) /
+                ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho);
+
+          double ptot_star = ((S_R - st_R->velx) * st_R->rho * ptot_L - (S_L - st_L->velx) * st_L->rho * ptot_R +
+                              st_L->rho * st_R->rho * (S_R - st_R->velx) * (S_L - st_L->velx) * (st_R->velx - st_L->velx)) /
+                             ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho);
+
+          hlld_get_star(&st_Lstar, st_L, S_L, S_M, ptot_L, ptot_star);
+          hlld_get_star(&st_Rstar, st_R, S_R, S_M, ptot_R, ptot_star);
+
+          S_L_star = S_M - fabs(st_L->Bx) / sqrt(st_Lstar.rho);
+          S_R_star = S_M + fabs(st_R->Bx) / sqrt(st_Rstar.rho);
+
+          if(S_L_star >= 0 || (Bx == 0 && S_M >= 0))  // we already know: S_L <= 0
+            {
+              st_middle = &st_Lstar;
+              hlld_get_fluxes_star(st_L, &st_Lstar, &flux_L, S_L, flux);
+            }
+          else if(S_R_star <= 0 || (Bx == 0))  // we already know: S_R >= 0
+            {
+              st_middle = &st_Rstar;
+              hlld_get_fluxes_star(st_R, &st_Rstar, &flux_R, S_R, flux);
+            }
+          else
+            {
+              // double stars are needed
+              if(S_M >= 0)  // we already know: S_L_star <= 0)
+                {
+                  st_middle = &st_Lstarstar;
+                  hlld_get_starstar_L(&st_Lstar, &st_Rstar, &st_Lstarstar);
+                  hlld_get_fluxes_starstar(st_L, &st_Lstar, &st_Lstarstar, &flux_L, S_L, S_L_star, flux);
+                }
+              else  // we already know: S_R_star >= 0 and S_M <= 0
+                {
+                  st_middle = &st_Rstarstar;
+                  hlld_get_starstar_R(&st_Lstar, &st_Rstar, &st_Rstarstar);
+                  hlld_get_fluxes_starstar(st_R, &st_Rstar, &st_Rstarstar, &flux_R, S_R, S_R_star, flux);
+                }
+            }
+        }
+    }
+  else
+    {
+      printf("Left:  st_L->press=%g st_L->rho=%g  st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx);
+      printf("Right: st_R->press=%g st_R->rho=%g  st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx);
+      terminate("density is zero\n");
+      return 0;
+    }
+
+  if(!state_and_flux_valid(st_middle, flux))
+    {
+      /* HLLD did not work => use HLL instead */
+      struct fluxes flux_R, flux_L;
+
+      hlld_get_fluxes_from_state(st_L, &flux_L, NULL);
+      hlld_get_fluxes_from_state(st_R, &flux_R, NULL);
+
+      hll_get_star(&st_star, &flux_L, &flux_R, st_L, st_R, S_L, S_R);
+      hll_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S_L, S_R);
+
+      st_middle = &st_star;
+
+      if(!state_and_flux_valid(st_middle, flux))
+        {
+          /* HLL did not work, use lax-friedrich flux instead */
+          lax_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S);
+
+          st_star.press = 0.5 * (st_L->press + st_R->press);
+        }
+    }
+
+  st_face->rho   = st_middle->rho;
+  st_face->velx  = st_middle->velx;
+  st_face->vely  = st_middle->vely;
+  st_face->velz  = st_middle->velz;
+  st_face->press = st_middle->press;
+  st_face->By    = st_middle->By;
+  st_face->Bz    = st_middle->Bz;
+
+  if(!state_and_flux_valid(st_middle, flux))
+    {
+      printf("M: rho=%g, v=(%g,%g,%g), p=%g, B=(%g,%g,%g)\n", st_middle->rho, st_middle->velx + vel_face[0],
+             st_middle->vely + vel_face[1], st_middle->velz + vel_face[2], st_middle->press, st_middle->Bx, st_middle->By,
+             st_middle->Bz);
+      printf("S_L=%g, S_L_star=%g, S_M=%g, S_R_star=%g, S_R=%g, cf_L=%g, cf_R=%g\n", S_L, S_L_star, S_M, S_R_star, S_R, cf_L, cf_R);
+    }
+
+  return st_middle->press;
+}
+
+/*! \brief Calculates signal speed of the fast magnetosonic wave.
+ *
+ *  \param[in] st MHD state.
+ *
+ *  \return Signal speed of fast wave.
+ */
+static double hlld_get_fast_wave(struct state *st)
+{
+  double gamma  = GAMMA;
+  double gPress = gamma * st->press;
+  double Bsqr   = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz;
+  double gpb2   = gPress + Bsqr;
+
+  return sqrt(0.5 / st->rho * (gpb2 + sqrt(gpb2 * gpb2 - 4. * gPress * st->Bx * st->Bx)));
+}
+
+/*! \brief Calculates the flux from a state.
+ *
+ *  Mass, momentum and energy flux.
+ *
+ *  \param[in] st State.
+ *  \param[out] flux Flux corresponding to the state.
+ *  \param[out] st_ptot Total pressure.
+ *
+ *  \return void
+ */
+static void hlld_get_fluxes_from_state(struct state *st, struct fluxes *flux, double *st_ptot)
+{
+  double gamma        = GAMMA;
+  double gamma_minus1 = gamma - 1.;
+
+  double cr_press = 0.;
+
+  flux->mass        = st->rho * st->velx;
+  double Bsqr       = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz;
+  flux->momentum[0] = st->rho * st->velx * st->velx + st->press + 0.5 * Bsqr - st->Bx * st->Bx + cr_press;
+  flux->momentum[1] = st->rho * st->velx * st->vely - st->Bx * st->By;
+  flux->momentum[2] = st->rho * st->velx * st->velz - st->Bx * st->Bz;
+
+  flux->B[1] = st->By * st->velx - st->Bx * st->vely;
+  flux->B[2] = st->Bz * st->velx - st->Bx * st->velz;
+
+  double etot =
+      st->press / gamma_minus1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz) + 0.5 * Bsqr;
+  double ptot = st->press + 0.5 * Bsqr + cr_press;
+
+  flux->energy = (etot + ptot) * st->velx - st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz);
+
+  st->Energy = etot;
+  if(st_ptot)
+    *st_ptot = ptot;
+}
+
+/*! \brief Calculates state in star region.
+ *
+ *  \param[out] st_star State in star region (computed in this function).
+ *  \param[in] st Outer state of Riemann problem.
+ *  \param[in] S Velocity of characteristics.
+ *  \param[in] S_M Velocity of magnetic characteristics.
+ *  \param[in] ptot Total pressure of outer state.
+ *  \param[in] ptot_star Total pressure in star region.
+ *
+ *  \return void
+ */
+static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star)
+{
+  st_star->rho  = st->rho * (S - st->velx) / (S - S_M);
+  st_star->velx = S_M;
+  st_star->vely = st->vely - st->Bx * st->By * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx);
+  st_star->velz = st->velz - st->Bx * st->Bz * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx);
+
+  st_star->Bx = st->Bx;
+  st_star->By = st->By * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) /
+                (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx);
+  st_star->Bz = st->Bz * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) /
+                (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx);
+
+  st_star->Energy = ((S - st->velx) * st->Energy - ptot * st->velx + ptot_star * S_M +
+                     st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz - st_star->velx * st->Bx -
+                               st_star->vely * st_star->By - st_star->velz * st_star->Bz)) /
+                    (S - S_M);
+
+  st_star->press = ptot_star - 0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz);
+}
+
+/*! \brief Calculates a central flux.
+ *
+ *  \param[in] st_A State of the Riemann problem.
+ *  \param[in] st_A_star State inside fast wave.
+ *  \param[in] flux_A Flux through face.
+ *  \param[in] S_A speed of characteristics.
+ *  \param[out] flux Flux through face.
+ *
+ *  \return void
+ */
+static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux)
+{
+  flux->mass = flux_A->mass - S_A * (st_A->rho - st_A_star->rho);
+
+  flux->momentum[0] = flux_A->momentum[0] - S_A * (st_A->rho * st_A->velx - st_A_star->rho * st_A_star->velx);
+  flux->momentum[1] = flux_A->momentum[1] - S_A * (st_A->rho * st_A->vely - st_A_star->rho * st_A_star->vely);
+  flux->momentum[2] = flux_A->momentum[2] - S_A * (st_A->rho * st_A->velz - st_A_star->rho * st_A_star->velz);
+
+  flux->B[1] = flux_A->B[1] - S_A * (st_A->By - st_A_star->By);
+  flux->B[2] = flux_A->B[2] - S_A * (st_A->Bz - st_A_star->Bz);
+
+  flux->energy = flux_A->energy - S_A * (st_A->Energy - st_A_star->Energy);
+}
+
+/*! \brief Get state in starstar region, case S_M>=0.
+ *
+ *  \param[in] st_star_L State in left star region.
+ *  \param[in] st_star_R State in right star region.
+ *  \param[out] st_starstar State in starstar region.
+ *
+ *  \return void
+ */
+static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar)
+{
+  hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_L, -1.0);
+}
+
+/*! \brief Get state in starstar region, case S_M<0.
+ *
+ *  \param[in] st_star_L State in left star region.
+ *  \param[in] st_star_R State in right star region.
+ *  \param[out] st_starstar State in starstar region.
+ *
+ *  \return void
+ */
+static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar)
+{
+  hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_R, 1.0);
+}
+
+/*! \brief Get state in starstar region.
+ *
+ *  \param[in] st_star_L State in left star region.
+ *  \param[in] st_star_R State in right star region.
+ *  \param[out] st_starstar State in starstar region.
+ *  \param[in] st_star_A State where flow is coming from (depends on
+ *             directionality of the flow).
+ *  \param[in] sign Directionality of flow.
+ *
+ *  \return void
+ */
+static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A,
+                              double sign)
+{
+  double sBx = st_star_A->Bx < 0 ? -1.0 : 1.0;
+
+  double sqLrho = sqrt(st_star_L->rho);
+  double sqRrho = sqrt(st_star_R->rho);
+
+  st_starstar->rho = st_star_A->rho;
+
+  st_starstar->velx = st_star_L->velx; /* == st_star_R->velx == S_M */
+  st_starstar->vely =
+      ((sqLrho * st_star_L->vely) + (sqRrho * st_star_R->vely) + (st_star_R->By - st_star_L->By) * sBx) / (sqLrho + sqRrho);
+  st_starstar->velz =
+      ((sqLrho * st_star_L->velz) + (sqRrho * st_star_R->velz) + (st_star_R->Bz - st_star_L->Bz) * sBx) / (sqLrho + sqRrho);
+
+  st_starstar->Bx = st_star_A->Bx;
+  st_starstar->By =
+      ((sqLrho * st_star_R->By) + (sqRrho * st_star_L->By) + sqLrho * sqRrho * (st_star_R->vely - st_star_L->vely) * sBx) /
+      (sqLrho + sqRrho);
+  st_starstar->Bz =
+      ((sqLrho * st_star_R->Bz) + (sqRrho * st_star_L->Bz) + sqLrho * sqRrho * (st_star_R->velz - st_star_L->velz) * sBx) /
+      (sqLrho + sqRrho);
+
+  st_starstar->Energy = st_star_A->Energy + sign * sqrt(st_star_A->rho) * sBx *
+                                                (st_star_A->velx * st_star_A->Bx + st_star_A->vely * st_star_A->By +
+                                                 st_star_A->velz * st_star_A->Bz - st_starstar->velx * st_star_A->Bx -
+                                                 st_starstar->vely * st_starstar->By - st_starstar->velz * st_starstar->Bz);
+
+  st_starstar->press = st_star_A->press;
+}
+
+/*! \brief Get fluxes in starstar region.
+ *
+ *  \param[in] st_A State in outside region.
+ *  \param[in] st_A_star State in star region.
+ *  \param[in] st_A_starstar State in starstar region.
+ *  \param[in] flux_A Flux corresponding to st_A.
+ *  \param[in] S_A Speed of characteristics in outside region.
+ *  \param[in] S_A_star Speed of characteristics in star region.
+ *  \param[out] flux Flux in starstar region.
+ *
+ *  \return void
+ */
+static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A,
+                                     double S_A, double S_A_star, struct fluxes *flux)
+{
+  flux->mass = flux_A->mass + S_A_star * st_A_starstar->rho - (S_A_star - S_A) * st_A_star->rho - S_A * st_A->rho;
+
+  flux->momentum[0] = flux_A->momentum[0] + S_A_star * st_A_starstar->rho * st_A_starstar->velx -
+                      (S_A_star - S_A) * st_A_star->rho * st_A_star->velx - S_A * st_A->rho * st_A->velx;
+  flux->momentum[1] = flux_A->momentum[1] + S_A_star * st_A_starstar->rho * st_A_starstar->vely -
+                      (S_A_star - S_A) * st_A_star->rho * st_A_star->vely - S_A * st_A->rho * st_A->vely;
+  flux->momentum[2] = flux_A->momentum[2] + S_A_star * st_A_starstar->rho * st_A_starstar->velz -
+                      (S_A_star - S_A) * st_A_star->rho * st_A_star->velz - S_A * st_A->rho * st_A->velz;
+
+  flux->B[1] = flux_A->B[1] + S_A_star * st_A_starstar->By - (S_A_star - S_A) * st_A_star->By - S_A * st_A->By;
+  flux->B[2] = flux_A->B[2] + S_A_star * st_A_starstar->Bz - (S_A_star - S_A) * st_A_star->Bz - S_A * st_A->Bz;
+
+  flux->energy = flux_A->energy + S_A_star * st_A_starstar->Energy - (S_A_star - S_A) * st_A_star->Energy - S_A * st_A->Energy;
+}
+
+/*! \brief Get state in star region.
+ *
+ *  \param[out] st_star State in star region.
+ *  \param[in] flux_L Flux from the left state.
+ *  \param[in] flux_R Flux from the right state.
+ *  \param[in] st_L State at the left side of the Riemann problem.
+ *  \param[in] st_R State at the right side of the Riemann problem.
+ *  \param[in] S_L Speed of characteristics on the left side.
+ *  \param[in] S_R Speed of characteristics on the right side.
+ *
+ *  \return void
+ */
+static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S_L, double S_R)
+{
+  double gamma        = GAMMA;
+  double gamma_minus1 = gamma - 1.;
+
+  double fac = 1.0 / (S_R - S_L);
+
+  st_star->rho = fac * (S_R * st_R->rho - S_L * st_L->rho - flux_R->mass + flux_L->mass);
+
+  st_star->velx =
+      fac * (S_R * st_R->rho * st_R->velx - S_L * st_L->rho * st_L->velx - flux_R->momentum[0] + flux_L->momentum[0]) / st_star->rho;
+  st_star->vely =
+      fac * (S_R * st_R->rho * st_R->vely - S_L * st_L->rho * st_L->vely - flux_R->momentum[1] + flux_L->momentum[1]) / st_star->rho;
+  st_star->velz =
+      fac * (S_R * st_R->rho * st_R->velz - S_L * st_L->rho * st_L->velz - flux_R->momentum[2] + flux_L->momentum[2]) / st_star->rho;
+
+  st_star->Energy = fac * (S_R * st_R->Energy - S_L * st_L->Energy - flux_R->energy + flux_L->energy);
+
+  st_star->Bx = st_R->Bx; /* == st_L->Bx */
+  st_star->By = fac * (S_R * st_R->By - S_L * st_L->By - flux_R->B[1] + flux_L->B[1]);
+  st_star->Bz = fac * (S_R * st_R->Bz - S_L * st_L->Bz - flux_R->B[2] + flux_L->B[2]);
+
+  st_star->press =
+      gamma_minus1 *
+      (st_star->Energy -
+       0.5 * st_star->rho * (st_star->velx * st_star->velx + st_star->vely * st_star->vely + st_star->velz * st_star->velz) -
+       0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz));
+}
+
+/*! \brief Get interface flux from states.
+ *
+ *  \param[out] flux Flux through the interface.
+ *  \param[in] flux_L Flux from left state.
+ *  \param[in] flux_R Flux from right state.
+ *  \param[in] st_L Left state.
+ *  \param[in] st_R Right state.
+ *  \param[in] S_L Speed of characteristics at left side.
+ *  \param[in] S_R Speed of characteristics at right side.
+ *
+ *  \return void
+ */
+static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S_L, double S_R)
+{
+  double fac = 1.0 / (S_R - S_L);
+
+  flux->mass = fac * (S_R * flux_L->mass - S_L * flux_R->mass + S_R * S_L * (st_R->rho - st_L->rho));
+
+  flux->momentum[0] =
+      fac * (S_R * flux_L->momentum[0] - S_L * flux_R->momentum[0] + S_R * S_L * (st_R->rho * st_R->velx - st_L->rho * st_L->velx));
+  flux->momentum[1] =
+      fac * (S_R * flux_L->momentum[1] - S_L * flux_R->momentum[1] + S_R * S_L * (st_R->rho * st_R->vely - st_L->rho * st_L->vely));
+  flux->momentum[2] =
+      fac * (S_R * flux_L->momentum[2] - S_L * flux_R->momentum[2] + S_R * S_L * (st_R->rho * st_R->velz - st_L->rho * st_L->velz));
+
+  flux->energy = fac * (S_R * flux_L->energy - S_L * flux_R->energy + S_R * S_L * (st_R->Energy - st_L->Energy));
+
+  flux->B[1] = fac * (S_R * flux_L->B[1] - S_L * flux_R->B[1] + S_R * S_L * (st_R->By - st_L->By));
+  flux->B[2] = fac * (S_R * flux_L->B[2] - S_L * flux_R->B[2] + S_R * S_L * (st_R->Bz - st_L->Bz));
+}
+
+/*! \brief Get interface flux from states.
+ *
+ *  Lax-Friedrich flux; used whenever the HLL flux estimate invalid.
+ *
+ *  \param[out] flux Flux through the interface.
+ *  \param[in] flux_L Flux from left state.
+ *  \param[in] flux_R Flux from right state.
+ *  \param[in] st_L Left state.
+ *  \param[in] st_R Right state.
+ *  \param[in] S_L Speed of characteristics at left side.
+ *  \param[in] S_R Speed of characteristics at right side.
+ *
+ *  \return void
+ */
+static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R,
+                         double S)
+{
+  flux->mass = 0.5 * (flux_L->mass + flux_R->mass) - 0.5 * S * (st_R->rho - st_L->rho);
+
+  flux->momentum[0] = 0.5 * (flux_L->momentum[0] + flux_R->momentum[0]) - 0.5 * S * (st_R->rho * st_R->velx - st_L->rho * st_L->velx);
+  flux->momentum[1] = 0.5 * (flux_L->momentum[1] + flux_R->momentum[1]) - 0.5 * S * (st_R->rho * st_R->vely - st_L->rho * st_L->vely);
+  flux->momentum[2] = 0.5 * (flux_L->momentum[2] + flux_R->momentum[2]) - 0.5 * S * (st_R->rho * st_R->velz - st_L->rho * st_L->velz);
+
+  flux->energy = 0.5 * (flux_L->energy + flux_R->energy) - 0.5 * S * (st_R->Energy - st_L->Energy);
+
+  flux->B[1] = 0.5 * (flux_L->B[1] + flux_R->B[1]) - 0.5 * S * (st_R->By - st_L->By);
+  flux->B[2] = 0.5 * (flux_L->B[2] + flux_R->B[2]) - 0.5 * S * (st_R->Bz - st_L->Bz);
+}
+
+#endif /* #if defined(RIEMANN_HLLD) */
diff --git a/src/amuse/community/arepo/src/hydro/scalars.c b/src/amuse/community/arepo/src/hydro/scalars.c
new file mode 100644
index 0000000000..b28bb67b6f
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/scalars.c
@@ -0,0 +1,107 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/scalars.c
+ * \date        05/2018
+ * \brief       Routines to initialize passive scalars which are advected with
+ *              the fluid.
+ * \details     contains functions:
+ *                void init_scalars()
+ *                int scalar_init(MyFloat * addr, MyFloat * addr_mass, int
+ *                  type)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#ifdef MAXSCALARS
+int N_Scalar = 0;
+struct scalar_elements scalar_elements[MAXSCALARS];
+struct scalar_index ScalarIndex;
+#endif /* #ifdef MAXSCALARS */
+
+/*! \brief Main routine to initialize passive scalar quantities.
+ *
+ *  \return void
+ */
+void init_scalars()
+{
+#ifdef MAXSCALARS
+
+#if defined(REFINEMENT_HIGH_RES_GAS)
+  ScalarIndex.HighResMass = scalar_init(&SphP[0].HighResDensity, &SphP[0].HighResMass, SCALAR_TYPE_PASSIVE);
+  if(ScalarIndex.HighResMass == -1)
+    terminate("ScalarIndex.HighResMass initialized incorrectly\n");
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+
+#ifdef PASSIVE_SCALARS
+  for(int i = 0; i < PASSIVE_SCALARS; i++)
+    {
+      scalar_init(&SphP[0].PScalars[i], &SphP[0].PConservedScalars[i], SCALAR_TYPE_PASSIVE);
+    }
+#endif /* #ifdef PASSIVE_SCALARS */
+
+  mpi_printf("INIT: %d/%d Scalars used.\n", N_Scalar, MAXSCALARS);
+#endif /* MAXSCALARS */
+}
+
+/*! \brief Initialize a specific scalar property.
+ *
+ *  \param[in] addr Pointer to (primitive) scalar in SphP[0] struct.
+ *  \param[in] addr_mass Pointer to conserved scalar quantity in SphP[0].
+ *  \param[in] type Type of scalar (e.g. SCALAR_TYPE_PASSIVE for passive
+ *             scalar)
+ *
+ *  \return Number of scalars - 1
+ */
+int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type)
+{
+#ifdef MAXSCALARS
+  if(N_Scalar == MAXSCALARS)
+    {
+      mpi_printf("Failed to register scalar, maximum of %d already reached\n", MAXSCALARS);
+      terminate("MAXSCALARS reached");
+    }
+
+  /* save type and relative address */
+  scalar_elements[N_Scalar].type        = type;
+  scalar_elements[N_Scalar].offset      = ((char *)addr) - ((char *)&SphP[0]);
+  scalar_elements[N_Scalar].offset_mass = ((char *)addr_mass) - ((char *)&SphP[0]);
+
+  N_Scalar++;
+
+  return N_Scalar - 1;
+  /* note: gradients are initialized in init_gradients */
+#else  /* #ifdef MAXSCALARS */
+  return -1;
+#endif /* #ifdef MAXSCALARS #else */
+}
diff --git a/src/amuse/community/arepo/src/hydro/update_primitive_variables.c b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c
new file mode 100644
index 0000000000..48a10cd4cf
--- /dev/null
+++ b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c
@@ -0,0 +1,343 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/update_primitive_variables.c
+ * \date        05/2018
+ * \brief       Routines to recover the primitive hydrodynamical variables from
+ *              the conserved ones.
+ * \details     contains functions:
+ *                void update_primitive_variables(void)
+ *                void set_pressure_of_cell(int i)
+ *                void set_pressure_of_cell_internal(struct particle_data
+ *                  *localP, struct sph_particle_data *localSphP, int i)
+ *                void do_validity_checks(struct particle_data *localP, struct
+ *                  sph_particle_data *localSphP, int i, struct pv_update_data
+ *                  *pvd)
+ *                void update_primitive_variables_single(struct particle_data
+ *                  *localP, struct sph_particle_data *localSphP, int i,
+ *                  struct pv_update_data *pvd)
+ *                void update_internal_energy(struct particle_data *localP,
+ *                  struct sph_particle_data *localSphP, int i, struct
+ *                  pv_update_data *pvd)
+ *                double get_sound_speed(int p)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_linalg.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Main routine to update the primitive hydrodynamics variables from
+ *         the conserved ones.
+ *
+ *  Note that the primitive variables are inconsistent with the (new)
+ *  conserved variables after the hydro integration up to the point this
+ *  function is called.
+ *
+ *  \return void
+ */
+void update_primitive_variables(void)
+{
+  TIMER_START(CPU_CELL_UPDATES);
+
+  struct pv_update_data pvd;
+  int idx, i;
+
+  if(All.ComovingIntegrationOn)
+    {
+      pvd.atime    = All.Time;
+      pvd.hubble_a = hubble_function(All.Time);
+      pvd.a3inv    = 1 / (All.Time * All.Time * All.Time);
+    }
+  else
+    pvd.atime = pvd.hubble_a = pvd.a3inv = 1.0;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      do_validity_checks(P, SphP, i, &pvd);
+
+      update_primitive_variables_single(P, SphP, i, &pvd);
+
+      update_internal_energy(P, SphP, i, &pvd);
+
+      set_pressure_of_cell_internal(P, SphP, i); /* calculate the pressure from Density and Utherm (and composition) */
+
+      SphP[i].OldMass = P[i].Mass;
+
+      SphP[i].TimeLastPrimUpdate = All.Time;
+    }
+
+  TIMER_STOP(CPU_CELL_UPDATES);
+}
+
+/*! \brief Wrapper function to calculate pressure of a cell from its internal
+ *         energy.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return void
+ */
+void set_pressure_of_cell(int i) { set_pressure_of_cell_internal(P, SphP, i); }
+
+/*! \brief Function to calculate pressure from other hydrodynamics quantities.
+ *
+ *  How this is done depends on the adiabatic index and potentially on sub-
+ *  resolution physics. Note that this is just the thermal pressure (i.e. not
+ *  including magnetic fields).
+ *
+ *  \param[in] localP Pointer to particle data array.
+ *  \param[in,out] localSphP Pointer to cell data array.
+ *  \param[in] i Index in localP and localSphP arrays.
+ *
+ *  \return void
+ */
+void set_pressure_of_cell_internal(struct particle_data *localP, struct sph_particle_data *localSphP, int i)
+{
+#ifdef ISOTHERM_EQS
+  localSphP[i].Pressure = localSphP[i].Density * All.IsoSoundSpeed * All.IsoSoundSpeed;
+#else  /* #ifdef ISOTHERM_EQS */
+
+  if(localSphP[i].Utherm >= 0)
+    localSphP[i].Pressure = GAMMA_MINUS1 * localSphP[i].Density * localSphP[i].Utherm;
+  else
+    localSphP[i].Pressure = 0;
+#endif /* #ifdef ISOTHERM_EQS */
+
+#ifdef ENFORCE_JEANS_STABILITY_OF_CELLS
+#if defined(USE_SFR)
+  if(get_starformation_rate(i) == 0)
+#endif /* #if defined(USE_SFR) */
+    {
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      double cell_soft = All.ForceSoftening[localP[i].SofteningType];
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+    double cell_soft = All.GasSoftFactor * get_cell_radius(i);
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+
+      localSphP[i].Pressure =
+          dmax(localSphP[i].Pressure, GAMMA_MINUS1 * localSphP[i].Density * 2 * All.G * localP[i].Mass / (All.cf_atime * cell_soft));
+    }
+#endif /* #ifdef ENFORCE_JEANS_STABILITY_OF_CELLS */
+}
+
+/*! \brief Validity checks for a gas cell.
+ *
+ *  So far, only a positive mass constraint implemented. Terminates if not
+ *  successful.
+ *
+ *  \param[in] localP Pointer to particle data array
+ *  \param[in,out] localSphP Pointer to cell data array
+ *  \param[in] i Index in localP and localSphP arrays
+ *  \param[in] pvd (unused)
+ *
+ *  \return void
+ */
+void do_validity_checks(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd)
+{
+  if(localP[i].Mass < 0)
+    {
+      printf("very bad...i=%d ID=%d mass=%g oldMass=%g utherm=%g pos=%g|%g|%g\n", i, (int)localP[i].ID, localP[i].Mass,
+             localSphP[i].OldMass, localSphP[i].Utherm, localP[i].Pos[0], localP[i].Pos[1], localP[i].Pos[2]);
+
+      terminate("stop");
+    }
+}
+
+/*! \brief Updates primitive variables in a specified cell.
+ *
+ *  \param[in] localP Pointer to particle data array.
+ *  \param[in,out] localSphP Pointer to cell data array.
+ *  \param[in] i Index of cell in localP and localSphP arrays.
+ *  \param[in] pvd additional data that is needed for update (e.g. cosmological
+ *             factors).
+ *
+ *  \return void
+ */
+void update_primitive_variables_single(struct particle_data *localP, struct sph_particle_data *localSphP, int i,
+                                       struct pv_update_data *pvd)
+{
+  localSphP[i].Density = localP[i].Mass / localSphP[i].Volume;
+
+  if(localP[i].Mass > 0)
+    {
+      localP[i].Vel[0] = localSphP[i].Momentum[0] / localP[i].Mass;
+      localP[i].Vel[1] = localSphP[i].Momentum[1] / localP[i].Mass;
+      localP[i].Vel[2] = localSphP[i].Momentum[2] / localP[i].Mass;
+
+#ifdef MAXSCALARS
+      for(int k = 0; k < N_Scalar; k++)
+        {
+          *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) =
+              *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset_mass) / localP[i].Mass;
+        }
+#endif /* #ifdef MAXSCALARS */
+
+#ifdef MHD
+      localSphP[i].B[0] = localSphP[i].BConserved[0] / localSphP[i].Volume;
+      localSphP[i].B[1] = localSphP[i].BConserved[1] / localSphP[i].Volume;
+      localSphP[i].B[2] = localSphP[i].BConserved[2] / localSphP[i].Volume;
+#endif /* #ifdef MHD */
+    }
+  else /* P[i].Mass <= 0 */
+    {
+      localP[i].Vel[0] = 0;
+      localP[i].Vel[1] = 0;
+      localP[i].Vel[2] = 0;
+
+#ifdef MAXSCALARS
+      for(int k = 0; k < N_Scalar; k++)
+        *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) = 0;
+#endif /* #ifdef MAXSCALARS */
+    }
+}
+
+/*! \brief Updates the internal energy field in a specified cell
+ *
+ *  \param[in] localP Pointer to particle data array
+ *  \param[in,out] localSphP Pointer to cell data array
+ *  \param[in] i Index of cell in localP and localSphP arrays
+ *  \param[in] pvd additional data that is needed for update (e.g. cosmological
+ *             factors)
+ *
+ *  \return void
+ */
+void update_internal_energy(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd)
+{
+#ifndef ISOTHERM_EQS
+  double ulimit;
+
+  if(localP[i].Mass > 0)
+    {
+#ifdef MESHRELAX
+      localSphP[i].Utherm = localSphP[i].Energy / localP[i].Mass;
+#else  /* #ifdef MESHRELAX */
+      localSphP[i].Utherm =
+          (localSphP[i].Energy / localP[i].Mass -
+           0.5 * (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2])) /
+          (pvd->atime * pvd->atime);
+#endif /* #ifdef MESHRELAX #else */
+
+#ifdef MHD
+      localSphP[i].Utherm -=
+          0.5 *
+          (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) /
+          localSphP[i].Density / pvd->atime;
+#endif /* #ifdef MHD */
+
+      ulimit = All.MinEgySpec;
+
+      if(localSphP[i].Utherm < ulimit)
+        {
+          EgyInjection -= localSphP[i].Energy;
+
+          localSphP[i].Utherm = ulimit;
+
+#ifdef MESHRELAX
+          localSphP[i].Energy = localP[i].Mass * localSphP[i].Utherm;
+#else  /* #ifdef MESHRELAX */
+          localSphP[i].Energy =
+              pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm +
+              0.5 * localP[i].Mass *
+                  (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]);
+#endif /* #ifdef MESHRELAX */
+
+#ifdef MHD
+          localSphP[i].Energy +=
+              0.5 *
+              (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) *
+              localSphP[i].Volume * pvd->atime;
+#endif /* #ifdef MHD */
+
+          EgyInjection += localSphP[i].Energy;
+        }
+    }
+  else
+    localSphP[i].Utherm = 0;
+
+  if(localSphP[i].Density < All.LimitUBelowThisDensity && localSphP[i].Utherm > All.LimitUBelowCertainDensityToThisValue)
+    {
+      localSphP[i].Utherm = All.LimitUBelowCertainDensityToThisValue;
+      localSphP[i].Energy =
+          pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm +
+          0.5 * localP[i].Mass *
+              (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]);
+#ifdef MHD
+      localSphP[i].Energy +=
+          0.5 *
+          (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) *
+          localSphP[i].Volume * pvd->atime;
+#endif /* #ifdef MHD */
+    }
+
+  if(localSphP[i].Utherm < 0)
+    {
+      printf("negative utherm %g\n", localSphP[i].Utherm);
+      terminate("stop");
+    }
+
+#endif /* #ifndef ISOTHERM_EQS */
+}
+
+/*! \brief Calculates the sound speed of a specified cell
+ *
+ *  Depends on equation of state and potential sub-resolution physics.
+ *
+ *  \param[in] p Index of gas cell in P and SphP arrays
+ *
+ *  \return Sound speed
+ */
+double get_sound_speed(int p)
+{
+  double csnd;
+
+#ifdef ISOTHERM_EQS
+  csnd = All.IsoSoundSpeed;
+#else  /* #ifdef ISOTHERM_EQS */
+
+  double gamma;
+  gamma = GAMMA;
+
+  if(SphP[p].Density > 0)
+    csnd = sqrt(gamma * SphP[p].Pressure / SphP[p].Density);
+  else
+    csnd = 0;
+#endif /* #ifdef ISOTHERM_EQS #else */
+
+#ifdef MHD
+  /* for MHD, this is an upper bound to the signal velocity
+     to do it more precisely, the magnet field in normal direction to the
+     interfaces has to be taken into account */
+  double Bsqr = SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2];
+  if(All.ComovingIntegrationOn)
+    Bsqr /= All.Time;
+  csnd = sqrt(csnd * csnd + Bsqr / SphP[p].Density);
+#endif /* #ifdef MHD */
+
+  return csnd;
+}
diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c
new file mode 100644
index 0000000000..ad8a5222ca
--- /dev/null
+++ b/src/amuse/community/arepo/src/init/begrun.c
@@ -0,0 +1,344 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/init/begrun.c
+ * \date        05/2018
+ * \brief       Initial set-up of a simulation run
+ * \details     This file contains various functions to initialize a simulation
+ *              run. In particular, the parameter file is read in and parsed
+ *              and global variables are initialized to their proper values.
+ *              contains functions:
+ *                void hello(void)
+ *                void begrun0(void)
+ *                void begrun1(void)
+ *                void begrun2(void)
+ *                void set_units(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 03.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+herr_t my_hdf5_error_handler(void *unused);
+#endif
+
+static void delete_end_file(void);
+
+/*! \brief Prints a welcome message.
+ *
+ *  \return void
+ */
+void hello(void)
+{
+  mpi_printf(
+      "\n   __    ____  ____  ____  _____\n  /__\\  (  _ \\( ___)(  _ \\(  _  )\n /(__)\\  )   / )__)  )___/ "
+      ")(_)(\n(__)(__)(_)\\_)(____)(__)  (_____)\n\n");
+}
+
+/*! \brief Prints used compile options.
+ *
+ *  \return void
+ */
+void begrun0(void)
+{
+  mpi_printf(
+      "\nThis is Arepo, version %s.\n\nRunning with %d MPI tasks.\n\nApparently we're using %d compute nodes (we have a minimum of %d "
+      "MPI tasks per node, and a maximum of %d)\n\nCode was compiled with settings:\n\n",
+      AREPO_VERSION, NTask, NumNodes, MinTasksPerNode, MaxTasksPerNode);
+
+  if(ThisTask == 0)
+    {
+      output_compile_time_options();
+    }
+}
+
+/*! \brief Initial setup of the simulation.
+ *
+ *  First, the parameter file is read by read_parameter_file(),
+ *  then routines for setting units, etc are called. This function only does
+ *  the setup necessary to load the IC file. After the IC file has been loaded
+ *  and prepared by init(), setup continues with begrun2(). This splitting is
+ *  done so that we can return cleanly from operations that don't actually
+ *  start the simulation (converting snapshots, making projected images, etc.)
+ *
+ * \return void
+ */
+void begrun1(void)
+{
+  read_parameter_file(ParameterFile); /* ... read in parameters for this run */
+
+  check_parameters(); /* consistency check of parameters */
+
+#ifdef HAVE_HDF5
+  H5Eset_auto(my_hdf5_error_handler, NULL);
+#endif /* #ifdef HAVE_HDF5 */
+
+  gsl_set_error_handler(my_gsl_error_handler);
+
+#ifdef DEBUG
+  enable_core_dumps_and_fpu_exceptions();
+#endif /* #ifdef DEBUG */
+
+  mpi_printf("BEGRUN: Size of particle structure       %3d  [bytes]\n", (int)sizeof(struct particle_data));
+  mpi_printf("BEGRUN: Size of sph particle structure   %3d  [bytes]\n", (int)sizeof(struct sph_particle_data));
+  mpi_printf("BEGRUN: Size of gravity tree node        %3d  [bytes]\n", (int)sizeof(struct NODE));
+#ifdef MULTIPLE_NODE_SOFTENING
+  mpi_printf("BEGRUN: Size of auxiliary gravity node   %3d  [bytes]\n", (int)sizeof(struct ExtNODE));
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  set_units();
+
+  if(RestartFlag == 1) /* this is needed here to allow domain decomposition right after restart */
+    if(All.ComovingIntegrationOn)
+      init_drift_table();
+
+  init_io_fields();
+
+  force_short_range_init();
+
+#if defined(FORCETEST) && !defined(FORCETEST_TESTFORCELAW)
+  forcetest_ewald_init();
+#endif /* #if defined (FORCETEST) && !defined(FORCETEST_TESTFORCELAW) */
+
+  /* set up random number generators */
+  random_generator     = gsl_rng_alloc(gsl_rng_ranlxd1);
+  random_generator_aux = gsl_rng_alloc(gsl_rng_ranlxd1);
+
+  /* individual start-up seed */
+  gsl_rng_set(random_generator, 42 + ThisTask);
+  gsl_rng_set(random_generator_aux, 31452 + ThisTask);
+
+  timebins_init(&TimeBinsHydro, "Hydro", &All.MaxPartSph);
+  timebins_init(&TimeBinsGravity, "Gravity", &All.MaxPart);
+
+#if defined(COOLING)
+  All.Time = All.TimeBegin;
+  set_cosmo_factors_for_current_time();
+  InitCool();
+#endif /* #if defined(COOLING) */
+
+#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL)
+  ewald_init();
+#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */
+
+#ifdef TILE_ICS
+  All.BoxSize *= All.TileICsFactor;
+#endif /* #ifdef TILE_ICS */
+
+  boxSize = All.BoxSize;
+  boxHalf = 0.5 * All.BoxSize;
+#ifdef LONG_X
+  boxHalf_X = boxHalf * LONG_X;
+  boxSize_X = boxSize * LONG_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+  boxHalf_Y = boxHalf * LONG_Y;
+  boxSize_Y = boxSize * LONG_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+  boxHalf_Z = boxHalf * LONG_Z;
+  boxSize_Z = boxSize * LONG_Z;
+#endif /* #ifdef LONG_Z */
+
+  EgyInjection = 0;
+
+#ifdef PMGRID
+  if((RestartFlag != 3) && (RestartFlag != 6))
+    long_range_init();
+#endif /* #ifdef PMGRID */
+
+  if(RestartFlag <= 2)
+    open_logfiles();
+
+  All.TimeLastRestartFile = CPUThisRun;
+
+#ifdef REDUCE_FLUSH
+  All.FlushLast = CPUThisRun;
+#endif /* #ifdef REDUCE_FLUSH */
+
+  init_scalars();
+
+  init_gradients();
+}
+
+/*! \brief Late setup, after the IC file has been loaded but before run() is
+ *  called.
+ *
+ *  The output files are opened and various modules are initialized. The next
+ *  output time is determined by find_next_outputtime() and various timers are
+ *  set.
+ *
+ *  \return void
+ */
+void begrun2(void)
+{
+  char contfname[1000];
+  sprintf(contfname, "%scont", All.OutputDir);
+  unlink(contfname);
+
+  delete_end_file();
+
+  if(RestartFlag > 2)
+    open_logfiles();
+
+#if defined(USE_SFR)
+  sfr_init();
+#endif /* #if defined(USE_SFR) */
+
+#ifdef PMGRID
+  long_range_init_regionsize();
+#endif /* #ifdef PMGRID */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+  special_particle_create_list();
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+  if(RestartFlag != 1) /* this needs to be done here because here All.TimeBegin has the correct value */
+    if(All.ComovingIntegrationOn)
+      init_drift_table();
+
+  {
+    if(RestartFlag == 2)
+      All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 100);
+    else
+      All.Ti_nextoutput = find_next_outputtime(All.Ti_Current);
+  }
+
+  All.TimeLastRestartFile = CPUThisRun;
+
+#ifdef REDUCE_FLUSH
+  All.FlushLast = CPUThisRun;
+#endif /* #ifdef REDUCE_FLUSH */
+
+#if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW)
+  gravity_forcetest_testforcelaw();
+#endif /* #if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW) */
+}
+
+/*! \brief Computes conversion factors between internal code units and the
+ *  cgs-system.
+ *
+ *  In addition constants like the gravitation constant are set.
+ *
+ *  \return void
+ */
+void set_units(void)
+{
+  double meanweight;
+
+#ifdef STATICNFW
+  double Mtot;
+#endif /* #ifdef STATICNFW */
+
+  All.UnitTime_in_s         = All.UnitLength_in_cm / All.UnitVelocity_in_cm_per_s;
+  All.UnitTime_in_Megayears = All.UnitTime_in_s / SEC_PER_MEGAYEAR;
+
+  if(All.GravityConstantInternal == 0)
+    All.G = GRAVITY / pow(All.UnitLength_in_cm, 3) * All.UnitMass_in_g * pow(All.UnitTime_in_s, 2);
+  else
+    All.G = All.GravityConstantInternal;
+
+  All.UnitDensity_in_cgs     = All.UnitMass_in_g / pow(All.UnitLength_in_cm, 3);
+  All.UnitPressure_in_cgs    = All.UnitMass_in_g / All.UnitLength_in_cm / pow(All.UnitTime_in_s, 2);
+  All.UnitCoolingRate_in_cgs = All.UnitPressure_in_cgs / All.UnitTime_in_s;
+  All.UnitEnergy_in_cgs      = All.UnitMass_in_g * pow(All.UnitLength_in_cm, 2) / pow(All.UnitTime_in_s, 2);
+
+  /* convert some physical input parameters to internal units */
+
+  All.Hubble = HUBBLE * All.UnitTime_in_s;
+
+  mpi_printf("BEGRUN: Hubble (internal units)   = %g\n", All.Hubble);
+  mpi_printf("BEGRUN: G (internal units)        = %g\n", All.G);
+  mpi_printf("BEGRUN: UnitMass_in_g             = %g\n", All.UnitMass_in_g);
+  mpi_printf("BEGRUN: UnitTime_in_s             = %g\n", All.UnitTime_in_s);
+  mpi_printf("BEGRUN: UnitVelocity_in_cm_per_s  = %g\n", All.UnitVelocity_in_cm_per_s);
+  mpi_printf("BEGRUN: UnitDensity_in_cgs        = %g\n", All.UnitDensity_in_cgs);
+  mpi_printf("BEGRUN: UnitEnergy_in_cgs         = %g\n", All.UnitEnergy_in_cgs);
+  mpi_printf("\n");
+
+  meanweight = 4.0 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */
+
+  if(All.MinEgySpec == 0)
+    {
+      All.MinEgySpec = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.MinGasTemp;
+      All.MinEgySpec *= All.UnitMass_in_g / All.UnitEnergy_in_cgs;
+
+      mpi_printf("BEGRUN: MinEgySpec set to %g based on MinGasTemp=%g\n", All.MinEgySpec, All.MinGasTemp);
+    }
+
+#if defined(USE_SFR)
+  set_units_sfr();
+#endif /* #if defined(USE_SFR) */
+
+#ifdef STATICNFW
+  R200    = pow(NFW_M200 * All.G / (100 * All.Hubble * All.Hubble), 1.0 / 3);
+  Rs      = R200 / NFW_C;
+  Dc      = 200.0 / 3 * NFW_C * NFW_C * NFW_C / (log(1 + NFW_C) - NFW_C / (1 + NFW_C));
+  RhoCrit = 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+  V200    = 10 * All.Hubble * R200;
+  mpi_printf("V200= %g\n", V200);
+
+  fac  = 1.0;
+  Mtot = enclosed_mass(R200);
+  mpi_printf("M200= %g\n", Mtot);
+  fac  = V200 * V200 * V200 / (10 * All.G * All.Hubble) / Mtot;
+  Mtot = enclosed_mass(R200);
+  mpi_printf("M200= %g\n", Mtot);
+#endif /* #ifdef STATICNFW */
+}
+
+/*! \brief deletes the end file if it exists.
+ *
+ *  This is needed in case a already completed simulation is extended or
+ *  overwritten. Note that the end-file is completely passive.
+ *
+ *  \return void
+ */
+static void delete_end_file(void)
+{
+  if(RestartFlag > 2)  // no simulation happening
+    {
+      return;
+    }
+
+  char endfname[1000];
+  sprintf(endfname, "%send", All.OutputDir);
+  unlink(endfname);
+  return;
+}
diff --git a/src/amuse/community/arepo/src/init/density.c b/src/amuse/community/arepo/src/init/density.c
new file mode 100644
index 0000000000..8be85e443b
--- /dev/null
+++ b/src/amuse/community/arepo/src/init/density.c
@@ -0,0 +1,635 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/init/density.c
+ * \date        05/2018
+ * \brief       SPH density computation and smoothing length determination.
+ * \details     This file contains the "first SPH loop", where the SPH
+ *              densities and smoothing lengths are calculated.
+ *              In Arepo, this is used in setup_smoothinglengths() (init.c) to
+ *              get an initial guess for MaxDelaunayRadius.
+ *              Note that the SPH density is NOT used in the subsequent
+ *              hydrodynamics calculation, but the density is either set by the
+ *              initial conditions explicitly (DENSITY_AS_MASS_IN_INPUT) or
+ *              calculated by the mass given in the initial conditions divided
+ *              by the volume of the cell calculated by the Voronoi
+ *              tessellation algorithm.
+ *              contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void density(void)
+ *                static int density_evaluate(int target, int mode, int
+ *                  threadid)
+ *                int density_isactive(int n)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+
+static int density_evaluate(int target, int mode, int threadid);
+
+static MyFloat *NumNgb, *DhsmlDensityFactor;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+static MyFloat *MinDist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Hsml;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  MyIDType ID;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = P[i].Pos[0];
+  in->Pos[1] = P[i].Pos[1];
+  in->Pos[2] = P[i].Pos[2];
+  in->Hsml   = SphP[i].Hsml;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  in->ID = P[i].ID;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Rho;
+  MyFloat DhsmlDensity;
+  MyFloat Ngb;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  MyFloat MinDist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      NumNgb[i] = out->Ngb;
+      if(P[i].Type == 0)
+        {
+          SphP[i].Density       = out->Rho;
+          DhsmlDensityFactor[i] = out->DhsmlDensity;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+          MinDist[i] = out->MinDist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+        }
+    }
+  else /* combine */
+    {
+      NumNgb[i] += out->Ngb;
+      if(P[i].Type == 0)
+        {
+          SphP[i].Density += out->Rho;
+          DhsmlDensityFactor[i] += out->DhsmlDensity;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+          if(MinDist[i] > out->MinDist)
+            MinDist[i] = out->MinDist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+        }
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int idx;
+
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= TimeBinsHydro.NActiveParticles)
+          break;
+
+        int i = TimeBinsHydro.ActiveParticleList[idx];
+        if(i < 0)
+          continue;
+
+        if(density_isactive(i))
+          density_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+static MyFloat *NumNgb, *DhsmlDensityFactor;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+static MyFloat *MinDist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+/*! \brief Main function of SPH density calculation.
+ *
+ *  This function computes the local density for each active SPH particle and
+ *  the number of weighted neighbors in the current smoothing radius. If a
+ *  particle with its smoothing region is fully inside the local domain, it is
+ *  not exported to the other processors. The function also detects particles
+ *  that have a number of neighbors outside the allowed tolerance range. For
+ *  these particles, the smoothing length is adjusted accordingly, and the
+ *  computation is called again.
+ *
+ *  \return void
+ */
+void density(void)
+{
+  MyFloat *Left, *Right;
+  int idx, i, npleft, iter = 0;
+  long long ntot;
+  double desnumngb, t0, t1;
+
+  CPU_Step[CPU_MISC] += measure_time();
+
+  NumNgb             = (MyFloat *)mymalloc("NumNgb", NumPart * sizeof(MyFloat));
+  DhsmlDensityFactor = (MyFloat *)mymalloc("DhsmlDensityFactor", NumPart * sizeof(MyFloat));
+  Left               = (MyFloat *)mymalloc("Left", NumPart * sizeof(MyFloat));
+  Right              = (MyFloat *)mymalloc("Right", NumPart * sizeof(MyFloat));
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  MinDist = (MyFloat *)mymalloc("MinDist", NumPart * sizeof(MyFloat));
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(density_isactive(i))
+        {
+          Left[i] = Right[i] = 0;
+        }
+    }
+
+  generic_set_MaxNexport();
+
+  desnumngb = All.DesNumNgb;
+
+  /* we will repeat the whole thing for those particles where we didn't find enough neighbours */
+  do
+    {
+      t0 = second();
+
+      generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported);
+
+      /* do final operations on results */
+      for(idx = 0, npleft = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(density_isactive(i))
+            {
+              if(P[i].Type == 0)
+                {
+                  if(SphP[i].Density > 0)
+                    {
+                      DhsmlDensityFactor[i] *= SphP[i].Hsml / (NUMDIMS * SphP[i].Density);
+                      if(DhsmlDensityFactor[i] > -0.9) /* note: this would be -1 if only a single particle at zero lag is found */
+                        DhsmlDensityFactor[i] = 1 / (1 + DhsmlDensityFactor[i]);
+                      else
+                        DhsmlDensityFactor[i] = 1;
+                    }
+                }
+
+              if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation) || NumNgb[i] > (desnumngb + All.MaxNumNgbDeviation))
+                {
+                  /* need to redo this particle */
+                  npleft++;
+
+                  if(Left[i] > 0 && Right[i] > 0)
+                    if((Right[i] - Left[i]) < 1.0e-3 * Left[i])
+                      {
+                        /* this one should be ok */
+                        npleft--;
+                        P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */
+                        continue;
+                      }
+
+                  if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation))
+                    Left[i] = dmax(SphP[i].Hsml, Left[i]);
+                  else
+                    {
+                      if(Right[i] != 0)
+                        {
+                          if(SphP[i].Hsml < Right[i])
+                            Right[i] = SphP[i].Hsml;
+                        }
+                      else
+                        Right[i] = SphP[i].Hsml;
+                    }
+
+                  if(iter >= MAXITER - 10)
+                    {
+                      printf("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n   pos=(%g|%g|%g)\n", i, ThisTask,
+                             (int)P[i].ID, SphP[i].Hsml, Left[i], Right[i], (float)NumNgb[i], Right[i] - Left[i], P[i].Pos[0],
+                             P[i].Pos[1], P[i].Pos[2]);
+                      myflush(stdout);
+                    }
+
+                  if(Right[i] > 0 && Left[i] > 0)
+                    SphP[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3);
+                  else
+                    {
+                      if(Right[i] == 0 && Left[i] == 0)
+                        terminate("should not occur");
+
+                      if(Right[i] == 0 && Left[i] > 0)
+                        {
+                          SphP[i].Hsml *= 1.26;
+                        }
+
+                      if(Right[i] > 0 && Left[i] == 0)
+                        {
+                          SphP[i].Hsml /= 1.26;
+                        }
+                    }
+                }
+              else
+                P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */
+            }
+        }
+
+      sumup_large_ints(1, &npleft, &ntot);
+
+      t1 = second();
+
+      if(ntot > 0)
+        {
+          iter++;
+
+          if(iter > 0)
+            mpi_printf("DENSITY: ngb iteration %3d: need to repeat for %12lld particles. (took %g sec)\n", iter, ntot,
+                       timediff(t0, t1));
+
+          if(iter > MAXITER)
+            terminate("failed to converge in neighbour iteration in density()\n");
+        }
+    }
+  while(ntot > 0);
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+
+#if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z)
+
+  int count2    = 0;
+  int countall2 = 0;
+
+  for(i = 0; i < NumGas; i++)
+    {
+      /*
+       * If the distance to the border of a particle is too small,
+       * then the ghost particle will be too close to this particle.
+       * Therefore we shift the particle in this case into the direction of the box center.
+       */
+      if(distance_to_border(i) < 0.5 * 0.001 * SphP[i].Hsml)
+        {
+          count2++;
+
+          double dir[3];
+
+          dir[0] = boxSize_X * 0.5 - P[i].Pos[0];
+          dir[1] = boxSize_Y * 0.5 - P[i].Pos[1];
+          dir[2] = boxSize_Z * 0.5 - P[i].Pos[2];
+
+          double n = sqrt(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]);
+          // note: it's not possible that the operand of sqrt is zero here.
+
+          dir[0] /= n;
+          dir[1] /= n;
+          dir[2] /= n;
+
+          P[i].Pos[0] += 0.05 * SphP[i].Hsml * dir[0];
+          P[i].Pos[1] += 0.05 * SphP[i].Hsml * dir[1];
+          P[i].Pos[2] += 0.05 * SphP[i].Hsml * dir[2];
+        }
+    }
+
+  MPI_Allreduce(&count2, &countall2, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  mpi_printf("\nFOUND %d particles extremely close to the reflective boundary. Fixing this. \n\n", countall2);
+#endif /* #if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z) */
+
+  int count = 0, countall;
+
+  for(i = 0; i < NumGas; i++)
+    if(MinDist[i] < 0.001 * SphP[i].Hsml)
+      count++;
+
+  MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  if(countall)
+    {
+      mpi_printf("\nFOUND %d SPH particles with an extremely close neighbor. Fixing this. \n\n", countall);
+
+      for(i = 0; i < NumGas; i++)
+        if(MinDist[i] < 0.001 * SphP[i].Hsml)
+          {
+            double theta = acos(2 * get_random_number() - 1);
+            double phi   = 2 * M_PI * get_random_number();
+
+            P[i].Pos[0] += 0.1 * SphP[i].Hsml * sin(theta) * cos(phi);
+            P[i].Pos[1] += 0.1 * SphP[i].Hsml * sin(theta) * sin(phi);
+            P[i].Pos[2] += 0.1 * SphP[i].Hsml * cos(theta);
+          }
+    }
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  myfree(MinDist);
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+  myfree(Right);
+  myfree(Left);
+  myfree(DhsmlDensityFactor);
+  myfree(NumNgb);
+
+  /* mark as active again */
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].TimeBinHydro < 0)
+        P[i].TimeBinHydro = -P[i].TimeBinHydro - 1;
+    }
+
+  /* collect some timing information */
+  CPU_Step[CPU_INIT] += measure_time();
+}
+
+/*! \brief Inner function of the SPH density calculation
+ *
+ *  This function represents the core of the SPH density computation. The
+ *  target particle may either be local, or reside in the communication
+ *  buffer.
+ *
+ *  \param[in] target Index of particle in local data/import buffer.
+ *  \param[in] mode Mode in which function is called (local or impored data).
+ *  \param[in] threadid ID of local thread.
+ *
+ *  \return 0
+ */
+static int density_evaluate(int target, int mode, int threadid)
+{
+  int j, n;
+  int numngb, numnodes, *firstnode;
+  double h, h2, hinv, hinv3, hinv4;
+  MyFloat rho;
+  double wk, dwk;
+  double dx, dy, dz, r, r2, u, mass_j;
+  MyFloat weighted_numngb;
+  MyFloat dhsmlrho;
+  MyDouble *pos;
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  MyFloat mindist = MAX_REAL_NUMBER;
+  MyIDType ID;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+  data_in local, *target_data;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos = target_data->Pos;
+  h   = target_data->Hsml;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  ID = target_data->ID;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+  h2   = h * h;
+  hinv = 1.0 / h;
+#ifndef TWODIMS
+  hinv3 = hinv * hinv * hinv;
+#else  /* #ifndef  TWODIMS */
+  hinv3 = hinv * hinv / boxSize_Z;
+#endif /* #ifndef  TWODIMS #else */
+  hinv4 = hinv3 * hinv;
+
+  numngb = 0;
+  rho = weighted_numngb = dhsmlrho = 0;
+
+  int nfound = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode);
+
+  for(n = 0; n < nfound; n++)
+    {
+      j = Thread[threadid].Ngblist[n];
+
+      dx = pos[0] - P[j].Pos[0];
+      dy = pos[1] - P[j].Pos[1];
+      dz = pos[2] - P[j].Pos[2];
+
+/*  now find the closest image in the given box size  */
+#ifndef REFLECTIVE_X
+      if(dx > boxHalf_X)
+        dx -= boxSize_X;
+      if(dx < -boxHalf_X)
+        dx += boxSize_X;
+#endif /* #ifndef REFLECTIVE_X */
+
+#ifndef REFLECTIVE_Y
+      if(dy > boxHalf_Y)
+        dy -= boxSize_Y;
+      if(dy < -boxHalf_Y)
+        dy += boxSize_Y;
+#endif /* #ifndef REFLECTIVE_Y */
+
+#ifndef REFLECTIVE_Z
+      if(dz > boxHalf_Z)
+        dz -= boxSize_Z;
+      if(dz < -boxHalf_Z)
+        dz += boxSize_Z;
+#endif /* #ifndef REFLECTIVE_Z */
+      r2 = dx * dx + dy * dy + dz * dz;
+
+      if(r2 < h2)
+        {
+          numngb++;
+
+          r = sqrt(r2);
+
+          u = r * hinv;
+
+          if(u < 0.5)
+            {
+              wk  = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
+              dwk = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
+            }
+          else
+            {
+              wk  = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
+              dwk = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
+            }
+
+          mass_j = P[j].Mass;
+
+          rho += FLT(mass_j * wk);
+
+          weighted_numngb += FLT(NORM_COEFF * wk / hinv3); /* 4.0/3 * PI = 4.188790204786 */
+
+          dhsmlrho += FLT(-mass_j * (NUMDIMS * hinv * wk + u * dwk));
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+          if(ID != P[j].ID && mindist > r)
+            mindist = r;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+        }
+    }
+
+  out.Rho          = rho;
+  out.Ngb          = weighted_numngb;
+  out.DhsmlDensity = dhsmlrho;
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  out.MinDist = mindist;
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+/* \brief Determines if a cell is active in current timestep.
+ *
+ *  If the cell is not active in a timestep, its value in TimeBinHydro is
+ *  negative.
+ *
+ *  \param[in] n Index of cell in P and SphP arrays.
+ *
+ *  \return 1: cell active; 0: cell not active or not a cell.
+ */
+int density_isactive(int n)
+{
+  if(P[n].TimeBinHydro < 0)
+    return 0;
+
+  if(P[n].Type == 0)
+    return 1;
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/init/init.c b/src/amuse/community/arepo/src/init/init.c
new file mode 100644
index 0000000000..934fef29da
--- /dev/null
+++ b/src/amuse/community/arepo/src/init/init.c
@@ -0,0 +1,835 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/init/init.c
+ * \date        05/2018
+ * \brief       Initialization of a simulation from initial conditions.
+ * \details     contains functions:
+ *                int init(void)
+ *                void check_omega(void)
+ *                void setup_smoothinglengths(void)
+ *                void test_id_uniqueness(void)
+ *                void calculate_maxid(void)
+ *                int compare_IDs(const void *a, const void *b)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_sf_gamma.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+/*! \brief Prepares the loaded initial conditions for the run.
+ *
+ *  It is only called if RestartFlag !=1. Various counters and variables are
+ *  initialized. Entries of the particle data structures not read from initial
+ *  conditions are initialized or converted and a initial domain decomposition
+ *  is performed. If gas cells are present, the initial SPH smoothing lengths
+ *  are determined.
+ *
+ *  \return status code: <0 if finished without errors and run can start,
+ *          0 code ends after calling init()  > 0 an error occurred, terminate.
+ */
+int init(void)
+{
+  int i, j;
+  double mass;
+
+  assert(RestartFlag != 1);
+
+  if(All.ComovingIntegrationOn)
+    if(All.PeriodicBoundariesOn == 1)
+      {
+        if(RestartFlag < 3)
+          /* can't do this check when not all particles are loaded */
+          check_omega();
+        else
+          mpi_printf("INIT: Skipping Omega check since we are not doing a dynamical evolution (not all particles may be loaded)\n");
+      }
+
+#if defined(COOLING)
+  IonizeParams();
+#endif /* #if defined(COOLING) */
+
+  if(All.ComovingIntegrationOn)
+    {
+      All.Timebase_interval = (log(All.TimeMax) - log(All.TimeBegin)) / TIMEBASE;
+      All.Ti_Current        = 0;
+    }
+  else
+    {
+      All.Timebase_interval = (All.TimeMax - All.TimeBegin) / TIMEBASE;
+      All.Ti_Current        = 0;
+    }
+
+  set_cosmo_factors_for_current_time();
+
+  for(j = 0; j < 3; j++)
+    All.GlobalDisplacementVector[j] = 0;
+
+  All.NumCurrentTiStep  = 0; /* setup some counters */
+  All.SnapshotFileCount = 0;
+
+  if(RestartFlag == 2)
+    {
+      if(RestartSnapNum < 0)
+        All.SnapshotFileCount = atoi(All.InitCondFile + strlen(All.InitCondFile) - 3) + 1;
+      else
+        All.SnapshotFileCount = RestartSnapNum + 1;
+    }
+
+  All.TotNumOfForces     = 0;
+  All.TopNodeAllocFactor = 0.08;
+  All.TreeAllocFactor    = 0.7;
+  All.NgbTreeAllocFactor = 0.7;
+
+  if(NumPart < 1000)
+    All.TreeAllocFactor = 10.0;
+
+  DeRefMesh.Indi.AllocFacNdp = MIN_ALLOC_NUMBER;
+  DeRefMesh.Indi.AllocFacNdt = MIN_ALLOC_NUMBER;
+
+  Mesh.Indi.AllocFacNdp = 1.2 * NumGas + MIN_ALLOC_NUMBER;
+  Mesh.Indi.AllocFacNdt = 8.0 * NumGas + MIN_ALLOC_NUMBER;
+  Mesh.Indi.AllocFacNvf = 8.0 * NumGas + MIN_ALLOC_NUMBER;
+
+  Mesh.Indi.AllocFacNvc = 16.0 * NumGas + MIN_ALLOC_NUMBER;
+  Nvc                   = 0;
+
+  Mesh.Indi.AllocFacNinlist     = 1.2 * NumGas + MIN_ALLOC_NUMBER;
+  Mesh.Indi.AllocFacN_DP_Buffer = 0.2 * NumGas + MIN_ALLOC_NUMBER;
+  Mesh.Indi.AllocFacNflux       = 0.01 * NumGas + MIN_ALLOC_NUMBER;
+  Mesh.Indi.AllocFacNradinflux  = 0.01 * NumGas + MIN_ALLOC_NUMBER;
+
+#ifdef MHD_POWELL
+  for(j = 0; j < 3; j++)
+    {
+      All.Powell_Momentum[j]         = 0;
+      All.Powell_Angular_Momentum[j] = 0;
+    }
+  All.Powell_Energy = 0;
+#endif /* #ifdef MHD_POWELL */
+
+  All.TimeLastStatistics = All.TimeBegin - All.TimeBetStatistics;
+
+  set_softenings();
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  mpi_printf("INIT: Adaptive hydro softening, minimum gravitational softening for cells: %g\n", All.MinimumComovingHydroSoftening);
+  mpi_printf("INIT: Adaptive hydro softening, maximum gravitational softening for cells: %g\n",
+             All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, NSOFTTYPES_HYDRO - 1));
+  mpi_printf("INIT: Adaptive hydro softening, number of softening values: %d\n", NSOFTTYPES_HYDRO);
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+  init_individual_softenings();
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+#ifdef SHIFT_BY_HALF_BOX
+  for(i = 0; i < NumPart; i++)
+    for(j = 0; j < 3; j++)
+      P[i].Pos[j] += 0.5 * All.BoxSize;
+#endif /* #ifdef SHIFT_BY_HALF_BOX */
+
+  for(i = 0; i < GRAVCOSTLEVELS; i++)
+    All.LevelToTimeBin[i] = -1;
+
+  for(i = 0; i < NumPart; i++)
+    for(j = 0; j < GRAVCOSTLEVELS; j++)
+      P[i].GravCost[j] = 0;
+
+      /* set unused coordinate values in 1d and 2d simulations to zero; this is needed for correct interfaces */
+  int nonzero_vel = 0;
+#ifdef ONEDIMS
+  for(i = 0; i < NumPart; i++)
+    {
+      P[i].Pos[1] = 0.0;
+      P[i].Pos[2] = 0.0;
+
+      if(P[i].Vel[1] != 0.0 || P[i].Vel[2] != 0.0)
+      {
+   	    nonzero_vel = 1;
+      }
+    }
+  if(nonzero_vel > 0)
+  {
+    warn("Initial y or z velocity nonzero in 1d simulation! Make sure you really want this!");
+  }
+#endif /* #ifdef ONEDIMS */
+
+#ifdef TWODIMS
+  for(i = 0; i < NumPart; i++)
+    {
+      P[i].Pos[2] = 0;
+
+      if(P[i].Vel[2] != 0.0)
+      {
+        nonzero_vel = 1;
+      }
+    }
+  if(nonzero_vel > 0)
+  {
+	warn("Initial z velocity nonzero in 2d simulation! Make sure you really want this!");
+  }
+#endif /* #ifdef TWODIMS */
+
+  if(All.ComovingIntegrationOn) /*  change to new velocity variable */
+    {
+      for(i = 0; i < NumPart; i++)
+        {
+          for(j = 0; j < 3; j++)
+            P[i].Vel[j] *= sqrt(All.Time) * All.Time; /* for dm/gas particles, p = a^2 xdot */
+        }
+    }
+
+  /* measure mean cell mass */
+  int num = 0;
+  long long glob_num;
+  double glob_mass;
+  mass = 0;
+
+  for(i = 0; i < NumGas; i++)
+#ifdef REFINEMENT_HIGH_RES_GAS
+    if(SphP[i].AllowRefinement != 0)
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+      {
+        num += 1;
+        mass += P[i].Mass;
+      }
+
+  sumup_large_ints(1, &num, &glob_num);
+  MPI_Allreduce(&mass, &glob_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+#ifndef REFINEMENT_HIGH_RES_GAS
+  if(glob_num != All.TotNumGas)
+    terminate("glob_num(=%lld) != All.TotNumGas(=%lld)", glob_num, All.TotNumGas);
+#endif /* #ifndef REFINEMENT_HIGH_RES_GAS */
+
+  if(All.TotNumGas > 0 && (glob_num == 0 || glob_mass == 0))
+    terminate("All.TotNumGas(=%lld) > 0 && (glob_num(=%lld) == 0 || glob_mass(=%g) == 0)", All.TotNumGas, glob_num, glob_mass);
+
+    /* assign global variables that depend on the mean cell mass */
+#if defined(REFINEMENT)
+  if(All.ReferenceGasPartMass == 0)
+    {
+      if(!All.ComovingIntegrationOn)
+        terminate("In non-comoving runs, ReferenceGasPartMass must be set to a non-zero value");
+
+      All.ReferenceGasPartMass = glob_mass / glob_num;
+
+      mpi_printf("REFINEMENT: The mean cell mass, which is used as a reference, is %g\n", All.ReferenceGasPartMass);
+    }
+  else
+    mpi_printf("REFINEMENT: The given reference cell mass is %g\n", All.ReferenceGasPartMass);
+  All.TargetGasMass = All.TargetGasMassFactor * All.ReferenceGasPartMass;
+  mpi_printf("REFINEMENT: setting All.TargetGasMass=%g\n", All.TargetGasMass);
+#endif /* #if defined(REFINEMENT) */
+
+  for(i = 0; i < TIMEBINS; i++)
+    All.Ti_begstep[i] = 0;
+
+  for(i = 0; i < NumPart; i++) /*  start-up initialization */
+    {
+      for(j = 0; j < 3; j++)
+        P[i].GravAccel[j] = 0;
+
+#ifdef PMGRID
+      for(j = 0; j < 3; j++)
+        P[i].GravPM[j] = 0;
+#endif /* #ifdef PMGRID */
+      P[i].TimeBinHydro = 0;
+      P[i].TimeBinGrav  = 0;
+      P[i].OldAcc       = 0; /* Do not zero as masses are stored here */
+
+#ifdef SELFGRAVITY
+#ifdef EVALPOTENTIAL
+      if(RestartFlag == 0)
+        P[i].Potential = 0;
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #ifdef SELFGRAVITY */
+
+#ifdef USE_SFR
+      if(RestartFlag == 0 && P[i].Type == 0)
+        SphP[i].Sfr = 0;
+#endif /* #ifdef USE_SFR */
+    }
+
+  for(i = 0; i < TIMEBINS; i++)
+    TimeBinSynchronized[i] = 1;
+
+  reconstruct_timebins();
+
+#ifdef PMGRID
+  All.PM_Ti_endstep = All.PM_Ti_begstep = 0;
+#endif /* #ifdef PMGRID */
+
+  for(i = 0; i < NumGas; i++) /* initialize sph_properties */
+    {
+      if(RestartFlag == 2 || RestartFlag == 3)
+        for(j = 0; j < 3; j++)
+          SphP[i].Center[j] = P[i].Pos[j];
+
+#if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS)
+      if(RestartFlag == 17 || RestartFlag == 18)
+        for(j = 0; j < 3; j++)
+          SphP[i].Center[j] = P[i].Pos[j];
+#endif /* #if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS) */
+
+      if(RestartFlag == 0)
+        {
+          for(j = 0; j < 3; j++)
+            SphP[i].Center[j] = P[i].Pos[j];
+
+          SphP[i].Hsml = 0;
+#if defined(COOLING)
+          SphP[i].Ne = 1.0;
+#endif /* #if defined(COOLING)  */
+        }
+    }
+
+#ifndef NODEREFINE_BACKGROUND_GRID
+  double mvol = 0;
+  if(All.TotNumGas)
+    {
+#ifdef TWODIMS
+      mvol = boxSize_X * boxSize_Y / All.TotNumGas;
+#else /* #ifdef TWODIMS */
+#ifdef ONEDIMS
+      mvol                  = boxSize_X / All.TotNumGas;
+#else  /* #ifdef ONEDIMS */
+      mvol = boxSize_X * boxSize_Y * boxSize_Z / All.TotNumGas;
+#endif /* #ifdef ONEDIMS #else */
+#endif /* #ifdef TWODIMS #else */
+    }
+
+  All.MeanVolume = mvol;
+#endif /* #ifndef NODEREFINE_BACKGROUND_GRID */
+
+  mpi_printf("INIT: MeanVolume=%g\n", All.MeanVolume);
+
+#ifndef NO_ID_UNIQUE_CHECK
+  test_id_uniqueness();
+#endif /* #ifndef NO_ID_UNIQUE_CHECK */
+
+#ifdef REFINEMENT_MERGE_CELLS
+  for(i = 0; i < NumPart; i++)
+    if(P[i].Type == 0 && P[i].ID == 0)
+      terminate("INIT: Cannot use ID==0 for gas in ICs with derefinement enabled.");
+#endif /* #ifdef REFINEMENT_MERGE_CELLS */
+
+  voronoi_init_connectivity(&Mesh);
+
+#ifdef ADDBACKGROUNDGRID
+  prepare_domain_backgroundgrid();
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  domain_Decomposition(); /* do initial domain decomposition (gives equal numbers of particles) */
+
+  if(RestartFlag == 18) /* recalculation of potential */
+    {
+      mark_active_timebins();
+      open_logfiles();
+#if defined(USE_SFR)
+      sfr_init();
+#endif /* #if defined(USE_SFR) */
+      set_non_standard_physics_for_current_time();
+
+#ifdef PMGRID
+      long_range_init_regionsize();
+#endif /* #ifdef PMGRID */
+
+      compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE);
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF)
+      PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data));
+      fof_prepare_output_order(); /* sort by type and Fileorder */
+      fof_subfind_exchange(MPI_COMM_WORLD);
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF) */
+
+      sprintf(All.SnapshotFileBase, "%s_potupdated", All.SnapshotFileBase);
+      mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum);
+      savepositions(RestartSnapNum, 0);
+
+      endrun();
+    }
+
+  /* will build tree */
+  ngb_treeallocate();
+  ngb_treebuild(NumGas);
+
+  if(RestartFlag == 3)
+    {
+#ifdef FOF
+      fof_fof(RestartSnapNum);
+      DumpFlag = 1;
+      savepositions(RestartSnapNum, 0);
+#endif /* #ifdef FOF */
+      return (0);
+    }
+
+  All.Ti_Current = 0;
+
+  if(RestartFlag == 0 || RestartFlag == 2 || RestartFlag == 14 || RestartFlag == 17)
+    setup_smoothinglengths();
+
+#ifdef ADDBACKGROUNDGRID
+  // This return more clearly shows that this function terminates the run
+  return add_backgroundgrid();
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+  create_mesh();
+  mesh_setup_exchange();
+
+  if(RestartFlag == 14)
+    {
+      char tess_name[1024];
+      sprintf(tess_name, "%s/tess_%03d", All.OutputDir, RestartSnapNum);
+      write_voronoi_mesh(&Mesh, tess_name, 0, NTask - 1);
+      return 0;
+    }
+
+  for(i = 0, mass = 0; i < NumGas; i++)
+    {
+      if(RestartFlag == 0)
+        {
+#ifdef READ_MASS_AS_DENSITY_IN_INPUT
+          P[i].Mass *= SphP[i].Volume;
+#endif /* #ifdef READ_MASS_AS_DENSITY_IN_INPUT */
+        }
+
+      SphP[i].Density = P[i].Mass / SphP[i].Volume;
+
+      if(SphP[i].Density < All.MinimumDensityOnStartUp)
+        {
+          SphP[i].Density = All.MinimumDensityOnStartUp;
+
+          P[i].Mass = SphP[i].Volume * SphP[i].Density;
+        }
+
+      SphP[i].Momentum[0] = P[i].Mass * P[i].Vel[0];
+      SphP[i].Momentum[1] = P[i].Mass * P[i].Vel[1];
+      SphP[i].Momentum[2] = P[i].Mass * P[i].Vel[2];
+
+#ifdef MHD
+#ifdef MHD_SEEDFIELD
+      if(RestartFlag == 0)
+        {
+          if(i == 0)
+            {
+              mpi_printf("MHD Seed field=%g, direction=%d\n", All.B_value, All.B_dir);
+            }
+
+          int k;
+          double bfac = 1. / (sqrt(All.UnitMass_in_g / All.UnitLength_in_cm) / (All.UnitTime_in_s / All.HubbleParam));
+
+          double B_value = All.B_value;
+
+          for(k = 0; k < 3; k++)
+            if(All.B_dir & (1 << k))
+              {
+                SphP[i].BConserved[k] = B_value * SphP[i].Volume * bfac;
+                SphP[i].B[k]          = SphP[i].BConserved[k] / SphP[i].Volume;
+              }
+            else
+              {
+                SphP[i].BConserved[k] = 0;
+                SphP[i].B[k]          = SphP[i].BConserved[k] / SphP[i].Volume;
+              }
+
+          if(i == 0)
+            {
+              mpi_printf("BConserved[0] = %g|%g|%g\n", SphP[i].BConserved[0], SphP[i].BConserved[1], SphP[i].BConserved[2]);
+              mpi_printf("Volume[0] %g bfac %g\n", SphP[i].Volume, bfac);
+            }
+          /* convert Gauss-cgs to heavyside - lorentz */
+          {
+            int kk;
+            for(kk = 0; kk < 3; kk++)
+              {
+                SphP[i].BConserved[kk] /= sqrt(4. * M_PI);
+                SphP[i].B[kk] /= sqrt(4. * M_PI);
+              }
+          }
+        }
+      else
+        {
+          SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume;
+          SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume;
+          SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume;
+        }
+#else /* #ifdef MHD_SEEDFIELD */
+      SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume;
+      SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume;
+      SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume;
+
+#endif /* #ifdef MHD_SEEDFIELD #else */
+#endif /* #ifdef MHD */
+
+        /* utherm has been loaded from IC file */
+#ifdef MESHRELAX
+      SphP[i].Energy = P[i].Mass * SphP[i].Utherm;
+#else  /* #ifdef MESHRELAX */
+      SphP[i].Energy = P[i].Mass * All.cf_atime * All.cf_atime * SphP[i].Utherm +
+                       0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]);
+#endif /* #ifdef MESHRELAX #else */
+
+#ifdef MHD
+      SphP[i].Energy += 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) *
+                        SphP[i].Volume * All.cf_atime;
+#endif /* #ifdef MHD */
+
+      for(j = 0; j < 3; j++)
+        SphP[i].VelVertex[j] = P[i].Vel[j];
+
+      mass += P[i].Mass;
+    }
+
+#ifdef PASSIVE_SCALARS
+  for(i = 0; i < NumGas; i++)
+    {
+      for(j = 0; j < PASSIVE_SCALARS; j++)
+        SphP[i].PConservedScalars[j] = SphP[i].PScalars[j] * P[i].Mass;
+    }
+
+#endif /* #ifdef PASSIVE_SCALARS */
+
+  if(RestartFlag == 17)
+    {
+      update_primitive_variables();
+      exchange_primitive_variables();
+      calculate_gradients();
+      exchange_primitive_variables_and_gradients();
+      DumpFlag = 1;
+      savepositions(RestartSnapNum + 1, 0);
+      return (0);
+    }
+
+  update_primitive_variables();
+
+#ifdef TREE_BASED_TIMESTEPS
+  tree_based_timesteps_setsoundspeeds();
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+  /* initialize star formation rate */
+#if defined(USE_SFR)
+  sfr_init();
+#endif /* #if defined(USE_SFR) */
+
+#if defined(USE_SFR)
+  for(i = 0; i < NumGas; i++)
+    SphP[i].Sfr = get_starformation_rate(i);
+#endif /* #if defined(USE_SFR) */
+
+  update_primitive_variables();
+
+  exchange_primitive_variables();
+
+  calculate_gradients();
+
+  exchange_primitive_variables_and_gradients();
+
+#if !defined(ONEDIMS) && !defined(TWODIMS)
+  int xaxis, yaxis, zaxis, weight_flag = 0;
+  double xmin, xmax, ymin, ymax, zmin, zmax;
+#endif /* #if !defined(ONEDIMS) && !defined(TWODIMS) */
+
+  free_mesh();
+
+  return -1;  // return -1 means we ran to completion, i.e. not an endrun code
+}
+
+/*! \brief This routine computes the mass content of the box and compares it
+ *         to the specified value of Omega-matter.
+ *
+ *  If discrepant, the run is terminated.
+ *
+ *  \return void
+ */
+void check_omega(void)
+{
+  double mass   = 0, masstot, omega;
+  double mass_b = 0, masstot_b, omega_b;
+  int i, n_b = 0;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      mass += P[i].Mass;
+      if(P[i].Type == 0)
+        {
+          mass_b += P[i].Mass;
+          n_b += 1;
+        }
+#ifdef USE_SFR
+      if(P[i].Type == 4)
+        {
+          mass_b += P[i].Mass;
+          n_b += 1;
+        }
+#endif /* #ifdef USE_SFR */
+    }
+  MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(&mass_b, &masstot_b, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  omega   = masstot / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G));
+  omega_b = masstot_b / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G));
+
+  if(n_b > 0)
+    {
+      if(fabs((omega - All.Omega0) / omega) > 1.0e-1 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-1)
+        {
+#ifndef TWODIMS
+          mpi_terminate(
+              "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g "
+              "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n",
+              omega, omega_b, All.Omega0, All.OmegaBaryon);
+#endif /* #ifndef TWODIMS */
+        }
+
+      if(fabs((omega - All.Omega0) / omega) > 1.0e-3 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-3)
+        if(ThisTask == 0)
+          warn(
+              "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and "
+              "OmegaBaryon=%g in the parameterfile.",
+              omega, omega_b, All.Omega0, All.OmegaBaryon);
+    }
+  else
+    {
+      if(All.OmegaBaryon != 0)
+        if(ThisTask == 0)
+          warn(
+              "We are running with no baryons, even though you have specified OmegaBaryon=%g in the parameterfile. Please make sure "
+              "you really want this.\n\n",
+              All.OmegaBaryon);
+
+      if(fabs((omega - All.Omega0) / omega) > 1.0e-1)
+        {
+#ifndef TWODIMS
+          mpi_terminate(
+              "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g "
+              "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n",
+              omega, omega_b, All.Omega0, All.OmegaBaryon);
+#endif /* #ifndef TWODIMS */
+        }
+
+      if(fabs((omega - All.Omega0) / omega) > 1.0e-3)
+        if(ThisTask == 0)
+          warn(
+              "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and "
+              "OmegaBaryon=%g in the parameterfile.",
+              omega, omega_b, All.Omega0, All.OmegaBaryon);
+    }
+}
+
+/*! \brief This function is used to find an initial SPH smoothing length for
+ *         each cell.
+ *
+ *  It guarantees that the number of neighbours will be between
+ *  desired_ngb-MAXDEV and desired_ngb+MAXDEV. For simplicity, a first guess
+ *  of the smoothing length is provided to the function density(), which will
+ *  then iterate if needed to find the right smoothing length.
+ *
+ *  \return void
+ */
+void setup_smoothinglengths(void)
+{
+  int i, no, p;
+  double *save_masses = mymalloc("save_masses", NumGas * sizeof(double));
+
+  for(i = 0; i < NumGas; i++)
+    {
+#ifdef NO_GAS_SELFGRAVITY
+      /* This is needed otherwise the force tree will not be constructed for gas particles */
+      P[i].Type = -1;
+#endif /* #ifdef NO_GAS_SELFGRAVITY */
+      save_masses[i] = P[i].Mass;
+      P[i].Mass      = 1.0;
+    }
+
+#ifdef HIERARCHICAL_GRAVITY
+  TimeBinsGravity.NActiveParticles = 0;
+  for(i = 0; i < NumGas; i++)
+    {
+      TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i;
+      TimeBinsGravity.NActiveParticles++;
+    }
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+
+  construct_forcetree(1, 1, 0, 0); /* build force tree with gas particles only */
+
+  for(i = 0; i < NumGas; i++)
+    {
+      no = Father[i];
+
+      if(no < 0)
+        terminate("i=%d no=%d\n", i, no);
+
+      while(10 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass)
+        {
+          p = Nodes[no].u.d.father;
+
+          if(p < 0)
+            break;
+
+          no = p;
+        }
+#ifndef TWODIMS
+      SphP[i].Hsml = pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len;
+#else  /* #ifndef TWODIMS */
+      SphP[i].Hsml = pow(1.0 / (M_PI)*All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 2) * Nodes[no].len;
+#endif /* #ifndef TWODIMS #else */
+#ifdef NO_GAS_SELFGRAVITY
+      /* Reset the original particle type */
+      P[i].Type = 0;
+#endif /* #ifdef NO_GAS_SELFGRAVITY */
+    }
+
+  myfree(Father);
+  myfree(Nextnode);
+
+  myfree(Tree_Points);
+  force_treefree();
+
+  density();
+
+  for(i = 0; i < NumGas; i++)
+    P[i].Mass = save_masses[i];
+
+  myfree(save_masses);
+
+  for(i = 0; i < NumGas; i++)
+    SphP[i].MaxDelaunayRadius = SphP[i].Hsml;
+
+#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES
+  ngb_treefree();
+  domain_free();
+  domain_Decomposition();
+  ngb_treeallocate();
+  ngb_treebuild(NumGas);
+#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */
+}
+
+/*! \brief This function checks for unique particle IDs.
+ *
+ *  The particle IDs are copied to an array and then sorted among all tasks.
+ *  This array is then checked for duplicates. In that case the code
+ *  terminates.
+ *
+ *  \return void
+ */
+void test_id_uniqueness(void)
+{
+  int i;
+  double t0, t1;
+  MyIDType *ids, *ids_first;
+
+  mpi_printf("INIT: Testing ID uniqueness...\n");
+
+  if(NumPart == 0)
+    terminate("need at least one particle per cpu\n");
+
+  t0 = second();
+
+  ids       = (MyIDType *)mymalloc("ids", NumPart * sizeof(MyIDType));
+  ids_first = (MyIDType *)mymalloc("ids_first", NTask * sizeof(MyIDType));
+
+  for(i = 0; i < NumPart; i++)
+    ids[i] = P[i].ID;
+
+  parallel_sort(ids, NumPart, sizeof(MyIDType), compare_IDs);
+
+  for(i = 1; i < NumPart; i++)
+    {
+      if(ids[i] == ids[i - 1])
+        terminate("non-unique ID=%lld found on task=%d (i=%d NumPart=%d)\n", (long long)ids[i], ThisTask, i, NumPart);
+    }
+  MPI_Allgather(&ids[0], sizeof(MyIDType), MPI_BYTE, ids_first, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD);
+
+  if(ThisTask < NTask - 1)
+    {
+      if(ids[NumPart - 1] == ids_first[ThisTask + 1])
+        terminate("non-unique ID=%lld found on task=%d\n", (long long)ids[NumPart - 1], ThisTask);
+    }
+  myfree(ids_first);
+  myfree(ids);
+
+  t1 = second();
+
+  mpi_printf("INIT: success.  took=%g sec\n", timediff(t0, t1));
+}
+
+/*! \brief Calculates global maximum of the IDs of all particles.
+ *
+ *  This is needed for REFINEMENT_SPLIT_CELLS.
+ *
+ *  \return void
+ */
+void calculate_maxid(void)
+{
+  /* determine maximum ID */
+  MyIDType maxid, *tmp;
+  int i;
+
+  for(i = 0, maxid = 0; i < NumPart; i++)
+    if(P[i].ID > maxid)
+      {
+        maxid = P[i].ID;
+      }
+
+  tmp = mymalloc("tmp", NTask * sizeof(MyIDType));
+
+  MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(i = 0; i < NTask; i++)
+    if(tmp[i] > maxid)
+      maxid = tmp[i];
+
+#if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR)
+  All.MaxID = maxid;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR) */
+
+  myfree(tmp);
+}
+
+/*! \brief Comparison function for two MyIDType objects.
+ *
+ *  Used as sorting-kernel for id_uniqueness check.
+ *
+ *  \return (-1,0,1), -1 if a<b, 0 if a==b, 1 if a>b
+ */
+int compare_IDs(const void *a, const void *b)
+{
+  if(*((MyIDType *)a) < *((MyIDType *)b))
+    return -1;
+
+  if(*((MyIDType *)a) > *((MyIDType *)b))
+    return +1;
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/io/global.c b/src/amuse/community/arepo/src/io/global.c
new file mode 100644
index 0000000000..e32ace4300
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/global.c
@@ -0,0 +1,257 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/global.c
+ * \date        05/2018
+ * \brief       Routines to compute statistics of the global state of the
+ *              code.
+ * \details     contains functions:
+ *                void compute_statistics(void)
+ *                void energy_statistics(void)
+ *                void compute_global_quantities_of_system(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Computes new global statistics if needed (call of
+ *         energy_statistics()).
+ *
+ *  \return void
+ */
+void compute_statistics(void)
+{
+  /* check whether we want a full energy statistics */
+  if((All.Time - All.TimeLastStatistics) >= All.TimeBetStatistics &&
+     All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */
+    {
+      TIMER_START(CPU_LOGS);
+
+      energy_statistics(); /* compute and output energy statistics */
+
+      All.TimeLastStatistics += All.TimeBetStatistics;
+
+      TIMER_STOP(CPU_LOGS);
+    }
+}
+
+/*! \brief Compute global statistics of the system.
+ *
+ *  This function first calls a computation of various global
+ *  quantities of the particle distribution
+ *  (compute_global_quantities_of_system() ), and then writes some statistics
+ *  about the energies of the various particle types to the file FdEnergy
+ *  (energy.txt).
+ *
+ *  \return void
+ */
+void energy_statistics(void)
+{
+  double egyinj_tot;
+
+  compute_global_quantities_of_system();
+
+  MPI_Reduce(&EgyInjection, &egyinj_tot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      fprintf(FdEnergy, "%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g\n", All.Time,
+              SysState.EnergyInt, SysState.EnergyPot, SysState.EnergyKin, SysState.EnergyIntComp[0], SysState.EnergyPotComp[0],
+              SysState.EnergyKinComp[0], SysState.EnergyIntComp[1], SysState.EnergyPotComp[1], SysState.EnergyKinComp[1],
+              SysState.EnergyIntComp[2], SysState.EnergyPotComp[2], SysState.EnergyKinComp[2], SysState.EnergyIntComp[3],
+              SysState.EnergyPotComp[3], SysState.EnergyKinComp[3], SysState.EnergyIntComp[4], SysState.EnergyPotComp[4],
+              SysState.EnergyKinComp[4], SysState.EnergyIntComp[5], SysState.EnergyPotComp[5], SysState.EnergyKinComp[5],
+              SysState.MassComp[0], SysState.MassComp[1], SysState.MassComp[2], SysState.MassComp[3], SysState.MassComp[4],
+              SysState.MassComp[5], egyinj_tot);
+
+      myflush(FdEnergy);
+    }
+}
+
+/*! \brief This routine computes various global properties of the particle
+ *         distribution and stores the result in the struct `SysState'.
+ *
+ *  Currently, not all the information that's computed here is
+ *  actually used (e.g. momentum is not really used anywhere),
+ *  just the energies are written to a log-file every once in a while.
+ *
+ *  \return void
+ */
+void compute_global_quantities_of_system(void)
+{
+  int i, j, n;
+  struct state_of_system sys;
+  double egyspec, vel[3];
+
+  for(n = 0; n < NTYPES; n++)
+    {
+      sys.MassComp[n] = sys.EnergyKinComp[n] = sys.EnergyPotComp[n] = sys.EnergyIntComp[n] = 0;
+
+      for(j = 0; j < 4; j++)
+        sys.CenterOfMassComp[n][j] = sys.MomentumComp[n][j] = sys.AngMomentumComp[n][j] = 0;
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      sys.MassComp[P[i].Type] += P[i].Mass;
+
+#if defined(SELFGRAVITY)
+#ifdef EVALPOTENTIAL
+#ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+      sys.EnergyPotComp[P[i].Type] +=
+          0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime;
+#else  /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+      /* ignore self-contribution from gravity if exact gravity is used */
+      if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE)
+        sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].Potential / All.cf_atime;
+      else
+        sys.EnergyPotComp[P[i].Type] +=
+            0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime;
+#endif /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE #else */
+#endif /* #ifdef EVALPOTENTIAL */
+#endif /* #if defined(SELFGRAVITY) */
+
+#if defined(EXTERNALGRAVITY)
+#if defined(SELFGRAVITY)
+      sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].ExtPotential; /* note: ExtPotential already included on P[].p.Potential,
+                                                                              that's why only 0.5 is needed here to recover the rest */
+#else                                                                      /* #if defined(SELFGRAVITY) */
+      sys.EnergyPotComp[P[i].Type] += 1.0 * P[i].Mass * P[i].ExtPotential;
+#endif                                                                     /* #if defined(SELFGRAVITY) #else */
+#endif                                                                     /* #if defined(EXTERNALGRAVITY) */
+
+      if(P[i].Type == 0)
+        {
+          for(j = 0; j < 3; j++)
+            {
+              vel[j] = P[i].Vel[j];
+            }
+
+          sys.EnergyKinComp[0] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]);
+
+          egyspec = SphP[i].Utherm;
+
+          sys.EnergyIntComp[0] += P[i].Mass * egyspec;
+        }
+      else
+        {
+          for(j = 0; j < 3; j++)
+            {
+              vel[j] = P[i].Vel[j];
+            }
+          sys.EnergyKinComp[P[i].Type] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]) * All.cf_a2inv;
+        }
+
+      for(j = 0; j < 3; j++)
+        {
+          sys.MomentumComp[P[i].Type][j] += P[i].Mass * vel[j];
+          sys.CenterOfMassComp[P[i].Type][j] += P[i].Mass * P[i].Pos[j];
+        }
+
+      sys.AngMomentumComp[P[i].Type][0] += P[i].Mass * (P[i].Pos[1] * vel[2] - P[i].Pos[2] * vel[1]);
+      sys.AngMomentumComp[P[i].Type][1] += P[i].Mass * (P[i].Pos[2] * vel[0] - P[i].Pos[0] * vel[2]);
+      sys.AngMomentumComp[P[i].Type][2] += P[i].Mass * (P[i].Pos[0] * vel[1] - P[i].Pos[1] * vel[0]);
+    }
+
+  /* some the stuff over all processors */
+  MPI_Reduce(&sys.MassComp[0], &SysState.MassComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.EnergyPotComp[0], &SysState.EnergyPotComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.EnergyIntComp[0], &SysState.EnergyIntComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.EnergyKinComp[0], &SysState.EnergyKinComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.MomentumComp[0][0], &SysState.MomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.AngMomentumComp[0][0], &SysState.AngMomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&sys.CenterOfMassComp[0][0], &SysState.CenterOfMassComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      for(i = 0; i < NTYPES; i++)
+        SysState.EnergyTotComp[i] = SysState.EnergyKinComp[i] + SysState.EnergyPotComp[i] + SysState.EnergyIntComp[i];
+
+      SysState.Mass = SysState.EnergyKin = SysState.EnergyPot = SysState.EnergyInt = SysState.EnergyTot = 0;
+
+      for(j = 0; j < 3; j++)
+        SysState.Momentum[j] = SysState.AngMomentum[j] = SysState.CenterOfMass[j] = 0;
+
+      for(i = 0; i < NTYPES; i++)
+        {
+          SysState.Mass += SysState.MassComp[i];
+          SysState.EnergyKin += SysState.EnergyKinComp[i];
+          SysState.EnergyPot += SysState.EnergyPotComp[i];
+          SysState.EnergyInt += SysState.EnergyIntComp[i];
+          SysState.EnergyTot += SysState.EnergyTotComp[i];
+
+          for(j = 0; j < 3; j++)
+            {
+              SysState.Momentum[j] += SysState.MomentumComp[i][j];
+              SysState.AngMomentum[j] += SysState.AngMomentumComp[i][j];
+              SysState.CenterOfMass[j] += SysState.CenterOfMassComp[i][j];
+            }
+        }
+
+      for(i = 0; i < NTYPES; i++)
+        for(j = 0; j < 3; j++)
+          if(SysState.MassComp[i] > 0)
+            SysState.CenterOfMassComp[i][j] /= SysState.MassComp[i];
+
+      for(j = 0; j < 3; j++)
+        if(SysState.Mass > 0)
+          SysState.CenterOfMass[j] /= SysState.Mass;
+
+      for(i = 0; i < NTYPES; i++)
+        {
+          SysState.CenterOfMassComp[i][3] = SysState.MomentumComp[i][3] = SysState.AngMomentumComp[i][3] = 0;
+          for(j = 0; j < 3; j++)
+            {
+              SysState.CenterOfMassComp[i][3] += SysState.CenterOfMassComp[i][j] * SysState.CenterOfMassComp[i][j];
+              SysState.MomentumComp[i][3] += SysState.MomentumComp[i][j] * SysState.MomentumComp[i][j];
+              SysState.AngMomentumComp[i][3] += SysState.AngMomentumComp[i][j] * SysState.AngMomentumComp[i][j];
+            }
+          SysState.CenterOfMassComp[i][3] = sqrt(SysState.CenterOfMassComp[i][3]);
+          SysState.MomentumComp[i][3]     = sqrt(SysState.MomentumComp[i][3]);
+          SysState.AngMomentumComp[i][3]  = sqrt(SysState.AngMomentumComp[i][3]);
+        }
+
+      SysState.CenterOfMass[3] = SysState.Momentum[3] = SysState.AngMomentum[3] = 0;
+
+      for(j = 0; j < 3; j++)
+        {
+          SysState.CenterOfMass[3] += SysState.CenterOfMass[j] * SysState.CenterOfMass[j];
+          SysState.Momentum[3] += SysState.Momentum[j] * SysState.Momentum[j];
+          SysState.AngMomentum[3] += SysState.AngMomentum[j] * SysState.AngMomentum[j];
+        }
+
+      SysState.CenterOfMass[3] = sqrt(SysState.CenterOfMass[3]);
+      SysState.Momentum[3]     = sqrt(SysState.Momentum[3]);
+      SysState.AngMomentum[3]  = sqrt(SysState.AngMomentum[3]);
+    }
+
+  /* give everyone the result, maybe the want to do something with it */
+  MPI_Bcast(&SysState, sizeof(struct state_of_system), MPI_BYTE, 0, MPI_COMM_WORLD);
+}
diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c
new file mode 100644
index 0000000000..a613a36bdc
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/hdf5_util.c
@@ -0,0 +1,881 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/hdf5_util.c
+ * \date        05/2018
+ * \brief       Contains the wrapper functions to the HDF5 library functions.
+ * \details     The wrapper functions explicitly check for error conditions
+ *              and terminate the run if such conditions occur.  The HDF5 error
+ *              handler is disabled in case of termination not to repeat the
+ *              error message of the handler again at the program exit.
+ *
+ * \par Major modifications and contributions:
+ * - 07.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef HAVE_HDF5
+#ifndef HDF5UTIL_H
+#define HDF5UTIL_H
+#include <hdf5.h>
+
+/*! \brief Wraps creating a file to give a nice error message.
+ *
+ *  Calls H5Fcreate.
+ *
+ *  \param[in] fname File name.
+ *  \param[in] flags Flags handed to H5Fcreate.
+ *  \param[in] fcpl_id File creation property list identifier, used when
+ *             modifying default file meta-data. Use H5P_DEFAULT to specify
+ *             default file creation properties.
+ *  \param[in] fapl_id File access property list identifier. If parallel file
+ *             access is desired, this is a collective call according to the
+ *             communicator stored in the fapl_id. Use H5P_DEFAULT for default
+ *             file access properties.
+ *
+ *  \return File identifier.
+ */
+hid_t my_H5Fcreate(const char *fname, unsigned int flags, hid_t fcpl_id, hid_t fapl_id)
+{
+  hid_t file_id = H5Fcreate(fname, flags, fcpl_id, fapl_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(file_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to create file %s\n", ThisTask, fname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return file_id;
+}
+
+/*! \brief Wraps creating a group to give a nice error message.
+ *
+ *  Calls H5Gcreate.
+ *
+ *  \param[in] loc_id File or group identifier.
+ *  \param[in] groupname Absolute or relative name of the o new group.
+ *  \param[in] size_hint Optional parameter indicating the number of bytes to
+ *             reserve for the names that will appear in the group. A
+ *             conservative estimate could result in multiple system-level
+ *             I/O requests to read the group name heap; a liberal estimate
+ *             could result in a single large I/O request even when the group
+ *             has just a few names. HDF5 stores each name with a null
+ *             terminator.
+ *
+ *  \return Group identifier.
+ */
+hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint)
+{
+  hid_t group_id = H5Gcreate(loc_id, groupname, size_hint);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(group_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to create group %s\n", ThisTask, groupname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return group_id;
+}
+
+/*! \brief Wraps creating a dataset to give a nice error message.
+ *
+ *  Calls H5Dcreate.
+ *
+ *  \param[in] loc_id Identifier of the file or group within which to create
+ *             the dataset.
+ *  \param[in] datasetname The name of the dataset to create.
+ *  \param[in] type_id Identifier of the datatype to use when creating the
+ *             dataset.
+ *  \param[in] space_id Identifier of the dataspace to use when creating the
+ *             dataset.
+ *  \param[in] dcpl_id Dataset creation property list identifier.
+ *
+ *  \return Dataset identifier.
+ */
+hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id)
+{
+  hid_t dataset_id = H5Dcreate(loc_id, datasetname, type_id, space_id, dcpl_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(dataset_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, Error detected in HDF5: unable to create dataset %s\n", ThisTask, datasetname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return dataset_id;
+}
+
+/*! \brief Wraps writing a dataset to give a nice error message.
+ *
+ *  Calls H5Dwrite.
+ *
+ *  \param[in] dataset_id Identifier of the dataset to write to.
+ *  \param[in] mem_type_id Identifier of the memory datatype.
+ *  \param[in] mem_space_id Identifier of the memory dataspace.
+ *  \param[in] file_space_id Identifier of the dataset's dataspace in the file.
+ *  \param[in] xfer_plist_id  Identifier of a transfer property list for this
+ *             I/O operation.
+ *  \param[in] buf Buffer with data to be written to the file.
+ *  \param[in] datasetname Name of dataset (for error message only)
+ *
+ *  \return Status of write operation.
+ */
+herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf,
+                   const char *datasetname)
+{
+#ifdef TOLERATE_WRITE_ERROR
+  if(WriteErrorFlag)
+    return 0;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  herr_t status = H5Dwrite(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to write dataset %s\n", ThisTask, datasetname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps creating an attribute to give a nice error message.
+ *
+ *  \param[in] loc_id Identifier for the object to which the attribute is to be
+ *             attached. May be any HDF5 object identifier (group, dataset, or
+ *             committed datatype) or an HDF5 file identifier; if loc_id is a
+ *             file identifer, the attribute will be attached to that file's
+ *             root group.
+ *  \param[in] attr_name Name of attribute to create.
+ *  \param[in] type_id Identifier of datatype for attribute.
+ *  \param[in] space_id Identifier of dataspace for attribute.
+ *  \param[in] acpl_id Identifier of creation property list (specify
+ *             H5P_DEFAULT).
+ *
+ *  \return Attribute identifier.
+ */
+hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id)
+{
+  hid_t attribute_id = H5Acreate(loc_id, attr_name, type_id, space_id, acpl_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(attribute_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to create attribute %s\n", ThisTask, attr_name);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return attribute_id;
+}
+
+/*! \brief Wraps writing an attribute to give a nice error message.
+ *
+ *  \param[in] attr_id Identifier of an attribute to write.
+ *  \param[in] mem_type_id Identifier of the attribute datatype (in memory).
+ *  \param[in] buf Data to be written.
+ *  \param[in] attr_name Name of attribute (for error message only).
+ *
+ *  \return status (non-negative if successful).
+ */
+herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name)
+{
+#ifdef TOLERATE_WRITE_ERROR
+  if(WriteErrorFlag)
+    return 0;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  herr_t status = H5Awrite(attr_id, mem_type_id, buf);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to write attribute %s\n", ThisTask, attr_name);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps creating a dataspace to give a nice error message.
+ *
+ *  \param[in] type Type of dataspace to be created.
+ *
+ *  \return Dataspace identifier if successful.
+ */
+hid_t my_H5Screate(H5S_class_t type)
+{
+  hid_t dataspace_id = H5Screate(type);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(dataspace_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      switch(type)
+        {
+          case H5S_SCALAR:
+            terminate("On Task %d, error detected in HDF5: unable to create a scalar dataspace\n", ThisTask);
+            break;
+          case H5S_SIMPLE:
+            terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask);
+            break;
+          default:
+            terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask);
+            break;
+        }
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return dataspace_id;
+}
+
+/*! \brief Wraps creating a simple dataspace to give a nice error message.
+ *
+ *  \param[in] rank Number of dimensions of dataspace.
+ *  \param[in] current_dims Array specifying the size of each dimension.
+ *  \param[in] maximum_dims Array specifying the maximum size of each
+ *             dimension.
+ *
+ *  \return Dataspace identifier if successful.
+ */
+hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims)
+{
+  hid_t dataspace_id = H5Screate_simple(rank, current_dims, maximum_dims);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(dataspace_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return dataspace_id;
+}
+
+/*! \brief Wraps opening a file to give a nice error message.
+ *
+ *  \param[in] fname Name of the file to be opened.
+ *  \param[in] flags File access flags. Allowable values are:
+ *             H5F_ACC_RDWR -- Allow read and write access to file.
+ *             H5F_ACC_RDONLY -- Allow read-only access to file.
+ *  \param[in] fapl_id Identifier for the file access properties list. If
+ *             parallel file access is desired, this is a collective call
+ *             according to the communicator stored in the fapl_id. Use
+ *             H5P_DEFAULT for default file access properties.
+ *
+ *  \return File identifier if successful.
+ */
+hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id)
+{
+  hid_t file_id = H5Fopen(fname, flags, fapl_id);
+
+  if(file_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to open file %s\n", ThisTask, fname);
+    }
+
+  return file_id;
+}
+
+/*! \brief Wraps opening a group to give a nice error message.
+ *
+ *  \param[in] loc_id File or group identifier within which the group is to be
+ *             opened.
+ *  \param[in] groupname Name of group.
+ *
+ *  \return Valid group identifier if successful.
+ */
+hid_t my_H5Gopen(hid_t loc_id, const char *groupname)
+{
+  hid_t group = H5Gopen(loc_id, groupname);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(group < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to open group %s\n", ThisTask, groupname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return group;
+}
+
+/*! \brief Wraps opening a dataset to give a nice error message.
+ *
+ *  \param[in] file_id Identifier of the file or group within which the
+ *             dataset to be accessed will be found.
+ *  \param[in] datasetname Name of the dataset to access.
+ *
+ *  \return Dataset identifier if successful.
+ */
+hid_t my_H5Dopen(hid_t file_id, const char *datasetname)
+{
+  hid_t dataset = H5Dopen(file_id, datasetname);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(dataset < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to open dataset %s\n", ThisTask, datasetname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return dataset;
+}
+
+/*! \brief Wraps opening a dataset.
+ *
+ *  In contrast to my_H5Dpoen(), if the dataset does not exist it does not
+ *  terminate the run. This is useful while reading an ICs file
+ *  because in that case a non-exisitng dataset is put to zero (see also
+ *  read_ic.c).
+ *
+ *  \param[in] file_id file_id Identifier of the file or group within which the
+ *             dataset to be accessed will be found.
+ *  \param[in] datasetname Name of the dataset to access.
+ *
+ *  \return Dataset identifier if successful; otherwise negative value.
+ */
+hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname)
+{
+  /* save error handler and disable it */
+  H5E_auto_t errfunc;
+  void *client_data;
+  H5Eget_auto(&errfunc, &client_data);
+  H5Eset_auto(NULL, NULL);
+
+  hid_t dataset = H5Dopen(file_id, datasetname);
+
+  /* reset error handler */
+  H5Eset_auto(errfunc, client_data);
+
+  return dataset;
+}
+
+/*! \brief Wraps opening an attribute to give a nice error message.
+ *
+ *  \param[in] loc_id  Identifier of a group, dataset, or named datatype that
+ *             attribute is attached to.
+ *  \param[in] attr_name Attribute name.
+ *
+ *  \return Returns attribute identifier if successful.
+ */
+hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name)
+{
+  hid_t attribute_id = H5Aopen_name(loc_id, attr_name);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(attribute_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to open attribute %s\n", ThisTask, attr_name);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return attribute_id;
+}
+
+/*! \brief Wraps reading a dataset to give a nice error message.
+ *
+ *  \param[in] dataset_id Identifier of the dataset read from.
+ *  \param[in] mem_type_id Identifier of the memory datatype.
+ *  \param[in] mem_space_id Identifier of the memory dataspace.
+ *  \param[in] file_space_id Identifier of the dataset's dataspace in the file.
+ *  \param[in] xfer_plist_id Identifier of a transfer property list for this
+ *             I/O operation.
+ *  \param[out] buf Buffer to receive data read from file.
+ *  \param[in] datasetname Name of dataset (only for error message).
+ *
+ *  \return Returns a non-negative value if successful.
+ */
+herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf,
+                  const char *datasetname)
+{
+  herr_t status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to read dataset %s\n", ThisTask, datasetname);
+    }
+  return status;
+}
+
+/*! \brief Wraps makeing a copy of the dataspace to give a nice error message.
+ *
+ *  \param[in] dataset_id Identifier of the dataset to query.
+ *  \param[in] datasetname Name of the dataset (for error message only).
+ *
+ *  \return Dataspace identifier if successful.
+ */
+hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname)
+{
+  hid_t status = H5Dget_space(dataset_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to determine space for dataset %s\n", ThisTask, datasetname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps reading an attribute to give a nice error message
+ *
+ *  \param[in] attr_id Identifier of an attribute to read.
+ *  \param[in] mem_type_id Identifier of the attribute datatype (in memory).
+ *  \param[out] buf Buffer for data to be read.
+ *  \param[in] attr_name Name of the attribute.
+ *  \param[in] size Size of the attribute.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size)
+{
+  hid_t hdf5_space   = H5Aget_space(attr_id);
+  hssize_t attr_size = H5Sget_simple_extent_npoints(hdf5_space);
+  H5Sclose(hdf5_space);
+
+  if(attr_size != size)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate(
+          "On Task %d, error detected in HDF5: mismatch in size for attribute %s, expected size = %lld, actual attribute size = "
+          "%lld\n",
+          ThisTask, attr_name, size, attr_size);
+    }
+
+  herr_t status = H5Aread(attr_id, mem_type_id, buf);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to read attribute %s\n", ThisTask, attr_name);
+    }
+  return status;
+}
+
+/*! \brief Wraps reseting the size of an existing dataspace to give a nice
+ *         error message.
+ *
+ *  \param[in] space_id Dataspace identifier.
+ *  \param[in] rank Rank, or dimensionality, of the dataspace.
+ *  \param[in] current_size Array containing current size of dataspace.
+ *  \param[in] maximum_size Array containing maximum size of dataspace.
+ *  \param[in] attr_name Name of attribute (only for error message).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size,
+                               const char *attr_name)
+{
+  herr_t status = H5Sset_extent_simple(space_id, rank, current_size, maximum_size);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to set extent for attribute %s\n", ThisTask, attr_name);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps closing an attribute to give a nice error message.
+ *
+ *  \param[in] attr_id Attribute to release access to.
+ *  \param[in] attr_name Name of the attribute (for error message only).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Aclose(hid_t attr_id, const char *attr_name)
+{
+  herr_t status = H5Aclose(attr_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to close attribute %s\n", ThisTask, attr_name);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps closing a dataset to give a nice error message.
+ *
+ *  \param[in] dataset_id Identifier of the dataset to close access to.
+ *  \param[in] datasetname Name of the dataset (for error message only).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname)
+{
+  herr_t status = H5Dclose(dataset_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to close dataset %s\n", ThisTask, datasetname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps closing a group to give a nice error message.
+ *
+ *  \param[in] group_id Group identifier to release.
+ *  \param[in] groupname Name of the group (for error message only).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Gclose(hid_t group_id, const char *groupname)
+{
+  herr_t status = H5Gclose(group_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to close group %s\n", ThisTask, groupname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps closing a file to give a nice error message.
+ *
+ *  \param[in] file_id Identifier of a file to terminate access to.
+ *  \param[in] fname File  name (for error message only).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Fclose(hid_t file_id, const char *fname)
+{
+  herr_t status = H5Fclose(file_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to close file %s\n", ThisTask, fname);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+  return status;
+}
+
+/*! \brief Wraps releasing and terminating access to a dataspace to give a nice
+ *         error message.
+ *
+ *  \param[in] dataspace_id Identifier of dataspace to release.
+ *  \param[in] type type of dataspace (simple, scalar,...).
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type)
+{
+  herr_t status = H5Sclose(dataspace_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      switch(type)
+        {
+          case H5S_SCALAR:
+            terminate("On Task %d, error detected in HDF5: unable to close a scalar dataspace\n", ThisTask);
+            break;
+          case H5S_SIMPLE:
+            terminate("On Task %d, error detected in HDF5: unable to close a simple dataspace\n", ThisTask);
+            break;
+          default:
+            terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask);
+            break;
+        }
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+/*! \brief Wraps copying an existing datatype to give a nice error message.
+ *
+ *  \param[in] type_id Identifier of datatype to copy. Can be a datatype
+ *             identifier, a predefined datatype (defined in H5Tpublic.h), or
+ *             a dataset identifier.
+ *
+ *  \return Datatype identifier if successful.
+ */
+hid_t my_H5Tcopy(hid_t type_id)
+{
+  hid_t datatype_id = H5Tcopy(type_id);
+#ifndef TOLERATE_WRITE_ERROR
+  if(datatype_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not properly copy datatype\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+  return datatype_id;
+}
+
+/*! \brief Wraps closing a datatype to give a nice error message.
+ *
+ *  \param[in] type_id Identifier of datatype to release.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Tclose(hid_t type_id)
+{
+  herr_t status = H5Tclose(type_id);
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not properly close datatype\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+  return status;
+}
+
+/*! \brief Wraps selecting a hyperslab to give a nice error message.
+ *
+ *  \param[in] space_id Identifier of dataspace selection to modify.
+ *  \param[in] op Operation to perform on current selection.
+ *  \param[in] start Offset of start of hyperslab.
+ *  \param[in] stride Hyperslab stride.
+ *  \param[in] count Number of blocks included in hyperslab.
+ *  \param[in] block Size of block in hyperslab.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count,
+                              const hsize_t *block)
+{
+  herr_t status = H5Sselect_hyperslab(space_id, op, start, stride, count, block);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not properly select the chosen hyperslab\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+  return status;
+}
+
+/*! \brief Wraps returning the size in bytes of a given datatype to give a nice
+ *         error message.
+ *
+ *  \param[in] datatype_id Identifier of datatype to query.
+ *
+ *  \return The size of the datatype in bytes.
+ */
+size_t my_H5Tget_size(hid_t datatype_id)
+{
+  size_t size = H5Tget_size(datatype_id);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(size == 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: unable to determine the size of the given datatype\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+  return size;
+}
+
+/*! \brief Wraps setting the size in bytes of a given datatype to give a nice
+ *         error message.
+ *
+ *  \param[in] datatype_id Identifier of datatype for which the size is being
+ *             changed.
+ *  \param[in] size New datatype size in bytes or H5T_VARIABLE.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Tset_size(hid_t datatype_id, size_t size)
+{
+  herr_t status = H5Tset_size(datatype_id, size);
+
+#ifndef TOLERATE_WRITE_ERROR
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not properly set the size of the given datatype\n", ThisTask);
+    }
+#endif /* #ifndef TOLERATE_WRITE_ERROR */
+
+  return status;
+}
+
+#ifdef HDF5_FILTERS
+/*! \brief Wraps checking if all hdf5 filters selected for plist_id are
+ *         available to give a nice error message.
+ *
+ *  \param[in] plist_id Dataset or group creation property list identifier.
+ *
+ *  \return Positive value if all filters are available;
+ *          0 if at least one filter is not currently available.
+ */
+htri_t my_H5Pall_filters_avail(hid_t plist_id)
+{
+  htri_t status = H5Pall_filters_avail(plist_id);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not properly verify the availability of all filters\n", ThisTask);
+    }
+  return status;
+}
+
+/*! \brief Wraps creating the property list of the given property class
+ *         identified by class_id to give a nice error message.
+ *
+ *  \param[in] The class of the property list to create.
+ *
+ *  \return Property list identifier if successful.
+ */
+hid_t my_H5Pcreate(hid_t class_id)
+{
+  hid_t plist_id = H5Pcreate(class_id);
+  if(plist_id < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not create the property list associated to the given property class\n",
+                ThisTask);
+    }
+  return plist_id;
+}
+
+/*! \brief Wraps closing a property list to give a nice error message.
+ *
+ * \param[in] Identifier of the property list to terminate access to.
+ *
+ * \return Non-negative value if successful.
+ */
+herr_t my_H5Pclose(hid_t plist)
+{
+  herr_t status = H5Pclose(plist);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not close the input property list\n", ThisTask);
+    }
+  return status;
+}
+
+/*! \brief Wraps setting the size of the chunks of a chunked dataset to give a
+ *         nice error message.
+ *
+ *  \param[in] plist Dataset creation property list identifier.
+ *  \param[in] ndims The number of dimensions of each chunk.
+ *  \param[in] dim An array defining the size, in dataset elements, of each
+ *             chunk.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim)
+{
+  herr_t status = H5Pset_chunk(plist, ndims, dim);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not set chunk size for the dataset\n", ThisTask);
+    }
+  return status;
+}
+
+/*! \brief Wraps setting the use of the shuffle filter to give a nice error
+ *         message.
+ *
+ *  \param[in] plist_id Dataset creation property list identifier.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Pset_shuffle(hid_t plist_id)
+{
+  herr_t status = H5Pset_shuffle(plist_id);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not set the shuffle filter in the properties list\n", ThisTask);
+    }
+  return status;
+}
+
+/*! \brief Wraps setting the use of the deflate compression (gzip) to give a
+ *         nice error message.
+ *
+ *  \param[in] plist_id Dataset or group creation property list identifier.
+ *  \param[in] level Compression level.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Pset_deflate(hid_t plist_id, uint level)
+{
+  herr_t status = H5Pset_deflate(plist_id, level);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not set the deflate compression in the properties list\n", ThisTask);
+    }
+  return status;
+}
+
+/*! \brief Wraps setting the use of the Fletcher32 checksum to give a nice
+ *         error message.
+ *
+ *  \param plist_id Dataset or group creation property list identifier.
+ *
+ *  \return Non-negative value if successful.
+ */
+herr_t my_H5Pset_fletcher32(hid_t plist_id)
+{
+  herr_t status = H5Pset_fletcher32(plist_id);
+  if(status < 0)
+    {
+      H5Eset_auto(NULL, NULL);
+      terminate("On Task %d, error detected in HDF5: could not set the Fletcher32 checksum in the properties list\n", ThisTask);
+    }
+  return status;
+}
+#endif /* #ifdef HDF5_FILTERS */
+
+#endif /* #ifndef HDF5UTIL_H */
+#endif /* #ifdef HAVE_HDF5 */
diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c
new file mode 100644
index 0000000000..f5d9a0c73f
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/io.c
@@ -0,0 +1,2226 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io.c
+ * \date        05/2018
+ * \brief       Routines for input and output of snapshot files to disk.
+ * \details     contains functions:
+ *                void init_field
+ *                void init_units
+ *                void init_snapshot_type
+ *                void write_error
+ *                void create_snapshot_if_desired(void)
+ *                void produce_dump(void)
+ *                void savepositions(int num, int subbox_flag)
+ *                void fill_write_buffer
+ *                int get_bytes_per_blockelement
+ *                int get_datatype_in_block(enum iofields blocknr, int mode)
+ *                int get_values_per_blockelement(enum iofields blocknr)
+ *                int get_particles_in_block(enum iofields blocknr, int
+ *                  *typelist)
+ *                int blockpresent(enum iofields blocknr, int write)
+ *                void get_Tab_IO_Label(enum iofields blocknr, char *label)
+ *                void get_dataset_name(enum iofields blocknr, char *buf)
+ *                void write_file(char *fname, int writeTask, int lastTask,
+ *                  int subbox_flag)
+ *                void write_header_attributes_in_hdf5(hid_t handle)
+ *                void write_parameters_attributes_in_hdf5(hid_t handle)
+ *                herr_t my_hdf5_error_handler(void *unused)
+ *                void write_dataset_attributes(hid_t hdf5_dataset, enum
+ *                  iofields blocknr)
+ *                void write_xdmf(char *fname)
+ *                size_t my_fwrite(void *ptr, size_t size, size_t nmemb,
+ *                  FILE * stream)
+ *                size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *
+ *                  stream)
+ *                void mpi_printf(const char *fmt, ...)
+ *                void mpi_fprintf(FILE * stream, const char *fmt, ...)
+ *                void mpi_printf_each(const char *fmt, ...)
+ *                FILE *open_file(char *fnam)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 07.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/* needs to be included after allvars.h */
+#ifdef OUTPUT_XDMF
+#include <libgen.h> /* for basename() function */
+#endif /* #ifdef OUTPUT_XDMF */
+
+#include "../fof/fof.h"
+#include "../gitversion/version.h"
+#include "../mesh/voronoi/voronoi.h"
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+void write_header_attributes_in_hdf5(hid_t handle);
+void write_parameters_attributes_in_hdf5(hid_t handle);
+void write_compile_time_options_in_hdf5(hid_t handle);
+void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr);
+#endif /* #ifdef HAVE_HDF5 */
+
+#ifdef TOLERATE_WRITE_ERROR
+static char alternative_fname[MAXLEN_PATH];
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+#ifdef OUTPUT_XDMF
+static void write_xdmf(char *fname);
+#endif /* #ifdef OUTPUT_XDMF */
+
+static int n_type[NTYPES]; /**< contains the local (for a single task) number of particles of each type in the snapshot file */
+static long long ntot_type_all[NTYPES]; /**< contains the global number of particles of each type in the snapshot file */
+static int subbox_dump = 0;
+
+/*! \brief Function for registering an output field.
+ *
+ *  Don't forget to add the new IO_FLAG to allvars.h.
+ *
+ *  \param[in] field Specifies the field as an enumeration type iofields
+ *             (allvars.h), e.g. IO_POS. Don't forget to insert new fields
+ *             also in allvars.h.
+ *  \param[in] label The label of the dataset (4 characters).
+ *  \param[in] datasetname The name of the hdf5 dataset (maximum 256
+ *             characters).
+ *  \param[in] type_in_memory The type of the field in the memory (use
+ *             MEM_NONE if specifying io_func).
+ *  \param[in] type_in_file_output The output type in the hdf5 file.
+ *  \param[in] type_in_file_input The input type in the hdf5 file (use
+ *             FILE_MY_OUTPUT_TYPE for MyInputType, input is disabled with
+ *             FILE_NONE).
+ *  \param[in] values_per_block The number of values per field, e.g. 1 for
+ *             mass, 3 for velocities.
+ *  \param[in] array The array in which the value is stored. For an io_func
+ *             this influences the particle index, the default (A_NONE) is an
+ *             index into P/SphP, can be changed if required.
+ *  \param[in] pointer_to_field A Pointer to the field in one of the global
+ *             arrays, e.g. &SphP[0].Density, or &P[0].Vel[0].
+ *  \param[in] io_func Alternatively, if the value to output/input is not a
+ *             simple field, you can define a function which handles i/o.
+ *  \param[in] typelist_bitmask Specifies for which particle type the field is
+ *             present, e.g. 1+2+8 => field present for particle types 0,1,3
+ *             (or use ALL_TYPES, GAS_ONLY,...).
+ *
+ *  \return void
+ */
+void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory,
+                enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array,
+                void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask)
+{
+  int alloc_step = 5;
+
+  if(Max_IO_Fields == 0)
+    {
+      IO_Fields     = (IO_Field *)mymalloc("IO_Fields", alloc_step * sizeof(IO_Field));
+      Max_IO_Fields = alloc_step;
+    }
+  else if(Max_IO_Fields == N_IO_Fields)
+    {
+      Max_IO_Fields = ((Max_IO_Fields / alloc_step) + 1) * alloc_step;
+      IO_Fields     = (IO_Field *)myrealloc(IO_Fields, Max_IO_Fields * sizeof(IO_Field));
+    }
+
+  IO_Fields[N_IO_Fields].field = field;
+  strncpy(IO_Fields[N_IO_Fields].label, label, 4);
+  strncpy(IO_Fields[N_IO_Fields].datasetname, datasetname, 256);
+  IO_Fields[N_IO_Fields].type_in_memory      = type_in_memory;
+  IO_Fields[N_IO_Fields].type_in_file_output = type_in_file_output;
+  IO_Fields[N_IO_Fields].type_in_file_input  = type_in_file_input;
+  IO_Fields[N_IO_Fields].values_per_block    = values_per_block;
+  IO_Fields[N_IO_Fields].snap_type           = SN_FULL;
+  IO_Fields[N_IO_Fields].typelist            = typelist_bitmask;
+
+  IO_Fields[N_IO_Fields].array = array;
+
+  if(array == A_NONE)
+    {
+      IO_Fields[N_IO_Fields].offset = 0;
+    }
+  else if(array == A_SPHP)
+    {
+      IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)SphP;
+    }
+  else if(array == A_P)
+    {
+      IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)P;
+    }
+  else if(array == A_PS)
+    {
+      IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)PS;
+    }
+
+  IO_Fields[N_IO_Fields].io_func = io_func;
+
+  // validate types
+  if(type_in_memory == MEM_INT &&
+     ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_INT) || type_in_file_output != FILE_INT))
+    {
+      terminate("combination of datatypes not supported (field %s)", datasetname);
+    }
+
+  if(type_in_memory == MEM_MY_ID_TYPE &&
+     ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_MY_ID_TYPE) || type_in_file_output != FILE_MY_ID_TYPE))
+    {
+      terminate("combination of datatypes not supported (field %s)", datasetname);
+    }
+
+  if((type_in_memory == MEM_FLOAT || type_in_memory == MEM_MY_SINGLE || type_in_memory == MEM_DOUBLE) &&
+     ((type_in_file_input != FILE_NONE && (type_in_file_input == FILE_MY_ID_TYPE || type_in_file_input == FILE_INT)) ||
+      type_in_file_output == FILE_INT || type_in_file_output == FILE_MY_ID_TYPE))
+    {
+      terminate("combination of datatypes not supported (field %s)", datasetname);
+    }
+
+  IO_Fields[N_IO_Fields].a       = 0.;
+  IO_Fields[N_IO_Fields].h       = 0.;
+  IO_Fields[N_IO_Fields].L       = 0.;
+  IO_Fields[N_IO_Fields].M       = 0.;
+  IO_Fields[N_IO_Fields].V       = 0.;
+  IO_Fields[N_IO_Fields].c       = 0.;
+  IO_Fields[N_IO_Fields].hasunit = 0;
+
+  N_IO_Fields++;
+}
+
+/*! \brief Function for adding units to output field.
+ *
+ *  This only works for fields registered with init_field.
+ *
+ *  \param[in] field Specifies the field as an enumeration type iofields
+ *             (allvars.h), e.g. IO_POS.
+ *  \param[in] a the exponent of the cosmological a factor.
+ *  \param[in] h the exponent of the hubble parameter.
+ *  \param[in] L the length unit scaling.
+ *  \param[in] M the mass unit scaling.
+ *  \param[in] V the velocity unit scaling.
+ *  \param[in] c conversion factor to cgs units (zero indicates dimensionless
+ *             quantity, integer count, etc).
+ *
+ *  \return void
+ */
+void init_units(enum iofields field, double a, double h, double L, double M, double V, double c)
+{
+  for(int i = 0; i < N_IO_Fields; i++)
+    {
+      if(IO_Fields[i].field == field)
+        {
+          IO_Fields[i].hasunit = 1;
+          IO_Fields[i].a       = a;
+          IO_Fields[i].h       = h;
+          IO_Fields[i].L       = L;
+          IO_Fields[i].M       = M;
+          IO_Fields[i].V       = V;
+          IO_Fields[i].c       = c;
+          break;
+        }
+    }
+}
+
+/*! \brief Function for determining whether a field is dumped in snapshot.
+ *
+ *  This only works for fields registered with init_field.
+ *  The member snap_type is initialized to SN_FULL in init_field.
+ *
+ *  \param[in] field Specifies the field as an enumeration type iofields
+ *             (allvars.h), e.g. IO_POS.
+ *  \param[in] type In which snapshot types this field should be present
+ *             (e.g. SN_FULL).
+ *
+ *  \return void
+ */
+void init_snapshot_type(enum iofields field, enum sn_type type)
+{
+  for(int i = 0; i < N_IO_Fields; i++)
+    {
+      if(IO_Fields[i].field == field)
+        {
+          IO_Fields[i].snap_type = type;
+        }
+    }
+}
+
+#ifdef TOLERATE_WRITE_ERROR
+/*! \brief Print information about a write error.
+ *
+ *  If a write error occurs, this function prints some useful debug information
+ *  and sets to 1 the variable WriteErrorFlag so that the write operation that
+ *  caused the error can be performed again.
+ *
+ *  \param[in] check Flag that indicates where the function was called [0 and 1
+ *             in my_fwrite(), 2 in my_hdf5_error_handler(), 3 in
+ *             hdf5_header_error_handler()].
+ *  \param[in] nwritten Number of elements actually written.
+ *  \param[in] nmemb Number of elements that should be written.
+ *
+ *  \return void
+ */
+void write_error(int check, size_t nwritten, size_t nmemb)
+{
+  if(!WriteErrorFlag)
+    {
+      int len;
+      char hostname[MPI_MAX_PROCESSOR_NAME];
+      MPI_Get_processor_name(hostname, &len);
+
+      printf("TOLERATE_WRITE_ERROR: write failed node=%s  nwritten=%lld  nmemb=%lld  errno=%s  task=%d  check=%d\n", hostname,
+             (long long)nwritten, (long long)nmemb, strerror(errno), ThisTask, check);
+      myflush(stdout);
+      WriteErrorFlag = 1;
+    }
+}
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+/*! \brief Checks if a snapshot should be saved.
+ *
+ *  This function checks whether a snapshot file or other kinds of output
+ *  files, such as a projection, should be saved at the current time-step.
+ *  If that is the case, the appropriate functions to produce the desired
+ *  file are called and the parameter controlling the output are updated
+ *  accordingly.
+ *
+ *  \return void
+ */
+void create_snapshot_if_desired(void)
+{
+#ifdef OUTPUT_EVERY_STEP
+  All.Ti_nextoutput = All.Ti_Current;
+#endif /* #ifdef OUTPUT_EVERY_STEP */
+
+  if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */
+    if(All.Ti_Current >= All.Ti_nextoutput && All.Ti_nextoutput >= 0)
+      {
+        DumpFlag = DumpFlagNextSnap;
+        produce_dump();
+
+        All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 1);
+      }
+}
+
+/*! \brief A wrapper function used to create a snapshot.
+ *
+ *  This function wraps together savepositions(), the function that
+ *  saves the snapshot file to the disk, with functions used for
+ *  special output needs.
+ *
+ *  \return void
+ */
+void produce_dump(void)
+{
+#ifdef UPDATE_GRADIENTS_FOR_OUTPUT
+  exchange_primitive_variables();
+  calculate_gradients();
+#endif /* #ifdef UPDATE_GRADIENTS_FOR_OUTPUT */
+
+  savepositions(All.SnapshotFileCount++, 0); /* write snapshot file */
+}
+
+/*! \brief Saves snapshot to disk.
+ *
+ *  This function writes a snapshot of the particle distribution to one or
+ *  several files. If NumFilesPerSnapshot>1, the snapshot is distributed
+ *  into several files, which are written simultaneously. Each file contains
+ *  data from a group of processors of size roughly NTask/NumFilesPerSnapshot.
+ *
+ *  \param[in] num The snapshot number.
+ *  \param[in] subbox_flag If greater than 0 instructs the code to output only
+ *             a subset of the whole domain.
+ *
+ *  \return void
+ */
+void savepositions(int num, int subbox_flag)
+{
+  char buf[500];
+  int n, filenr, gr, ngroups, masterTask, lastTask;
+  double t0, t1;
+
+  t0 = second();
+  CPU_Step[CPU_MISC] += measure_time();
+
+  if(DumpFlag)
+    {
+      subbox_dump = 0;
+
+      if(subbox_flag > 0)
+        {
+          mpi_printf("\nwriting small subbox #%d snapshot file #%d @ time %g ... \n", subbox_flag - 1, num, All.Time);
+          subbox_dump = 1;
+        }
+      else
+        mpi_printf("\nwriting snapshot file #%d @ time %g ... (DumpFlag=%d)\n", num, All.Time, DumpFlag);
+
+#ifdef FOF
+      if(RestartFlag != 3 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2)
+        {
+          {
+            mpi_printf("\nWe shall first compute a group catalogue for this snapshot file\n");
+
+            fof_fof(num);
+          }
+        }
+#endif /* #ifdef FOF */
+
+      if(DumpFlag != 4)
+        {
+          CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE);
+
+          if(NTask < All.NumFilesPerSnapshot)
+            {
+              warn(
+                  "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot "
+                  "accordingly.\n");
+              All.NumFilesPerSnapshot = NTask;
+            }
+
+          if(All.SnapFormat < 1 || All.SnapFormat > 3)
+            terminate("Unsupported File-Format.  All.SnapFormat=%d\n", All.SnapFormat);
+
+#ifndef HAVE_HDF5
+          if(All.SnapFormat == 3)
+            {
+              mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n");
+            }
+#endif /* #ifndef  HAVE_HDF5 */
+
+          /* determine global and local particle numbers */
+          for(n = 0; n < NTYPES; n++)
+            n_type[n] = 0;
+
+          for(n = 0; n < NumPart; n++)
+            {
+              n_type[P[n].Type]++;
+            }
+
+          sumup_large_ints(NTYPES, n_type, ntot_type_all);
+
+          /* assign processors to output files */
+          distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask);
+
+          if(All.NumFilesPerSnapshot > 1)
+            {
+              if(ThisTask == 0)
+                {
+                  sprintf(buf, "%s/snapdir_%03d", All.OutputDir, num);
+                  mkdir(buf, 02755);
+
+#ifdef TOLERATE_WRITE_ERROR
+                  sprintf(alternative_fname, "%s/snapdir_%03d", AlternativeOutputDir, num);
+                  mkdir(alternative_fname, 02755);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+                }
+
+              MPI_Barrier(MPI_COMM_WORLD);
+            }
+
+          if(All.NumFilesPerSnapshot > 1)
+            sprintf(buf, "%s/snapdir_%03d/%s_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr);
+          else
+            sprintf(buf, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num);
+
+#ifdef TOLERATE_WRITE_ERROR
+          if(All.NumFilesPerSnapshot > 1)
+            sprintf(alternative_fname, "%s/snapdir_%03d/%s_%03d.%d", AlternativeOutputDir, num, All.SnapshotFileBase, num, filenr);
+          else
+            sprintf(alternative_fname, "%s%s_%03d", AlternativeOutputDir, All.SnapshotFileBase, num);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+          if(RestartFlag == 3)
+            {
+#ifndef FOF_STOREIDS
+              if(All.NumFilesPerSnapshot > 1)
+                sprintf(buf, "%s/snapdir_%03d/%s-groupordered_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr);
+              else
+                sprintf(buf, "%s%s-groupordered_%03d", All.OutputDir, All.SnapshotFileBase, num);
+#else  /* #ifndef FOF_STOREIDS */
+              if(All.NumFilesPerSnapshot > 1)
+                sprintf(buf, "%s/snapdir_%03d/%s-storeids_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr);
+              else
+                sprintf(buf, "%s%s-storeids_%03d", All.OutputDir, All.SnapshotFileBase, num);
+#endif /* #ifndef FOF_STOREIDS #else */
+            }
+
+#ifdef ADDBACKGROUNDGRID
+          if(All.NumFilesPerSnapshot > 1)
+            sprintf(buf, "%s-with-grid.%d", All.InitCondFile, filenr);
+          else
+            sprintf(buf, "%s-with-grid", All.InitCondFile);
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+          ngroups = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel;
+          if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel))
+            ngroups++;
+
+          for(gr = 0; gr < ngroups; gr++)
+            {
+              if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */
+                {
+                  if(ThisTask == masterTask && (filenr % All.NumFilesWrittenInParallel) == 0)
+                    printf("writing snapshot files group %d out of %d - files %d-%d (total of %d files): '%s'\n", gr + 1, ngroups,
+                           filenr, filenr + All.NumFilesWrittenInParallel - 1, All.NumFilesPerSnapshot, buf);
+                  write_file(buf, masterTask, lastTask, subbox_flag);
+#ifdef OUTPUT_XDMF
+                  if(All.SnapFormat == 3)
+                    {
+                      write_xdmf(buf);
+                    }
+#endif /* #ifdef OUTPUT_XDMF */
+                }
+              MPI_Barrier(MPI_COMM_WORLD);
+            }
+
+          myfree(CommBuffer);
+
+          t1 = second();
+          CPU_Step[CPU_SNAPSHOT] += measure_time();
+
+          mpi_printf("done with writing snapshot (took %g sec).\n", timediff(t0, t1));
+        }
+      else
+        {
+          mpi_printf("done with writing files: no dump of snapshot (DumpFlag = %d).\n", DumpFlag);
+        }  // if(DumpFlag !=4)
+
+#ifdef FOF
+      if(RestartFlag != 3 && RestartFlag != 6 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2)
+        {
+          {
+#ifndef FOF_STOREIDS
+            /* now revert from output order to the original order */
+            for(n = 0; n < NumPart; n++)
+              {
+                PS[n].TargetTask  = PS[n].OriginTask;
+                PS[n].TargetIndex = PS[n].OriginIndex;
+              }
+
+            fof_subfind_exchange(MPI_COMM_WORLD);
+
+            myfree(PS);
+
+            /* do resize because subfind may have increased these limits */
+            if(All.MaxPart != fof_OldMaxPart)
+              {
+                All.MaxPart = fof_OldMaxPart;
+                reallocate_memory_maxpart();
+              }
+            if(All.MaxPartSph != fof_OldMaxPartSph)
+              {
+                All.MaxPartSph = fof_OldMaxPartSph;
+                reallocate_memory_maxpartsph();
+              }
+
+            CPU_Step[CPU_FOF] += measure_time();
+#endif /* #ifndef FOF_STOREIDS */
+
+            /* recreate the mesh that we had free to reduce peak memory usage */
+            create_mesh();
+            mesh_setup_exchange();
+          }
+        }
+#endif /* #ifdef FOF */
+
+      All.Ti_lastoutput = All.Ti_Current;
+
+      CPU_Step[CPU_SNAPSHOT] += measure_time();
+    }
+}
+
+/*! \brief This function fills the write buffer with particle data.
+ *
+ *  \param[out] buffer Buffer to be filled.
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in, out] startindex Pointer containing the offset in write buffer.
+ *  \param[in] pc Number of particle to be put in the buffer.
+ *  \param[in] type Particle type.
+ *  \param[in] subbox_flag If greater than 0 instructs the code to output
+ *             only a subset of the whole domain.
+ *
+ *  \return void
+ */
+void fill_write_buffer(void *buffer, enum iofields blocknr, int *startindex, int pc, int type, int subbox_flag)
+{
+  int n, k, pindex, f;
+  MyOutputFloat *fp;
+  MyIDType *ip;
+  int *intp;
+
+  /* determine which field we are working on */
+  int field = -1;
+
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          field = f;
+          break;
+        }
+    }
+
+  if(field < 0)
+    terminate("IO field=%d not registered with init_field()", (int)blocknr);
+
+  set_cosmo_factors_for_current_time();
+
+  fp              = (MyOutputFloat *)buffer;
+  ip              = (MyIDType *)buffer;
+  intp            = (int *)buffer;
+  double *doublep = (double *)buffer;
+  float *floatp   = (float *)buffer;
+
+  pindex = *startindex;
+
+  for(n = 0; n < pc; pindex++)
+    {
+      /* SUBBOX_SNAPSHOTS specialized output */
+
+      /* normal particle output */
+      if(P[pindex].Type == type)
+        {
+          if(IO_Fields[field].io_func)
+            {
+              int particle;
+              switch(IO_Fields[field].array)
+                {
+                  case A_NONE:
+                  case A_SPHP:
+                  case A_P:
+                    particle = pindex;
+                    break;
+                  case A_PS:
+                    terminate("Not good, trying to read into PS[]?\n");
+                    break;
+                  default:
+                    terminate("ERROR in fill_write_buffer: Array not found!\n");
+                    break;
+                }
+
+              switch(IO_Fields[field].type_in_file_output)
+                {
+                  case FILE_NONE:
+                    terminate("error");
+                    break;
+                  case FILE_INT:
+                    IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 0);
+                    intp += IO_Fields[field].values_per_block;
+                    n++;
+                    break;
+                  case FILE_MY_ID_TYPE:
+                    IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 0);
+                    ip += IO_Fields[field].values_per_block;
+                    n++;
+                    break;
+                  case FILE_MY_IO_FLOAT:
+                    IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 0);
+                    fp += IO_Fields[field].values_per_block;
+                    n++;
+                    break;
+                  case FILE_DOUBLE:
+                    IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 0);
+                    doublep += IO_Fields[field].values_per_block;
+                    n++;
+                    break;
+                  case FILE_FLOAT:
+                    IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 0);
+                    floatp += IO_Fields[field].values_per_block;
+                    n++;
+                    break;
+                }
+            }
+          else
+            {
+              void *array_pos;
+
+              switch(IO_Fields[field].array)
+                {
+                  case A_NONE:
+                    array_pos = 0;
+                    break;
+
+                  case A_SPHP:
+                    array_pos = SphP + pindex;
+                    break;
+
+                  case A_P:
+                    array_pos = P + pindex;
+                    break;
+                  case A_PS:
+                    array_pos = PS + pindex;
+                    break;
+
+                  default:
+                    terminate("ERROR in fill_write_buffer: Array not found!\n");
+                    break;
+                }
+
+              for(k = 0; k < IO_Fields[field].values_per_block; k++)
+                {
+                  double value = 0.;
+
+                  switch(IO_Fields[field].type_in_memory)
+                    {
+                      case MEM_INT:
+                        *intp = *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int)));
+                        intp++;
+                        break;
+
+                      case MEM_MY_ID_TYPE:
+                        *ip = *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType)));
+                        ip++;
+                        break;
+
+                      case MEM_FLOAT:
+                        value = *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float)));
+                        break;
+
+                      case MEM_DOUBLE:
+                        value = *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double)));
+                        break;
+
+                      case MEM_MY_SINGLE:
+                        value = *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle)));
+                        break;
+
+                      case MEM_MY_FLOAT:
+                        value = *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat)));
+                        break;
+
+                      case MEM_MY_DOUBLE:
+                        value = *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble)));
+                        break;
+
+                      case MEM_NONE:
+                        terminate("ERROR in fill_write_buffer: reached MEM_NONE with no io_func specified!\n");
+                        break;
+
+                      default:
+                        terminate("ERROR in fill_write_buffer: Type not found!\n");
+                        break;
+                    }
+
+                  switch(IO_Fields[field].type_in_file_output)
+                    {
+                      case FILE_MY_IO_FLOAT:
+                        *fp = value;
+                        fp++;
+                        break;
+
+                      case FILE_DOUBLE:
+                        *doublep = value;
+                        doublep++;
+                        break;
+
+                      case FILE_FLOAT:
+                        *floatp = value;
+                        floatp++;
+                        break;
+
+                      default:
+                        break;
+                    }
+                }
+
+              n++;
+            }  // end io_func/not
+        }      // end type if
+    }          // end particle loop
+
+  *startindex = pindex;
+}
+
+/*! \brief This function tells the size in bytes of one data entry in each of
+ *         the blocks defined for the output file.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] mode Used to distinguish whether the function is called in input
+ *             mode (mode > 0) or in output mode (mode = 0). The size of one
+ *             data entry may vary depending on the mode.
+ *
+ *  \return Size of the data entry in bytes.
+ */
+int get_bytes_per_blockelement(enum iofields blocknr, int mode)
+{
+  int bytes_per_blockelement = 0;
+  int f;
+
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          if(mode)
+            {
+              switch(IO_Fields[f].type_in_file_input)
+                {
+                  case FILE_NONE:
+                    terminate("error");
+                    break;
+                  case FILE_INT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int);
+                    break;
+                  case FILE_MY_ID_TYPE:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType);
+                    break;
+                  case FILE_MY_IO_FLOAT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyInputFloat);
+                    break;
+                  case FILE_DOUBLE:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double);
+                    break;
+                  case FILE_FLOAT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float);
+                    break;
+                }
+            }
+          else
+            {
+              switch(IO_Fields[f].type_in_file_output)
+                {
+                  case FILE_NONE:
+                    terminate("error");
+                    break;
+                  case FILE_INT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int);
+                    break;
+                  case FILE_MY_ID_TYPE:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType);
+                    break;
+                  case FILE_MY_IO_FLOAT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyOutputFloat);
+                    break;
+                  case FILE_DOUBLE:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double);
+                    break;
+                  case FILE_FLOAT:
+                    bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float);
+                    break;
+                }
+            }
+          break;
+        }
+    }
+
+  return bytes_per_blockelement;
+}
+
+/*! \brief This function determines the type of one data entry in each of the
+ *         blocks defined for the output file.
+ *
+ *  Used only if output in HDF5 format is enabled.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] mode For input mode > 0, for output mode = 0.
+ *
+ *  \return typekey, a flag that indicates the type of the data entry.
+ */
+int get_datatype_in_block(enum iofields blocknr, int mode)
+{
+  int typekey, f;
+
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          if(mode)
+            typekey = IO_Fields[f].type_in_file_input;
+          else
+            typekey = IO_Fields[f].type_in_file_output;
+
+          return typekey;
+        }
+    }
+
+  terminate("error invalid field");
+  return typekey;
+}
+
+/*! \brief This function determines the number of elements composing one data
+ *         entry in each of the blocks defined for the output file.
+ *
+ *  Used only if output in HDF5 format is enabled.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *
+ *  \return Number of elements of one data entry.
+ */
+int get_values_per_blockelement(enum iofields blocknr)
+{
+  int values = 0;
+  int f;
+
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          values = IO_Fields[f].values_per_block;
+          return values;
+        }
+    }
+
+  terminate("reached last entry in switch - strange.");
+  return values;
+}
+
+/*! \brief Gets particle number in an output block.
+ *
+ *  This function determines how many particles there are in a given block,
+ *  based on the information in the header-structure.  It also flags particle
+ *  types that are present in the block in the typelist array.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] typelist Array that contains the number of particles of each
+ *             type in the block.
+ *
+ *  \return The total number of particles in the block.
+ */
+int get_particles_in_block(enum iofields blocknr, int *typelist)
+{
+  int i, f;
+  int npart = 0;
+
+  switch(blocknr)
+    {
+      case IO_MASS:
+        for(i = 0; i < NTYPES; i++)
+          {
+            typelist[i] = 0;
+            if(All.MassTable[i] == 0)
+              if(header.npart[i] > 0)
+                {
+                  typelist[i] = 1;
+                  npart += header.npart[i];
+                }
+          }
+        return npart; /* with masses */
+        break;
+
+      case IO_LASTENTRY:
+        terminate("reached last entry in switch - strange.");
+        break;
+
+      default:
+        for(f = 0; f < N_IO_Fields; f++)
+          {
+            if(IO_Fields[f].field == blocknr)
+              {
+                for(i = 0; i < NTYPES; i++)
+                  {
+                    if((IO_Fields[f].typelist & (1 << i)) && header.npart[i] > 0)
+                      {
+                        typelist[i] = 1;
+                        npart += header.npart[i];
+                      }
+                    else
+                      typelist[i] = 0;
+                  }
+
+                return npart;
+              }
+          }
+        break;
+
+    }  // end switch
+
+  terminate("reached end of function - this should not happen");
+  return 0;
+}
+
+/*! \brief Checks if a block is expected for file input or output.
+ *
+ *  This function tells whether a block in the input/output file is requested
+ *  or not. Because the blocks processed in the two cases are different, the
+ *  mode is indicated with the flag write (1=write, 0=read).
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] write If 0 the function is in read mode, if 1 the function is
+ *             in write mode.
+ *
+ *  \return 0 if the block is not present, 1 otherwise.
+ */
+int blockpresent(enum iofields blocknr, int write)
+{
+  int f;
+
+  if(!write)
+    {
+#ifdef PASSIVE_SCALARS
+      if(RestartFlag == 0 && blocknr == IO_PASS)
+        return 1;
+#endif /* #ifdef PASSIVE_SCALARS */
+#if defined(MHD) && !defined(MHD_SEEDFIELD)
+      if(All.ICFormat != 3 && RestartFlag == 0 && (blocknr > IO_U && blocknr != IO_BFLD))
+#else  /* #if defined(MHD) && !defined(MHD_SEEDFIELD) */
+      if(All.ICFormat != 3 && RestartFlag == 0 && blocknr > IO_U)
+#endif /* #if defined(MHD) && !defined(MHD_SEEDFIELD) #else */
+#ifdef READ_LEGACY_ICS
+        if(RestartFlag == 0 && blocknr > IO_U && blocknr != IO_BFLD)
+#else               /* #ifdef  READ_LEGACY_ICS */
+        if(RestartFlag == 0)
+#endif              /* #ifdef  READ_LEGACY_ICS #else */
+          return 0; /* ignore all other blocks in non-HDF5 initial conditions */
+    }
+
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          if(!write)
+            {
+              if(IO_Fields[f].type_in_file_input != FILE_NONE)
+                {
+                  return 1;
+                }
+            }
+          else
+            {
+              if(IO_Fields[f].type_in_file_output == FILE_NONE)
+                return 0;
+
+              /* subboxes: write all fields except those marked by SN_NO_SUBBOX or SN_MINI_ONLY
+                 (must come first to ignore DumpFlag) */
+              if(subbox_dump)
+                {
+                  if(IO_Fields[f].snap_type == SN_NO_SUBBOX || IO_Fields[f].snap_type == SN_MINI_ONLY)
+                    return 0;
+
+                  return 1;
+                }
+
+              /* normal full snapshot (with or without groupcat): only skip fields marked by SN_MINI_ONLY */
+              if(DumpFlag == 1 || DumpFlag == 2)
+                {
+                  if(IO_Fields[f].snap_type == SN_MINI_ONLY)
+                    return 0;
+
+                  return 1;
+                }
+
+              /* mini-snaps: write only those fields marked by either SN_MINI or SN_MINI_ONLY */
+              if(DumpFlag == 3)
+                {
+                  if(IO_Fields[f].snap_type == SN_MINI || IO_Fields[f].snap_type == SN_MINI_ONLY)
+                    return 1;
+
+                  if(IO_Fields[f].typelist == BHS_ONLY)
+                    return 1;  // temporarily hard-coded that all BH fields are included in mini-snaps
+
+                  return 0;  // specifically do not include any other fields in mini-snaps
+                }
+            }
+          return 0;
+        }
+    }
+
+  return 0; /* default: not present */
+}
+
+/*! \brief This function associates a short 4-character block name with each
+ *         block number.
+ *
+ *   This is stored in front of each block for snapshot FileFormat=2.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] label string containing the dataset name.
+ *
+ *  \return void
+ */
+void get_Tab_IO_Label(enum iofields blocknr, char *label)
+{
+  int f;
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          strncpy(label, IO_Fields[f].label, 4);
+          return;
+        }
+    }
+
+  terminate("error invalid field");
+}
+
+/*! \brief This function associates a dataset name with each block number.
+ *
+ *   This is needed to name the dataset if the output is written in HDF5
+ *   format.
+ *
+ *  \param[in] blocknr ID of the output block (i.e. position, velocities...).
+ *  \param[in] buf String containing the dataset name.
+ *
+ *  \return void
+ */
+void get_dataset_name(enum iofields blocknr, char *buf)
+{
+  int f;
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          strcpy(buf, IO_Fields[f].datasetname);
+          return;
+        }
+    }
+
+  terminate("error invalid field");
+}
+
+/*! \brief Actually write the snapshot file to the disk.
+ *
+ *  This function writes a snapshot file containing the data from processors
+ *  'writeTask' to 'lastTask'. 'writeTask' is the one that actually writes.
+ *  Each snapshot file contains a header and cell/particle details. The
+ *  output fields for each particle type depend on included physics
+ *  and compile-time flags.
+ *
+ *  \param[in] fname String containing the file name.
+ *  \param[in] writeTask The rank of the task in a writing group that which
+ *             is responsible for the output operations.
+ *  \param[in] lastTask The rank of the last task in a writing group.
+ *  \param[in] subbox_flag If greater than 0 instructs the code to output
+ *             only a subset of the whole domain.
+ *
+ *  \return void
+ */
+void write_file(char *fname, int writeTask, int lastTask, int subbox_flag)
+{
+  int type, bytes_per_blockelement, npart, nextblock, typelist[NTYPES];
+  int n_for_this_task, n, p, pc, offset = 0, task;
+  int blockmaxlen, ntot_type[NTYPES], nn[NTYPES];
+  enum iofields blocknr;
+  char label[8];
+  int bnr;
+  int blksize;
+  MPI_Status status;
+  FILE *fd  = 0;
+  int pcsum = 0;
+
+#ifdef HAVE_HDF5
+  hid_t hdf5_file = 0, hdf5_grp[NTYPES], hdf5_headergrp = 0, hdf5_dataspace_memory;
+  hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0;
+  hsize_t dims[2], count[2], start[2];
+  int rank = 0;
+  char buf[500];
+#ifdef HDF5_FILTERS
+  hid_t hdf5_properties;
+#endif /* #ifdef HDF5_FILTERS */
+  hid_t hdf5_paramsgrp = 0;
+  hid_t hdf5_configgrp = 0;
+#endif /* #ifdef HAVE_HDF5 */
+
+#define SKIP                                 \
+  {                                          \
+    my_fwrite(&blksize, sizeof(int), 1, fd); \
+  }
+
+#ifdef TOLERATE_WRITE_ERROR
+  for(int try_io = 0; try_io < 2; try_io++)
+    {
+      WriteErrorFlag = 0;
+#ifdef HAVE_HDF5
+      H5Eget_current_stack(); /* clears current error stack */
+#endif                        /* #ifdef HAVE_HDF5 */
+#endif                        /* #ifdef TOLERATE_WRITE_ERROR */
+
+      /* determine particle numbers of each type in file */
+      if(ThisTask == writeTask)
+        {
+          for(n = 0; n < NTYPES; n++)
+            ntot_type[n] = n_type[n];
+
+          for(task = writeTask + 1; task <= lastTask; task++)
+            {
+              MPI_Recv(&nn[0], NTYPES, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
+              for(n = 0; n < NTYPES; n++)
+                ntot_type[n] += nn[n];
+            }
+
+          for(task = writeTask + 1; task <= lastTask; task++)
+            MPI_Send(&ntot_type[0], NTYPES, MPI_INT, task, TAG_N, MPI_COMM_WORLD);
+        }
+      else
+        {
+          MPI_Send(&n_type[0], NTYPES, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
+          MPI_Recv(&ntot_type[0], NTYPES, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status);
+        }
+
+      /* fill file header */
+      for(n = 0; n < NTYPES; n++)
+        {
+          header.npart[n]              = ntot_type[n];
+          header.npartTotal[n]         = (unsigned int)ntot_type_all[n];
+          header.npartTotalHighWord[n] = (unsigned int)(ntot_type_all[n] >> 32);
+        }
+
+      for(n = 0; n < NTYPES; n++)
+        header.mass[n] = All.MassTable[n];
+
+      header.time = All.Time;
+
+      if(All.ComovingIntegrationOn)
+        header.redshift = 1.0 / All.Time - 1;
+      else
+        header.redshift = 0;
+
+      header.flag_sfr        = 0;
+      header.flag_feedback   = 0;
+      header.flag_cooling    = 0;
+      header.flag_stellarage = 0;
+      header.flag_metals     = 0;
+
+      header.flag_tracer_field = 0;
+
+#ifdef COOLING
+      header.flag_cooling = 1;
+#endif /* #ifdef COOLING */
+
+#ifdef USE_SFR
+      header.flag_sfr      = 1;
+      header.flag_feedback = 1;
+#endif /* #ifdef USE_SFR */
+
+      header.num_files   = All.NumFilesPerSnapshot;
+      header.BoxSize     = All.BoxSize;
+      header.Omega0      = All.Omega0;
+      header.OmegaLambda = All.OmegaLambda;
+      header.HubbleParam = All.HubbleParam;
+
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+      header.flag_doubleprecision = 1;
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+  header.flag_doubleprecision = 0;
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */
+
+      /* open file and write header */
+
+      if(ThisTask == writeTask)
+        {
+          if(All.SnapFormat == 3)
+            {
+#ifdef HAVE_HDF5
+              sprintf(buf, "%s.hdf5", fname);
+              hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+              hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0);
+
+              for(type = 0; type < NTYPES; type++)
+                {
+                  if(header.npart[type] > 0)
+                    {
+                      sprintf(buf, "/PartType%d", type);
+                      hdf5_grp[type] = my_H5Gcreate(hdf5_file, buf, 0);
+                    }
+                }
+
+              write_header_attributes_in_hdf5(hdf5_headergrp);
+
+              hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0);
+              write_parameters_attributes_in_hdf5(hdf5_paramsgrp);
+
+              hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0);
+              write_compile_time_options_in_hdf5(hdf5_configgrp);
+#endif /* #ifdef HAVE_HDF5 */
+            }
+          else
+            {
+              if(!(fd = fopen(fname, "w")))
+                {
+                  printf("can't open file `%s' for writing snapshot.\n", fname);
+                  terminate("file open error");
+                }
+
+              if(All.SnapFormat == 2)
+                {
+                  blksize = sizeof(int) + 4 * sizeof(char);
+                  SKIP;
+                  my_fwrite((void *)"HEAD", sizeof(char), 4, fd);
+                  nextblock = sizeof(header) + 2 * sizeof(int);
+                  my_fwrite(&nextblock, sizeof(int), 1, fd);
+                  SKIP;
+                }
+
+              blksize = sizeof(header);
+              SKIP;
+              my_fwrite(&header, sizeof(header), 1, fd);
+              SKIP;
+            }
+        }
+
+      for(bnr = 0; bnr < 1000; bnr++)
+        {
+          blocknr = (enum iofields)bnr;
+
+          if(blocknr == IO_LASTENTRY)
+            break;
+
+          if(blockpresent(blocknr, 1))
+            {
+              bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 0);
+
+              blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement);
+
+              npart = get_particles_in_block(blocknr, &typelist[0]);
+
+              if(npart > 0)
+                {
+                  if(ThisTask == 0)
+                    {
+                      char buf[1000];
+
+                      get_dataset_name(blocknr, buf);
+                      if(subbox_flag == 0)
+                        printf("writing block %d (%s)...\n", blocknr, buf);
+                    }
+
+                  if(ThisTask == writeTask)
+                    {
+                      if(All.SnapFormat == 1 || All.SnapFormat == 2)
+                        {
+                          if(All.SnapFormat == 2)
+                            {
+                              blksize = sizeof(int) + 4 * sizeof(char);
+                              SKIP;
+                              get_Tab_IO_Label(blocknr, label);
+                              my_fwrite(label, sizeof(char), 4, fd);
+                              nextblock = npart * bytes_per_blockelement + 2 * sizeof(int);
+                              my_fwrite(&nextblock, sizeof(int), 1, fd);
+                              SKIP;
+                            }
+
+                          blksize = npart * bytes_per_blockelement;
+                          SKIP;
+                        }
+                    }
+
+                  for(type = 0; type < NTYPES; type++)
+                    {
+                      if(typelist[type])
+                        {
+#ifdef HAVE_HDF5
+                          if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0)
+                            {
+                              switch(get_datatype_in_block(blocknr, 0))
+                                {
+                                  case FILE_INT:
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT);
+                                    break;
+                                  case FILE_MY_IO_FLOAT:
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE);
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT);
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */
+                                    break;
+                                  case FILE_MY_ID_TYPE:
+#ifdef LONGIDS
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64);
+#else  /* #ifdef LONGIDS */
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32);
+#endif /* #ifdef LONGIDS #else */
+                                    break;
+                                  case FILE_DOUBLE:
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE);
+                                    break;
+                                  case FILE_FLOAT:
+                                    hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT);
+                                    break;
+                                }
+
+                              dims[0] = header.npart[type];
+                              dims[1] = get_values_per_blockelement(blocknr);
+                              if(dims[1] == 1)
+                                rank = 1;
+                              else
+                                rank = 2;
+
+                              get_dataset_name(blocknr, buf);
+
+                              hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL);
+#ifdef HDF5_FILTERS
+                              hdf5_properties = my_H5Pcreate(H5P_DATASET_CREATE);
+                              my_H5Pset_chunk(hdf5_properties, rank, dims); /* set chunk size */
+                              my_H5Pset_shuffle(hdf5_properties);           /* reshuffle bytes to get better compression ratio */
+                              my_H5Pset_deflate(hdf5_properties, 9);        /* gzip compression level 9 */
+                              my_H5Pset_fletcher32(hdf5_properties);        /* Fletcher32 checksum on dataset */
+
+                              if(my_H5Pall_filters_avail(hdf5_properties))
+                                hdf5_dataset =
+                                    my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, hdf5_properties);
+                              else
+                                {
+                                  printf("HDF5_FILTERS: Warning selected filters not available! Writing data without filters! \n");
+                                  myflush(stdout);
+                                  hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT);
+                                }
+#else  /* #ifdef HDF5_FILTERS */
+                              hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT);
+#endif /* #ifdef HDF5_FILTERS #else */
+                              write_dataset_attributes(hdf5_dataset, blocknr);
+                            }
+#endif /* #ifdef HAVE_HDF5 */
+
+                          pcsum               = 0;
+                          int remaining_space = blockmaxlen;
+                          int bufferstart     = 0;
+
+                          for(task = writeTask, offset = 0; task <= lastTask; task++)
+                            {
+                              if(task == ThisTask)
+                                {
+                                  n_for_this_task = n_type[type];
+
+                                  for(p = writeTask; p <= lastTask; p++)
+                                    if(p != ThisTask)
+                                      MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD);
+                                }
+                              else
+                                MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status);
+
+                              while(n_for_this_task > 0)
+                                {
+                                  pc = n_for_this_task;
+
+                                  if(pc > blockmaxlen)
+                                    pc = blockmaxlen;
+
+                                  if(pc > remaining_space)
+                                    pc = remaining_space;
+
+                                  void *buffer = (void *)((char *)CommBuffer + bufferstart * bytes_per_blockelement);
+
+                                  if(ThisTask == task)
+                                    fill_write_buffer(buffer, blocknr, &offset, pc, type, subbox_flag);
+
+                                  if(ThisTask == writeTask && task != writeTask)
+                                    MPI_Recv(buffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status);
+
+                                  if(ThisTask != writeTask && task == ThisTask)
+                                    MPI_Ssend(buffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD);
+
+                                  remaining_space -= pc;
+                                  bufferstart += pc;
+
+                                  if(remaining_space == 0)
+                                    {
+                                      /* write stuff (number of elements equal to bufferstart) */
+                                      if(ThisTask == writeTask)
+                                        {
+                                          if(All.SnapFormat == 3)
+                                            {
+#ifdef HAVE_HDF5
+                                              start[0] = pcsum;
+                                              start[1] = 0;
+
+                                              count[0] = bufferstart;
+                                              count[1] = get_values_per_blockelement(blocknr);
+
+                                              my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL);
+
+                                              dims[0]               = bufferstart;
+                                              dims[1]               = get_values_per_blockelement(blocknr);
+                                              hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL);
+
+                                              my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file,
+                                                          H5P_DEFAULT, CommBuffer, buf);
+
+                                              my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE);
+#endif /* #ifdef HAVE_HDF5 */
+                                            }
+                                          else
+                                            {
+                                              my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd);
+                                            }
+                                        }
+
+                                      pcsum += bufferstart;
+                                      remaining_space = blockmaxlen;
+                                      bufferstart     = 0;
+                                    }
+
+                                  n_for_this_task -= pc;
+                                }
+                            }
+
+                          if(bufferstart > 0)
+                            {
+                              /* write remaining stuff (number of elements equal to bufferstart) */
+                              if(ThisTask == writeTask)
+                                {
+                                  if(All.SnapFormat == 3)
+                                    {
+#ifdef HAVE_HDF5
+                                      start[0] = pcsum;
+                                      start[1] = 0;
+
+                                      count[0] = bufferstart;
+                                      count[1] = get_values_per_blockelement(blocknr);
+
+                                      my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL);
+
+                                      dims[0]               = bufferstart;
+                                      dims[1]               = get_values_per_blockelement(blocknr);
+                                      hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL);
+
+                                      my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file,
+                                                  H5P_DEFAULT, CommBuffer, buf);
+
+                                      my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE);
+#endif /* #ifdef HAVE_HDF5 */
+                                    }
+                                  else
+                                    {
+                                      my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd);
+                                    }
+                                }
+
+                              pcsum += bufferstart;
+                              remaining_space = blockmaxlen;
+                              bufferstart     = 0;
+                            }
+
+#ifdef HAVE_HDF5
+                          if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0)
+                            {
+                              if(All.SnapFormat == 3)
+                                {
+                                  my_H5Dclose(hdf5_dataset, buf);
+#ifdef HDF5_FILTERS
+                                  my_H5Pclose(hdf5_properties);
+#endif /* #ifdef HDF5_FILTERS */
+                                  my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE);
+                                  my_H5Tclose(hdf5_datatype);
+                                }
+                            }
+#endif /* #ifdef HAVE_HDF5 */
+                        }
+                    }
+
+                  if(ThisTask == writeTask)
+                    {
+                      if(All.SnapFormat == 1 || All.SnapFormat == 2)
+                        SKIP;
+                    }
+                }
+
+#ifdef TOLERATE_WRITE_ERROR
+              if(ThisTask == writeTask)
+                {
+                  for(int p = writeTask; p <= lastTask; p++)
+                    if(p != ThisTask)
+                      MPI_Send(&WriteErrorFlag, 1, MPI_INT, p, TAG_KEY, MPI_COMM_WORLD);
+                }
+              else
+                MPI_Recv(&WriteErrorFlag, 1, MPI_INT, writeTask, TAG_KEY, MPI_COMM_WORLD, &status);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+            }
+
+#ifdef TOLERATE_WRITE_ERROR
+          if(WriteErrorFlag) /* don't write further blocks in this case */
+            break;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+        }
+
+      if(ThisTask == writeTask)
+        {
+          if(All.SnapFormat == 3)
+            {
+#ifdef HAVE_HDF5
+              for(type = NTYPES - 1; type >= 0; type--)
+                if(header.npart[type] > 0)
+                  my_H5Gclose(hdf5_grp[type], buf);
+              my_H5Gclose(hdf5_headergrp, "/Header");
+              my_H5Gclose(hdf5_paramsgrp, "/Parameters");
+              my_H5Gclose(hdf5_configgrp, "/Config");
+
+              sprintf(buf, "%s.hdf5", fname);
+              my_H5Fclose(hdf5_file, buf);
+#endif /* #ifdef HAVE_HDF5 */
+            }
+          else
+            fclose(fd);
+        }
+
+#ifdef TOLERATE_WRITE_ERROR
+      if(WriteErrorFlag == 0)
+        break;
+
+      if(try_io == 0)
+        {
+          if(ThisTask == writeTask)
+            {
+              printf(
+                  "TOLERATE_WRITE_ERROR: Try to write to alternative file: masterTask=%d  lastTask=%d  try_io=%d "
+                  "alternative-filename='%s'\n",
+                  writeTask, lastTask, try_io, alternative_fname);
+              myflush(stdout);
+            }
+          fname = alternative_fname; /* try on a different output directory */
+        }
+      else
+        {
+          terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n");
+        }
+    }
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+}
+
+#ifdef HAVE_HDF5
+/*! \brief Write the fields contained in the header group of the HDF5 snapshot
+ *         file.
+ *
+ *  This function stores the fields of the structure io_header as attributes
+ *  belonging to the header group of the HDF5 file.
+ *
+ *  \param[in] handle A handle for the header group.
+ *
+ *  \return void
+ */
+void write_header_attributes_in_hdf5(hid_t handle)
+{
+  hsize_t adim[1] = {NTYPES};
+  hid_t hdf5_dataspace, hdf5_attribute;
+
+  hdf5_dataspace = my_H5Screate(H5S_SIMPLE);
+  my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_ThisFile");
+  hdf5_attribute = my_H5Acreate(handle, "NumPart_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile");
+  my_H5Aclose(hdf5_attribute, "NumPart_ThisFile");
+  my_H5Sclose(hdf5_dataspace, H5S_SIMPLE);
+
+  hdf5_dataspace = my_H5Screate(H5S_SIMPLE);
+  my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total");
+  hdf5_attribute = my_H5Acreate(handle, "NumPart_Total", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total");
+  my_H5Aclose(hdf5_attribute, "NumPart_Total");
+  my_H5Sclose(hdf5_dataspace, H5S_SIMPLE);
+
+  hdf5_dataspace = my_H5Screate(H5S_SIMPLE);
+  my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total_HighWord");
+  hdf5_attribute = my_H5Acreate(handle, "NumPart_Total_HighWord", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord");
+  my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord");
+  my_H5Sclose(hdf5_dataspace, H5S_SIMPLE);
+
+  hdf5_dataspace = my_H5Screate(H5S_SIMPLE);
+  my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "MassTable");
+  hdf5_attribute = my_H5Acreate(handle, "MassTable", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable");
+  my_H5Aclose(hdf5_attribute, "MassTable");
+  my_H5Sclose(hdf5_dataspace, H5S_SIMPLE);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time");
+  my_H5Aclose(hdf5_attribute, "Time");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift");
+  my_H5Aclose(hdf5_attribute, "Redshift");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize");
+  my_H5Aclose(hdf5_attribute, "BoxSize");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "NumFilesPerSnapshot", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot");
+  my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0");
+  my_H5Aclose(hdf5_attribute, "Omega0");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda");
+  my_H5Aclose(hdf5_attribute, "OmegaLambda");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "OmegaBaryon", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.OmegaBaryon, "OmegaBaryon");
+  my_H5Aclose(hdf5_attribute, "OmegaBaryon");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam");
+  my_H5Aclose(hdf5_attribute, "HubbleParam");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_Sfr", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr");
+  my_H5Aclose(hdf5_attribute, "Flag_Sfr");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_Cooling", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling");
+  my_H5Aclose(hdf5_attribute, "Flag_Cooling");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_StellarAge", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge");
+  my_H5Aclose(hdf5_attribute, "Flag_StellarAge");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_Metals", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals");
+  my_H5Aclose(hdf5_attribute, "Flag_Metals");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_Feedback", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback");
+  my_H5Aclose(hdf5_attribute, "Flag_Feedback");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Flag_DoublePrecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision");
+  my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Composition_vector_length", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.composition_vector_length, "Composition_vector_length");
+  my_H5Aclose(hdf5_attribute, "Composition_vector_length");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hid_t atype = my_H5Tcopy(H5T_C_S1);
+
+  my_H5Tset_size(atype, strlen(GIT_COMMIT));
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit");
+  my_H5Aclose(hdf5_attribute, "Git_commit");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  my_H5Tset_size(atype, strlen(GIT_DATE));
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date");
+  my_H5Aclose(hdf5_attribute, "Git_date");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "UnitLength_in_cm", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitLength_in_cm, "UnitLength_in_cm");
+  my_H5Aclose(hdf5_attribute, "UnitLength_in_cm");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "UnitMass_in_g", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitMass_in_g, "UnitMass_in_g");
+  my_H5Aclose(hdf5_attribute, "UnitMass_in_g");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(handle, "UnitVelocity_in_cm_per_s", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitVelocity_in_cm_per_s, "UnitVelocity_in_cm_per_s");
+  my_H5Aclose(hdf5_attribute, "UnitVelocity_in_cm_per_s");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+}
+
+/*! \brief Write the parameters read from the parameter file in the HDF5
+ *         snapshot file.
+ *
+ *  This function stores the parameter io_header as attributes belonging
+ *  to the parameter group of the HDF5 file.
+ *
+ *  \param[in] handle A handle for the parameter group.
+ *
+ *  \return void
+ */
+void write_parameters_attributes_in_hdf5(hid_t handle)
+{
+  hid_t hdf5_dataspace, hdf5_attribute, atype = my_H5Tcopy(H5T_C_S1);
+  int i = 0;
+
+  my_H5Tset_size(atype, MAXLEN_PARAM_VALUE);
+
+  for(i = 0; i < All.NParameters; i++)
+    {
+      switch(ParametersType[i])
+        {
+          case 1:  // REAL
+            hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+            hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+            my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, ParametersValue[i], Parameters[i]);
+            my_H5Aclose(hdf5_attribute, Parameters[i]);
+            my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+            break;
+          case 2:  // STRING
+            hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+            hdf5_attribute = my_H5Acreate(handle, Parameters[i], atype, hdf5_dataspace, H5P_DEFAULT);
+            my_H5Awrite(hdf5_attribute, atype, ParametersValue[i], Parameters[i]);
+            my_H5Aclose(hdf5_attribute, Parameters[i]);
+            my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+            break;
+          case 3:  // INT
+            hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+            hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT);
+            my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, ParametersValue[i], Parameters[i]);
+            my_H5Aclose(hdf5_attribute, Parameters[i]);
+            my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+            break;
+        }
+    }
+
+  my_H5Tclose(atype);
+}
+
+/*! \brief A simple error handler for HDF5.
+ *
+ *  This function terminates the run or if write errors are tolerated, calls
+ *  the write_error() function to print information about the error and returns
+ *  a positive integer to allow the repetition of the write operation
+ *  (see also the HDF5 documentation).
+ *
+ *  \param[in] unused The parameter is not used, but it is necessary for
+ *             compatibility with the HDF5 library.
+ *
+ *  \return 1 if the write error is tolerated, otherwise the run is terminated.
+ */
+herr_t my_hdf5_error_handler(void *unused)
+{
+#ifdef TOLERATE_WRITE_ERROR
+  if(FlagNyt == 0)
+    write_error(2, 0, 0);
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+/*! \brief Write attributes to dataset, scaling with a and h (cosmological)
+ *         and units.
+ *
+ *  Only for hdf5 output.
+ *
+ *  \param[in] hdf5_dataset Dataset identifier.
+ *  \param[in] blocknumber Number of field which is written.
+ *
+ *  \return void
+ */
+void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr)
+{
+  int ind = -1;
+
+  for(int f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          ind = f;
+          break;
+        }
+    }
+
+  if(ind < 0)
+    {
+      return;
+    }
+
+  if(IO_Fields[ind].hasunit == 0)
+    return;
+
+  if(All.ComovingIntegrationOn)
+    {
+      hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+      hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+      my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].a, "a_scaling");
+      my_H5Aclose(hdf5_attribute, "a_scaling");
+      my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+      hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+      hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+      my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].h, "h_scaling");
+      my_H5Aclose(hdf5_attribute, "h_scaling");
+      my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+    }
+  else
+    {
+      double zero          = 0;
+      hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+      hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+      my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "a_scaling");
+      my_H5Aclose(hdf5_attribute, "a_scaling");
+      my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+      hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+      hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+      my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "h_scaling");
+      my_H5Aclose(hdf5_attribute, "h_scaling");
+      my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+    }
+
+  hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "length_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].L, "length_scaling");
+  my_H5Aclose(hdf5_attribute, "length_scaling");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(hdf5_dataset, "mass_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].M, "mass_scaling");
+  my_H5Aclose(hdf5_attribute, "mass_scaling");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(hdf5_dataset, "velocity_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].V, "velocity_scaling");
+  my_H5Aclose(hdf5_attribute, "velocity_scaling");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+
+  hdf5_dataspace = my_H5Screate(H5S_SCALAR);
+  hdf5_attribute = my_H5Acreate(hdf5_dataset, "to_cgs", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT);
+  my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].c, "to_cgs");
+  my_H5Aclose(hdf5_attribute, "to_cgs");
+  my_H5Sclose(hdf5_dataspace, H5S_SCALAR);
+}
+#endif /* #ifdef HAVE_HDF5 */
+
+#ifdef OUTPUT_XDMF
+/*! \brief Outputs a xdmf file corresponding to this snapshot.
+ *
+ *  This xdmf file can be used to load the snapshot into programs like visit.
+ *  This option only works with output format 3 (hdf5).
+ *
+ *  \param[in] fname Name of the snapshot.
+ *
+ *  \return void
+ */
+static void write_xdmf(char *fname)
+{
+  FILE *f;
+  char buf[256], buf2[256];
+  int i;
+  int npresent[NTYPES];
+
+  for(i = 0; i < NTYPES; i++)
+    npresent[i] = 0;
+
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+  int prec = 8;
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+  int prec = 4;
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+
+  sprintf(buf, "%s.xmf", fname);
+  f = fopen(buf, "w");
+
+  fprintf(f, "<?xml version=\"1.0\" ?>\n");
+  fprintf(f, "<!DOCTYPE Xdmf SYSTEM \"Xdmf.dtd\" []>\n");
+  fprintf(f, "<Xdmf Version=\"2.0\">\n");
+  fprintf(f, " <Domain>");
+
+  /* hdf5 file path relative to xmf file, uses basename function of libgen.h,
+   * i.e. POSIX version of basename() */
+  sprintf(buf, "./%s.hdf5", basename(fname));
+  int type = 0;
+  for(; type < NTYPES; type++)
+    {
+      int bnr;
+
+      for(bnr = 0; bnr < 1000; bnr++)
+        {
+          enum iofields i = (enum iofields)bnr;
+
+          if(i == IO_LASTENTRY)
+            break;
+
+          if(blockpresent(i, 1))
+            {
+              // get_particles_in_block(i, ntypes);
+
+              if(header.npart[type] > 0)
+                {
+                  if(i == IO_POS)
+                    {
+                      fprintf(f, "  <Grid Name=\"PartType%d\" GridType=\"Uniform\">\n", type);
+                      fprintf(f, "   <Topology TopologyType=\"Polyvertex\" NumberOfElements=\"%d\"/>\n", header.npart[type]);
+                      fprintf(f, "   <Geometry GeometryType=\"XYZ\">\n");
+                      fprintf(f, "    <DataItem Dimensions=\"%d 3\" NumberType=\"Float\" Precision=\"%d\" Format=\"HDF\">\n",
+                              header.npart[type], prec);
+                      fprintf(f, "     %s:/PartType0/Coordinates\n", buf);
+                      fprintf(f, "    </DataItem>\n");
+                      fprintf(f, "   </Geometry>\n");
+
+                      npresent[type] = 1;
+                    }
+                  else
+                    {
+                      int dim   = get_values_per_blockelement(i);
+                      int dtype = get_datatype_in_block(i, 0);
+                      get_dataset_name(i, buf2);
+
+                      if(dim == 1 || dim == 3)
+                        {
+                          if(dtype == 1)
+                            {
+                              if(dim == 1)
+                                {
+                                  fprintf(f, "   <Attribute Name=\"%s\" AttributeType=\"Scalar\" Center=\"Node\">\n", buf2);
+                                  fprintf(f, "    <DataItem Dimensions=\"%d\" NumberType=\"Float\" Precision=\"%d\" Format=\"HDF\">\n",
+                                          header.npart[type], prec);
+                                }
+                              else
+                                {
+                                  fprintf(f, "   <Attribute Name=\"%s\" AttributeType=\"Vector\" Center=\"Node\">\n", buf2);
+                                  fprintf(f,
+                                          "    <DataItem Dimensions=\"%d 3\" NumberType=\"Float\" Precision=\"%d\" Format=\"HDF\">\n",
+                                          header.npart[type], prec);
+                                }
+
+                              fprintf(f, "     %s:/PartType%d/%s\n", buf, type, buf2);
+                              fprintf(f, "    </DataItem>\n");
+                              fprintf(f, "   </Attribute>\n");
+                            }
+                        }
+                    }
+                }
+            }
+        }
+      if(npresent[type] == 1)
+        {
+          fprintf(f, "  </Grid>\n");
+        }
+    }
+
+  fprintf(f, " </Domain>\n");
+  fprintf(f, "</Xdmf>");
+
+  fclose(f);
+}
+#endif /* #ifdef OUTPUT_XDMF */
+
+/*! \brief  A wrapper for the fwrite() function.
+ *
+ *  This catches I/O errors occuring for fwrite(). In this case we
+ *  better stop. If stream is null, no attempt at writing is done.
+ *
+ *  \param[in] ptr Pointer to the beginning of data to write.
+ *  \param[in] size Size in bytes of a single data element.
+ *  \param[in] nmemb Number of elements to be written.
+ *  \param[in] stream Pointer to the output stream.
+ *
+ *  \return Number of elements written to stream.
+ */
+size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+  size_t nwritten;
+
+#ifdef TOLERATE_WRITE_ERROR
+  if(WriteErrorFlag)
+    return 0;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  if(!stream)
+    return 0;
+
+  if(size * nmemb > 0)
+    {
+      if((nwritten = fwrite(ptr, size, nmemb, stream)) != nmemb)
+        {
+#ifdef TOLERATE_WRITE_ERROR
+          write_error(0, nwritten, nmemb);
+#else  /* #ifdef TOLERATE_WRITE_ERROR */
+          printf("I/O error (fwrite) on task=%d has occured: %s\n", ThisTask, strerror(errno));
+          myflush(stdout);
+          terminate("write error");
+#endif /* #ifdef TOLERATE_WRITE_ERROR #else */
+        }
+    }
+  else
+    nwritten = 0;
+
+#ifdef TOLERATE_WRITE_ERROR
+  if(ferror(stream))
+    write_error(1, nwritten, nmemb);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  return nwritten;
+}
+
+/*! \brief  A wrapper for the fread() function.
+ *
+ *  This catches I/O errors occuring for fread(). In this case we
+ *  better stop. If stream is null, no attempt at readingis done.
+ *
+ *  \param[out] ptr Pointer to the beginning of memory location where to
+ *              store data.
+ *  \param[in] size Size in bytes of a single data element.
+ *  \param[in] nmemb Number of elements to be read.
+ *  \param[in] stream Pointer to the input stream.
+ *
+ *  \return Number of elements read from stream.
+ */
+size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+  size_t nread;
+
+  if(!stream)
+    return 0;
+
+  if(size * nmemb > 0)
+    {
+      if((nread = fread(ptr, size, nmemb, stream)) != nmemb)
+        {
+          if(feof(stream))
+            printf("I/O error (fread) on task=%d has occured: end of file\n", ThisTask);
+          else
+            printf("I/O error (fread) on task=%d has occured: %s\n", ThisTask, strerror(errno));
+          myflush(stdout);
+          terminate("read error");
+        }
+    }
+  else
+    nread = 0;
+
+  return nread;
+}
+
+/*! \brief A wrapper for the printf() function.
+ *
+ *  This function has the same functionalities of the standard printf()
+ *  function. However, data is written to the standard output only for
+ *  the task with rank 0.
+ *
+ *  \param[in] fmt String that contains format arguments.
+ *
+ *  \return void
+ */
+void mpi_printf(const char *fmt, ...)
+{
+  if(ThisTask == 0)
+    {
+      va_list l;
+      va_start(l, fmt);
+      vprintf(fmt, l);
+      myflush(stdout);
+      va_end(l);
+    }
+}
+
+/*! \brief A wrapper for the fprintf() function.
+ *
+ *  This function has the same functionalities of the standard fprintf()
+ *  function. However, data is written to the standard output only for
+ *  the task with rank 0.
+ *
+ *  \param[in] fmt String that contains format arguments.
+ *
+ *  \return void
+ */
+void mpi_fprintf(FILE *stream, const char *fmt, ...)
+{
+  if(ThisTask == 0)
+    {
+      va_list l;
+      va_start(l, fmt);
+      vfprintf(stream, fmt, l);
+      myflush(stream);
+      va_end(l);
+    }
+}
+
+/*! \brief A function for printing debug information in parallel.
+ *
+ *  This function works like printf, however it takes care
+ *  that the output is contigous in the stdout from task 0 to task NTask-1.
+ *  Run this debug function only in code parts which all tasks reach.
+ *
+ *
+ *  \param[in] fmt String that contains format arguments.
+ *
+ *  \return void
+ */
+void mpi_printf_each(const char *fmt, ...)
+{
+  char buffer[2048];
+
+  va_list l;
+  va_start(l, fmt);
+  vsprintf(buffer, fmt, l);
+  va_end(l);
+
+  if(ThisTask == 0)
+    {
+      // print own message
+      printf("%s", buffer);
+
+      // print message from other tasks
+      unsigned int i;
+
+      for(i = 1; i < NTask; i++)
+        {
+          MPI_Recv(buffer, 2048, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+          printf("%s", buffer);
+        }
+    }
+
+  else
+    {
+      MPI_Send(buffer, strlen(buffer) + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
+    }
+}
+
+/*! \brief Opens the requested file name and returns the file descriptor.
+ *
+ *  If opening fails, an error is printed and the file descriptor is
+ *  null.
+ *
+ *  \param[in] fnam The file name.
+ *
+ *  \return A file descriptor to the file.
+ */
+FILE *open_file(char *fnam)
+{
+  FILE *fd;
+
+  if(!(fd = fopen(fnam, "w")))
+    {
+      printf("can't open file `%s' for writing.\n", fnam);
+    }
+  return fd;
+}
diff --git a/src/amuse/community/arepo/src/io/io_fields.c b/src/amuse/community/arepo/src/io/io_fields.c
new file mode 100644
index 0000000000..d36231c570
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/io_fields.c
@@ -0,0 +1,765 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io/io_fields.c
+ * \date        05/2018
+ * \brief       User defined functions for output; needed for all
+ *              quantities that are not stored in a global array
+ * \details     contains functions:
+ *                static void io_func_task(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                static void io_func_timebin_hydro(int particle, int
+ *                  components, void *out_buffer, int mode)
+ *                static void io_func_timestep(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                static void io_func_softenings(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                void io_func_pos(int particle, int components, void *buffer,
+ *                  int mode)
+ *                static void io_func_vel(int particle, int components, void
+ *                  *buffer, int mode)
+ *                static void io_func_coolrate(int particle, int components,
+ *                  void *buffer, int mode)
+ *                static void io_func_ne(int particle, int components, void
+ *                  *buffer, int mode)
+ *                static void io_func_nh(int particle, int components, void
+ *                  *buffer, int mode)
+ *                static void io_func_curlvel(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                static void io_func_vorticity(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                static void io_func_cell_spin(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                static void io_func_bfield(int particle, int components,
+ *                  void *out_buffer, int mode)
+ *                void init_io_fields()
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 07.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef OUTPUT_TASK
+/*! \brief Output of the task the particles are at.
+ *
+ *  \param[in] particle (unused)
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode (unused)
+ *
+ *  \return void
+ */
+static void io_func_task(int particle, int components, void *out_buffer, int mode) { ((int *)out_buffer)[0] = ThisTask; }
+#endif /* #ifdef OUTPUT_TASK */
+
+#ifdef OUTPUT_TIMEBIN_HYDRO
+/*! \brief Output function of the timebin corresponding to the hydrodynamic
+ *         timestep.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode (unused)
+ *
+ *  \return void
+ */
+static void io_func_timebin_hydro(int particle, int components, void *out_buffer, int mode)
+{
+  ((int *)out_buffer)[0] = P[particle].TimeBinHydro;
+}
+#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */
+
+#ifdef OUTPUTTIMESTEP
+/*! \brief Output function of the hydrodynamic timestep.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode (unused)
+ *
+ *  \return void
+ */
+static void io_func_timestep(int particle, int components, void *out_buffer, int mode)
+{
+  ((MyOutputFloat *)out_buffer)[0] =
+      (P[particle].TimeBinHydro ? (((integertime)1) << P[particle].TimeBinHydro) : 0) * All.Timebase_interval;
+}
+#endif /* #ifdef OUTPUTTIMESTEP */
+
+#ifdef OUTPUT_SOFTENINGS
+/*! \brief Output function of the force softening.
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode Mode (output)
+ *
+ *  \return void
+ */
+static void io_func_softenings(int particle, int components, void *out_buffer, int mode)
+{
+  ((MyOutputFloat *)out_buffer)[0] = All.ForceSoftening[P[particle].SofteningType];
+}
+#endif /* #ifdef OUTPUT_SOFTENINGS */
+
+/*! \brief IO function of the particle positions.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode Mode (0: output, 1: input).
+ *
+ *  \return void
+ */
+void io_func_pos(int particle, int components, void *buffer, int mode)
+{
+  int k;
+
+  if(mode == 0)
+    {
+      if(DumpFlag != 3)  // TODO: clean up this code duplication
+        {
+#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION
+          double *pp = buffer;
+#else  /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION */
+          MyOutputFloat *pp = buffer;
+#endif /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */
+
+          for(k = 0; k < 3; k++)
+            {
+              pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k];
+
+#if defined(GRAVITY_NOT_PERIODIC)
+              if(P[particle].Type != 0)
+                continue;
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+              double boxSize = All.BoxSize;
+#ifdef LONG_X
+              if(k == 0)
+                boxSize = All.BoxSize * LONG_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+              if(k == 1)
+                boxSize = All.BoxSize * LONG_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+              if(k == 2)
+                boxSize = All.BoxSize * LONG_Z;
+#endif /* #ifdef LONG_Z */
+              while(pp[k] < 0)
+                pp[k] += boxSize;
+              while(pp[k] >= boxSize)
+                pp[k] -= boxSize;
+            }
+        }
+      else
+        {
+          MyOutputFloat *pp = buffer;
+
+          for(k = 0; k < 3; k++)
+            {
+              pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k];
+
+#if defined(GRAVITY_NOT_PERIODIC)
+              if(P[particle].Type != 0)
+                continue;
+#endif /* #if defined(GRAVITY_NOT_PERIODIC) */
+              double boxSize = All.BoxSize;
+#ifdef LONG_X
+              if(k == 0)
+                boxSize = All.BoxSize * LONG_X;
+#endif /* #ifdef LONG_X */
+#ifdef LONG_Y
+              if(k == 1)
+                boxSize = All.BoxSize * LONG_Y;
+#endif /* #ifdef LONG_Y */
+#ifdef LONG_Z
+              if(k == 2)
+                boxSize = All.BoxSize * LONG_Z;
+#endif /* #ifdef LONG_Z */
+              while(pp[k] < 0)
+                pp[k] += boxSize;
+              while(pp[k] >= boxSize)
+                pp[k] -= boxSize;
+            }
+        }
+    }
+  else
+    {
+#ifdef READ_COORDINATES_IN_DOUBLE
+      double *in_buffer = buffer;
+#else  /* #ifdef READ_COORDINATES_IN_DOUBLE */
+      MyInputFloat *in_buffer = buffer;
+#endif /* #ifdef READ_COORDINATES_IN_DOUBLE #else */
+
+      for(k = 0; k < components; k++)
+        {
+          P[particle].Pos[k] = in_buffer[k] + All.GlobalDisplacementVector[k];
+        }
+    }
+}
+
+/*! \brief IO function for velocities.
+ *
+ *  Note the different factors of scalefactor in the output than in the code!
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components Number of entries in array.
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode Mode 0: output, 1: input.
+ *
+ *  \return void
+ */
+static void io_func_vel(int particle, int components, void *buffer, int mode)
+{
+  int k;
+
+  if(mode == 0)
+    {
+      for(k = 0; k < components; k++)
+        {
+          ((MyOutputFloat *)buffer)[k] = P[particle].Vel[k];
+          ((MyOutputFloat *)buffer)[k] *= sqrt(All.cf_a3inv); /* we are dealing with p = a^2 * xdot */
+        }
+    }
+  else
+    {
+      for(k = 0; k < components; k++)
+        {
+          P[particle].Vel[k] = ((MyInputFloat *)buffer)[k];
+        }
+    }
+}
+
+#ifdef OUTPUTACCELERATION
+/*! \brief IO function for gravitational accelerations.
+ *
+ *  Note different a factors in output than in code.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components Number of entries in array.
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode Mode 0: output, 1: input.
+ *
+ *  \return void
+ */
+static void io_func_accel(int particle, int components, void *out_buffer, int mode)
+{
+  int k;
+
+  if(mode == 0)
+    {
+      if(RestartFlag != 6)
+        for(k = 0; k < 3; k++)
+          ((MyOutputFloat *)out_buffer)[k] = All.cf_a2inv * P[particle].GravAccel[k];
+      else
+        for(k = 0; k < 3; k++)
+          ((MyOutputFloat *)out_buffer)[k] = P[particle].GravAccel[k];
+#ifdef PMGRID
+      if(RestartFlag != 6)
+        for(k = 0; k < 3; k++)
+          ((MyOutputFloat *)out_buffer)[k] += All.cf_a2inv * P[particle].GravPM[k];
+      else
+        for(k = 0; k < 3; k++)
+          ((MyOutputFloat *)out_buffer)[k] += P[particle].GravPM[k];
+#endif /* #ifdef PMGRID */
+    }
+  else
+    {
+      for(k = 0; k < 3; k++)
+        P[particle].GravAccel[k] = ((MyOutputFloat *)out_buffer)[k];
+    }
+}
+#endif /* #ifdef OUTPUTACCELERATION */
+
+/* -- user defined functions: additional physics -- */
+#ifdef OUTPUTCOOLRATE
+/*! \brief Output function of cooling rate.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode (unused)
+ *
+ *  \return void
+ */
+static void io_func_coolrate(int particle, int components, void *buffer, int mode)
+{
+  double tcool, ne, nh0, coolrate;
+
+  ne = SphP[particle].Ne;
+  SetOutputGasState(particle, &ne, &nh0, &coolrate);
+
+  /* get cooling time */
+  tcool = GetCoolingTime(SphP[particle].Utherm, SphP[particle].Density * All.cf_a3inv, &ne);
+
+  /* convert cooling time with current thermal energy to du/dt */
+  if(tcool != 0)
+    ((MyOutputFloat *)buffer)[0] = SphP[particle].Utherm / tcool;
+  else
+    ((MyOutputFloat *)buffer)[0] = 0;
+}
+#endif /* #ifdef OUTPUTCOOLRATE */
+
+/* -- user defined functions: gas properties -- */
+#if defined(COOLING)
+/*! \brief IO function of the electron number density.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File IO buffer.
+ *  \param[in] mode Mode 0: output, 1: input.
+ *
+ *  \return void
+ */
+static void io_func_ne(int particle, int components, void *buffer, int mode)
+{
+  if(mode == 0)
+    {
+      // normal code path: calculate Ne accounting for GFM options and USE_SFR
+      double ne = SphP[particle].Ne;
+
+#if defined(USE_SFR)
+      // reproduces previous behavior that Ne is updated prior to output only for Sfr>0 cells
+      // if this is unwanted (or redundant) this if() condition should be removed
+      double nh0, coolrate;
+      if(get_starformation_rate(particle) > 0)
+        SetOutputGasState(particle, &ne, &nh0, &coolrate);
+#endif /* #if defined(USE_SFR) */
+
+      ((MyOutputFloat *)buffer)[0] = ne;
+    }
+  else
+    {
+      SphP[particle].Ne = ((MyInputFloat *)buffer)[0];
+    }
+}
+#endif /* #if defined(COOLING) */
+
+#if defined(COOLING)
+/*! \brief Output function for neutral hydrogen fraction.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode (unused)
+ *
+ *  \return void
+ */
+static void io_func_nh(int particle, int components, void *buffer, int mode)
+{
+  double ne, nh0, coolrate;
+
+  ne = SphP[particle].Ne;
+  SetOutputGasState(particle, &ne, &nh0, &coolrate);
+
+  ((MyOutputFloat *)buffer)[0] = nh0;
+}
+#endif /* #if defined(COOLING) */
+
+#ifdef USE_SFR
+/*! \brief IO function for star formation rate.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File output buffer.
+ *  \param[in] mode Mode 0: output, 1: input.
+ *
+ *  \return void
+ */
+static void io_func_sfr(int particle, int components, void *buffer, int mode)
+{
+  if(mode == 0)
+    {
+      ((MyOutputFloat *)buffer)[0] = get_starformation_rate(particle);
+    }
+  else
+    {
+      SphP[particle].Sfr = ((MyOutputFloat *)buffer)[0];
+    }
+}
+#endif
+
+/* -- user defined functions: other -- */
+#if defined(OUTPUT_CURLVEL)
+/*! \brief Output function for curl of velocity field.
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File IO buffer.
+ *  \param[in] mode Mode 0: output.
+ *
+ *  \return void
+ */
+static void io_func_curlvel(int particle, int components, void *out_buffer, int mode)
+{
+  if(mode == 0)
+    {
+      ((MyOutputFloat *)out_buffer)[0] = SphP[particle].CurlVel;
+    }
+}
+#endif /* #if defined(OUTPUT_CURLVEL) */
+
+#ifdef OUTPUT_VORTICITY
+/*! \brief Output function of vorticity (calculated from velocity spatial
+ *         derivatives).
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File IO buffer.
+ *  \param[in] mode Mode 0: output
+ *
+ *  \return void
+ */
+static void io_func_vorticity(int particle, int components, void *out_buffer, int mode)
+{
+  if(mode == 0)
+    {
+      ((MyOutputFloat *)out_buffer)[0] = SphP[particle].Grad.dvel[2][1] - SphP[particle].Grad.dvel[1][2];
+      ((MyOutputFloat *)out_buffer)[1] = SphP[particle].Grad.dvel[0][2] - SphP[particle].Grad.dvel[2][0];
+      ((MyOutputFloat *)out_buffer)[2] = SphP[particle].Grad.dvel[1][0] - SphP[particle].Grad.dvel[0][1];
+    }
+}
+#endif /* #ifdef OUTPUT_VORTICITY */
+
+#ifdef MHD
+/*! \brief IO function for magnetic field.
+ *
+ *  Note that the output is in Gauss unit system (in code units) while the
+ *  internal B-field is in Heaviside-Lorentz system (FACTOR of sqrt(4 PI)!).
+ *
+ *  \param[in] particle Index of particle/cell.
+ *  \param[in] components (unused)
+ *  \param[out] out_buffer File IO buffer.
+ *  \param[in] mode Mode 0: output, 1: input.
+ *
+ *  \return void
+ */
+static void io_func_bfield(int particle, int components, void *out_buffer, int mode)
+{
+  int k;
+
+  if(mode == 0)
+    {
+      /* writing: convert from Heavyside-Lorentz to Gauss */
+      for(k = 0; k < 3; k++)
+        ((MyOutputFloat *)out_buffer)[k] = SphP[particle].B[k] * sqrt(4. * M_PI);
+    }
+  else
+    {
+      /* reading: convert from Gauss to Heavyside-Lorentz */
+      for(k = 0; k < 3; k++)
+        SphP[particle].B[k] = ((MyInputFloat *)out_buffer)[k] / sqrt(4. * M_PI);
+    }
+}
+#endif /* #ifdef MHD */
+
+/*! \brief Function for field registering.
+ *
+ *  For init_field arguments read the description of init_field.
+ *  Don't forget to add the new IO_FLAG to allvars.h.
+ *
+ *  \return void
+ */
+void init_io_fields()
+{
+  /* ALL TYPES */
+
+#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION
+  enum types_in_file pos_out = FILE_DOUBLE;
+#else  /* #ifdef  OUTPUT_COORDINATES_IN_DOUBLEPRECISION */
+  enum types_in_file pos_out = FILE_MY_IO_FLOAT;
+#endif /* #ifdef  OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */
+#ifdef READ_COORDINATES_IN_DOUBLE
+  enum types_in_file pos_in = FILE_DOUBLE;
+#else  /* #ifdef  READ_COORDINATES_IN_DOUBLE */
+  enum types_in_file pos_in = FILE_MY_IO_FLOAT;
+#endif /* #ifdef  READ_COORDINATES_IN_DOUBLE #else */
+  init_field(IO_POS, "POS ", "Coordinates", MEM_MY_DOUBLE, pos_out, pos_in, 3, A_NONE, 0, io_func_pos, ALL_TYPES);
+  init_units(IO_POS, 1., -1., 1., 0., 0., All.UnitLength_in_cm);
+
+  init_field(IO_POS_MINI, "POS ", "Coordinates", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_NONE, 0, io_func_pos, ALL_TYPES);
+  init_units(IO_POS_MINI, 1., -1., 1., 0., 0., All.UnitLength_in_cm);
+  init_snapshot_type(IO_POS_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps always in single precision */
+
+  init_field(IO_VEL, "VEL ", "Velocities", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vel,
+             ALL_TYPES);                                                 /* particle velocities */
+  init_units(IO_VEL, 0.5, 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); /* sqrt(a)*km/s */
+  init_snapshot_type(IO_VEL, SN_MINI);
+
+  init_field(IO_ID, "ID  ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, FILE_MY_ID_TYPE, 1, A_P, &P[0].ID, 0, ALL_TYPES);
+  init_units(IO_ID, 0, 0, 0, 0, 0, 0);
+  init_snapshot_type(IO_ID, SN_MINI);
+
+  init_field(IO_MASS, "MASS", "Masses", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Mass, 0,
+             SET_IN_GET_PARTICLES_IN_BLOCK); /* particle mass */
+  init_units(IO_MASS, 0., -1., 0., 1., 0., All.UnitMass_in_g);
+  init_snapshot_type(IO_MASS, SN_MINI);
+
+#ifdef OUTPUTPOTENTIAL
+  init_field(IO_POT, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Potential, 0,
+             ALL_TYPES); /* gravitational potential */
+  init_units(IO_POT, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); /* (km/s)^2/a */
+
+  init_field(IO_POT_MINI, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_P, &P[0].Potential, 0,
+             STARS_ONLY | BHS_ONLY);
+  init_units(IO_POT_MINI, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s);
+  init_snapshot_type(IO_POT_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps for stars/BHs only */
+#endif                                           /* #ifdef OUTPUTPOTENTIAL */
+
+  /* GAS CELLS */
+
+  init_field(IO_U, "U   ", "InternalEnergy", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Utherm, 0,
+             GAS_ONLY); /* internal energy */
+  init_units(IO_U, 0., 0., 0., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s);
+  init_snapshot_type(IO_U, SN_MINI);
+
+  init_field(IO_RHO, "RHO ", "Density", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Density, 0,
+             GAS_ONLY); /* particle density */
+  init_units(IO_RHO, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs);
+  init_snapshot_type(IO_RHO, SN_MINI);
+
+#ifdef OUTPUT_PRESSURE
+  init_field(IO_PRESSURE, "PRES", "Pressure", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Pressure, 0, GAS_ONLY);
+  init_units(IO_PRESSURE, -3.0, 2.0, -3.0, 1.0, 2.0,
+             All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s);
+#endif /* #ifdef OUTPUT_PRESSURE */
+
+#ifdef OUTPUT_CSND
+  init_field(IO_CSND, "CSND", "SoundSpeed", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Csnd, 0, GAS_ONLY);
+  init_units(IO_CSND, 0., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s);
+#endif /* #ifdef OUTPUT_CSND */
+
+#if defined(COOLING)
+  init_field(IO_NE, "NE  ", "ElectronAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_ne,
+             GAS_ONLY);                /* electron abundance */
+  init_units(IO_NE, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */
+  init_snapshot_type(IO_NE, SN_MINI);
+
+  init_field(IO_NH, "NH  ", "NeutralHydrogenAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_nh,
+             GAS_ONLY);                /* neutral hydrogen fraction */
+  init_units(IO_NH, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */
+#endif                                 /* #if defined(COOLING) */
+
+#ifdef USE_SFR
+  init_field(IO_SFR, "SFR ", "StarFormationRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_sfr,
+             GAS_ONLY);                                                    /* star formation rate */
+  init_units(IO_SFR, 0.0, 0.0, -1.0, 1.0, 1.0, SOLAR_MASS / SEC_PER_YEAR); /* Msun/yr */
+  init_snapshot_type(IO_SFR, SN_MINI);
+#endif /* #ifdef USE_SFR */
+
+#ifdef OUTPUT_DIVVEL
+  init_field(IO_DIVVEL, "DIVV", "VelocityDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivVel, 0,
+             GAS_ONLY);
+  init_units(IO_DIVVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_DIVVEL */
+
+#if defined(OUTPUT_CURLVEL)
+  init_field(IO_CURLVEL, "ROTV", "VelocityCurl", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_curlvel,
+             GAS_ONLY);
+  init_units(IO_CURLVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm);
+#endif /* #if defined(OUTPUT_CURLVEL) */
+
+#ifdef OUTPUT_COOLHEAT
+  init_field(IO_COOLHEAT, "COHE", "CoolingHeatingEnergy", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].CoolHeat, 0,
+             GAS_ONLY);
+  init_units(IO_COOLHEAT, 0.0, 0.0, -1.0, 1.0, 3.0, All.UnitEnergy_in_cgs / All.UnitTime_in_s);
+#endif /* #ifdef OUTPUT_COOLHEAT */
+
+#ifdef OUTPUT_SURFACE_AREA
+  init_field(IO_SAREA, "AREA", "SurfaceArea", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].SurfaceArea, 0,
+             GAS_ONLY);
+  init_units(IO_SAREA, 2.0, -2.0, 2.0, 0.0, 0.0, All.UnitLength_in_cm * All.UnitLength_in_cm);
+
+  init_field(IO_NFACES, "NFAC", "NumFacesCell", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].CountFaces, 0, GAS_ONLY);
+  init_units(IO_NFACES, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+
+#ifdef OUTPUTCOOLRATE
+  init_field(IO_COOLRATE, "COOR", "CoolingRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_coolrate, GAS_ONLY);
+  init_units(IO_COOLRATE, 0.0, 0.0, -1.0, 1.0, 3.0, 1.0);
+#endif /* #ifdef OUTPUTCOOLRATE */
+
+#ifdef OUTPUT_VORTICITY
+  init_field(IO_VORT, "VORT", "Vorticity", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vorticity, GAS_ONLY);
+  init_units(IO_VORT, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_VORTICITY */
+
+  /* GAS CELLS GRADIENTS */
+
+#ifdef OUTPUT_PRESSURE_GRADIENT
+  init_field(IO_GRADP, "GRAP", "PressureGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.dpress[0], 0,
+             GAS_ONLY);
+  init_units(IO_GRADP, -4.0, 3.0, -4.0, 1.0, 2.0,
+             All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_PRESSURE_GRADIENT */
+
+#ifdef OUTPUT_DENSITY_GRADIENT
+  init_field(IO_GRADR, "GRAR", "DensityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.drho[0], 0,
+             GAS_ONLY);
+  init_units(IO_GRADR, -4., 3., -4., 1., 0., All.UnitDensity_in_cgs / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_DENSITY_GRADIENT */
+
+#ifdef OUTPUT_VELOCITY_GRADIENT
+  init_field(IO_GRADV, "GRAV", "VelocityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dvel[0][0], 0,
+             GAS_ONLY);
+  init_units(IO_GRADV, 0., 1., -1., 0., 1., All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); /* sqrt(a)*km/s */
+#endif                                                                                            /* #ifdef OUTPUT_VELOCITY_GRADIENT */
+
+#ifdef OUTPUT_BFIELD_GRADIENT
+  init_field(IO_GRADB, "GRAB", "BfieldGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dB[0][0], 0,
+             GAS_ONLY);
+  init_units(IO_GRADB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_BFIELD_GRADIENT */
+
+  /* GAS CELLS (MESH PROPERTIES) */
+
+#ifdef OUTPUT_VOLUME
+  init_field(IO_VOL, "VOL ", "Volume", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Volume, 0, GAS_ONLY);
+  init_units(IO_VOL, 3., -3., 3., 0., 0., All.UnitLength_in_cm * All.UnitLength_in_cm * All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_VOLUME */
+
+#ifdef OUTPUT_VERTEX_VELOCITY
+  init_field(IO_VERTEXVEL, "VEVE", "VertexVelocity", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP,
+             &SphP[0].VelVertex[0], 0, GAS_ONLY);
+  init_units(IO_VERTEXVEL, 1., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s);
+#endif /* #ifdef OUTPUT_VERTEX_VELOCITY */
+
+#ifdef OUTPUT_MESH_FACE_ANGLE
+  init_field(IO_FACEANGLE, "FACA", "MaxFaceAngle", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].MaxFaceAngle, 0,
+             GAS_ONLY);
+  init_units(IO_FACEANGLE, 0., 0., 0., 0., 0., 0.0);
+#endif /* #ifdef OUTPUT_MESH_FACE_ANGLE */
+
+#ifdef OUTPUT_CENTER_OF_MASS
+  init_field(IO_CM, "CMCE", "CenterOfMass", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP, &SphP[0].Center[0], 0,
+             GAS_ONLY);
+  init_units(IO_CM, 1., -1., 1., 0., 0., All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_CENTER_OF_MASS */
+
+  /* DIAGNOSTIC */
+
+#ifdef OUTPUT_TASK
+  init_field(IO_TASK, "TASK", "task", MEM_INT, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_task, GAS_ONLY);
+  init_units(IO_TASK, 0., 0., 0., 0., 0., 0.0);
+#endif /* #ifdef OUTPUT_TASK */
+
+#ifdef OUTPUT_TIMEBIN_HYDRO
+  init_field(IO_TIMEBIN_HYDRO, "TBH", "TimebinHydro", MEM_NONE, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_timebin_hydro, GAS_ONLY);
+  init_units(IO_TIMEBIN_HYDRO, 0., 0., 0., 0., 0., 0.0);
+#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */
+
+#ifdef OUTPUTTIMESTEP
+  init_field(IO_TSTP, "TSTP", "TimeStep", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_timestep, ALL_TYPES);
+  init_units(IO_TSTP, 0., -1., 1., 0., -1., All.UnitTime_in_s);
+#endif /* #ifdef OUTPUTTIMESTEP */
+
+#ifdef OUTPUTACCELERATION
+  init_field(IO_ACCEL, "ACCE", "Acceleration", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_accel, ALL_TYPES);
+  init_units(IO_ACCEL, -1., 1., -1., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUTACCELERATION */
+
+#ifdef OUTPUT_SOFTENINGS
+  init_field(IO_SOFTENING, "SOFT", "Softenings", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_softenings, ALL_TYPES);
+  init_units(IO_SOFTENING, 1., -1., 1., 0., 0., All.UnitLength_in_cm);
+#endif /* #ifdef OUTPUT_SOFTENINGS */
+
+#ifdef OUTPUTGRAVINTERACTIONS
+  init_field(IO_GRAVITERACTIONS, "GINT", "GravityInteractions", MEM_INT, FILE_INT, FILE_NONE, 1, A_SPHP, &SphP[0].GravInteractions, 0,
+             ALL_TYPES);
+  init_units(IO_GRAVITERACTIONS, 0., 0., 0., 0., 0., 0.0);
+#endif /* #ifdef OUTPUTGRAVINTERACTIONS */
+
+  /* MHD */
+
+#ifdef MHD
+  enum types_in_file mhd_read = FILE_MY_IO_FLOAT;
+#if defined(MHD_SEEDFIELD)
+  if(RestartFlag == 0)
+    mhd_read = FILE_NONE; /* magnetic field not expected in ICs */
+#endif                    /* #if defined(MHD_SEEDFIELD) */
+
+  init_field(IO_BFLD, "BFLD", "MagneticField", MEM_NONE, FILE_MY_IO_FLOAT, mhd_read, 3, A_NONE, 0, io_func_bfield,
+             GAS_ONLY); /* magnetic field  */
+  init_units(IO_BFLD, -2., 1., -1.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5));
+
+  init_field(IO_DIVB, "DIVB", "MagneticFieldDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivB, 0,
+             GAS_ONLY); /* divergence of magnetic field  */
+  init_units(IO_DIVB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm);
+#endif /* #ifdef MHD */
+
+  /* Scalars */
+
+#ifdef PASSIVE_SCALARS
+  init_field(IO_PASS, "PASS", "PassiveScalars", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, PASSIVE_SCALARS, A_SPHP,
+             &SphP[0].PScalars[0], 0, GAS_ONLY);
+  init_units(IO_PASS, 0., 0., 0., 0., 0., 0.0);
+#endif /* #ifdef PASSIVE_SCALARS */
+
+  /* OTHER */
+
+#ifdef SAVE_HSML_IN_SNAPSHOT
+  init_field(IO_SUBFINDDENSITY, "SFDE", "SubfindDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindDensity, 0,
+             ALL_TYPES);
+  init_units(IO_SUBFINDDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs);
+  init_snapshot_type(IO_SUBFINDDENSITY, SN_NO_SUBBOX);
+
+  init_field(IO_SUBFINDDMDENSITY, "SFDD", "SubfindDMDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS,
+             &PS[0].SubfindDMDensity, 0, ALL_TYPES);
+  init_units(IO_SUBFINDDMDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs);
+  init_snapshot_type(IO_SUBFINDDMDENSITY, SN_NO_SUBBOX);
+
+  init_field(IO_SUBFINDHSML, "SFHS", "SubfindHsml", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindHsml, 0,
+             ALL_TYPES);
+  init_units(IO_SUBFINDHSML, 1., -1., 1., 0., 0., All.UnitLength_in_cm);
+  init_snapshot_type(IO_SUBFINDHSML, SN_NO_SUBBOX);
+
+  init_field(IO_SUBFINDVELDISP, "SFVD", "SubfindVelDisp", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindVelDisp, 0,
+             ALL_TYPES);
+  init_units(IO_SUBFINDVELDISP, 0.0, 0.0, 0.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s);
+  init_snapshot_type(IO_SUBFINDVELDISP, SN_NO_SUBBOX);
+#endif /* #ifdef SAVE_HSML_IN_SNAPSHOT */
+
+#if defined(REFINEMENT_HIGH_RES_GAS)
+  init_field(IO_HIGHRESMASS, "HRGM", "HighResGasMass", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].HighResMass, 0,
+             GAS_ONLY);
+  init_units(IO_HIGHRESMASS, 0, -1, 0, 1, 0, All.UnitMass_in_g);
+
+  init_field(IO_ALLOWREFINEMENT, "REF ", "AllowRefinement", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].AllowRefinement, 0,
+             GAS_ONLY);
+  init_units(IO_ALLOWREFINEMENT, 0, 0, 0, 0, 0, 0);
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+}
diff --git a/src/amuse/community/arepo/src/io/logs.c b/src/amuse/community/arepo/src/io/logs.c
new file mode 100644
index 0000000000..6354cf3609
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/logs.c
@@ -0,0 +1,623 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io/logs.c
+ * \date        05/2018
+ * \brief       Log-files handling.
+ * \details     contains functions:
+ *                void open_logfiles(void)
+ *                void close_logfiles(void)
+ *                void output_log_messages(void)
+ *                void init_cpu_log(void)
+ *                void write_cpu_log(void)
+ *                void put_symbol(char *string, double t0, double t1, char c)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 07.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <ctype.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#define CPU_STRING_LEN 120
+
+/*! \brief Contains informations about the used CPU timers like it's name,
+ * symbols etc.
+ */
+struct timer_d Timer_data[CPU_LAST + 1];
+
+enum timers TimerStack[TIMER_STACK_DEPTH];
+int TimerStackPos = 0;
+
+/*! \brief Opens files for logging.
+ *
+ *   This function opens various log-files that report on the status and
+ *   performance of the simulation. Upon restart, the code will append to
+ *   these files.
+ *
+ *   \return void
+ */
+void open_logfiles(void)
+{
+  char mode[2], buf[1000], msg[1000];
+
+  if(RestartFlag == 0)
+    strcpy(mode, "w");
+  else
+    strcpy(mode, "a");
+
+  if(ThisTask == 0)
+    mkdir(All.OutputDir, 02755);
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+#ifdef DETAILEDTIMINGS
+  sprintf(buf, "%stimings_detailed_%d.txt", All.OutputDir, ThisTask);
+  if(!(FdDetailed = fopen(buf, mode)))
+    terminate("error in opening file '%s'\n", buf);
+#endif /* #ifdef DETAILEDTIMINGS */
+
+  if(ThisTask != 0) /* only the root processors writes to the log files */
+    return;
+
+  sprintf(buf, "%s%s", All.OutputDir, "cpu.txt");
+  if(!(FdCPU = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "info.txt");
+  if(!(FdInfo = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "energy.txt");
+  if(!(FdEnergy = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "timings.txt");
+  if(!(FdTimings = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "balance.txt");
+  if(!(FdBalance = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "timebins.txt");
+  if(!(FdTimebin = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "domain.txt");
+  if(!(FdDomain = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+  sprintf(buf, "%s%s", All.OutputDir, "memory.txt");
+  if(!(FdMemory = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+
+#ifdef FORCETEST
+  sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt");
+  if(!(FdForceTest = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+  fclose(FdForceTest);
+#endif /* #ifdef FORCETEST */
+
+#ifdef RESTART_DEBUG
+  sprintf(buf, "%s%s", All.OutputDir, "restartdebug.txt");
+  if(!(FdRestartTest = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+#endif /* #ifdef RESTART_DEBUG */
+
+#ifdef OUTPUT_CPU_CSV
+  sprintf(buf, "%s%s", All.OutputDir, "cpu.csv");
+  if(!(FdCPUCSV = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+#endif /* #ifdef OUTPUT_CPU_CSV */
+
+#ifdef USE_SFR
+  sprintf(buf, "%s%s", All.OutputDir, "sfr.txt");
+  if(!(FdSfr = fopen(buf, mode)))
+    {
+      sprintf(msg, "error in opening file '%s'\n", buf);
+      terminate(msg);
+    }
+#endif /* #ifdef USE_SFR */
+
+  int i = 0;
+  fprintf(FdBalance, "\n");
+
+#ifdef OUTPUT_CPU_CSV
+  fprintf(FdCPUCSV, "STEP, TIME, CPUS, MULTIPLEDOMAIN, HIGHESTTIMEBIN, ");
+#endif /* #ifdef OUTPUT_CPU_CSV */
+  for(; i < CPU_LAST; i++)
+    {
+      if(Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0)
+        {
+          fprintf(FdBalance, "%-20s = '%c' / '%c'\n", Timer_data[i].longname, Timer_data[i].symb, Timer_data[i].symbImbal);
+        }
+#ifdef OUTPUT_CPU_CSV
+      fprintf(FdCPUCSV, "%s1, %s2, %s3, ", Timer_data[i].shortname, Timer_data[i].shortname, Timer_data[i].shortname);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+    }
+  fprintf(FdBalance, "\n");
+
+#ifdef OUTPUT_CPU_CSV
+  fprintf(FdCPUCSV, "\n");
+#endif /* #ifdef OUTPUT_CPU_CSV */
+}
+
+/*! \brief Closes the global log-files.
+ *
+ *  \return void
+ */
+void close_logfiles(void)
+{
+  if(ThisTask != 0) /* only the root processors writes to the log files */
+    return;
+
+  fclose(FdCPU);
+  fclose(FdInfo);
+  fclose(FdEnergy);
+  fclose(FdTimings);
+  fclose(FdBalance);
+  fclose(FdTimebin);
+
+#ifdef OUTPUT_CPU_CSV
+  fclose(FdCPUCSV);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+
+#ifdef USE_SFR
+  fclose(FdSfr);
+#endif /* #ifdef USE_SFR */
+}
+
+/*! \brief Writes log messages in log-files.
+ *
+ *  At each time step this function writes on to two log-files.
+ *  In FdInfo, it just lists the timesteps that have been done, while in
+ *  FdTimeBin it outputs information about the active and occupied time-bins.
+ *  Additionally, reports to memory log-files are written.
+ *
+ *  \return void
+ */
+void output_log_messages(void)
+{
+  double z;
+  int i, j, write_logs = 1;
+  double sum, avg_CPU_TimeBin[TIMEBINS], frac_CPU_TimeBin[TIMEBINS];
+  int weight, corr_weight;
+  long long tot_cumulative_grav[TIMEBINS], tot_cumulative_sph[TIMEBINS];
+  long long tot_grav, tot_sph;
+
+  TIMER_START(CPU_LOGS);
+
+  if(write_logs)
+    report_detailed_memory_usage_of_largest_task();
+
+  long long count[4 * TIMEBINS], tot_count[4 * TIMEBINS];
+  long long *tot_count_grav = &tot_count[0], *tot_count_sph = &tot_count[TIMEBINS];
+  int nelem = 2 * TIMEBINS;
+
+  for(int i = 0; i < TIMEBINS; i++)
+    count[i] = TimeBinsGravity.TimeBinCount[i];
+
+  for(int i = 0; i < TIMEBINS; i++)
+    count[i + TIMEBINS] = TimeBinsHydro.TimeBinCount[i];
+
+  MPI_Reduce(count, tot_count, nelem, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      if(All.ComovingIntegrationOn)
+        {
+          z = 1.0 / (All.Time) - 1;
+
+          if(write_logs)
+            fprintf(FdInfo,
+                    "\nSync-Point %d, TimeBin=%d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: "
+                    "%10llu\n",
+                    All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, z, All.TimeStep,
+                    log(All.Time) - log(All.Time - All.TimeStep), All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro);
+
+          printf("\n\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n",
+                 All.NumCurrentTiStep, All.Time, z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep),
+                 All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro);
+
+          if(write_logs)
+            fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g\n", All.NumCurrentTiStep, All.Time,
+                    z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep));
+
+          myflush(FdInfo);
+        }
+      else
+        {
+          if(write_logs)
+            fprintf(FdInfo, "\nSync-Point %d, TimeBin=%d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n",
+                    All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, All.TimeStep, All.GlobalNSynchronizedGravity,
+                    All.GlobalNSynchronizedHydro);
+
+          printf("\n\nSync-Point %d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", All.NumCurrentTiStep, All.Time,
+                 All.TimeStep, All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro);
+
+          if(write_logs)
+            fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Systemstep: %g\n", All.NumCurrentTiStep, All.Time, All.TimeStep);
+
+          myflush(FdInfo);
+        }
+
+      for(i = 1, tot_cumulative_grav[0] = tot_count_grav[0], tot_cumulative_sph[0] = tot_count_sph[0]; i < TIMEBINS; i++)
+        {
+          tot_cumulative_grav[i] = tot_count_grav[i] + tot_cumulative_grav[i - 1];
+          tot_cumulative_sph[i]  = tot_count_sph[i] + tot_cumulative_sph[i - 1];
+        }
+
+      for(i = 0; i < TIMEBINS; i++)
+        {
+          for(j = 0, sum = 0; j < All.CPU_TimeBinCountMeasurements[i]; j++)
+            sum += All.CPU_TimeBinMeasurements[i][j];
+          if(All.CPU_TimeBinCountMeasurements[i])
+            avg_CPU_TimeBin[i] = sum / All.CPU_TimeBinCountMeasurements[i];
+          else
+            avg_CPU_TimeBin[i] = 0;
+        }
+
+      for(i = All.HighestOccupiedTimeBin, weight = 1, sum = 0; i >= 0 && tot_count_grav[i] > 0; i--, weight *= 2)
+        {
+          if(weight > 1)
+            corr_weight = weight / 2;
+          else
+            corr_weight = weight;
+
+          frac_CPU_TimeBin[i] = corr_weight * avg_CPU_TimeBin[i];
+          sum += frac_CPU_TimeBin[i];
+        }
+
+      for(i = All.HighestOccupiedTimeBin; i >= 0 && tot_count_grav[i] > 0; i--)
+        {
+          if(sum)
+            frac_CPU_TimeBin[i] /= sum;
+        }
+
+      char tracerString[13];
+
+      sprintf(tracerString, "%s", "");
+
+      char dustString[13];
+      sprintf(dustString, "%s", "");
+      if(write_logs)
+        fprintf(FdTimebin,
+                "Occupied timebins: gravity      hydro     %s     %s     dt              cumul-grav   cumul-sph A D    avg-time  "
+                "cpu-frac\n",
+                tracerString, dustString);
+
+      for(i = TIMEBINS - 1, tot_grav = tot_sph = 0; i >= 0; i--)
+        {
+          int binUsed = 0;
+
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+          if(tot_count_grav[i] > 0)
+            binUsed = 1;
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \
+        */
+
+          if(tot_count_sph[i] > 0)
+            binUsed = 1;
+
+          sprintf(tracerString, "%s", "");
+
+          if(binUsed)
+            {
+              if(write_logs)
+                fprintf(FdTimebin, " %c  bin=%2d      %10llu  %10llu  %s  %s  %16.12f       %10llu  %10llu %c %c  %10.2f    %5.1f%%\n",
+                        TimeBinSynchronized[i] ? 'X' : ' ', i, tot_count_grav[i], tot_count_sph[i], tracerString, dustString,
+                        i > 0 ? (((integertime)1) << i) * All.Timebase_interval : 0.0, tot_cumulative_grav[i], tot_cumulative_sph[i],
+                        (i == All.HighestActiveTimeBin) ? '<' : ' ',
+                        (All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition && i == All.HighestActiveTimeBin)
+                            ? '*'
+                            : ' ',
+                        avg_CPU_TimeBin[i], 100.0 * frac_CPU_TimeBin[i]);
+
+              if(TimeBinSynchronized[i])
+                {
+                  tot_grav += tot_count_grav[i];
+                  tot_sph += tot_count_sph[i];
+                }
+            }
+        }
+
+      if(write_logs)
+        {
+          fprintf(FdTimebin, "               ------------------------\n");
+        }
+
+      sprintf(tracerString, "%s", "");
+      sprintf(dustString, "%s", "");
+
+      if(write_logs)
+        {
+#ifdef PMGRID
+          if(All.PM_Ti_endstep == All.Ti_Current)
+            {
+              fprintf(FdTimebin, "PM-Step. Total: %10llu  %10llu  %s  %s\n", tot_grav, tot_sph, tracerString, dustString);
+            }
+          else
+#endif /* #ifdef PMGRID */
+            {
+              fprintf(FdTimebin, "Total active:   %10llu  %10llu  %s  %s\n", tot_grav, tot_sph, tracerString, dustString);
+            }
+
+          fprintf(FdTimebin, "\n");
+        }
+
+      myflush(FdTimebin);
+    }
+
+#ifdef RESTART_DEBUG
+  log_restart_debug();
+#endif /* #ifdef RESTART_DEBUG */
+
+  TIMER_STOP(CPU_LOGS);
+}
+
+/*! \brief Initializes cpu log file.
+ *
+ *  \return void
+ */
+void init_cpu_log(void)
+{
+  int i = 0;
+
+#define TIMER_STRUCT
+#include "../utils/timer.h"
+
+  for(i = 0; i < CPU_LAST; i++)
+    {
+      if(Timer_data[i].parent >= 0)
+        Timer_data[i].depth = Timer_data[Timer_data[i].parent].depth + 1;
+      else
+        Timer_data[i].depth = 0;
+    }
+
+  for(i = 0; i < CPU_LAST; i++)
+    {
+      All.CPU_Sum[i] = 0.;
+      CPU_Step[i]    = 0.;
+    }
+
+  TimerStackPos = 0;
+  TimerStack[0] = CPU_MISC;
+
+  CPUThisRun = 0.;
+
+  WallclockTime = second();
+  StartOfRun    = second();
+}
+
+/*! \brief Write the FdBalance and FdCPU files.
+ *
+ *  At each time step this function writes on to two log-files.
+ *  In FdBalance, it outputs in a graphical way the amount of
+ *  time spent in the various parts of the code, while
+ *  in FdCPU it writes information about the cpu-time consumption
+ *  of the various modules.
+ *
+ * \return void
+ */
+void write_cpu_log(void)
+{
+  int write_logs = 1;
+  double max_CPU_Step[CPU_LAST], avg_CPU_Step[CPU_LAST], summed_CPU_Step[CPU_LAST];
+  double t0, t1, tsum;
+  double avg_total   = 0;
+  double local_total = 0;
+  double max_total   = 0;
+  int i;
+
+  TIMER_START(CPU_LOGS);
+
+  for(i = 0; i < CPU_LAST; i++)
+    {
+      local_total += CPU_Step[i];
+    }
+
+  MPI_Reduce(CPU_Step, max_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&local_total, &max_total, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+  MPI_Reduce(CPU_Step, avg_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      /* sum up cpu items into groups */
+      for(i = 0; i < CPU_LAST; i++)
+        {
+          summed_CPU_Step[i] = avg_CPU_Step[i];
+        }
+      for(i = CPU_LAST - 1; i > CPU_ALL; i--)
+        {
+          if(Timer_data[i].parent >= 0)
+            {
+              summed_CPU_Step[Timer_data[i].parent] += summed_CPU_Step[i];
+            }
+        }
+
+      /* calc averages, update All.CPU_Sum */
+      for(i = 0; i < CPU_LAST; i++)
+        {
+          avg_CPU_Step[i] /= NTask;
+          avg_total += avg_CPU_Step[i];
+
+          summed_CPU_Step[i] /= NTask;
+          All.CPU_Sum[i] += summed_CPU_Step[i];
+        }
+
+      /* create balance.txt string */
+      char cpu_String[CPU_STRING_LEN + 1];
+      put_symbol(cpu_String, 0., 1.0, '-');
+
+      for(i = 1, tsum = 0.0; i < CPU_LAST; i++)
+        {
+          if(max_CPU_Step[i] > 0 && Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0)
+            {
+              t0 = tsum;
+              t1 = tsum + avg_CPU_Step[i] * (avg_CPU_Step[i] / max_CPU_Step[i]);
+              put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symb);
+              tsum += t1 - t0;
+
+              t0 = tsum;
+              t1 = tsum + avg_CPU_Step[i] * ((max_CPU_Step[i] - avg_CPU_Step[i]) / max_CPU_Step[i]);
+              put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symbImbal);
+              tsum += t1 - t0;
+            }
+        }
+
+      if(write_logs)
+        {
+          fprintf(FdBalance, "Step=%7d  sec=%10.3f Nsync-grv=%10llu Nsync-hyd=%10llu  %s\n", All.NumCurrentTiStep, max_total,
+                  All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro, cpu_String);
+        }
+
+      myflush(FdBalance);
+
+      if(All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin] == NUMBER_OF_MEASUREMENTS_TO_RECORD)
+        {
+          All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]--;
+          memmove(&All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][0], &All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][1],
+                  (NUMBER_OF_MEASUREMENTS_TO_RECORD - 1) * sizeof(double));
+        }
+
+      All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]++] = max_total;
+
+      if(write_logs)
+        {
+#ifdef OUTPUT_CPU_CSV
+          fprintf(FdCPUCSV, "%d, %g, %d, %d, %d, ", All.NumCurrentTiStep, All.Time, NTask, All.MultipleDomains,
+                  All.HighestActiveTimeBin);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+          fprintf(FdCPU, "Step %d, Time: %g, CPUs: %d, MultiDomains: %d, HighestActiveTimeBin: %d\n", All.NumCurrentTiStep, All.Time,
+                  NTask, All.MultipleDomains, All.HighestActiveTimeBin);
+
+          fprintf(FdCPU, "                          diff               cumulative\n");
+
+          for(i = 0; i < CPU_LAST; i++)
+            {
+              fprintf(FdCPU, "%*s%*s%10.2f  %5.1f%% %10.2f  %*s%5.1f%%\n", 2 * Timer_data[i].depth, "", -20 + 2 * Timer_data[i].depth,
+                      Timer_data[i].longname, summed_CPU_Step[i], summed_CPU_Step[i] / summed_CPU_Step[CPU_ALL] * 100., All.CPU_Sum[i],
+                      5 * Timer_data[i].depth, "", All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.);
+
+#ifdef OUTPUT_CPU_CSV
+              fprintf(FdCPUCSV, "%f, %f, %f, ", summed_CPU_Step[i], All.CPU_Sum[i], All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+            }
+
+          fprintf(FdCPU, "\n");
+        }
+
+      myflush(FdCPU);
+
+#ifdef OUTPUT_CPU_CSV
+      if(write_logs)
+        fprintf(FdCPUCSV, "\n");
+
+      myflush(FdCPUCSV);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+    }
+
+  for(i = 0; i < CPU_LAST; i++)
+    CPU_Step[i] = 0.;
+
+  CPUThisRun = timediff(StartOfRun, second());
+
+  TIMER_STOP(CPU_LOGS);
+}
+
+/*! \brief Fill the cpu balance string representing the cpu usage in a
+ *         graphical way.
+ *
+ *  This function fills a fraction, specified by the parameters t0 and t1,
+ *  of the array string with the debug symbol given by c.
+ *
+ *  \param[out] string String to fill.
+ *  \param[in] t0 Initial position of the symbol in the array as a fraction of
+ *             its maximum dimension.
+ *  \param[in] t1 Final position of the symbol in the array as a fraction of
+ *             its maximum dimension.
+ *  \param[in] c Symbol to be put on string.
+ *
+ *  \return void
+ */
+void put_symbol(char *string, double t0, double t1, char c)
+{
+  int i, j;
+
+  i = (int)(t0 * CPU_STRING_LEN + 0.5);
+  j = (int)(t1 * CPU_STRING_LEN);
+
+  if(i < 0)
+    i = 0;
+  if(j >= CPU_STRING_LEN)
+    j = CPU_STRING_LEN;
+
+  while(i <= j)
+    string[i++] = c;
+
+  string[CPU_STRING_LEN] = 0;
+}
diff --git a/src/amuse/community/arepo/src/io/parameters.c b/src/amuse/community/arepo/src/io/parameters.c
new file mode 100644
index 0000000000..059d422ceb
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/parameters.c
@@ -0,0 +1,861 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io/parameters.c
+ * \date        05/2018
+ * \brief       Parses the parameter file.
+ * \details     This file contains the routine to parse the parameter file.
+ *              Additionally the output list is also parsed.
+ *              contains functions:
+ *                void read_parameter_file(char *fname)
+ *                void check_parameters()
+ *                int read_outputlist(char *fname)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief This function parses the parameter file.
+ *
+ *  Each parameter is defined by a keyword (`tag'), and can be either
+ *  of type douple, int, or character string. Three arrays containing the name,
+ *  type and address of the parameter are filled first. The routine then parses
+ *  the parameter file and fills the referenced variables. The routine makes
+ *  sure that each parameter appears exactly once in the parameter file,
+ *  otherwise error messages are produced that complain about the missing
+ *  parameters.
+ *
+ *  \param[in] fname The file name of the parameter file
+ *
+ *  \return void
+ */
+void read_parameter_file(char *fname)
+{
+#define REAL 1
+#define STRING 2
+#define INT 3
+
+  FILE *fd, *fdout;
+  char buf[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200], buf1[MAXLEN_PARAM_TAG + 200], buf2[MAXLEN_PARAM_VALUE + 200],
+      buf3[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400];
+  int i, j, nt;
+  int id[MAX_PARAMETERS];
+  void *addr[MAX_PARAMETERS];
+  char tag[MAX_PARAMETERS][MAXLEN_PARAM_TAG];
+  int param_handled[MAX_PARAMETERS];
+  int errorFlag = 0;
+
+  All.StarformationOn = 0; /* defaults */
+
+  for(i = 0; i < MAX_PARAMETERS; i++)
+    {
+      param_handled[i] = 0;
+    }
+
+  if(sizeof(long long) != 8)
+    {
+      mpi_terminate("\nType `long long' is not 64 bit on this platform. Stopping.\n\n");
+    }
+
+  if(sizeof(int) != 4)
+    {
+      mpi_terminate("\nType `int' is not 32 bit on this platform. Stopping.\n\n");
+    }
+
+  if(sizeof(float) != 4)
+    {
+      mpi_terminate("\nType `float' is not 32 bit on this platform. Stopping.\n\n");
+    }
+
+  if(sizeof(double) != 8)
+    {
+      mpi_terminate("\nType `double' is not 64 bit on this platform. Stopping.\n\n");
+    }
+
+  if(ThisTask == 0) /* read parameter file on process 0 */
+    {
+      nt = 0;
+
+      strcpy(tag[nt], "InitCondFile");
+      addr[nt] = All.InitCondFile;
+      id[nt++] = STRING;
+
+      strcpy(tag[nt], "OutputDir");
+      addr[nt] = All.OutputDir;
+      id[nt++] = STRING;
+
+#ifdef TOLERATE_WRITE_ERROR
+      strcpy(tag[nt], "AlternativeOutputDir");
+      addr[nt] = AlternativeOutputDir;
+      id[nt++] = STRING;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+      strcpy(tag[nt], "SnapshotFileBase");
+      addr[nt] = All.SnapshotFileBase;
+      id[nt++] = STRING;
+
+      strcpy(tag[nt], "ResubmitCommand");
+      addr[nt] = All.ResubmitCommand;
+      id[nt++] = STRING;
+
+      strcpy(tag[nt], "OutputListFilename");
+      addr[nt] = All.OutputListFilename;
+      id[nt++] = STRING;
+
+      strcpy(tag[nt], "OutputListOn");
+      addr[nt] = &All.OutputListOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "Omega0");
+      addr[nt] = &All.Omega0;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "OmegaBaryon");
+      addr[nt] = &All.OmegaBaryon;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "OmegaLambda");
+      addr[nt] = &All.OmegaLambda;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "HubbleParam");
+      addr[nt] = &All.HubbleParam;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "BoxSize");
+      addr[nt] = &All.BoxSize;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "PeriodicBoundariesOn");
+      addr[nt] = &All.PeriodicBoundariesOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "MaxMemSize");
+      addr[nt] = &All.MaxMemSize;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "TimeOfFirstSnapshot");
+      addr[nt] = &All.TimeOfFirstSnapshot;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "CpuTimeBetRestartFile");
+      addr[nt] = &All.CpuTimeBetRestartFile;
+      id[nt++] = REAL;
+
+#ifdef REDUCE_FLUSH
+      strcpy(tag[nt], "FlushCpuTimeDiff");
+      addr[nt] = &All.FlushCpuTimeDiff;
+      id[nt++] = REAL;
+#endif /* #ifdef REDUCE_FLUSH */
+
+      strcpy(tag[nt], "TimeBetStatistics");
+      addr[nt] = &All.TimeBetStatistics;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TimeBegin");
+      addr[nt] = &All.TimeBegin;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TimeMax");
+      addr[nt] = &All.TimeMax;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TimeBetSnapshot");
+      addr[nt] = &All.TimeBetSnapshot;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "UnitVelocity_in_cm_per_s");
+      addr[nt] = &All.UnitVelocity_in_cm_per_s;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "UnitLength_in_cm");
+      addr[nt] = &All.UnitLength_in_cm;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "UnitMass_in_g");
+      addr[nt] = &All.UnitMass_in_g;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "ErrTolIntAccuracy");
+      addr[nt] = &All.ErrTolIntAccuracy;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "ErrTolTheta");
+      addr[nt] = &All.ErrTolTheta;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "ErrTolForceAcc");
+      addr[nt] = &All.ErrTolForceAcc;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MaxSizeTimestep");
+      addr[nt] = &All.MaxSizeTimestep;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MinSizeTimestep");
+      addr[nt] = &All.MinSizeTimestep;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "CourantFac");
+      addr[nt] = &All.CourantFac;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "LimitUBelowThisDensity");
+      addr[nt] = &All.LimitUBelowThisDensity;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "LimitUBelowCertainDensityToThisValue");
+      addr[nt] = &All.LimitUBelowCertainDensityToThisValue;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "DesNumNgb");
+      addr[nt] = &All.DesNumNgb;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "MultipleDomains");
+      addr[nt] = &All.MultipleDomains;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "TopNodeFactor");
+      addr[nt] = &All.TopNodeFactor;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "ActivePartFracForNewDomainDecomp");
+      addr[nt] = &All.ActivePartFracForNewDomainDecomp;
+      id[nt++] = REAL;
+
+#ifdef SUBFIND
+      strcpy(tag[nt], "DesLinkNgb");
+      addr[nt] = &All.DesLinkNgb;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "ErrTolThetaSubfind");
+      addr[nt] = &All.ErrTolThetaSubfind;
+      id[nt++] = REAL;
+#endif /* #ifdef SUBFIND */
+
+#if defined(ISOTHERM_EQS)
+      strcpy(tag[nt], "IsoSoundSpeed");
+      addr[nt] = &All.IsoSoundSpeed;
+      id[nt++] = REAL;
+#endif /* #if defined(ISOTHERM_EQS) */
+
+      strcpy(tag[nt], "MaxNumNgbDeviation");
+      addr[nt] = &All.MaxNumNgbDeviation;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "ComovingIntegrationOn");
+      addr[nt] = &All.ComovingIntegrationOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "ICFormat");
+      addr[nt] = &All.ICFormat;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "SnapFormat");
+      addr[nt] = &All.SnapFormat;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "NumFilesPerSnapshot");
+      addr[nt] = &All.NumFilesPerSnapshot;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "NumFilesWrittenInParallel");
+      addr[nt] = &All.NumFilesWrittenInParallel;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "ResubmitOn");
+      addr[nt] = &All.ResubmitOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "CoolingOn");
+      addr[nt] = &All.CoolingOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "StarformationOn");
+      addr[nt] = &All.StarformationOn;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "TypeOfTimestepCriterion");
+      addr[nt] = &All.TypeOfTimestepCriterion;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "TypeOfOpeningCriterion");
+      addr[nt] = &All.TypeOfOpeningCriterion;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "TimeLimitCPU");
+      addr[nt] = &All.TimeLimitCPU;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "GasSoftFactor");
+      addr[nt] = &All.GasSoftFactor;
+      id[nt++] = REAL;
+
+      for(i = 0; i < NSOFTTYPES; i++)
+        {
+          char buf[100];
+          sprintf(buf, "SofteningComovingType%d", i);
+          strcpy(tag[nt], buf);
+          addr[nt] = &All.SofteningComoving[i];
+          id[nt++] = REAL;
+        }
+
+      for(i = 0; i < NSOFTTYPES; i++)
+        {
+          char buf[100];
+          sprintf(buf, "SofteningMaxPhysType%d", i);
+          strcpy(tag[nt], buf);
+          addr[nt] = &All.SofteningMaxPhys[i];
+          id[nt++] = REAL;
+        }
+
+      for(i = 0; i < NTYPES; i++)
+        {
+          char buf[100];
+          sprintf(buf, "SofteningTypeOfPartType%d", i);
+          strcpy(tag[nt], buf);
+          addr[nt] = &All.SofteningTypeOfPartType[i];
+          id[nt++] = INT;
+        }
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      strcpy(tag[nt], "MinimumComovingHydroSoftening");
+      addr[nt] = &All.MinimumComovingHydroSoftening;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "AdaptiveHydroSofteningSpacing");
+      addr[nt] = &All.AdaptiveHydroSofteningSpacing;
+      id[nt++] = REAL;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+
+      strcpy(tag[nt], "GravityConstantInternal");
+      addr[nt] = &All.GravityConstantInternal;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "InitGasTemp");
+      addr[nt] = &All.InitGasTemp;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MinGasTemp");
+      addr[nt] = &All.MinGasTemp;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MinEgySpec");
+      addr[nt] = &All.MinEgySpec;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MinimumDensityOnStartUp");
+      addr[nt] = &All.MinimumDensityOnStartUp;
+      id[nt++] = REAL;
+
+#ifdef NODEREFINE_BACKGROUND_GRID
+      strcpy(tag[nt], "MeanVolume");
+      addr[nt] = &All.MeanVolume;
+      id[nt++] = REAL;
+#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */
+
+#ifndef VORONOI_STATIC_MESH
+#ifdef REGULARIZE_MESH_FACE_ANGLE
+      strcpy(tag[nt], "CellMaxAngleFactor");
+      addr[nt] = &All.CellMaxAngleFactor;
+      id[nt++] = REAL;
+#else  /* #ifdef REGULARIZE_MESH_FACE_ANGLE */
+      strcpy(tag[nt], "CellShapingFactor");
+      addr[nt] = &All.CellShapingFactor;
+      id[nt++] = REAL;
+#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */
+
+      strcpy(tag[nt], "CellShapingSpeed");
+      addr[nt] = &All.CellShapingSpeed;
+      id[nt++] = REAL;
+#endif /* #ifndef VORONOI_STATIC_MESH */
+
+#if defined(COOLING)
+      strcpy(tag[nt], "TreecoolFile");
+      addr[nt] = &All.TreecoolFile;
+      id[nt++] = STRING;
+#endif /* #if defined(COOLING) */
+
+#if defined(REFINEMENT)
+      strcpy(tag[nt], "ReferenceGasPartMass");
+      addr[nt] = &All.ReferenceGasPartMass;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TargetGasMassFactor");
+      addr[nt] = &All.TargetGasMassFactor;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "RefinementCriterion");
+      addr[nt] = &All.RefinementCriterion;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "DerefinementCriterion");
+      addr[nt] = &All.DerefinementCriterion;
+      id[nt++] = INT;
+#endif /* #if defined(REFINEMENT) */
+
+#ifdef USE_SFR
+      strcpy(tag[nt], "CritOverDensity");
+      addr[nt] = &All.CritOverDensity;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TemperatureThresh");
+      addr[nt] = &All.TemperatureThresh;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "CritPhysDensity");
+      addr[nt] = &All.CritPhysDensity;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "FactorSN");
+      addr[nt] = &All.FactorSN;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "FactorEVP");
+      addr[nt] = &All.FactorEVP;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TempSupernova");
+      addr[nt] = &All.TempSupernova;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "TempClouds");
+      addr[nt] = &All.TempClouds;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MaxSfrTimescale");
+      addr[nt] = &All.MaxSfrTimescale;
+      id[nt++] = REAL;
+#endif /* #ifdef USE_SFR */
+
+#ifdef MHD_SEEDFIELD
+      strcpy(tag[nt], "MHDSeedDir");
+      addr[nt] = &All.B_dir;
+      id[nt++] = INT;
+
+      strcpy(tag[nt], "MHDSeedValue");
+      addr[nt] = &All.B_value;
+      id[nt++] = REAL;
+#endif /* #ifdef MHD_SEEDFIELD */
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+      strcpy(tag[nt], "MaxVolumeDiff");
+      addr[nt] = &All.MaxVolumeDiff;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MinVolume");
+      addr[nt] = &All.MinVolume;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "MaxVolume");
+      addr[nt] = &All.MaxVolume;
+      id[nt++] = REAL;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+#ifdef TILE_ICS
+      strcpy(tag[nt], "TileICsFactor");
+      addr[nt] = &All.TileICsFactor;
+      id[nt++] = INT;
+#endif /* #ifdef TILE_ICS */
+
+#ifdef ADDBACKGROUNDGRID
+      strcpy(tag[nt], "GridSize");
+      addr[nt] = &All.GridSize;
+      id[nt++] = INT;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+#ifdef ONEDIMS_SPHERICAL
+      strcpy(tag[nt], "CoreRadius");
+      addr[nt] = &All.CoreRadius;
+      id[nt++] = REAL;
+
+      strcpy(tag[nt], "CoreMass");
+      addr[nt] = &All.CoreMass;
+      id[nt++] = REAL;
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+      if((fd = fopen(fname, "r")))
+        {
+          sprintf(buf, "%s%s", fname, "-usedvalues");
+          if(!(fdout = fopen(buf, "w")))
+            {
+              printf("error opening file '%s' \n", buf);
+              errorFlag = 1;
+            }
+          else
+            {
+              printf("Obtaining parameters from file '%s':\n\n", fname);
+              while(!feof(fd))
+                {
+                  *buf = 0;
+                  fgets(buf, MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200, fd);
+                  if(sscanf(buf, "%s%s%s", buf1, buf2, buf3) < 2)
+                    continue;
+
+                  if(buf1[0] == '%')
+                    continue;
+
+                  for(i = 0, j = -1; i < nt; i++)
+                    if(strcmp(buf1, tag[i]) == 0)
+                      {
+                        if(param_handled[i] == 0)
+                          {
+                            j                = i;
+                            param_handled[i] = 1;
+                            break;
+                          }
+                        else
+                          {
+                            j = -2;
+                            break;
+                          }
+                      }
+
+                  if(j >= 0)
+                    {
+                      switch(id[j])
+                        {
+                          case REAL:
+                            *((double *)addr[j]) = atof(buf2);
+                            sprintf(buf3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG);
+                            fprintf(fdout, buf3, buf1, *((double *)addr[j]));
+                            fprintf(stdout, "        ");
+                            fprintf(stdout, buf3, buf1, *((double *)addr[j]));
+                            break;
+                          case STRING:
+                            strcpy((char *)addr[j], buf2);
+                            sprintf(buf3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG);
+                            fprintf(fdout, buf3, buf1, buf2);
+                            fprintf(stdout, "        ");
+                            fprintf(stdout, buf3, buf1, buf2);
+                            break;
+                          case INT:
+                            *((int *)addr[j]) = atoi(buf2);
+                            sprintf(buf3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG);
+                            fprintf(fdout, buf3, buf1, *((int *)addr[j]));
+                            fprintf(stdout, "        ");
+                            fprintf(stdout, buf3, buf1, *((int *)addr[j]));
+                            break;
+                        }
+                    }
+                  else if(j == -2)
+                    {
+#ifdef ALLOWEXTRAPARAMS
+                      warn("Tag '%s' ignored from file %s !", buf1, fname);
+#else  /* #ifdef ALLOWEXTRAPARAMS */
+                      fprintf(stdout, "Error in file %s:   Tag '%s' multiply defined.\n", fname, buf1);
+                      errorFlag = 1;
+#endif /* #ifdef ALLOWEXTRAPARAMS #else */
+                    }
+                  else
+                    {
+#ifdef ALLOWEXTRAPARAMS
+                      warn("Tag '%s' ignored from file %s !", buf1, fname);
+#else  /* #ifdef ALLOWEXTRAPARAMS */
+                      fprintf(stdout, "Error in file %s:   Tag '%s' not allowed\n", fname, buf1);
+                      errorFlag = 1;
+#endif /* #ifdef ALLOWEXTRAPARAMS #else */
+                    }
+                }
+              fclose(fd);
+              fclose(fdout);
+              printf("\n");
+
+              i = strlen(All.OutputDir);
+              if(i > 0)
+                if(All.OutputDir[i - 1] != '/')
+                  strcat(All.OutputDir, "/");
+
+              mkdir(All.OutputDir, 02755);
+              sprintf(buf1, "%s%s", fname, "-usedvalues");
+              sprintf(buf2, "%s%s", All.OutputDir, "parameters-usedvalues");
+              sprintf(buf3, "cp %s %s", buf1, buf2);
+#ifndef NOCALLSOFSYSTEM
+              if(errorFlag == 0)
+                system(buf3);
+#endif /* #ifndef NOCALLSOFSYSTEM */
+            }
+        }
+      else
+        {
+          printf("Parameter file %s not found.\n", fname);
+          errorFlag = 1;
+        }
+
+      for(i = 0; i < nt; i++)
+        {
+          if(param_handled[i] != 1)
+            {
+              printf("Error. I miss a value for tag '%s' in parameter file '%s'.\n", tag[i], fname);
+              errorFlag = 1;
+            }
+        }
+
+      if(All.OutputListOn && errorFlag == 0)
+        errorFlag += read_outputlist(All.OutputListFilename);
+      else
+        All.OutputListLength = 0;
+    }
+
+  MPI_Bcast(&errorFlag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  if(errorFlag)
+    {
+      MPI_Finalize();
+      exit(errorFlag);
+    }
+
+  All.NParameters = nt;
+
+  /* now communicate the relevant parameters to the other processes */
+  MPI_Bcast(&All, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+#ifdef TOLERATE_WRITE_ERROR
+  MPI_Bcast(AlternativeOutputDir, MAXLEN_PATH, MPI_BYTE, 0, MPI_COMM_WORLD);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+#ifdef HOST_MEMORY_REPORTING
+  check_maxmemsize_setting();
+#endif /* #ifdef HOST_MEMORY_REPORTING */
+
+  mymalloc_init();
+
+  Parameters      = (char(*)[MAXLEN_PARAM_TAG])mymalloc("Parameters", All.NParameters * MAXLEN_PARAM_TAG * sizeof(char));
+  ParametersValue = (char(*)[MAXLEN_PARAM_VALUE])mymalloc("ParametersValue", All.NParameters * MAXLEN_PARAM_VALUE * sizeof(char));
+  ParametersType  = mymalloc("ParamtersType", All.NParameters * sizeof(char));
+
+  if(ThisTask == 0)
+    {
+      for(i = 0; i < All.NParameters; i++)
+        {
+          strncpy(Parameters[i], tag[i], MAXLEN_PARAM_TAG);
+          ParametersType[i] = id[i];
+          void *tmp         = ParametersValue[i];
+          switch(id[i])
+            {
+              case REAL:
+                *((double *)tmp) = *((double *)addr[i]);
+                break;
+              case STRING:
+                strncpy(tmp, addr[i], MAXLEN_PARAM_VALUE);
+                break;
+              case INT:
+                tmp           = ParametersValue[i];
+                *((int *)tmp) = *((int *)addr[i]);
+                break;
+            }
+        }
+    }
+
+  MPI_Bcast(Parameters, sizeof(char) * All.NParameters * MAXLEN_PARAM_TAG, MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ParametersValue, sizeof(char) * All.NParameters * MAXLEN_PARAM_VALUE, MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(ParametersType, sizeof(char) * All.NParameters, MPI_BYTE, 0, MPI_COMM_WORLD);
+
+#undef REAL
+#undef STRING
+#undef INT
+}
+
+/*! \brief This function checks the consistency of the input parameters.
+ *
+ *  If you encounter some possible misuse and a corresponding error message
+ *  that is hard to interpret, a check should be placed in this function with
+ *  a terminate statement and a clear explanation why this does not work.
+ *
+ *  \return void
+ */
+void check_parameters()
+{
+  int i, errorFlag = 0;
+
+  /* check whether time max is larger than max timestep */
+  if(All.TimeMax - All.TimeBegin <= All.MaxSizeTimestep)
+    {
+      printf("PARAMETERS: check_parameters: TimeBegin = %g, TimeMax = %g, MaxSizeTimestep = %g \n", All.TimeBegin, All.TimeMax,
+             All.MaxSizeTimestep);
+      terminate(
+          "check_parameters: Your total runtime is smaller than the maximum allowed timestep! Choose an appropriate value for "
+          "MaxSizeTimestep < TimeMax-TimeBegin! \n");
+    }
+
+  /* check softening types */
+  for(i = 0; i < NTYPES; i++)
+    {
+      if(All.SofteningTypeOfPartType[i] >= NSOFTTYPES || All.SofteningTypeOfPartType[i] < 0)
+        {
+          mpi_printf("SofteningTypeOfPartType%  invalid (NSOFTTYPES=%d)\n", i, NSOFTTYPES);
+          errorFlag = 1;
+        }
+    }
+
+  if(errorFlag)
+    mpi_terminate("Softening invalid!");
+
+  if(All.NumFilesWrittenInParallel > NTask)
+    {
+      if(ThisTask == 0)
+        warn("NOTICE: Reducing requested NumFilesWrittenInParallel=%d to %d\n", All.NumFilesWrittenInParallel, NTask);
+      All.NumFilesWrittenInParallel = NTask;
+    }
+
+  if(All.NumFilesWrittenInParallel == 0)
+    {
+      mpi_printf("NOTICE: All.NumFilesWrittenInParallel has been set to be equal to the number of processors\n");
+      All.NumFilesWrittenInParallel = NTask;
+    }
+
+#ifndef GRAVITY_NOT_PERIODIC
+  if(All.PeriodicBoundariesOn == 0)
+    {
+      mpi_terminate(
+          "Code was compiled with gravity periodic boundary conditions switched on.\nYou must set `PeriodicBoundariesOn=1', or "
+          "recompile the code.\n");
+    }
+#else  /* #ifndef GRAVITY_NOT_PERIODIC */
+  if(All.PeriodicBoundariesOn == 1)
+    {
+      mpi_terminate(
+          "Code was compiled with gravity periodic boundary conditions switched off.\nYou must set `PeriodicBoundariesOn=0', or "
+          "recompile the code.\n");
+    }
+#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */
+
+#ifdef COOLING
+  if(All.CoolingOn == 0)
+    {
+      mpi_terminate("Code was compiled with cooling switched on.\nYou must set `CoolingOn=1', or recompile the code.\n");
+    }
+#else  /* #ifdef COOLING */
+  if(All.CoolingOn == 1)
+    {
+      mpi_terminate("Code was compiled with cooling switched off.\nYou must set `CoolingOn=0', or recompile the code.\n");
+    }
+#endif /* #ifdef COOLING #else */
+
+  if(All.TypeOfTimestepCriterion >= 3)
+    {
+      mpi_terminate("The specified timestep criterion\nis not valid\n");
+    }
+
+#if(NTYPES < 6)
+  mpi_terminate("NTYPES < 6 is not allowed.\n");
+#endif /* #if (NTYPES < 6) */
+
+#if(NTYPES > 15)
+  mpi_terminate("NTYPES > 15 is not supported yet.\n");
+#endif /* #if (NTYPES > 15) */
+
+#if(NTYPES > 8)
+  if(All.ICFormat == 1 || All.ICFormat == 2)
+    {
+      mpi_terminate("NTYPES>8 is not allowed with ICFormat=%d, since the header block is limited to 256 bytes.\n", All.ICFormat);
+    }
+#endif /* #if (NTYPES > 8) */
+
+#ifdef USE_SFR
+  if(All.StarformationOn == 0)
+    {
+      mpi_terminate("Code was compiled with star formation switched on.\nYou must set `StarformationOn=1', or recompile the code.\n");
+    }
+  if(All.CoolingOn == 0)
+    {
+      mpi_terminate(
+          "You try to use the code with star formation enabled,\nbut you did not switch on cooling.\nThis mode is not supported.\n");
+    }
+#else  /* #ifdef USE_SFR */
+  if(All.StarformationOn == 1)
+    {
+      mpi_terminate("Code was compiled with star formation switched off.\nYou must set `StarformationOn=0', or recompile the code.\n");
+    }
+#endif /* #ifdef USE_SFR #else */
+
+#if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && defined(USE_SFR)
+  if(ThisTask == 0)
+    warn("Code was compiled with ENFORCE_JEANS_STABILITY_OF_CELLS together with another EOS. Please make sure you really want this.");
+#endif /* #if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && (defined(ISOTHERM_EQS) || (defined(USE_SFR) && !defined(FM_SFR))) */
+}
+
+/*! \brief This function reads a table with a list of desired output times.
+ *
+ *  The table does not have to be ordered in any way, but may not contain more
+ *  than MAXLEN_OUTPUTLIST entries.
+ *
+ *  \param[in] fname The file name of the outputlist.
+ *
+ *  \return 0: success  1: unable to open file.
+ */
+int read_outputlist(char *fname)
+{
+  FILE *fd;
+  int count, flag;
+  char buf[512], msg[512];
+
+  if(!(fd = fopen(fname, "r")))
+    {
+      printf("can't read output list in file '%s'\n", fname);
+      return 1;
+    }
+
+  All.OutputListLength = 0;
+
+  while(1)
+    {
+      if(fgets(buf, 500, fd) != buf)
+        break;
+
+      count = sscanf(buf, " %lg %d ", &All.OutputListTimes[All.OutputListLength], &flag);
+
+      if(count == 1)
+        flag = 1;
+
+      if(count == 1 || count == 2)
+        {
+          if(All.OutputListLength >= MAXLEN_OUTPUTLIST)
+            {
+              sprintf(msg, "\ntoo many entries in output-list. You should increase MAXLEN_OUTPUTLIST=%d.\n", (int)MAXLEN_OUTPUTLIST);
+              terminate(msg);
+            }
+
+          All.OutputListFlag[All.OutputListLength] = flag;
+          All.OutputListLength++;
+        }
+    }
+
+  fclose(fd);
+
+  printf("\nBEGRUN: found %d times in output-list.\n", All.OutputListLength);
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/io/read_ic.c b/src/amuse/community/arepo/src/io/read_ic.c
new file mode 100644
index 0000000000..97481c91ad
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/read_ic.c
@@ -0,0 +1,1900 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io/read_ic.c
+ * \date        05/2018
+ * \brief       Contains the routines needed to load initial conditions.
+ * \details     contains functions:
+ *                void read_ic(const char *fname, int readTypes)
+ *                MyIDType determine_ids_offset(void)
+ *                void empty_read_buffer(enum iofields blocknr, int offset,
+ *                  int pc, int type)
+ *                void share_particle_number_in_file(const char *fname, int
+ *                  filenr, int readTask, int lastTask, int readTypes)
+ *                void read_file(const char *fname, int filenr, int readTask,
+ *                  int lastTask, int readTypes)
+ *                int find_files(const char *fname)
+ *                void distribute_file(int nfiles, int firstfile, int
+ *                  firsttask, int lasttask, int *filenr, int *master, int
+ *                  *last)
+ *                herr_t hdf5_header_error_handler(void *unused)
+ *                void read_header_attributes_in_hdf5(const char *fname)
+ *                void read_header_attributes(FILE * fd)
+ *                void swap_Nbyte(char *data, int n, int m)
+ *                void swap_header()
+ *                void tile_ics(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 08.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifndef IDS_OFFSET
+#ifdef LONGIDS
+#define IDS_OFFSET 100000000000
+#else /* #ifdef LONGIDS */
+#define IDS_OFFSET 1000000000
+#endif /* #ifdef LONGIDS #else */
+#endif /* #ifndef IDS_OFFSET */
+
+#define SKIP                                 \
+  {                                          \
+    my_fread(&blksize1, sizeof(int), 1, fd); \
+  }
+#define SKIP2                                \
+  {                                          \
+    my_fread(&blksize2, sizeof(int), 1, fd); \
+  }
+
+void read_header_attributes(FILE *fd);
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+void read_header_attributes_in_hdf5(const char *fname);
+#endif /* #ifdef HAVE_HDF5 */
+
+int num_files;
+
+int swap_file = 8;
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+/*! \brief Struct containing information about the number of particles per
+ *         particle type.
+ */
+static struct ntypes_data
+{
+  int npart[NTYPES];
+} * ntype_in_files;
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+/*! \brief Reads initial conditions that are in one of the supported file
+ *         formats.
+ *
+ *  Snapshot files can be used as input files. However, when a
+ *  snapshot file is used as input, not all the information in the header is
+ *  used: THE STARTING TIME NEEDS TO BE SET IN THE PARAMETERFILE.
+ *  Alternatively, the code can be started with restartflag 2, then snapshots
+ *  from the code can be used as initial conditions-files without having to
+ *  change the parameter file. For gas particles, only the internal energy is
+ *  read, the density and mean molecular weight will be recomputed by the code.
+ *  When InitGasTemp>0 is given, the gas temperature will be initialized to
+ *  this value assuming a mean molecular weight either corresponding to
+ *  complete neutrality, or full ionization.
+ *
+ *  \param[in] fname File name of the ICs.
+ *  \param[in] readTypes A bitfield that determines what particle types to
+ *             read, only if the bit corresponding to a particle type is set,
+ *             the corresponding data is loaded, otherwise its particle number
+ *             is set to zero. (This is only implemented for HDF5 files.)
+ *
+ *  \return void
+ */
+void read_ic(const char *fname, int readTypes)
+{
+  int i, rep, rest_files, ngroups, gr, filenr, masterTask, lastTask, groupMaster;
+  double u_init, molecular_weight;
+  char buf[500];
+  double t0, t1;
+
+  if((All.ICFormat < 1) || (All.ICFormat > 4))
+    {
+      mpi_terminate("ICFormat=%d not supported.\n", All.ICFormat);
+    }
+
+  t0 = second();
+  CPU_Step[CPU_MISC] += measure_time();
+
+  num_files = find_files(fname);
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+  ntype_in_files = mymalloc("ntype_in_files", num_files * sizeof(struct ntypes_data));
+  memset(ntype_in_files, 0, num_files * sizeof(struct ntypes_data));
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+  All.TotNumPart = 0;
+
+  /* we repeat reading the headers of the files two times. In the first iteration, only the
+   * particle numbers ending up on each processor are assembled, followed by memory allocation.
+   * In the second iteration, the data is actually read in.
+   */
+  for(rep = 0; rep < 2; rep++)
+    {
+      NumPart = 0;
+      NumGas  = 0;
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+      if(rep == 1)
+        MPI_Allreduce(MPI_IN_PLACE, ntype_in_files, num_files * NTYPES, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+      rest_files = num_files;
+      while(rest_files > NTask)
+        {
+          sprintf(buf, "%s.%d", fname, ThisTask + (rest_files - NTask));
+          if(All.ICFormat == 3)
+            sprintf(buf, "%s.%d.hdf5", fname, ThisTask + (rest_files - NTask));
+
+          ngroups = NTask / All.NumFilesWrittenInParallel;
+          if((NTask % All.NumFilesWrittenInParallel))
+            ngroups++;
+          groupMaster = (ThisTask / ngroups) * ngroups;
+
+          for(gr = 0; gr < ngroups; gr++)
+            {
+              if(ThisTask == (groupMaster + gr)) /* ok, it's this processor's turn */
+                {
+                  if(rep == 0)
+                    share_particle_number_in_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes);
+                  else
+                    read_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes);
+                }
+              MPI_Barrier(MPI_COMM_WORLD);
+            }
+
+          rest_files -= NTask;
+        }
+
+      if(rest_files > 0)
+        {
+          distribute_file(rest_files, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask);
+
+          if(num_files > 1)
+            {
+              sprintf(buf, "%s.%d", fname, filenr);
+              if(All.ICFormat == 3)
+                sprintf(buf, "%s.%d.hdf5", fname, filenr);
+            }
+          else
+            {
+              sprintf(buf, "%s", fname);
+              if(All.ICFormat == 3)
+                sprintf(buf, "%s.hdf5", fname);
+            }
+
+          ngroups = rest_files / All.NumFilesWrittenInParallel;
+          if((rest_files % All.NumFilesWrittenInParallel))
+            ngroups++;
+
+          for(gr = 0; gr < ngroups; gr++)
+            {
+              if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */
+                {
+                  if(rep == 0)
+                    share_particle_number_in_file(buf, filenr, masterTask, lastTask, readTypes);
+                  else
+                    read_file(buf, filenr, masterTask, lastTask, readTypes);
+                }
+              MPI_Barrier(MPI_COMM_WORLD);
+            }
+        }
+
+      /* now do the memory allocation */
+      if(rep == 0)
+        {
+          int max_load, max_sphload;
+          MPI_Allreduce(&NumPart, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+          MPI_Allreduce(&NumGas, &max_sphload, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+#ifdef GENERATE_GAS_IN_ICS
+          if(max_sphload < max_load)
+            max_sphload = max_load;
+#endif /* #ifdef GENERATE_GAS_IN_ICS */
+
+          All.MaxPart    = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
+          All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE);
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+          if(All.TotPartSpecial != 0)
+            All.MaxPartSpecial = (int)(All.TotPartSpecial);
+          else
+            terminate("Code compiled with option EXACT_GRAVITY_FOR_PARTICLE_TYPE but no particles of specified type found in ICs.");
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+          allocate_memory();
+
+          CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE);
+        }
+    }
+
+  myfree(CommBuffer);
+
+#ifdef TILE_ICS
+  tile_ics();
+#endif /* #ifdef TILE_ICS */
+
+  /* this makes sure that masses are initialized in the case that the mass-block
+     is empty for this particle type */
+  for(i = 0; i < NumPart; i++)
+    {
+      if(All.MassTable[P[i].Type] != 0)
+        P[i].Mass = All.MassTable[P[i].Type];
+    }
+
+    /* If we are reading in Gadget2 ICs, we need to compute the material
+       number from the ID  */
+#ifdef READ_LEGACY_ICS
+  if(header.flag_entropy_instead_u)
+    {
+      sprintf(buf, "\nProblem: Legacy ICs cannot contain entropy in the u field!\n");
+      terminate(buf);
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      int j;
+
+      double mat;
+
+      modf(((double)(P[i].ID - EOS_ID_START)) / EOS_ID_SKIP, &mat); /* This stores the int part in variable mat and
+                                                                       discards the remainder */
+      int imat = mat;
+
+      SphP[i].Composition[imat] = 1.0;
+    }
+#endif /* #ifdef READ_LEGACY_ICS */
+
+#if defined(REFINEMENT) && defined(REFINEMENT_HIGH_RES_GAS)
+  if(RestartFlag == 0) /* All gas that is already present in the ICs is allowed to be (de-)refined */
+    {
+      for(i = 0; i < NumGas; i++)
+        {
+          if(All.ReferenceGasPartMass == 0 || P[i].Mass < 1.2 * All.ReferenceGasPartMass)
+            SphP[i].AllowRefinement = 1;
+        }
+    }
+#endif /* #if defined (REFINEMENT) && defined (REFINEMENT_HIGH_RES_GAS) */
+
+  for(i = 0; i < NumPart; i++)
+    P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type];
+
+#ifdef GENERATE_GAS_IN_ICS
+  int count;
+  double fac, d, a, b, rho;
+
+  if(RestartFlag == 0)
+    {
+      header.flag_entropy_instead_u = 0;
+
+      MyIDType ids_offset = determine_ids_offset();
+
+      for(i = 0, count = 0; i < NumPart; i++)
+#ifdef SPLIT_PARTICLE_TYPE
+        if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE))
+#else  /* #ifdef SPLIT_PARTICLE_TYPE */
+        if(P[i].Type == 1)
+#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */
+          count++;
+
+      if(count)
+        {
+          domain_resize_storage(count, count, 0);
+
+          memmove(P + count, P, sizeof(struct particle_data) * NumPart);
+
+          NumPart += count;
+          NumGas += count;
+
+          if(NumGas > All.MaxPartSph)
+            terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph);
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+          for(i = 0; i < NumGas - count; i++) /* make sure that AllowRefinement is shifted with the particles */
+            SphP[i + count].AllowRefinement = SphP[i].AllowRefinement;
+          for(i = 0; i < count; i++) /* by default, new cells are not allowed to be refined */
+            SphP[i].AllowRefinement = 0;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+          fac = All.OmegaBaryon / All.Omega0;
+          rho = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+
+          int j;
+
+          for(i = count, j = 0; i < NumPart; i++)
+#ifdef SPLIT_PARTICLE_TYPE
+            if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE))
+#else  /* #ifdef SPLIT_PARTICLE_TYPE */
+            if(P[i].Type == 1)
+#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */
+              {
+                d = pow(P[i].Mass / rho, 1.0 / 3);
+                a = 0.5 * All.OmegaBaryon / All.Omega0 * d;
+                b = 0.5 * (All.Omega0 - All.OmegaBaryon) / All.Omega0 * d;
+
+                P[j] = P[i];
+
+                P[j].Mass *= fac;
+                P[i].Mass *= (1 - fac);
+                P[j].Type = 0;
+                P[j].ID += ids_offset;
+                P[i].Pos[0] += a;
+                P[i].Pos[1] += a;
+                P[i].Pos[2] += a;
+                P[j].Pos[0] -= b;
+                P[j].Pos[1] -= b;
+                P[j].Pos[2] -= b;
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+                if(P[i].Type == 1) /* also allow gas which is produced by splitting a high res DM particle to be (de-) refined */
+                  SphP[j].AllowRefinement = 2;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+                j++;
+              }
+
+          All.MassTable[0] = 0;
+
+#ifdef SPLIT_PARTICLE_TYPE
+          for(i = 1; i < NTYPES; i++)
+            if((1 << i) & (SPLIT_PARTICLE_TYPE))
+              All.MassTable[i] *= (1 - fac);
+#else  /* #ifdef SPLIT_PARTICLE_TYPE */
+          All.MassTable[1] *= (1 - fac);
+#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */
+        }
+    }
+#endif /* #ifdef GENERATE_GAS_IN_ICS */
+
+#ifdef READ_DM_AS_GAS
+  {
+    domain_resize_storage(0, NumPart, 0);
+
+    if(NumGas > All.MaxPartSph)
+      terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph);
+
+    for(i = 0; i < NumPart; i++)
+      {
+        P[i].Type      = 0;
+        SphP[i].Utherm = 1.0;
+      }
+
+    All.MassTable[0] = 0;
+
+    header.npartTotal[0]         = header.npartTotal[1];
+    header.npartTotalHighWord[0] = header.npartTotalHighWord[1];
+    header.npart[0]              = header.npart[1];
+    header.npartTotal[1]         = 0;
+    header.npartTotalHighWord[1] = 0;
+    header.npart[1]              = 0;
+    NumGas                       = NumPart;
+    All.TotNumGas                = All.TotNumPart;
+    mpi_printf("READ_DM_AS_GAS: generated %lld gas particles from type %d\n",
+               header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), 0);
+  }
+#endif /* #ifdef READ_DM_AS_GAS */
+
+#ifdef USE_SFR
+  if(RestartFlag == 0)
+    {
+      if(All.MassTable[4] == 0 && All.MassTable[0] > 0)
+        {
+          All.MassTable[0] = 0;
+          All.MassTable[4] = 0;
+        }
+    }
+#endif
+
+  u_init = (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.InitGasTemp;
+  u_init *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; /* unit conversion */
+
+  if(All.InitGasTemp > 1.0e4) /* assuming FULL ionization */
+    molecular_weight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC));
+  else /* assuming NEUTRAL GAS */
+    molecular_weight = 4 / (1 + 3 * HYDROGEN_MASSFRAC);
+
+  u_init /= molecular_weight;
+
+  All.InitGasU = u_init;
+
+  header.mass[0]   = 0; /* to make sure that the variable masses are stored in output file */
+  All.MassTable[0] = 0;
+
+  if(RestartFlag == 0)
+    {
+#if defined(REFINEMENT_HIGH_RES_GAS)
+      for(i = 0; i < NumGas; i++)
+        if(SphP[i].AllowRefinement)
+          SphP[i].HighResMass = P[i].Mass;
+        else
+          SphP[i].HighResMass = 0;
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+
+      if(All.InitGasTemp > 0)
+        {
+          for(i = 0; i < NumGas; i++)
+            {
+              if(ThisTask == 0 && i == 0 && SphP[i].Utherm == 0)
+                printf("READIC: Initializing u from InitGasTemp!\n");
+
+              if(SphP[i].Utherm == 0)
+                SphP[i].Utherm = All.InitGasU;
+              /* Note: the coversion to entropy will be done in the function init(),
+                 after the densities have been computed */
+            }
+        }
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      SphP[i].Utherm = dmax(All.MinEgySpec, SphP[i].Utherm);
+      if(SphP[i].Density > 0)
+        SphP[i].Volume = P[i].Mass / SphP[i].Density;
+    }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  t1 = second();
+  mpi_printf("READIC: reading done (took %g sec).\n", timediff(t0, t1));
+
+  /* verify number of particles */
+  int num = 0;
+  long long glob_num;
+  for(i = 0; i < NumPart; i++)
+    num += 1;
+  sumup_large_ints(1, &num, &glob_num);
+  if(glob_num != All.TotNumPart)
+    terminate("glob_num (=%lld) != All.TotNumPart (=%lld)", glob_num, All.TotNumPart);
+
+  mpi_printf("READIC: Total number of particles :  %lld\n\n", All.TotNumPart);
+
+  CPU_Step[CPU_SNAPSHOT] += measure_time();
+}
+
+/*! \brief This function computes a suitable offset for the particle IDs in
+ *         case gas should be generated in the ICs.
+ *
+ *  If the macro OFFSET_FOR_NON_CONTIGUOUS_IDS is not defined the code reverts
+ *  to a fixed offset defined at the beginning of the file.
+ *
+ *  \return Offset for the gas particles to be generated.
+ */
+MyIDType determine_ids_offset(void)
+{
+#ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS
+  MyIDType ids_offset = IDS_OFFSET;
+#else /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */
+  if(All.MaxID == 0) /* MaxID not calculated yet */
+    calculate_maxid();
+
+  int bits_used       = 1;
+  int bits_available  = CHAR_BIT * sizeof(MyIDType);
+  MyIDType ids_offset = 1;
+
+  while(ids_offset <= All.MaxID && ids_offset > 0)
+    {
+      ids_offset <<= 1;
+      bits_used++;
+    }
+
+  All.MaxID = 0; /* reset to allow recomputing */
+
+  if(ids_offset <= 0)
+    terminate("not enough memory to generate id offsets. Used %d bits out of %d\n", bits_used, bits_available);
+
+#ifdef LONGIDS
+  mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %llu. Used %d bits out of %d\n", ids_offset, bits_used, bits_available);
+#else  /* #ifdef LONGIDS */
+  mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %u. Used %d bits out of %d\n", ids_offset, bits_used, bits_available);
+#endif /* #ifdef LONGIDS #else */
+
+#endif /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */
+  return ids_offset;
+}
+
+/*! \brief Reads out the io buffer that was filled with particle data.
+ *
+ *  The data in the io buffer is put in the appropriate places of the particle
+ *  structures.
+ *
+ * \param[in] blocknr Data block present in io buffer.
+ * \param[in] offset Particle corresponding to the first element in io buffer.
+ * \param[in] pc Number of elements in the io buffer.
+ * \param[in] type If blocknr=IO_POS P[n].Type is set to type.
+ *
+ * \return void
+ */
+void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type)
+{
+  int n, k;
+  MyInputFloat *fp;
+  double *doublep;
+  MyIDType *ip;
+  int *intp;
+  float *floatp;
+
+  int vt, vpb;
+  char *cp;
+
+  fp      = (MyInputFloat *)CommBuffer;
+  doublep = (double *)CommBuffer;
+  ip      = (MyIDType *)CommBuffer;
+  intp    = (int *)CommBuffer;
+  floatp  = (float *)CommBuffer;
+
+  cp  = (char *)CommBuffer;
+  vt  = get_datatype_in_block(blocknr, 1);
+  vpb = get_values_per_blockelement(blocknr);
+  if(vt == 2)
+    swap_Nbyte(cp, pc * vpb, 8);
+  else
+    {
+#ifdef INPUT_IN_DOUBLEPRECISION
+      if(vt == 1)
+        swap_Nbyte(cp, pc * vpb, 8);
+      else
+#endif /* #ifdef INPUT_IN_DOUBLEPRECISION */
+        swap_Nbyte(cp, pc * vpb, 4);
+    }
+
+  int field = -1;
+  int f;
+  for(f = 0; f < N_IO_Fields; f++)
+    {
+      if(IO_Fields[f].field == blocknr)
+        {
+          field = f;
+          break;
+        }
+    }
+
+  if(field < 0)
+    terminate("error: field not found");
+
+  for(n = 0; n < pc; n++)
+    {
+      if(IO_Fields[field].io_func)
+        {
+          int particle;
+          switch(IO_Fields[field].array)
+            {
+              case A_NONE:
+              case A_SPHP:
+              case A_P:
+                particle = offset + n;
+                break;
+              case A_PS:
+                terminate("Not good, trying to read into PS[]?\n");
+                break;
+              default:
+                terminate("ERROR in empty_read_buffer: Array not found!\n");
+                break;
+            }
+
+          switch(IO_Fields[field].type_in_file_input)
+            {
+              case FILE_NONE:
+                terminate("error");
+                break;
+              case FILE_INT:
+                IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 1);
+                intp += IO_Fields[field].values_per_block;
+                break;
+              case FILE_MY_ID_TYPE:
+                IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 1);
+                ip += IO_Fields[field].values_per_block;
+                break;
+              case FILE_MY_IO_FLOAT:
+                IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 1);
+                fp += IO_Fields[field].values_per_block;
+                break;
+              case FILE_DOUBLE:
+                IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 1);
+                doublep += IO_Fields[field].values_per_block;
+                break;
+              case FILE_FLOAT:
+                IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 1);
+                floatp += IO_Fields[field].values_per_block;
+                break;
+            }
+        }
+      else
+        {
+          void *array_pos;
+          switch(IO_Fields[field].array)
+            {
+              case A_NONE:
+                array_pos = 0;
+                break;
+              case A_SPHP:
+                array_pos = SphP + offset + n;
+                break;
+              case A_P:
+                array_pos = P + offset + n;
+                break;
+              case A_PS:
+                terminate("Not good, trying to read into PS[]?\n");
+                break;
+              default:
+                terminate("ERROR in empty_read_buffer: Array not found!\n");
+                break;
+            }
+
+          for(k = 0; k < IO_Fields[field].values_per_block; k++)
+            {
+              double value = 0;
+              switch(IO_Fields[field].type_in_file_input)
+                {
+                  case FILE_MY_IO_FLOAT:
+                    value = *fp;
+                    fp++;
+                    break;
+                  case FILE_DOUBLE:
+                    value = *doublep;
+                    doublep++;
+                    break;
+                  case FILE_FLOAT:
+                    value = *floatp;
+                    floatp++;
+                    break;
+                  default:
+                    break;
+                }
+
+              switch(IO_Fields[field].type_in_memory)
+                {
+                  case MEM_INT:
+                    *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int))) = *intp;
+                    intp++;
+                    break;
+                  case MEM_MY_ID_TYPE:
+                    *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType))) = *ip;
+                    ip++;
+                    break;
+                  case MEM_FLOAT:
+                    *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float))) = value;
+                    break;
+
+                  case MEM_DOUBLE:
+                    *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double))) = value;
+                    break;
+
+                  case MEM_MY_SINGLE:
+                    *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle))) = value;
+                    break;
+
+                  case MEM_MY_FLOAT:
+                    *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat))) = value;
+                    break;
+
+                  case MEM_MY_DOUBLE:
+                    *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble))) = value;
+                    break;
+
+                  default:
+                    terminate("ERROR in empty_read_buffer: Type not found!\n");
+                    break;
+                }
+            }
+        }
+    }
+
+  if(blocknr == IO_VEL)
+    {
+      for(n = 0; n < pc; n++)
+        P[offset + n].Type = type; /* initialize type here as well */
+    }
+}
+
+/*! \brief Distributes the particle numbers in the file fname
+ *         to tasks 'readTask' to 'lastTask', and calculates the number of
+ *         particles each task gets.
+ *
+ *  \param[in] fname Filename to be read.
+ *  \param[in] readTask Task responsible for reading the file fname.
+ *  \param[in] lastTask Last task which gets data contained in the file.
+ *  \param[in] readTypes A bitfield that determines what particle  types to
+ *             read, only if the bit corresponding to a particle type is set,
+ *             the corresponding data is loaded, otherwise its particle number
+ *             is set to zero. (This is only implemented for HDF5 files.)
+ *
+ *  \return void
+ */
+void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes)
+{
+  int i, n_in_file, n_for_this_task, ntask, task;
+  int blksize1, blksize2;
+  MPI_Status status;
+  FILE *fd = 0;
+  int type;
+  char label[4], buf[500];
+  int nextblock;
+#ifdef HAVE_HDF5
+  hid_t hdf5_file = 0, hdf5_grp[NTYPES];
+#endif /* #ifdef HAVE_HDF5 */
+
+  if(ThisTask == readTask)
+    {
+      if(All.ICFormat == 1 || All.ICFormat == 2)
+        {
+          if(!(fd = fopen(fname, "r")))
+            {
+              sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname);
+              terminate(buf);
+            }
+
+          if(All.ICFormat == 2)
+            {
+              SKIP;
+              swap_file = blksize1;
+              my_fread(&label, sizeof(char), 4, fd);
+              my_fread(&nextblock, sizeof(int), 1, fd);
+              swap_Nbyte((char *)&nextblock, 1, 4);
+              printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock);
+              SKIP2;
+            }
+
+          SKIP;
+          if(All.ICFormat == 1)
+            {
+              if(blksize1 != 256)
+                swap_file = 1;
+            }
+          read_header_attributes(fd);
+          SKIP2;
+          swap_Nbyte((char *)&blksize1, 1, 4);
+          swap_Nbyte((char *)&blksize2, 1, 4);
+
+          if(blksize1 != 256 || blksize2 != 256)
+            terminate("incorrect header format blocksize %d, %d\n", blksize1, blksize2);
+
+          swap_header();
+
+#ifdef COMBINETYPES
+          header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5];
+          header.npart[3] += header.npart[4] + header.npart[5];
+          header.npartTotal[4] = 0;
+          header.npartTotal[5] = 0;
+          header.npart[4]      = 0;
+          header.npart[5]      = 0;
+#endif /* #ifdef COMBINETYPES */
+        }
+
+#ifdef HAVE_HDF5
+      if(All.ICFormat == 3)
+        {
+          read_header_attributes_in_hdf5(fname);
+
+          hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
+          if(hdf5_file < 0)
+            terminate("cannot read initial conditions file %s", fname);
+
+          for(type = 0; type < NTYPES; type++)
+            {
+              if(header.npart[type] > 0 && (readTypes & (1 << type)))
+                {
+                  sprintf(buf, "/PartType%d", type);
+                  hdf5_grp[type] = my_H5Gopen(hdf5_file, buf);
+                }
+              if(!(readTypes & (1 << type)))
+                {
+                  // Override particle number in file. If we don't
+                  // read the type, both npart and npartTotal will be 0
+                  header.npartTotal[type]         = 0;
+                  header.npart[type]              = 0;
+                  header.npartTotalHighWord[type] = 0;
+                  header.mass[type]               = 0;
+                }
+            }
+        }
+#endif /* #ifdef HAVE_HDF5 */
+
+      for(task = readTask + 1; task <= lastTask; task++)
+        {
+          MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD);
+          MPI_Ssend(&swap_file, sizeof(swap_file), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD);
+        }
+    }
+  else
+    {
+      MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status);
+      MPI_Recv(&swap_file, sizeof(swap_file), MPI_BYTE, readTask, TAG_KEY, MPI_COMM_WORLD, &status);
+    }
+
+  if(header.num_files != num_files)
+    warn("header.num_files=%d != num_files=%d", header.num_files, num_files);
+
+  if(All.TotNumPart == 0)
+    {
+      if(num_files == 1)
+        for(type = 0; type < NTYPES; type++)
+          {
+            if(header.npartTotal[type] != header.npart[type])
+              {
+                warn("header.npartTotal[%d]=%d != header.npart[%d]=%d, setting header.npartTotal[%d] = header.npart[%d]\n", type,
+                     header.npartTotal[type], type, header.npart[type], type, type);
+                header.npartTotal[type] = header.npart[type];
+              }
+#ifdef USE_SFR
+            header.npartTotalHighWord[type] = 0;
+#endif
+          }
+
+      All.TotNumGas = header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32);
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+      All.TotPartSpecial = header.npartTotal[EXACT_GRAVITY_FOR_PARTICLE_TYPE] +
+                           (((long long)header.npartTotalHighWord[EXACT_GRAVITY_FOR_PARTICLE_TYPE]) << 32);
+      mpi_printf("Tot Special %d %d %d %d\n", All.TotPartSpecial, EXACT_GRAVITY_FOR_PARTICLE_TYPE, header.npart[4],
+                 header.npartTotal[4]);
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+      for(type = 0, All.TotNumPart = 0; type < NTYPES; type++)
+        {
+          All.TotNumPart += header.npartTotal[type];
+          All.TotNumPart += (((long long)header.npartTotalHighWord[type]) << 32);
+        }
+
+#ifdef GENERATE_GAS_IN_ICS
+      if(RestartFlag == 0)
+        {
+          if(All.TotNumGas > 0)
+            terminate("You specified GENERATE_GAS_IN_ICS but your ICs already contain gas! (namely %lld gas cells)\n", All.TotNumGas);
+
+#ifdef SPLIT_PARTICLE_TYPE
+          for(i = 0; i < NTYPES; i++)
+            if((1 << i) & (SPLIT_PARTICLE_TYPE))
+              {
+                All.TotNumGas += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32);
+                All.TotNumPart += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32);
+                mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type %d\n",
+                           header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32), i);
+              }
+#else  /* #ifdef SPLIT_PARTICLE_TYPE */
+          All.TotNumGas += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32);
+          All.TotNumPart += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32);
+          mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type 1\n",
+                     header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32));
+#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */
+        }
+#endif /* #ifdef GENERATE_GAS_IN_ICS */
+
+#ifdef TILE_ICS
+      All.TotNumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+      All.TotNumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+      All.TotPartSpecial *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+#endif /* #ifdef TILE_ICS */
+
+      for(i = 0; i < NTYPES; i++)
+        All.MassTable[i] = header.mass[i];
+
+      if(RestartFlag >= 2)
+        All.Time = All.TimeBegin = header.time;
+      else
+        All.Time = All.TimeBegin;
+
+      set_cosmo_factors_for_current_time();
+    }
+
+  if(ThisTask == readTask)
+    {
+      for(type = 0, n_in_file = 0; type < NTYPES; type++)
+        n_in_file += header.npart[type];
+
+      printf("READIC: Reading file `%s' on task=%d and distribute it to %d to %d (contains %d particles).\n", fname, ThisTask,
+             readTask, lastTask, n_in_file);
+
+      myflush(stdout);
+    }
+
+  for(type = 0; type < NTYPES; type++)
+    {
+      n_in_file       = header.npart[type];
+      ntask           = lastTask - readTask + 1;
+      n_for_this_task = n_in_file / ntask;
+      if((ThisTask - readTask) < (n_in_file % ntask))
+        n_for_this_task++;
+
+      NumPart += n_for_this_task;
+
+      if(type == 0)
+        NumGas += n_for_this_task;
+    }
+
+  if(ThisTask == readTask)
+    {
+      if(All.ICFormat == 1 || All.ICFormat == 2)
+        fclose(fd);
+#ifdef HAVE_HDF5
+      if(All.ICFormat == 3)
+        {
+          for(type = NTYPES - 1; type >= 0; type--)
+            if(header.npart[type] > 0)
+              {
+                sprintf(buf, "/PartType%d", type);
+                my_H5Gclose(hdf5_grp[type], buf);
+              }
+          my_H5Fclose(hdf5_file, fname);
+        }
+#endif /* #ifdef HAVE_HDF5 */
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+      for(int type = 0; type < NTYPES; type++)
+        ntype_in_files[filenr].npart[type] = header.npart[type];
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+    }
+}
+
+/*! \brief Reads a single snapshot file.
+ *
+ *  This routine reads a single file. The data it contains is
+ *  distributed to tasks 'readTask' to 'lastTask'.
+ *
+ *  \param[in] fname Filename to be read.
+ *  \param[in] readTask Task responsible for reading the file fname
+ *  \param[in] lastTask Last task which gets data contained in the file
+ *  \param[in] readTypes readTypes is a bitfield that determines what particle
+ *             types to read, only if the bit corresponding to a particle type
+ *             is set, the corresponding data is loaded, otherwise its particle
+ *             number is set to zero. (This is only implemented for HDF5
+ *             files.)
+ *
+ *  \return void
+ */
+void read_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes)
+{
+  int blockmaxlen;
+  int n_in_file, n_for_this_task, ntask, pc, offset = 0, task;
+  int blksize1, blksize2;
+  MPI_Status status;
+  FILE *fd = 0;
+  int nall;
+  int type, bnr;
+  char label[4], expected_label[4], buf[500];
+  int nstart, bytes_per_blockelement, npart, nextblock, typelist[NTYPES];
+  enum iofields blocknr;
+
+#ifdef HAVE_HDF5
+  int rank, pcsum;
+  hid_t hdf5_file     = 0, hdf5_grp[NTYPES], hdf5_dataspace_in_file;
+  hid_t hdf5_datatype = 0, hdf5_dataspace_in_memory, hdf5_dataset;
+  hsize_t dims[2], count[2], start[2];
+#endif /* #ifdef HAVE_HDF5 */
+
+  if(ThisTask == readTask)
+    {
+      if(All.ICFormat == 1 || All.ICFormat == 2)
+        {
+          if(!(fd = fopen(fname, "r")))
+            {
+              sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname);
+              terminate(buf);
+            }
+
+          if(All.ICFormat == 2)
+            {
+              SKIP;
+              swap_file = blksize1;
+              my_fread(&label, sizeof(char), 4, fd);
+              my_fread(&nextblock, sizeof(int), 1, fd);
+              swap_Nbyte((char *)&nextblock, 1, 4);
+              SKIP2;
+            }
+
+          SKIP;
+          if(All.ICFormat == 1)
+            {
+              if(blksize1 != 256)
+                swap_file = 1;
+            }
+          read_header_attributes(fd);
+          SKIP2;
+          swap_Nbyte((char *)&blksize1, 1, 4);
+          swap_Nbyte((char *)&blksize2, 1, 4);
+
+          swap_header();
+
+#ifdef COMBINETYPES
+          header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5];
+          header.npart[3] += header.npart[4] + header.npart[5];
+          header.npartTotal[4] = 0;
+          header.npartTotal[5] = 0;
+          header.npart[4]      = 0;
+          header.npart[5]      = 0;
+#endif /* #ifdef COMBINETYPES */
+        }
+
+#ifdef HAVE_HDF5
+      if(All.ICFormat == 3)
+        {
+          read_header_attributes_in_hdf5(fname);
+
+          hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
+          if(hdf5_file < 0)
+            terminate("cannot read initial conditions file %s", fname);
+
+          for(type = 0; type < NTYPES; type++)
+            {
+              if(header.npart[type] > 0 && (readTypes & (1 << type)))
+                {
+                  sprintf(buf, "/PartType%d", type);
+                  hdf5_grp[type] = my_H5Gopen(hdf5_file, buf);
+                }
+              if(!(readTypes & (1 << type)))
+                {
+                  // Override particle number in file. If we don't
+                  // read the type, both npart and npartTotal will be 0
+                  header.npartTotal[type]         = 0;
+                  header.npart[type]              = 0;
+                  header.npartTotalHighWord[type] = 0;
+                  header.mass[type]               = 0;
+                }
+            }
+        }
+#endif /* #ifdef HAVE_HDF5 */
+
+      for(task = readTask + 1; task <= lastTask; task++)
+        MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD);
+    }
+  else
+    MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status);
+
+#ifdef INPUT_IN_DOUBLEPRECISION
+  if(header.flag_doubleprecision == 0)
+    {
+      sprintf(buf, "\nProblem: Code compiled with INPUT_IN_DOUBLEPRECISION, but input files are in single precision!\n");
+      terminate(buf);
+    }
+#else  /* #ifdef INPUT_IN_DOUBLEPRECISION */
+  if(header.flag_doubleprecision)
+    {
+      sprintf(buf, "\nProblem: Code not compiled with INPUT_IN_DOUBLEPRECISION, but input files are in double precision!\n");
+      terminate(buf);
+    }
+#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */
+
+  if(ThisTask == readTask)
+    {
+      if(filenr == 0)
+        mpi_printf(
+            "\nREADIC: filenr=%d, '%s' contains:\n"
+            "READIC: Type 0 (gas):   %8d  (tot=%15lld) masstab= %g\n"
+            "READIC: Type 1 (halo):  %8d  (tot=%15lld) masstab= %g\n"
+            "READIC: Type 2 (disk):  %8d  (tot=%15lld) masstab= %g\n"
+            "READIC: Type 3 (bulge): %8d  (tot=%15lld) masstab= %g\n"
+            "READIC: Type 4 (stars): %8d  (tot=%15lld) masstab= %g\n"
+            "READIC: Type 5 (bndry): %8d  (tot=%15lld) masstab= %g\n\n",
+            filenr, fname, header.npart[0], header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), All.MassTable[0],
+            header.npart[1], header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32), All.MassTable[1],
+            header.npart[2], header.npartTotal[2] + (((long long)header.npartTotalHighWord[2]) << 32), All.MassTable[2],
+            header.npart[3], header.npartTotal[3] + (((long long)header.npartTotalHighWord[3]) << 32), All.MassTable[3],
+            header.npart[4], header.npartTotal[4] + (((long long)header.npartTotalHighWord[4]) << 32), All.MassTable[4],
+            header.npart[5], header.npartTotal[5] + (((long long)header.npartTotalHighWord[5]) << 32), All.MassTable[5]);
+    }
+
+  /* to collect the gas particles all at the beginning (in case several
+     snapshot files are read on the current CPU) we move the collisionless
+     particles such that a gap of the right size is created */
+
+  for(type = 0, nall = 0; type < NTYPES; type++)
+    {
+      n_in_file       = header.npart[type];
+      ntask           = lastTask - readTask + 1;
+      n_for_this_task = n_in_file / ntask;
+      if((ThisTask - readTask) < (n_in_file % ntask))
+        n_for_this_task++;
+
+      nall += n_for_this_task;
+    }
+
+  memmove(&P[NumGas + nall], &P[NumGas], (NumPart - NumGas) * sizeof(struct particle_data));
+  nstart = NumGas;
+
+  for(bnr = 0; bnr < 1000; bnr++)
+    {
+      blocknr = (enum iofields)bnr;
+
+      if(blocknr == IO_LASTENTRY)
+        {
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+          int pc = nstart;
+
+          for(int type = 0; type < NTYPES; type++)
+            {
+              int n_in_file = header.npart[type];
+
+              long long nprevious = 0;
+              for(int t = 0; t < type; t++)
+                nprevious += header.npartTotal[t] + (((long long)header.npartTotalHighWord[t]) << 32);
+
+              for(int nr = 0; nr < filenr; nr++)
+                nprevious += ntype_in_files[nr].npart[type];
+
+              for(int task = readTask; task <= lastTask; task++)
+                {
+                  int n_for_this_task = n_in_file / ntask;
+                  if((task - readTask) < (n_in_file % ntask))
+                    n_for_this_task++;
+
+                  if(ThisTask == task)
+                    {
+                      for(int i = 0; i < n_for_this_task; i++)
+                        P[pc++].FileOrder = nprevious++;
+                    }
+                  else
+                    nprevious += n_for_this_task;
+                }
+            }
+#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+          break;
+        }
+
+      /* proceed reading this field only if we are expecting it */
+      if(blockpresent(blocknr, 0))
+        {
+          if(ThisTask == readTask)
+            {
+              get_dataset_name(blocknr, buf);
+              if(filenr == 0)
+                mpi_printf("READIC: reading block %d (%s)...\n", blocknr, buf);
+              myflush(stdout);
+            }
+
+          bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 1);
+
+          blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement);
+
+          npart = get_particles_in_block(blocknr, &typelist[0]);
+
+          if(npart > 0)
+            {
+              if(ThisTask == readTask)
+                {
+                  if(All.ICFormat == 2)
+                    {
+                      SKIP;
+                      my_fread(&label, sizeof(char), 4, fd);
+                      my_fread(&nextblock, sizeof(int), 1, fd);
+                      swap_Nbyte((char *)&nextblock, 1, 4);
+                      printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock);
+                      SKIP2;
+
+                      get_Tab_IO_Label(blocknr, expected_label);
+                      if(strncmp(label, expected_label, 4) != 0)
+                        {
+                          sprintf(buf, "incorrect block-structure!\nexpected '%c%c%c%c' but found '%c%c%c%c'\n", expected_label[0],
+                                  expected_label[1], expected_label[2], expected_label[3], label[0], label[1], label[2], label[3]);
+                          terminate(buf);
+                        }
+                    }
+
+                  if(All.ICFormat == 1 || All.ICFormat == 2)
+                    SKIP;
+                }
+
+              for(type = 0, offset = 0; type < NTYPES; type++)
+                {
+                  n_in_file = header.npart[type];
+#ifdef HAVE_HDF5
+                  pcsum = 0;
+#endif /* #ifdef HAVE_HDF5 */
+                  if(typelist[type] == 0)
+                    {
+                      /* we are expecting (npart>0) this block, but not for this particle type */
+                      n_for_this_task = n_in_file / ntask;
+                      if((ThisTask - readTask) < (n_in_file % ntask))
+                        n_for_this_task++;
+
+                      offset += n_for_this_task;
+                    }
+                  else
+                    {
+                      /* we are expecting (npart>0) this block for this particle type, read or recv */
+                      for(task = readTask; task <= lastTask; task++)
+                        {
+                          n_for_this_task = n_in_file / ntask;
+                          if((task - readTask) < (n_in_file % ntask))
+                            n_for_this_task++;
+
+                          if(task == ThisTask)
+                            if(NumPart + n_for_this_task > All.MaxPart)
+                              terminate("too many particles. %d %d %d\n", NumPart, n_for_this_task, All.MaxPart);
+
+                          /* blocked load to fit in finite size of CommBuffer */
+                          do
+                            {
+                              pc = n_for_this_task;
+
+                              if(pc > blockmaxlen)
+                                pc = blockmaxlen;
+
+                              if(ThisTask == readTask)
+                                {
+                                  if(All.ICFormat == 1 || All.ICFormat == 2)
+                                    my_fread(CommBuffer, bytes_per_blockelement, pc, fd);
+#ifdef HAVE_HDF5
+                                  if(All.ICFormat == 3 && pc > 0)
+                                    {
+                                      /* configure HDF5 dataspaces and hyperslab selection */
+                                      dims[0] = header.npart[type];
+                                      dims[1] = get_values_per_blockelement(blocknr);
+                                      if(dims[1] == 1)
+                                        rank = 1;
+                                      else
+                                        rank = 2;
+
+                                      hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL);
+
+                                      dims[0]                  = pc;
+                                      hdf5_dataspace_in_memory = my_H5Screate_simple(rank, dims, NULL);
+
+                                      start[0] = pcsum;
+                                      start[1] = 0;
+
+                                      count[0] = pc;
+                                      count[1] = get_values_per_blockelement(blocknr);
+                                      pcsum += pc;
+
+                                      my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL);
+
+                                      switch(get_datatype_in_block(blocknr, 1))
+                                        {
+                                          case FILE_INT:
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT);
+                                            break;
+                                          case FILE_MY_IO_FLOAT:
+#ifdef INPUT_IN_DOUBLEPRECISION
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE);
+#else  /* #ifdef INPUT_IN_DOUBLEPRECISION */
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT);
+#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */
+                                            break;
+                                          case FILE_MY_ID_TYPE:
+#ifdef LONGIDS
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64);
+#else  /* #ifdef LONGIDS */
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32);
+#endif /* #ifdef LONGIDS #else */
+                                            break;
+                                          case FILE_DOUBLE:
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE);
+                                            break;
+                                          case FILE_FLOAT:
+                                            hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT);
+                                            break;
+                                          default:
+                                            terminate("can't process this input type");
+                                            break;
+                                        }
+
+                                      /* test if HDF5 dataset is actually present */
+                                      get_dataset_name(blocknr, buf);
+
+                                      hdf5_dataset = my_H5Dopen_if_existing(hdf5_grp[type], buf);
+
+                                      if(hdf5_dataset < 0)
+                                        {
+                                          // no, pad with zeros
+                                          if((ThisTask == readTask) && (task == ThisTask))
+                                            mpi_printf("\tDataset %s not present for particle type %d, using zero.\n", buf, type);
+                                          memset(CommBuffer, 0, dims[0] * dims[1] * my_H5Tget_size(hdf5_datatype));
+                                        }
+                                      else
+                                        {
+                                          // yes, read into CommBuffer
+                                          my_H5Dread(hdf5_dataset, hdf5_datatype, hdf5_dataspace_in_memory, hdf5_dataspace_in_file,
+                                                     H5P_DEFAULT, CommBuffer, buf);
+                                          my_H5Dclose(hdf5_dataset, buf);
+                                        }
+                                      my_H5Tclose(hdf5_datatype);
+                                      my_H5Sclose(hdf5_dataspace_in_memory, H5S_SIMPLE);
+                                      my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE);
+
+                                    } /* All.ICFormat == 3 */
+#endif                                /* #ifdef HAVE_HDF5 */
+                                }
+
+                              if(ThisTask == readTask && task != readTask && pc > 0)
+                                MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD);
+
+                              if(ThisTask != readTask && task == ThisTask && pc > 0)
+                                MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, readTask, TAG_PDATA, MPI_COMM_WORLD,
+                                         &status);
+
+                              /* copy CommBuffer contents into actual particle data structs */
+                              if(ThisTask == task)
+                                {
+                                  empty_read_buffer(blocknr, nstart + offset, pc, type);
+
+                                  offset += pc;
+                                }
+
+                              n_for_this_task -= pc;
+                            } /* do */
+                          while(n_for_this_task > 0);
+
+                        } /* task loop */
+                    }     /* typelist[type] > 0 */
+                }         /* type loop */
+
+              if(ThisTask == readTask)
+                {
+                  if(All.ICFormat == 1 || All.ICFormat == 2)
+                    {
+                      SKIP2;
+                      swap_Nbyte((char *)&blksize1, 1, 4);
+                      swap_Nbyte((char *)&blksize2, 1, 4);
+                      if(blksize1 != blksize2)
+                        {
+                          sprintf(buf, "incorrect block-sizes detected!\n Task=%d   blocknr=%d  blksize1=%d  blksize2=%d\n", ThisTask,
+                                  blocknr, blksize1, blksize2);
+                          if(blocknr == IO_ID)
+                            {
+                              strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and AREPO compilation !\n");
+                            }
+                          terminate(buf);
+                        }
+                    }
+                }
+
+            } /* npart > 0 */
+        }     /* blockpresent */
+    }         /* blocknr loop */
+
+  for(type = 0; type < NTYPES; type++)
+    {
+      n_in_file = header.npart[type];
+
+      n_for_this_task = n_in_file / ntask;
+      if((ThisTask - readTask) < (n_in_file % ntask))
+        n_for_this_task++;
+
+      NumPart += n_for_this_task;
+
+      if(type == 0)
+        NumGas += n_for_this_task;
+    }
+
+  if(ThisTask == readTask)
+    {
+      if(All.ICFormat == 1 || All.ICFormat == 2)
+        fclose(fd);
+#ifdef HAVE_HDF5
+      if(All.ICFormat == 3)
+        {
+          for(type = NTYPES - 1; type >= 0; type--)
+            if(header.npart[type] > 0)
+              {
+                sprintf(buf, "/PartType%d", type);
+                my_H5Gclose(hdf5_grp[type], buf);
+              }
+          my_H5Fclose(hdf5_file, fname);
+        }
+#endif /* #ifdef HAVE_HDF5 */
+    }
+}
+
+/*! \brief Determines on how many files a given snapshot is distributed.
+ *
+ *  \param[in] fname File name of the snapshot as given in the parameter file.
+ *
+ *  \return Number of files; -1: could not find files.
+ */
+int find_files(const char *fname)
+{
+  FILE *fd;
+  char buf[200], buf1[200];
+  int dummy;
+
+  sprintf(buf, "%s.%d", fname, 0);
+  sprintf(buf1, "%s", fname);
+
+  if(All.ICFormat == 3)
+    {
+      sprintf(buf, "%s.%d.hdf5", fname, 0);
+      sprintf(buf1, "%s.hdf5", fname);
+    }
+
+#ifndef HAVE_HDF5
+  if(All.ICFormat == 3)
+    {
+      mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n");
+    }
+#endif /* #ifndef HAVE_HDF5 */
+
+  header.num_files = 0;
+
+  if(ThisTask == 0)
+    {
+      if((fd = fopen(buf, "r")))
+        {
+          if(All.ICFormat == 1 || All.ICFormat == 2)
+            {
+              if(All.ICFormat == 2)
+                {
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  swap_file = dummy;
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                }
+
+              my_fread(&dummy, sizeof(dummy), 1, fd);
+              if(All.ICFormat == 1)
+                {
+                  if(dummy == 256)
+                    swap_file = 8;
+                  else
+                    swap_file = dummy;
+                }
+              read_header_attributes(fd);
+
+              swap_header();
+
+#ifdef COMBINETYPES
+              header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5];
+              header.npart[3] += header.npart[4] + header.npart[5];
+              header.npartTotal[4] = 0;
+              header.npartTotal[5] = 0;
+              header.npart[4]      = 0;
+              header.npart[5]      = 0;
+#endif /* #ifdef COMBINETYPES */
+
+              my_fread(&dummy, sizeof(dummy), 1, fd);
+            }
+          fclose(fd);
+
+#ifdef HAVE_HDF5
+          if(All.ICFormat == 3)
+            read_header_attributes_in_hdf5(buf);
+#endif /* #ifdef HAVE_HDF5 */
+        }
+    }
+
+  MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+  if(header.num_files < 0)
+    terminate("header.num_files < 0");
+  if(header.num_files > 100000)
+    terminate("header.num_files=%d read from %s does not make sense - header possibly corrupt.", header.num_files, buf);
+  if(header.num_files > 0)
+    return header.num_files;
+
+  if(ThisTask == 0)
+    {
+      if((fd = fopen(buf1, "r")))
+        {
+          if(All.ICFormat == 1 || All.ICFormat == 2)
+            {
+              if(All.ICFormat == 2)
+                {
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  swap_file = dummy;
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                  my_fread(&dummy, sizeof(dummy), 1, fd);
+                }
+
+              my_fread(&dummy, sizeof(dummy), 1, fd);
+              if(All.ICFormat == 1)
+                {
+                  if(dummy == 256)
+                    swap_file = 8;
+                  else
+                    swap_file = dummy;
+                }
+              read_header_attributes(fd);
+              swap_header();
+
+#ifdef COMBINETYPES
+              header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5];
+              header.npart[3] += header.npart[4] + header.npart[5];
+              header.npartTotal[4] = 0;
+              header.npartTotal[5] = 0;
+              header.npart[4]      = 0;
+              header.npart[5]      = 0;
+#endif /* #ifdef COMBINETYPES */
+
+              my_fread(&dummy, sizeof(dummy), 1, fd);
+            }
+          fclose(fd);
+
+#ifdef HAVE_HDF5
+          if(All.ICFormat == 3)
+            read_header_attributes_in_hdf5(buf1);
+#endif /* #ifdef HAVE_HDF5 */
+
+          header.num_files = 1;
+        }
+    }
+
+  MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+  if(header.num_files > 0)
+    return header.num_files;
+
+  mpi_terminate("\nCan't find initial conditions file, neither as '%s'\nnor as '%s'\n", buf, buf1);
+  return -1;
+}
+
+/*! \brief This function assigns a certain number of tasks to each file.
+ *
+ *  These tasks are containing the content of that file after the ICs have been
+ *  read. The number of tasks per file is as homogeneous as possible.
+ *  The number of files may at most be equal to the number of tasks.
+ *
+ *  \param[in] nfiles Number of files of which the snapshot is distributed.
+ *  \param[in] filenr Contains the file number to which this task belongs.
+ *  \param[in] master The number of the task responsible to read the file.
+ *  \param[in] last Number of the last task belonging to the same file as this
+ *             task.
+ *
+ *  \return void
+ */
+void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last)
+{
+  int i, group;
+  int tasks_per_file = NTask / nfiles;
+  int tasks_left     = NTask % nfiles;
+
+  if(tasks_left == 0)
+    {
+      group   = ThisTask / tasks_per_file;
+      *master = group * tasks_per_file;
+      *last   = (group + 1) * tasks_per_file - 1;
+      *filenr = group;
+      return;
+    }
+
+  double tpf = ((double)NTask) / nfiles;
+
+  for(i = 0, *last = -1; i < nfiles; i++)
+    {
+      *master = *last + 1;
+      *last   = (i + 1) * tpf;
+      if(*last >= NTask)
+        *last = *last - 1;
+      if(*last < *master)
+        terminate("last < master");
+      *filenr = i;
+
+      if(i == nfiles - 1)
+        *last = NTask - 1;
+
+      if(ThisTask >= *master && ThisTask <= *last)
+        return;
+    }
+}
+
+#ifdef HAVE_HDF5
+/*! \brief The error handler used during the loading of the hdf5 header.
+ *
+ *  \param[in] unused The parameter is not used, but it is necessary for
+ *             compatibility with the HDF5 library.
+ *  \return 1 if the write error is tolerated, otherwise the run is terminated.
+ */
+herr_t hdf5_header_error_handler(void *unused)
+{
+#ifdef TOLERATE_WRITE_ERROR
+  write_error(3, 0, 0);
+  return 1;
+#else
+  terminate("Failed to read HDF5 header attribute. Probably your file is corrupt.\n");
+  return 0;
+#endif
+}
+
+/*! \brief This function reads the snapshot header in case of hdf5 files
+ *        (i.e. format 3).
+ *
+ *  \param[in] fname File name of the snapshot as given in the parameter file.
+ *
+ *  \return void
+ */
+void read_header_attributes_in_hdf5(const char *fname)
+{
+  hid_t hdf5_file, hdf5_headergrp, hdf5_attribute;
+  hssize_t scalar_attr_dim = 1;
+  hssize_t vector_attr_dim = NTYPES;
+
+  hdf5_file      = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
+  hdf5_headergrp = my_H5Gopen(hdf5_file, "/Header");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_ThisFile");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile", vector_attr_dim);
+  my_H5Aclose(hdf5_attribute, "NumPart_ThisFile");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total", vector_attr_dim);
+  my_H5Aclose(hdf5_attribute, "NumPart_Total");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total_HighWord");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord", vector_attr_dim);
+  my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "MassTable");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable", vector_attr_dim);
+  my_H5Aclose(hdf5_attribute, "MassTable");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Time");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Time");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Redshift");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Redshift");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "BoxSize");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "BoxSize");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumFilesPerSnapshot");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Omega0");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Omega0");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "OmegaLambda");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "OmegaLambda");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "HubbleParam");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "HubbleParam");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Sfr");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_Sfr");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Cooling");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_Cooling");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_StellarAge");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_StellarAge");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Metals");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_Metals");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Feedback");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_Feedback");
+
+  hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_DoublePrecision");
+  my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision", scalar_attr_dim);
+  my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision");
+
+  my_H5Gclose(hdf5_headergrp, "/Header");
+  my_H5Fclose(hdf5_file, fname);
+}
+#endif /* #ifdef HAVE_HDF5 */
+
+/*! \brief Reads the snapshot header in case of non-hdf5 files (i.e. formats 1
+ *         and 2).
+ *
+ * \param[in] *fd Pointer to snapshot file.
+ *
+ * \return void
+ */
+void read_header_attributes(FILE *fd)
+{
+#ifdef NTYPES_ICS
+  int type;
+  if(RestartFlag == 0)
+    {
+      my_fread(&header_ICs, sizeof(header_ICs), 1, fd);
+
+      for(type = 0; type < NTYPES_ICS; type++)
+        {
+          header.npart[type]              = header_ICs.npart[type];
+          header.mass[type]               = header_ICs.mass[type];
+          header.npartTotal[type]         = header_ICs.npartTotal[type];
+          header.npartTotalHighWord[type] = header_ICs.npartTotalHighWord[type];
+        }
+      for(type = NTYPES_ICS; type < NTYPES; type++)
+        {
+          header.npart[type]              = 0;
+          header.mass[type]               = 0;
+          header.npartTotal[type]         = 0;
+          header.npartTotalHighWord[type] = 0;
+        }
+
+      header.time                      = header_ICs.time;
+      header.redshift                  = header_ICs.redshift;
+      header.flag_sfr                  = header_ICs.flag_sfr;
+      header.flag_feedback             = header_ICs.flag_feedback;
+      header.flag_cooling              = header_ICs.flag_cooling;
+      header.num_files                 = header_ICs.num_files;
+      header.BoxSize                   = header_ICs.BoxSize;
+      header.Omega0                    = header_ICs.Omega0;
+      header.OmegaLambda               = header_ICs.OmegaLambda;
+      header.HubbleParam               = header_ICs.HubbleParam;
+      header.flag_stellarage           = header_ICs.flag_stellarage;
+      header.flag_metals               = header_ICs.flag_metals;
+      header.flag_entropy_instead_u    = header_ICs.flag_entropy_instead_u;
+      header.flag_doubleprecision      = header_ICs.flag_doubleprecision;
+      header.flag_lpt_ics              = header_ICs.flag_lpt_ics;
+      header.lpt_scalingfactor         = header_ICs.lpt_scalingfactor;
+      header.flag_tracer_field         = header_ICs.flag_tracer_field;
+      header.composition_vector_length = header_ICs.composition_vector_length;
+    }
+  else
+    my_fread(&header, sizeof(header), 1, fd);
+#else  /* #ifdef NTYPES_ICS */
+  my_fread(&header, sizeof(header), 1, fd);
+#endif /* #ifdef NTYPES_ICS #else */
+}
+
+/*! \brief Swaps endiannes of data.
+ *
+ * \param[in, out] data Pointer to the data.
+ * \param[in] n Number of elements to swap.
+ * \param[in] m Size of single element to swap: int, float = 4; double = 8.
+ *
+ * \return void
+ */
+void swap_Nbyte(char *data, int n, int m)
+{
+  int i, j;
+  char old_data[16];
+
+  if(swap_file != 8)
+    {
+      for(j = 0; j < n; j++)
+        {
+          memcpy(&old_data[0], &data[j * m], m);
+          for(i = 0; i < m; i++)
+            {
+              data[j * m + i] = old_data[m - i - 1];
+            }
+        }
+    }
+}
+
+/*! \brief Swaps the endianness of the snapshot header.
+ *
+ *  \return void
+ */
+void swap_header()
+{
+  swap_Nbyte((char *)&header.npart, NTYPES, 4);
+  swap_Nbyte((char *)&header.mass, NTYPES, 8);
+  swap_Nbyte((char *)&header.time, 1, 8);
+  swap_Nbyte((char *)&header.redshift, 1, 8);
+  swap_Nbyte((char *)&header.flag_sfr, 1, 4);
+  swap_Nbyte((char *)&header.flag_feedback, 1, 4);
+  swap_Nbyte((char *)&header.npartTotal, NTYPES, 4);
+  swap_Nbyte((char *)&header.flag_cooling, 1, 4);
+  swap_Nbyte((char *)&header.num_files, 1, 4);
+  swap_Nbyte((char *)&header.BoxSize, 1, 8);
+  swap_Nbyte((char *)&header.Omega0, 1, 8);
+  swap_Nbyte((char *)&header.OmegaLambda, 1, 8);
+  swap_Nbyte((char *)&header.HubbleParam, 1, 8);
+  swap_Nbyte((char *)&header.flag_stellarage, 1, 4);
+  swap_Nbyte((char *)&header.flag_metals, 1, 4);
+  swap_Nbyte((char *)&header.npartTotalHighWord, NTYPES, 4);
+  swap_Nbyte((char *)&header.flag_entropy_instead_u, 1, 4);
+  swap_Nbyte((char *)&header.flag_doubleprecision, 1, 4);
+  swap_Nbyte((char *)&header.flag_lpt_ics, 1, 4);
+  swap_Nbyte((char *)&header.lpt_scalingfactor, 1, 4);
+  swap_Nbyte((char *)&header.flag_tracer_field, 1, 4);
+  swap_Nbyte((char *)&header.composition_vector_length, 1, 4);
+}
+
+#ifdef TILE_ICS
+/*! \brief Duplicates ICs and lines TileICsFactor of them up in each dimension.
+ *
+ *  \return void
+ */
+void tile_ics(void)
+{
+  mpi_printf("TILE_ICS: tiling by a factor of %d...\n", All.TileICsFactor);
+
+  /* allocate memory for new particles */
+  domain_resize_storage(NumPart * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1),
+                        NumGas * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1), 0);
+
+  /* tile gas particles at the beginning of P[] */
+  int N_others = NumPart - NumGas;
+  memmove(&P[NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor], &P[NumGas], N_others * sizeof(struct particle_data));
+  int i, j, ix, iy = 0, iz = 0;
+  for(i = 0; i < NumGas; i++)
+    {
+      for(ix = 0; ix < All.TileICsFactor; ix++)
+        {
+#ifndef ONEDIMS
+          for(iy = 0; iy < All.TileICsFactor; iy++)
+#endif /* #ifndef ONEDIMS */
+            {
+#if !defined(TWODIMS) && !defined(ONEDIMS)
+              for(iz = 0; iz < All.TileICsFactor; iz++)
+#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */
+                {
+                  if(ix == 0 && iy == 0 && iz == 0)
+                    continue;
+                  j       = i + NumGas * ix + NumGas * All.TileICsFactor * iy + NumGas * All.TileICsFactor * All.TileICsFactor * iz;
+                  P[j]    = P[i];
+                  P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy +
+                            IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz;
+                  P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix;
+                  P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy;
+                  P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz;
+                  SphP[j] = SphP[i];
+                }
+            }
+        }
+    }
+  /* tile the other particle types */
+  iy = 0;
+  iz = 0;
+  for(i = NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+      i < NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor + N_others; i++)
+    {
+      for(ix = 0; ix < All.TileICsFactor; ix++)
+        {
+#ifndef ONEDIMS
+          for(iy = 0; iy < All.TileICsFactor; iy++)
+#endif /* #ifndef ONEDIMS */
+            {
+#if !defined(TWODIMS) && !defined(ONEDIMS)
+              for(iz = 0; iz < All.TileICsFactor; iz++)
+#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */
+                {
+                  if(ix == 0 && iy == 0 && iz == 0)
+                    continue;
+                  j    = i + N_others * ix + N_others * All.TileICsFactor * iy + N_others * All.TileICsFactor * All.TileICsFactor * iz;
+                  P[j] = P[i];
+                  P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy +
+                            IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz;
+                  P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix;
+                  P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy;
+                  P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz;
+                }
+            }
+        }
+    }
+
+  NumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+  NumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor;
+}
+#endif /* #ifdef TILE_ICS */
diff --git a/src/amuse/community/arepo/src/io/restart.c b/src/amuse/community/arepo/src/io/restart.c
new file mode 100644
index 0000000000..9a3dff5bba
--- /dev/null
+++ b/src/amuse/community/arepo/src/io/restart.c
@@ -0,0 +1,1549 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/io/restart.c
+ * \date        05/2018
+ * \brief       Handling of the loading/writing of restart files.
+ * \details     contains functions:
+ *                void loadrestart(void)
+ *                void reread_params_after_loading_restart(void)
+ *                static int compare_seq_data(const void *a, const void *b)
+ *                static void create_restartfiles_dir()
+ *                static void get_restart_filename(char *buf, int task,
+ *                  int modus)
+ *                static void backup_restartfiles(int task)
+ *                static int get_file_to_check(int task)
+ *                static void check_restart_files(char *buf, struct check *ch,
+ *                  int *success)
+ *                static void send_work_request(int modus, int i)
+ *                static void polling(int modus)
+ *                static void work_files(int modus)
+ *                void restart(int modus)
+ *                static void write_or_read_this_processors_restart_file(int
+ *                  modus, char *buf, struct check *ch)
+ *                static int execute_write_or_read(int modus, char *buf,
+ *                  struct check *ch)
+ *                static void contents_restart_file(int modus)
+ *                void readjust_timebase(double TimeMax_old,
+ *                  double TimeMax_new)
+ *                void in(int *x, int modus)
+ *                void byten(void *x, size_t n, int modus)
+ *                void byten_nohash(void *x, size_t n, int modus)
+ *                void byten_hash(void *x, size_t n, int modus, int hash)
+ *                void allocate_iobuf(void)
+ *                void deallocate_iobuf(int modus)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef __USE_GNU
+#define _GNU_SOURCE /* needed for USE_DIRECT_IO_FOR_RESTARTS */
+#endif              /* #ifndef __USE_GNU */
+
+#include <fcntl.h>
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../debug_md5/Md5.h"
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+#define MODUS_WRITE 0
+#define MODUS_READ 1
+#define MODUS_READCHECK 2
+#define MODUS_CHECK 3
+
+/*! \brief Data for scheduling restart file IO.
+ */
+static struct seq_data
+{
+  int thistask;
+  int rankinnode;
+  int thisnode;
+} * seq;
+
+/*! \brief Metadata of restart files to be compared to when checking.
+ */
+static struct check
+{
+  long long byte_count;
+  unsigned char hash[16];
+} * checks;
+
+static char *write_success;
+
+static int fdint;
+
+static void in(int *x, int modus);
+static void byten(void *x, size_t n, int modus);
+static void byten_nohash(void *x, size_t n, int modus);
+static void byten_hash(void *x, size_t n, int modus, int hash);
+static void write_or_read_this_processors_restart_file(int modus, char *fname, struct check *ch);
+static int execute_write_or_read(int modus, char *buf, struct check *ch);
+static void contents_restart_file(int modus);
+
+#define MAX_BLOCK_SIZE (32 * 1024 * 1024)
+
+static int PageSize;
+static char *iobuf_aligned, *io_buf;
+static size_t fillp, iop;
+void allocate_iobuf(void);
+void deallocate_iobuf(int modus);
+
+static long long byte_count;
+static int files_started;
+static int files_completed;
+static int files_concurrent;
+static int files_groups;
+
+static MD5_CTX mysum;
+
+static struct global_data_all_processes all;
+
+/*! \brief This function loads the last restart file.
+ *
+ *  Some parameters of the parameter file might be changed between restarting.
+ *  This function ensures that only the allowed parameters change,
+ *  otherwise the old value from the restart file is taken.
+ *  If the end time of the simulation changed readjust_timebase() is called in
+ *  the end.
+ *
+ *  \return void
+ */
+void loadrestart(void)
+{
+  /* save global variables. (will be read from restart file) */
+  all = All;
+
+  /* Read restart files.
+     Note: This also resets all variables in the struct `All'. */
+  restart(MODUS_READ);
+
+  /* However, during the run, some variables in the parameter
+     file are allowed to be changed, if desired. These are copied here. */
+  reread_params_after_loading_restart();
+}
+
+/*! \brief This function takes from the parameter file values that are allowed
+ *         to change after restart.
+ *
+ *  \return void
+ */
+void reread_params_after_loading_restart(void)
+{
+  if(ThisTask == 0 && All.MinSizeTimestep != all.MinSizeTimestep)
+    warn("MinSizeTimestep modified from %g to %g while restarting at Time=%g", All.MinSizeTimestep, all.MinSizeTimestep, All.Time);
+  All.MinSizeTimestep = all.MinSizeTimestep;
+  if(ThisTask == 0 && All.MaxSizeTimestep != all.MaxSizeTimestep)
+    warn("MaxSizeTimestep modified from %g to %g while restarting at Time=%g", All.MaxSizeTimestep, all.MaxSizeTimestep, All.Time);
+  All.MaxSizeTimestep = all.MaxSizeTimestep;
+  if(ThisTask == 0 && All.TimeLimitCPU != all.TimeLimitCPU)
+    warn("TimeLimitCPU modified from %g to %g while restarting at Time=%g", All.TimeLimitCPU, all.TimeLimitCPU, All.Time);
+  All.TimeLimitCPU = all.TimeLimitCPU;
+  if(ThisTask == 0 && All.ResubmitOn != all.ResubmitOn)
+    warn("ResubmitOn modified from %d to %d while restarting at Time=%g", All.ResubmitOn, all.ResubmitOn, All.Time);
+  All.ResubmitOn = all.ResubmitOn;
+  if(ThisTask == 0 && All.TimeBetSnapshot != all.TimeBetSnapshot)
+    warn("TimeBetSnapshot modified from %g to %g while restarting at Time=%g", All.TimeBetSnapshot, all.TimeBetSnapshot, All.Time);
+  All.TimeBetSnapshot = all.TimeBetSnapshot;
+  if(ThisTask == 0 && All.TimeBetStatistics != all.TimeBetStatistics)
+    warn("TimeBetStatistics modified from %g to %g while restarting at Time=%g", All.TimeBetStatistics, all.TimeBetStatistics,
+         All.Time);
+  All.TimeBetStatistics = all.TimeBetStatistics;
+  if(ThisTask == 0 && All.CpuTimeBetRestartFile != all.CpuTimeBetRestartFile)
+    warn("CpuTimeBetRestartFile modified from %g to %g while restarting at Time=%g", All.CpuTimeBetRestartFile,
+         all.CpuTimeBetRestartFile, All.Time);
+  All.CpuTimeBetRestartFile = all.CpuTimeBetRestartFile;
+  if(ThisTask == 0 && All.ErrTolIntAccuracy != all.ErrTolIntAccuracy)
+    warn("ErrTolIntAccuracy modified from %g to %g while restarting at Time=%g", All.ErrTolIntAccuracy, all.ErrTolIntAccuracy,
+         All.Time);
+  All.ErrTolIntAccuracy = all.ErrTolIntAccuracy;
+  if(ThisTask == 0 && All.SnapFormat != all.SnapFormat)
+    warn("SnapFormat modified from %d to %d while restarting at Time=%g", All.SnapFormat, all.SnapFormat, All.Time);
+  All.SnapFormat = all.SnapFormat;
+
+  if(ThisTask == 0 && All.ErrTolForceAcc != all.ErrTolForceAcc)
+    warn("ErrTolForceAcc modified from %g to %g while restarting at Time=%g", All.ErrTolForceAcc, all.ErrTolForceAcc, All.Time);
+  All.ErrTolForceAcc = all.ErrTolForceAcc;
+  if(ThisTask == 0 && All.TypeOfTimestepCriterion != all.TypeOfTimestepCriterion)
+    warn("TypeOfTimestepCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfTimestepCriterion,
+         all.TypeOfTimestepCriterion, All.Time);
+  All.TypeOfTimestepCriterion = all.TypeOfTimestepCriterion;
+  if(ThisTask == 0 && All.TypeOfOpeningCriterion != all.TypeOfOpeningCriterion)
+    warn("TypeOfOpeningCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfOpeningCriterion,
+         all.TypeOfOpeningCriterion, All.Time);
+  All.TypeOfOpeningCriterion = all.TypeOfOpeningCriterion;
+  if(ThisTask == 0 && All.NumFilesWrittenInParallel != all.NumFilesWrittenInParallel)
+    warn("NumFilesWrittenInParallel modified from %d to %d while restarting at Time=%g", All.NumFilesWrittenInParallel,
+         all.NumFilesWrittenInParallel, All.Time);
+  All.NumFilesWrittenInParallel = all.NumFilesWrittenInParallel;
+  if(ThisTask == 0 && All.NumFilesPerSnapshot != all.NumFilesPerSnapshot)
+    warn("NumFilesPerSnapshot modified from %d to %d while restarting at Time=%g", All.NumFilesPerSnapshot, all.NumFilesPerSnapshot,
+         All.Time);
+  All.NumFilesPerSnapshot = all.NumFilesPerSnapshot;
+
+  if(ThisTask == 0 && All.LimitUBelowThisDensity != all.LimitUBelowThisDensity)
+    warn("LimitUBelowThisDensity modified from %g to %g while restarting at Time=%g", All.LimitUBelowThisDensity,
+         all.LimitUBelowThisDensity, All.Time);
+  All.LimitUBelowThisDensity = all.LimitUBelowThisDensity;
+  if(ThisTask == 0 && All.LimitUBelowCertainDensityToThisValue != all.LimitUBelowCertainDensityToThisValue)
+    warn("LimitUBelowCertainDensityToThisValue modified from %g to %g while restarting at Time=%g",
+         All.LimitUBelowCertainDensityToThisValue, all.LimitUBelowCertainDensityToThisValue, All.Time);
+  All.LimitUBelowCertainDensityToThisValue = all.LimitUBelowCertainDensityToThisValue;
+  if(ThisTask == 0 && All.MinimumDensityOnStartUp != all.MinimumDensityOnStartUp)
+    warn("MinimumDensityOnStartUp modified from %g to %g while restarting at Time=%g", All.MinimumDensityOnStartUp,
+         all.MinimumDensityOnStartUp, All.Time);
+  All.MinimumDensityOnStartUp = all.MinimumDensityOnStartUp;
+  if(ThisTask == 0 && All.MultipleDomains != all.MultipleDomains)
+    warn("MultipleDomains modified from %d to %d while restarting at Time=%g", All.MultipleDomains, all.MultipleDomains, All.Time);
+  All.MultipleDomains = all.MultipleDomains;
+  if(ThisTask == 0 && All.TopNodeFactor != all.TopNodeFactor)
+    warn("TopNodeFactor modified from %g to %g while restarting at Time=%g", All.TopNodeFactor, all.TopNodeFactor, All.Time);
+  All.TopNodeFactor = all.TopNodeFactor;
+  if(ThisTask == 0 && All.ActivePartFracForNewDomainDecomp != all.ActivePartFracForNewDomainDecomp)
+    warn("ActivePartFracForNewDomainDecomp modified from %g to %g while restarting at Time=%g", All.ActivePartFracForNewDomainDecomp,
+         all.ActivePartFracForNewDomainDecomp, All.Time);
+  All.ActivePartFracForNewDomainDecomp = all.ActivePartFracForNewDomainDecomp;
+  if(ThisTask == 0 && All.OutputListOn != all.OutputListOn)
+    warn("OutputListOn modified from %d to %d while restarting at Time=%g", All.OutputListOn, all.OutputListOn, All.Time);
+  All.OutputListOn = all.OutputListOn;
+  if(ThisTask == 0 && All.CourantFac != all.CourantFac)
+    warn("CourantFac modified from %g to %g while restarting at Time=%g", All.CourantFac, all.CourantFac, All.Time);
+  All.CourantFac = all.CourantFac;
+#ifdef REGULARIZE_MESH_FACE_ANGLE
+  if(ThisTask == 0 && All.CellMaxAngleFactor != all.CellMaxAngleFactor)
+    warn("CellMaxAngleFactor modified from %g to %g while restarting at Time=%g", All.CellMaxAngleFactor, all.CellMaxAngleFactor,
+         All.Time);
+  All.CellMaxAngleFactor = all.CellMaxAngleFactor;
+#else  /* #ifdef REGULARIZE_MESH_FACE_ANGLE */
+  if(ThisTask == 0 && All.CellShapingFactor != all.CellShapingFactor)
+    warn("CellShapingFactor modified from %g to %g while restarting at Time=%g", All.CellShapingFactor, all.CellShapingFactor,
+         All.Time);
+  All.CellShapingFactor = all.CellShapingFactor;
+#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */
+  if(ThisTask == 0 && All.CellShapingSpeed != all.CellShapingSpeed)
+    warn("CellShapingSpeed modified from %g to %g while restarting at Time=%g", All.CellShapingSpeed, all.CellShapingSpeed, All.Time);
+  All.CellShapingSpeed = all.CellShapingSpeed;
+
+  if(ThisTask == 0 && All.OutputListLength != all.OutputListLength)
+    warn("OutputListLength modified from %d to %d while restarting at Time=%g", All.OutputListLength, all.OutputListLength, All.Time);
+  All.OutputListLength = all.OutputListLength;
+  if(ThisTask == 0 && memcmp(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength) != 0)
+    warn("OutputListTimes modified while restarting at Time=%g", All.Time);
+  memcpy(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength);
+  if(ThisTask == 0 && memcmp(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength) != 0)
+    warn("OutputListFlag modified while restarting at Time=%g", All.Time);
+  memcpy(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength);
+
+  if(ThisTask == 0 && strcmp(All.ResubmitCommand, all.ResubmitCommand) != 0)
+    warn("ResubmitCommand modified from %s to %s while restarting at Time=%g", All.ResubmitCommand, all.ResubmitCommand, All.Time);
+  strcpy(All.ResubmitCommand, all.ResubmitCommand);
+  if(ThisTask == 0 && strcmp(All.OutputListFilename, all.OutputListFilename) != 0)
+    warn("OutputListFilename modified from %s to %s while restarting at Time=%g", All.OutputListFilename, all.OutputListFilename,
+         All.Time);
+  strcpy(All.OutputListFilename, all.OutputListFilename);
+  if(ThisTask == 0 && strcmp(All.OutputDir, all.OutputDir) != 0)
+    warn("OutputDir modified from %s to %s while restarting at Time=%g", All.OutputDir, all.OutputDir, All.Time);
+  strcpy(All.OutputDir, all.OutputDir);
+  if(ThisTask == 0 && strcmp(All.SnapshotFileBase, all.SnapshotFileBase) != 0)
+    warn("SnapshotFileBase modified from %s to %s while restarting at Time=%g", All.SnapshotFileBase, all.SnapshotFileBase, All.Time);
+  strcpy(All.SnapshotFileBase, all.SnapshotFileBase);
+
+#ifdef MHD_SEEDFIELD
+  if(ThisTask == 0 && All.B_dir != all.B_dir)
+    warn("B_dir modified from %d to %d while restarting at Time=%g", All.B_dir, all.B_dir, All.Time);
+  All.B_dir = all.B_dir;
+  if(ThisTask == 0 && All.B_value != all.B_value)
+    warn("B_value modified from %g to %g while restarting at Time=%g", All.B_value, all.B_value, All.Time);
+  All.B_value = all.B_value;
+#endif /* #ifdef MHD_SEEDFIELD */
+
+  if(All.TimeMax != all.TimeMax)
+    {
+      if(ThisTask == 0)
+        warn("TimeMax modified from %g to %g while restarting at Time=%g", All.TimeMax, all.TimeMax, All.Time);
+      readjust_timebase(All.TimeMax, all.TimeMax);
+    }
+}
+
+/*! \brief Sorting kernel for seq_data strucutre.
+ *
+ *  Compares (top priority first)
+ *    rankinnode
+ *    thisnode
+ *    thistask
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+static int compare_seq_data(const void *a, const void *b)
+{
+  if(((struct seq_data *)a)->rankinnode < ((struct seq_data *)b)->rankinnode)
+    return -1;
+
+  if(((struct seq_data *)a)->rankinnode > ((struct seq_data *)b)->rankinnode)
+    return +1;
+
+  if(((struct seq_data *)a)->thisnode < ((struct seq_data *)b)->thisnode)
+    return -1;
+
+  if(((struct seq_data *)a)->thisnode > ((struct seq_data *)b)->thisnode)
+    return +1;
+
+  if(((struct seq_data *)a)->thistask < ((struct seq_data *)b)->thistask)
+    return -1;
+
+  if(((struct seq_data *)a)->thistask > ((struct seq_data *)b)->thistask)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Creates the restart file directory with appropriate permissions.
+ *
+ *  \return void
+ */
+static void create_restartfiles_dir()
+{
+  char buf[MAXLEN_PATH];
+#ifdef MULTIPLE_RESTARTS
+  printf(", All.RestartFileCount=%03d", All.RestartFileCount);
+#endif /* #ifdef MULTIPLE_RESTARTS */
+  printf(".\n");
+  sprintf(buf, "%s/restartfiles", All.OutputDir);
+#ifdef MULTIPLE_RESTARTS
+  sprintf(buf, "%s/restartfiles_%03d", All.OutputDir, All.RestartFileCount);
+#endif /* #ifdef MULTIPLE_RESTARTS */
+  mkdir(buf, 02755);
+
+#ifdef TOLERATE_WRITE_ERROR
+  sprintf(buf, "%s/restartfiles", AlternativeOutputDir);
+  mkdir(buf, 02755);
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+}
+
+/*! \brief Sets filename of restart file on local task.
+ *
+ *  \param[out] buf Buffer to which filename is written.
+ *  \param[in] task Task for which restart file should be written.
+ *  \param[in] modus Read or write mode flag.
+ *
+ *  \return void
+ */
+static void get_restart_filename(char *buf, int task, int modus)
+{
+  sprintf(buf, "%s/restartfiles/%s.%d", All.OutputDir, "restart", task);
+
+#ifdef MULTIPLE_RESTARTS
+  if(modus == MODUS_WRITE)
+    sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount++, "restart", task);
+  if((modus == MODUS_READ) || (modus == MODUS_READCHECK) || (modus == MODUS_CHECK))
+    sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount - 1, "restart", task);
+#endif /* #ifdef MULTIPLE_RESTARTS */
+}
+
+/*! \brief Renames existing restartfiles to backup-restartfiles.
+ *
+ *  This way the code ensures that there are two sets of restart-files per
+ *  run.
+ *
+ *  \param[in] task Task for which restart file is renamed.
+ *
+ *  \return void
+ */
+static void backup_restartfiles(int task)
+{
+  char buf[MAXLEN_PATH];
+
+  FILE *fcheck = NULL;
+  char buf_bak[MAXLEN_PATH];
+
+  int bak_files_status = 0;
+
+  mpi_printf("RESTART: Backup restart files...\n");
+  myflush(stdout);
+
+  get_restart_filename(buf, task, MODUS_READ);
+
+  sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", All.OutputDir, "restart", ThisTask);
+  if((fcheck = fopen(buf, "r")))
+    {
+      fclose(fcheck);
+
+      rename(buf, buf_bak);
+      bak_files_status = 1;
+    }
+#ifdef TOLERATE_WRITE_ERROR
+  char alternative_fname[MAXLEN_PATH];
+  sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask);
+  sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", AlternativeOutputDir, "restart", ThisTask);
+
+  if((fcheck = fopen(alternative_fname, "r")))
+    {
+      fclose(fcheck);
+
+      rename(alternative_fname, buf_bak);
+      bak_files_status = 1;
+    }
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  int bak_files_status_sum;
+  MPI_Allreduce(&bak_files_status, &bak_files_status_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  if(bak_files_status_sum != NTask && bak_files_status_sum != 0)
+    warn("RESTART: some (%d) restart files were renamed to bak, but some (%d) weren't - something is very possibly wrong!",
+         bak_files_status, NTask - bak_files_status);
+  if(bak_files_status_sum == NTask)
+    mpi_printf("RESTART: done renaming pre-existing restart files to bak files.\n");
+  else if(bak_files_status_sum == 0)
+    mpi_printf("RESTART: no pre-existing restart files found.\n");
+
+  myflush(stdout);
+}
+
+/*! \brief Returns the index of file which is to be checked by local task.
+ *
+ *  Task gets assigned a restart file to check which it has not written.
+ *  This is to ensure that the check is actually read from disk (not from some
+ *  buffer).
+ *
+ *  \param[in] task Local task.
+ *
+ *  \return File number.
+ */
+static int get_file_to_check(int task) { return (task + NTask / 2) % NTask; }
+
+/*! \brief Checks restart files via an md5sum.
+ *
+ *  This is to ensure that they have been written correctly to the file system.
+ *
+ *  \param[in] buf Filename of restart file.
+ *  \param[in] ch Metadata about data to be checked.
+ *  \param[out] success Flag whether check was a success.
+ *
+ *  \return void
+ */
+static void check_restart_files(char *buf, struct check *ch, int *success)
+{
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+  struct stat st;
+  if(stat(buf, &st) == 0)
+    {
+      size_t size = st.st_size;
+      if(size % PageSize > 0)
+        {
+          FILE *fd = fopen(buf, "a");
+          if(fd)
+            {
+              size_t n = PageSize - (size % PageSize);
+              char *p  = calloc(n, 1);
+              if(p == NULL)
+                terminate("p == NULL");
+              printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n);
+              fwrite(p, n, 1, fd);
+              fclose(fd);
+              free(p);
+            }
+          else
+            terminate("can't increase length of restart file '%s'", buf);
+        }
+    }
+  else
+    terminate("Restart file '%s' not found.\n", buf);
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+  int oflag = O_RDONLY;
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+  oflag |= O_DIRECT;
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+
+  if((fdint = open(buf, oflag)) < 0)
+    terminate("Restart file '%s' not found.\n", buf);
+
+  allocate_iobuf();
+
+  MD5Init(&mysum);
+
+  long long readLen = ch->byte_count;
+  while(readLen > 0)
+    {
+      int readChunk = 1024 * 1024 * 32;
+      if(readChunk > readLen)
+        readChunk = readLen;
+
+      byten(NULL, readChunk, MODUS_CHECK);
+      readLen -= readChunk;
+    }
+
+  MD5Final(&mysum);
+
+  unsigned char has_hash[16], written_hash[16];
+
+  for(int k = 0; k < 16; k++)
+    has_hash[k] = mysum.digest[k];
+
+  byten_nohash(written_hash, 16, MODUS_READ);
+
+  if(memcmp(has_hash, ch->hash, 16) != 0 || memcmp(has_hash, written_hash, 16) != 0)
+    {
+      char str_has[48], str_expected[48], str_written[48];
+      for(int i = 0; i < 16; i++)
+        {
+          sprintf(str_has + 2 * i, "%02X", has_hash[i]);
+          sprintf(str_expected + 2 * i, "%02X", ch->hash[i]);
+          sprintf(str_written + 2 * i, "%02X", written_hash[i]);
+        }
+
+      str_has[32] = str_expected[32] = str_written[32] = 0;
+
+      char newname[10000];
+      sprintf(newname, "%s-damaged", buf);
+      rename(buf, newname);
+
+      terminate("RESTART: file '%s' has MD5 hash of '%s', does not match expected hash '%s' or written hash '%s'.", newname, str_has,
+                str_expected, str_written);
+      *success = 0;
+    }
+  else
+    {
+#ifdef VERBOSE
+      char str_has[48], str_expected[48], str_written[48];
+      for(int i = 0; i < 16; i++)
+        {
+          sprintf(str_has + 2 * i, "%02X", has_hash[i]);
+          sprintf(str_expected + 2 * i, "%02X", ch->hash[i]);
+          sprintf(str_written + 2 * i, "%02X", written_hash[i]);
+        }
+
+      str_has[32] = str_expected[32] = str_written[32] = 0;
+
+      printf("RESTART: Task %d: file '%s' has MD5 hash of '%s', does match expected hash '%s' and written hash '%s'.\n", ThisTask, buf,
+             str_has, str_expected, str_written);
+#endif /* #ifdef VERBOSE */
+      *success = 1;
+    }
+  deallocate_iobuf(MODUS_CHECK);
+
+  close(fdint);
+}
+
+/*! \brief Distributes information and meta-data to task that is supposed to
+ *         check the restart file which has just been written.
+ *
+ *  \param[in] modus Write or check mode.
+ *  \param[in] i Index in seq array.
+ *
+ *  \return void
+ */
+static void send_work_request(int modus, int i)
+{
+  int type = 0;
+
+  if(modus == MODUS_WRITE)
+    {
+      if(write_success[seq[i].thistask])
+        type = 1;
+    }
+
+  if(modus == MODUS_CHECK)
+    {
+      int task = get_file_to_check(seq[i].thistask);
+      if(write_success[task])
+        type = 1;
+    }
+
+  MPI_Ssend(&type, 1, MPI_INT, seq[i].thistask, TAG_N, MPI_COMM_WORLD);
+
+  if(modus == MODUS_CHECK)
+    {
+      int task = get_file_to_check(seq[i].thistask);
+      if(!write_success[task])
+        MPI_Ssend(&checks[task], sizeof(struct check), MPI_BYTE, seq[i].thistask, TAG_N, MPI_COMM_WORLD);
+    }
+}
+
+/*! \brief Gets work request.
+ *
+ *  \param[in] modus Write or check files.
+ *
+ *  \return void
+ */
+static void polling(int modus)
+{
+  if(ThisTask == 0)
+    if(files_completed < NTask)
+      {
+        MPI_Status status;
+        int flag;
+
+        /* now check for a completion message  */
+        MPI_Iprobe(MPI_ANY_SOURCE, TAG_KEY, MPI_COMM_WORLD, &flag, &status);
+
+        if(flag)
+          {
+            int source = status.MPI_SOURCE;
+
+            if(modus == MODUS_WRITE)
+              {
+                MPI_Recv(&checks[source], sizeof(struct check), MPI_BYTE, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+              }
+            else if(modus == MODUS_CHECK)
+              {
+                int success;
+                MPI_Recv(&success, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                write_success[get_file_to_check(source)] = success;
+              }
+            else
+              {
+                int dummy;
+                MPI_Recv(&dummy, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+              }
+            files_completed++;
+
+            if(files_started < NTask)
+              {
+                if((files_started % files_concurrent) == 0)
+                  {
+                    if(modus == MODUS_READ)
+                      mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                                 files_groups);
+                    else if(modus == MODUS_WRITE)
+                      mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                                 files_groups);
+                    else
+                      mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                                 files_groups);
+                  }
+
+                send_work_request(modus, files_started++);
+              }
+          }
+      }
+}
+
+/*! \brief Schedule the reading/writing/checking of restart files to ensure
+ *         only NumFilesWrittenInParallel are written in parallel.
+ *
+ *  \param[in] modus Read, write or check files.
+ *
+ *  \return void
+ */
+static void work_files(int modus)
+{
+  if(ThisTask == 0)
+    if(!(seq = malloc(NTask * sizeof(struct seq_data))))
+      terminate("can't allocate seq_data");
+
+  struct seq_data seq_loc;
+  seq_loc.thistask   = ThisTask;
+  seq_loc.rankinnode = RankInThisNode;
+  seq_loc.thisnode   = ThisNode;
+
+  MPI_Gather(&seq_loc, sizeof(struct seq_data), MPI_BYTE, seq, sizeof(struct seq_data), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      qsort(seq, NTask, sizeof(struct seq_data), compare_seq_data);
+      if(seq[0].thistask != 0)
+        terminate("unexpected");
+
+      files_started   = 0;
+      files_completed = 0;
+
+      if((files_started % files_concurrent) == 0)
+        {
+          if(modus == MODUS_READ)
+            mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                       files_groups);
+          else if(modus == MODUS_WRITE)
+            mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                       files_groups);
+          else
+            mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                       files_groups);
+        }
+
+      for(int i = 1; i < All.NumFilesWrittenInParallel; i++)
+        {
+          files_started++;
+          send_work_request(modus, i);
+        }
+
+      files_started++;
+      if(!((modus == MODUS_WRITE && write_success[ThisTask]) || (modus == MODUS_CHECK && write_success[get_file_to_check(ThisTask)])))
+        {
+          if(modus == MODUS_CHECK)
+            {
+              char buf[MAXLEN_PATH];
+              int task = get_file_to_check(ThisTask);
+              get_restart_filename(buf, task, modus);
+
+              int success;
+              check_restart_files(buf, &checks[task], &success);
+              write_success[task] = success;
+            }
+          else
+            {
+              char buf[MAXLEN_PATH];
+              get_restart_filename(buf, ThisTask, modus);
+              write_or_read_this_processors_restart_file(modus, buf, &checks[0]);
+            }
+        }
+      files_completed++;
+
+      if(files_started < NTask)
+        {
+          if((files_started % files_concurrent) == 0)
+            {
+              if(modus == MODUS_READ)
+                mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                           files_groups);
+              else if(modus == MODUS_WRITE)
+                mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                           files_groups);
+              else
+                mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1,
+                           files_groups);
+            }
+
+          send_work_request(modus, files_started++);
+        }
+
+      while(files_completed < NTask)
+        polling(modus);
+
+      free(seq);
+    }
+  else
+    {
+      int type;
+      MPI_Recv(&type, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); /* wait until we are told to start */
+
+      if(type == 0)
+        {
+          if(modus == MODUS_CHECK)
+            {
+              struct check ch;
+              MPI_Recv(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              char buf[MAXLEN_PATH];
+              get_restart_filename(buf, get_file_to_check(ThisTask), modus);
+
+              int success;
+              check_restart_files(buf, &ch, &success);
+              MPI_Ssend(&success, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD);
+            }
+          else
+            {
+              char buf[MAXLEN_PATH];
+              get_restart_filename(buf, ThisTask, modus);
+              struct check ch;
+              write_or_read_this_processors_restart_file(modus, buf, &ch);
+
+              if(modus == MODUS_WRITE)
+                {
+                  MPI_Ssend(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_KEY, MPI_COMM_WORLD);
+                }
+              else
+                {
+                  int dummy = 0;
+                  MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD);
+                }
+            }
+        }
+      else
+        {
+          int dummy = 1;
+          MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD);
+        }
+    }
+}
+
+/*! \brief This function reads or writes the restart files.
+ *
+ *  Each processor writes its own restart file, with the
+ *  I/O being done in parallel. To avoid congestion of the disks
+ *  you can tell the program to restrict the number of files
+ *  that are simultaneously written to NumFilesWrittenInParallel.
+ *
+ *  \param[in] modus if modus==MODUS_READ  the restart()-routine reads,
+ *  if modus==MODUS_WRITE it writes a restart file.
+ *
+ *  \return void
+ */
+void restart(int modus)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+  double t0  = second();
+  byte_count = 0;
+
+  PageSize = getpagesize();
+  mpi_printf("RESTART: PageSize = %d\n", PageSize);
+
+  if(modus == MODUS_READ)
+    mpi_printf("RESTART: Loading restart files...\n");
+
+  if(ThisTask == 0 && modus == MODUS_WRITE)
+    {
+      printf("RESTART: Writing restart files");
+      create_restartfiles_dir();
+    }
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  if(NTask < All.NumFilesWrittenInParallel)
+    {
+      warn("Number of processors should be a smaller or equal than `NumFilesWrittenInParallel'. We're adjusting the latter.\n");
+      All.NumFilesWrittenInParallel = NTask;
+    }
+
+  if(All.NumFilesWrittenInParallel < 1)
+    All.NumFilesWrittenInParallel = 1;
+
+  files_concurrent = All.NumFilesWrittenInParallel;
+  files_groups     = NTask / All.NumFilesWrittenInParallel;
+  if(NTask % All.NumFilesWrittenInParallel)
+    files_groups++;
+
+#ifndef MULTIPLE_RESTARTS
+  if(modus == MODUS_WRITE) /* write */
+    backup_restartfiles(ThisTask);
+#endif /* #ifndef MULTIPLE_RESTARTS */
+
+  if(modus == MODUS_WRITE)
+    if(ThisTask == 0)
+      {
+        if(!(checks = malloc(NTask * sizeof(struct check))))
+          terminate("can't allocate checks");
+        if(!(write_success = malloc(NTask)))
+          terminate("can't allocate write_success");
+
+        for(int i = 0; i < NTask; i++)
+          {
+            checks[i].byte_count = 0;
+            write_success[i]     = 0;
+          }
+      }
+
+  work_files(modus);
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  if(modus == MODUS_WRITE)
+    {
+      int iter    = 0;
+      int success = 0;
+      while(!success)
+        {
+          work_files(MODUS_CHECK);
+
+          if(ThisTask == 0)
+            {
+              int count = 0;
+              for(int i = 0; i < NTask; i++)
+                {
+                  if(!write_success[i])
+                    count++;
+                }
+
+              if(count == 0)
+                {
+                  printf("All restart files written successfully.\n");
+                  success = 1;
+                }
+              else
+                {
+                  printf("Need to repeat writing for %d restartfiles.\n", count);
+                }
+            }
+
+          MPI_Bcast(&success, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+          if(success)
+            break;
+
+          iter++;
+          if(iter > 4)
+            terminate("Too many iterations, fix your file system.");
+
+          work_files(MODUS_WRITE);
+        };
+
+      free(checks);
+    }
+
+  /* check whether the restarts are all at the same time */
+  if(modus == MODUS_READ) /* read */
+    {
+      struct global_data_all_processes all_task0;
+
+      if(ThisTask == 0)
+        all_task0 = All;
+
+      MPI_Bcast(&all_task0, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+      if(all_task0.Time != All.Time)
+        terminate("The restart file on task=%d is not consistent with the one on task=0\n", ThisTask);
+    }
+
+  long long byte_count_all;
+  sumup_longs(1, &byte_count, &byte_count_all);
+
+  double t1 = second();
+
+  mpi_printf("RESTART: load/save took %g sec, corresponds to I/O rate of %g MB/sec\n", timediff(t0, t1),
+             byte_count_all / (1024.0 * 1024.0) / timediff(t0, t1));
+
+  CPU_Step[CPU_RESTART] += measure_time();
+  mpi_printf("RESTART: done.\n");
+}
+
+/*! \brief Reads or writes restart file.
+ *
+ *  Try write until successful.
+ *
+ *  \param[in] modus Flag for write or read.
+ *  \param[in] buf File name.
+ *  \param[in] ch Check metadata.
+ *
+ *  \return void
+ */
+static void write_or_read_this_processors_restart_file(int modus, char *buf, struct check *ch)
+{
+  if(modus == MODUS_READ)
+    {
+      execute_write_or_read(MODUS_READ, buf, ch);
+    }
+  else
+    {
+      int failed = 0;
+
+      do
+        {
+          execute_write_or_read(MODUS_WRITE, buf, ch);
+        }
+      while(failed > 0);
+    }
+}
+
+/*! \brief Reads or writes a restart file.
+ *
+ *  A single attempt which either is successful or fails.
+ *
+ *  \param[in] modus Flag for write or read.
+ *  \param[in] buf File name.
+ *  \param[in] ch Check metadata.
+ *
+ *  \return 0: success, 1: failed.
+ */
+static int execute_write_or_read(int modus, char *buf, struct check *ch)
+{
+  if(modus == MODUS_WRITE)
+    ch->byte_count = byte_count;
+
+  int failed_flag = 0;
+
+#ifdef TOLERATE_WRITE_ERROR
+  for(int try_io = 0; try_io < 2; try_io++)
+    {
+      WriteErrorFlag = 0;
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+      if(modus == MODUS_READ || modus == MODUS_READCHECK)
+        {
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+          struct stat st;
+          if(stat(buf, &st) == 0)
+            {
+              size_t size = st.st_size;
+              if(size % PageSize > 0)
+                {
+                  FILE *fd = fopen(buf, "a");
+                  if(fd)
+                    {
+                      size_t n = PageSize - (size % PageSize);
+                      char *p  = calloc(n, 1);
+                      if(p == NULL)
+                        terminate("p == NULL");
+                      printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n);
+                      fwrite(p, n, 1, fd);
+                      fclose(fd);
+                      free(p);
+                    }
+                  else
+                    terminate("can't increase length of restart file '%s'", buf);
+                }
+            }
+          else
+            terminate("Restart file '%s' not found.\n", buf);
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+          int oflag = O_RDONLY;
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+          oflag |= O_DIRECT;
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+          if((fdint = open(buf, oflag)) < 0)
+            terminate("Restart file '%s' not found.\n", buf);
+
+          allocate_iobuf();
+        }
+      else
+        {
+#ifdef TOLERATE_WRITE_ERROR
+          int try_open = 0;
+
+          while(try_open < IO_TRIALS)
+            {
+              int oflag = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+              oflag |= O_DIRECT;
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+              if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0)
+                {
+                  printf("Restart file '%s' cannot be opened. Trying again...\n", buf);
+                  myflush(stdout);
+
+                  try_open++;
+
+                  sleep(IO_SLEEP_TIME);
+                }
+              else
+                break;
+            }
+
+          if(try_open == IO_TRIALS)
+            terminate("Opening of restart file failed too often!");
+#else /* #ifdef TOLERATE_WRITE_ERROR */
+      int oflag = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef USE_DIRECT_IO_FOR_RESTARTS
+      oflag |= O_DIRECT;
+#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */
+      if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0)
+        terminate("Restart file '%s' cannot be opened.\n", buf);
+#endif /* #ifdef TOLERATE_WRITE_ERROR #else */
+          allocate_iobuf();
+        }
+
+      MD5Init(&mysum);
+
+      contents_restart_file(modus);
+
+      MD5Final(&mysum);
+
+      unsigned char has_hash[16];
+      static unsigned char should_hash[16];
+
+      for(int k = 0; k < 16; k++)
+        has_hash[k] = mysum.digest[k];
+
+      if(modus == MODUS_READ)
+        {
+          /* read */
+          unsigned char written_hash[16];
+          byten_nohash(written_hash, 16, modus);
+          if(memcmp(has_hash, written_hash, 16) != 0)
+            {
+              char str_has[48], str_written[48];
+              for(int i = 0; i < 16; i++)
+                {
+                  sprintf(str_has + 2 * i, "%02X", has_hash[i]);
+                  sprintf(str_written + 2 * i, "%02X", written_hash[i]);
+                }
+
+              str_has[32] = str_written[32] = 0;
+
+              terminate("RESTART: file '%s' does not match expected MD5 hash of '%s', found '%s' instead.", buf, str_has, str_written);
+            }
+        }
+      else if(modus == MODUS_READCHECK)
+        {
+          if(memcmp(should_hash, has_hash, 16) != 0)
+            {
+              char str_should[48], str_has[48];
+              for(int i = 0; i < 16; i++)
+                {
+                  sprintf(str_should + 2 * i, "%02X", should_hash[i]);
+                  sprintf(str_has + 2 * i, "%02X", has_hash[i]);
+                }
+
+              str_should[32] = str_has[32] = 0;
+
+              failed_flag = 1;
+
+              terminate(
+                  "RESTART-READCHECK: file '%s' does not match expected MD5 hash of '%s' after read-back check, has '%s' instead.",
+                  buf, str_should, str_has);
+            }
+#ifdef VERBOSE
+          else
+            {
+              char str_should[48], str_has[48];
+              for(int i = 0; i < 16; i++)
+                {
+                  sprintf(str_should + 2 * i, "%02X", should_hash[i]);
+                  sprintf(str_has + 2 * i, "%02X", has_hash[i]);
+                }
+
+              str_should[32] = str_has[32] = 0;
+
+              printf("RESTART-READCHECK: Task %d: file '%s' does match expected MD5 hash of '%s' after read-back check, has '%s'.\n",
+                     ThisTask, buf, str_should, str_has);
+            }
+#endif /* #ifdef VERBOSE */
+        }
+      else if(modus == MODUS_WRITE)
+        {
+          ch->byte_count = byte_count - ch->byte_count;
+          for(int k = 0; k < 16; k++)
+            ch->hash[k] = has_hash[k];
+
+          /* write */
+          byten_nohash(has_hash, 16, modus);
+
+          for(int k = 0; k < 16; k++)
+            should_hash[k] = has_hash[k];
+        }
+      else
+        terminate("This should not happen - wrong modus!");
+
+      deallocate_iobuf(modus);
+
+      close(fdint);
+
+#ifdef TOLERATE_WRITE_ERROR
+      if(WriteErrorFlag == 0)
+        break;
+
+      if(try_io == 0)
+        {
+          char alternative_fname[MAXLEN_PATH];
+          sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask);
+
+          printf("TOLERATE_WRITE_ERROR: Try to write to alternative file: Task=%d try_io=%d alternative-filename='%s'\n", ThisTask,
+                 try_io, alternative_fname);
+          myflush(stdout);
+          strncpy(buf, alternative_fname, MAXLEN_PATH); /* try on a different output directory */
+        }
+      else
+        {
+          terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n");
+        }
+    }
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+  return failed_flag;
+}
+
+/*! \brief Defines contents of restart file.
+ *
+ *  \param[in] modus Read or write (0: write; >0 read).
+ *
+ *  \return void
+ */
+static void contents_restart_file(int modus)
+{
+  /* common data  */
+  byten(&All, sizeof(struct global_data_all_processes), modus);
+
+  /* individual allocation factors for meshes */
+  byten(&Mesh.Indi, sizeof(struct individual_alloc_data), modus);
+  byten(&DeRefMesh.Indi, sizeof(struct individual_alloc_data), modus);
+
+  polling(modus);
+
+  if(modus == MODUS_READ) /* read */
+    allocate_memory();
+
+  int ntask = NTask;
+  in(&ntask, modus);
+
+  if(modus == MODUS_READ)
+    if(ntask != NTask)
+      terminate("The restart files were written for ntask=%d while you're using now %d MPI ranks\n", ntask, NTask);
+
+  in(&NumPart, modus);
+
+  /* Particle data  */
+  byten(&P[0], NumPart * sizeof(struct particle_data), modus);
+
+  polling(modus);
+
+  in(&NumGas, modus);
+
+  if(NumGas > 0)
+    {
+      /* Sph-Particle data  */
+      byten(&SphP[0], NumGas * sizeof(struct sph_particle_data), modus);
+    }
+
+  polling(modus);
+
+  in(&Nvc, modus);
+  in(&MaxNvc, modus);
+  in(&FirstUnusedConnection, modus);
+
+  if(modus == MODUS_READ) /* read */
+    DC = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection));
+
+  byten(DC, MaxNvc * sizeof(connection), modus);
+
+  polling(modus);
+
+  /* write state of random number generators */
+  byten(gsl_rng_state(random_generator), gsl_rng_size(random_generator), modus);
+  byten(gsl_rng_state(random_generator_aux), gsl_rng_size(random_generator_aux), modus);
+
+  /* now store variables for time integration bookkeeping */
+  byten(TimeBinSynchronized, TIMEBINS * sizeof(int), modus);
+
+  in(&TimeBinsHydro.NActiveParticles, modus);
+  in(&TimeBinsGravity.NActiveParticles, modus);
+  byten(&TimeBinsHydro.GlobalNActiveParticles, sizeof(long long), modus);
+  byten(&TimeBinsGravity.GlobalNActiveParticles, sizeof(long long), modus);
+  byten(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles * sizeof(int), modus);
+  byten(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles * sizeof(int), modus);
+  byten(TimeBinsHydro.NextInTimeBin, NumGas * sizeof(int), modus);
+  byten(TimeBinsGravity.NextInTimeBin, NumPart * sizeof(int), modus);
+  byten(TimeBinsHydro.PrevInTimeBin, NumGas * sizeof(int), modus);
+  byten(TimeBinsGravity.PrevInTimeBin, NumPart * sizeof(int), modus);
+  byten(TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int), modus);
+  byten(TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int), modus);
+  byten(TimeBinsHydro.FirstInTimeBin, TIMEBINS * sizeof(int), modus);
+  byten(TimeBinsGravity.FirstInTimeBin, TIMEBINS * sizeof(int), modus);
+  byten(TimeBinsHydro.LastInTimeBin, TIMEBINS * sizeof(int), modus);
+  byten(TimeBinsGravity.LastInTimeBin, TIMEBINS * sizeof(int), modus);
+
+#ifdef USE_SFR
+  byten(TimeBinSfr, TIMEBINS * sizeof(double), modus);
+#endif
+
+  polling(modus);
+
+  /* now store custom data for optional Config settings */
+#ifdef USE_SFR
+  in(&Stars_converted, modus);
+#endif
+
+  polling(modus);
+
+  /* now store relevant data for tree */
+
+  in(&NTopleaves, modus);
+  in(&NTopnodes, modus);
+
+  in(&Ngb_MaxPart, modus);
+  in(&Ngb_MaxNodes, modus);
+  in(&Ngb_NumNodes, modus);
+  in(&Ngb_MarkerValue, modus);
+  in(&Ngb_FirstNonTopLevelNode, modus);
+
+  polling(modus);
+
+  if(modus == MODUS_READ) /* read */
+    {
+      domain_allocate();
+      ngb_treeallocate();
+    }
+
+  if(All.TotNumGas > 0)
+    {
+#ifdef TREE_BASED_TIMESTEPS
+      byten(ExtNgb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct ExtNgbNODE), modus);
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+      byten(Ngb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct NgbNODE), modus);
+      byten(Ngb_DomainNodeIndex, NTopleaves * sizeof(int), modus);
+      byten(Ngb_Nextnode, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus);
+      byten(Ngb_Father, Ngb_MaxPart * sizeof(int), modus);
+      byten(Ngb_Marker, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus);
+    }
+
+  polling(modus);
+
+  byten(TopNodes, NTopnodes * sizeof(struct topnode_data), modus);
+  byten(DomainTask, NTopleaves * sizeof(int), modus);
+  byten(DomainCorner, 3 * sizeof(double), modus);
+  byten(DomainCenter, 3 * sizeof(double), modus);
+  byten(&DomainLen, sizeof(double), modus);
+  byten(&DomainFac, sizeof(double), modus);
+  byten(&DomainInverseLen, sizeof(double), modus);
+  byten(&DomainBigFac, sizeof(double), modus);
+}
+
+/*! \brief Adjusts the timeline if the TimeMax variable is
+ *  increased between a restart.
+ *
+ *  The approach taken here is to reduce the resolution of the
+ *  integer timeline by factors of 2 until the new final time
+ *  can be reached within TIMEBASE.
+ *
+ *  \param[in] TimeMax_old old final time.
+ *  \param[in] TimeMax_new new final time (must be larger than old one).
+ *
+ *  \return void
+ */
+void readjust_timebase(double TimeMax_old, double TimeMax_new)
+{
+  int i;
+  long long ti_end;
+
+  if(sizeof(long long) != 8)
+    terminate("\nType 'long long' is not 64 bit on this platform\n\n");
+
+  mpi_printf("\nRESTART: All.TimeMax has been changed in the parameterfile\nNeed to adjust integer timeline\n\n\n");
+
+  if(TimeMax_new < TimeMax_old)
+    terminate("\nIt is not allowed to reduce All.TimeMax\n\n");
+
+  if(All.ComovingIntegrationOn)
+    ti_end = (long long)(log(TimeMax_new / All.TimeBegin) / All.Timebase_interval);
+  else
+    ti_end = (long long)((TimeMax_new - All.TimeBegin) / All.Timebase_interval);
+
+  while(ti_end > TIMEBASE)
+    {
+      All.Timebase_interval *= 2.0;
+
+      ti_end /= 2;
+      All.Ti_Current /= 2;
+      All.Previous_Ti_Current /= 2;
+
+#ifdef PMGRID
+      All.PM_Ti_begstep /= 2;
+      All.PM_Ti_endstep /= 2;
+#endif /* #ifdef PMGRID */
+
+      for(i = 0; i < NumPart; i++)
+        {
+          P[i].Ti_Current /= 2;
+
+          if(P[i].TimeBinGrav > 0)
+            {
+              P[i].TimeBinGrav--;
+              if(P[i].TimeBinGrav <= 0)
+                {
+                  char buf[1000];
+                  sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i);
+                  terminate(buf);
+                }
+            }
+
+          if(P[i].Type == 0)
+            if(P[i].TimeBinHydro > 0)
+              {
+                P[i].TimeBinHydro--;
+                if(P[i].TimeBinHydro <= 0)
+                  {
+                    char buf[1000];
+                    sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i);
+                    terminate(buf);
+                  }
+              }
+        }
+    }
+
+  All.TimeMax = TimeMax_new;
+}
+
+/*! \brief Reads/writes one integer to a restart file.
+ *
+ *  \param[in, out] x pointer to the integer.
+ *  \param[in] modus if modus>0  the restart()-routine reads,
+ *             if modus==0 it writes a restart file.
+ *
+ *  \return void
+ */
+void in(int *x, int modus) { byten(x, sizeof(int), modus); }
+
+/*! \brief Reads/writes n bytes to restart file buffer.
+ *
+ * \param[in, out] x Pointer to the data.
+ * \param[in] n Number of bytes.
+ * \param[in] modus If modus>0  the restart()-routine reads,
+ *            if modus==0 it writes a restart file.
+ *
+ *  \return void
+ */
+void byten(void *x, size_t n, int modus) { byten_hash(x, n, modus, 1); }
+
+/*! \brief Wrapper for byten; called with hash=0.
+ *
+ *  I.e. writes something without including it in calculating the md5sum. This
+ *  should only be done for the md5sum itself, but not for actual data.
+ *
+ *  \param[in, out] x Pointer to the data.
+ *  \param[in] n Number of bytes.
+ *  \param[in] modus If modus>0  the restart()-routine reads,
+ *             if modus==0 it writes a restart file.
+ *
+ *  \return void
+ */
+void byten_nohash(void *x, size_t n, int modus) { byten_hash(x, n, modus, 0); }
+
+/*! \brief Reads/writes n bytes to restart file buffer.
+ *
+ * \param[in, out] x pointer to the data
+ * \param[in] n number of bytes
+ * \param[in] modus if modus>0  the restart()-routine reads,
+ *            if modus==0 it writes a restart file.
+ * \param[in] hash If nonzero, this part is considered in md5sum.
+ *
+ *
+ *  \return void
+ */
+void byten_hash(void *x, size_t n, int modus, int hash)
+{
+  byte_count += n;
+
+  if(n > 0)
+    {
+      size_t nin = n;
+
+      if(modus == MODUS_READ || modus == MODUS_READCHECK || modus == MODUS_CHECK) /* read */
+        {
+          if(modus == MODUS_READCHECK || modus == MODUS_CHECK)
+            x = mymalloc("x", n);
+
+          unsigned char *ptr = x;
+
+          while(n > 0)
+            {
+              if(iop != fillp)
+                {
+                  size_t nn = n;
+                  if(nn > (fillp - iop))
+                    nn = fillp - iop;
+
+                  memcpy(ptr, iobuf_aligned + iop, nn);
+
+                  n -= nn;
+                  ptr += nn;
+                  iop += nn;
+                }
+              else
+                {
+                  if(iop == MAX_BLOCK_SIZE)
+                    {
+                      iop   = 0;
+                      fillp = 0;
+                    }
+
+                  size_t nn = n;
+                  if(nn % PageSize > 0)
+                    nn = (nn / PageSize + 1) * PageSize;
+
+                  if(nn > MAX_BLOCK_SIZE - fillp)
+                    nn = MAX_BLOCK_SIZE - fillp;
+
+                  if(read(fdint, iobuf_aligned + fillp, nn) != nn)
+                    terminate("read error");
+
+                  fillp += nn;
+                }
+            }
+
+          if(hash) /* to prevent call if we write/load the checksum itself */
+            MD5UpdateLong(&mysum, x, nin);
+
+          if(modus == MODUS_READCHECK || modus == MODUS_CHECK)
+            myfree(x);
+        }
+      else /* write */
+        {
+          unsigned char *ptr = x;
+
+          while(n > 0)
+            {
+              if(iop < MAX_BLOCK_SIZE)
+                {
+                  size_t nn = n;
+                  if(nn > MAX_BLOCK_SIZE - iop)
+                    nn = MAX_BLOCK_SIZE - iop;
+                  memcpy(iobuf_aligned + iop, ptr, nn);
+
+                  n -= nn;
+                  ptr += nn;
+                  iop += nn;
+                }
+              else
+                {
+                  size_t nn = MAX_BLOCK_SIZE;
+                  if(write(fdint, iobuf_aligned, nn) != nn)
+                    terminate("write error");
+
+                  iop = 0;
+                }
+            }
+
+          if(hash) /* to prevent call if we write/load the checksum itself */
+            MD5UpdateLong(&mysum, x, nin);
+        }
+    }
+}
+
+/*! \brief Allocates the IO buffer for reading/writing the restart-file buffer.
+ *
+ *  \return void
+ */
+void allocate_iobuf(void)
+{
+  if((MAX_BLOCK_SIZE % PageSize) > 0)
+    terminate("MAX_BLOCK_SIZE must be a multiple of PageSize");
+
+  if(!(io_buf = malloc(MAX_BLOCK_SIZE + PageSize)))
+    terminate("cannot allocated IO buffer");
+
+  iobuf_aligned = (char *)(((((size_t)io_buf) + (PageSize - 1)) / PageSize) * PageSize);
+
+  fillp = 0;
+  iop   = 0;
+}
+
+/*! \brief Frees the IO buffer for reading/writing the restart-files.
+ *
+ *  Writes buffer before freeing it if in MODUS_WRITE.
+ *
+ *  \param[in] modus Read or write.
+ *
+ *  \return void
+ */
+void deallocate_iobuf(int modus)
+{
+  if(modus == MODUS_WRITE) /* write */
+    {
+      if(iop > 0)
+        {
+          if(iop % PageSize > 0)
+            iop = ((iop / PageSize) + 1) * PageSize;
+
+          if(write(fdint, iobuf_aligned, iop) != iop)
+            terminate("write error");
+        }
+    }
+
+  free(io_buf);
+}
diff --git a/src/amuse/community/arepo/src/main/allvars.c b/src/amuse/community/arepo/src/main/allvars.c
new file mode 100644
index 0000000000..fedd5f6306
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/allvars.c
@@ -0,0 +1,331 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/allvars.c
+ * \date        05/2018
+ * \brief       Contains all global variables.
+ * \details     This file contains the global variables used in Arepo.
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+
+struct data_nodelist *DataNodeList; /* to be deleted */
+
+MyDouble boxSize, boxHalf;
+
+#ifdef LONG_X
+MyDouble boxSize_X, boxHalf_X;
+#else  /* #ifdef LONG_X */
+#endif /* #ifdef LONG_X #else */
+#ifdef LONG_Y
+MyDouble boxSize_Y, boxHalf_Y;
+#else  /* #ifdef LONG_Y */
+#endif /* #ifdef LONG_Y #else */
+#ifdef LONG_Z
+MyDouble boxSize_Z, boxHalf_Z;
+#else  /* #ifdef LONG_Z */
+#endif /* #ifdef LONG_Z #else */
+
+#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG
+MPI_Status mpistat;
+#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */
+
+/*********************************************************/
+/*  Global variables                                     */
+/*********************************************************/
+
+int ThisTask; /*!< the number of the local processor  */
+int NTask;    /*!< number of processors */
+int PTask;    /*!< note: NTask = 2^PTask */
+
+int ThisNode;        /*!< the rank of the current compute node  */
+int NumNodes;        /*!< the number of compute nodes used  */
+int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes  */
+int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes  */
+int TasksInThisNode; /*!< number of MPI tasks on  current compute node */
+int RankInThisNode;  /*!< rank of the MPI task on the current compute node */
+long long MemoryOnNode;
+double CPUThisRun; /*!< Sums CPU time of current process */
+int MaxTopNodes;   /*!< Maximum number of nodes in the top-level tree used for domain decomposition */
+int RestartFlag;   /*!< taken from command line used to start code. 0 is normal start-up from
+                      initial conditions, 1 is resuming a run from a set of restart files, while 2
+                      marks a restart from a snapshot file. */
+int RestartSnapNum;
+int Argc;
+char **Argv;
+
+size_t AllocatedBytes;
+size_t FreeBytes;
+
+int Nforces;
+int *TargetList;
+struct thread_data Thread[NUM_THREADS];
+
+#ifdef IMPOSE_PINNING
+hwloc_cpuset_t cpuset_thread[NUM_THREADS];
+#endif /* #ifdef IMPOSE_PINNING */
+
+int *Exportflag,
+    *ThreadsExportflag[NUM_THREADS]; /*!< Buffer used for flagging whether a particle needs to be exported to another process */
+int *Exportnodecount;
+int *Exportindex;
+
+int *Send_offset, *Send_count, *Recv_count, *Recv_offset;
+int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes;
+int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks;
+struct send_recv_counts *Send, *Recv;
+
+int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset;
+int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset;
+
+int TakeLevel;
+int TagOffset;
+
+int TimeBinSynchronized[TIMEBINS];
+struct TimeBinData TimeBinsHydro, TimeBinsGravity;
+
+#ifdef USE_SFR
+double TimeBinSfr[TIMEBINS];
+#endif
+
+#ifdef SUBFIND
+int GrNr;
+int NumPartGroup;
+#endif /* #ifdef SUBFIND */
+
+char DumpFlag         = 1;
+char DumpFlagNextSnap = 1;
+
+int FlagNyt = 0;
+
+double CPU_Step[CPU_LAST];
+double CPU_Step_Stored[CPU_LAST];
+
+double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */
+double StartOfRun;    /*!< This stores the time of the start of the run for evaluating the elapsed time */
+
+double EgyInjection;
+
+int NumPart; /*!< number of particles on the LOCAL processor */
+int NumGas;  /*!< number of gas particles on the LOCAL processor  */
+
+gsl_rng *random_generator;     /*!< a random number generator  */
+gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main code's
+                                  random numbers  */
+
+#ifdef USE_SFR
+int Stars_converted; /*!< current number of star particles in gas particle block */
+#endif
+
+#ifdef TOLERATE_WRITE_ERROR
+int WriteErrorFlag;
+char AlternativeOutputDir[MAXLEN_PATH];
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+double TimeOfLastDomainConstruction; /*!< holds what it says */
+
+int *Ngblist; /*!< Buffer to hold indices of neighbours retrieved by the neighbour search
+                 routines */
+
+double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac;
+double DomainInverseLen, DomainBigFac;
+int *DomainStartList, *DomainEndList;
+double *DomainCost, *TaskCost;
+int *DomainCount, *TaskCount;
+struct no_list_data *ListNoData;
+
+int domain_bintolevel[TIMEBINS];
+int domain_refbin[TIMEBINS];
+int domain_grav_weight[TIMEBINS];
+int domain_hydro_weight[TIMEBINS];
+int domain_to_be_balanced[TIMEBINS];
+
+int *DomainTask;
+int *DomainNewTask;
+int *DomainNodeIndex;
+
+peanokey *Key, *KeySorted;
+
+struct topnode_data *TopNodes;
+
+int NTopnodes, NTopleaves;
+
+/* variables for input/output , usually only used on process 0 */
+
+char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */
+
+FILE *FdInfo,   /*!< file handle for info.txt log-file. */
+    *FdEnergy,  /*!< file handle for energy.txt log-file. */
+    *FdTimings, /*!< file handle for timings.txt log-file. */
+    *FdDomain,  /*!< file handle for domain.txt log-file. */
+    *FdBalance, /*!< file handle for balance.txt log-file. */
+    *FdMemory,  /*!< file handle for memory.txt log-file. */
+    *FdTimebin, /*!< file handle for timebins.txt log-file. */
+    *FdCPU;     /*!< file handle for cpu.txt log-file. */
+
+#ifdef DETAILEDTIMINGS
+FILE *FdDetailed;
+#endif /* #ifdef DETAILEDTIMINGS */
+
+#ifdef OUTPUT_CPU_CSV
+FILE *FdCPUCSV;
+#endif /* #ifdef OUTPUT_CPU_CSV */
+
+#ifdef RESTART_DEBUG
+FILE *FdRestartTest;
+#endif /* #ifdef RESTART_DEBUG */
+
+#ifdef USE_SFR
+FILE *FdSfr; /*!< file handle for sfr.txt log-file. */
+#endif
+
+struct pair_data *Pairlist;
+
+#ifdef FORCETEST
+FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */
+#endif             /* #ifdef FORCETEST */
+
+int WriteMiscFiles = 1;
+
+void *CommBuffer; /*!< points to communication buffer, which is used at a few places */
+
+/*! This structure contains data which is the SAME for all tasks (mostly code parameters read from the
+ * parameter file).  Holding this data in a structure is convenient for writing/reading the restart file, and
+ * it allows the introduction of new global variables in a simple way. The only thing to do is to introduce
+ * them into this structure.
+ */
+struct global_data_all_processes All;
+
+/*! This structure holds all the information that is
+ * stored for each particle of the simulation.
+ */
+struct particle_data *P, /*!< holds particle data on local processor */
+    *DomainPartBuf;      /*!< buffer for particle data used in domain decomposition */
+
+struct subfind_data *PS;
+
+/* the following struture holds data that is stored for each SPH particle in addition to the collisionless
+ * variables.
+ */
+struct sph_particle_data *SphP, /*!< holds SPH particle data on local processor */
+    *DomainSphBuf;              /*!< buffer for SPH particle data in domain decomposition */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+struct special_particle_data *PartSpecialListGlobal;
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+peanokey *DomainKeyBuf;
+
+/*! global state of system
+ */
+struct state_of_system SysState, SysStateAtStart, SysStateAtEnd;
+
+/*! Various structures for communication during the gravity computation.
+ */
+struct directdata *DirectDataIn, *DirectDataAll;
+struct accdata *DirectAccOut, *DirectAccIn;
+int ThreadsNexport[NUM_THREADS], ThreadsNexportNodes[NUM_THREADS];
+struct data_partlist *PartList, *ThreadsPartList[NUM_THREADS];
+struct datanodelist *NodeList, *ThreadsNodeList[NUM_THREADS];
+struct potdata_out *PotDataResult, /*!< holds the partial results computed for imported particles. Note: We use GravDataResult =
+                                      GravDataGet, such that the result replaces the imported data */
+    *PotDataOut; /*!< holds partial results received from other processors. This will overwrite the GravDataIn array */
+
+/*! Header for the standard file format.
+ */
+struct io_header header; /*!< holds header for snapshot files */
+#ifdef NTYPES_ICS
+struct io_header_ICs header_ICs; /*!< holds header for IC files */
+#endif                           /* #ifdef NTYPES_ICS */
+char (*Parameters)[MAXLEN_PARAM_TAG];
+char (*ParametersValue)[MAXLEN_PARAM_VALUE];
+char *ParametersType;
+
+/*! Variables for gravitational tree
+ * ------------------
+ */
+int Tree_MaxPart;
+int Tree_NumNodes;
+int Tree_MaxNodes;
+int Tree_FirstNonTopLevelNode;
+int Tree_NumPartImported;
+int Tree_NumPartExported;
+int Tree_ImportedNodeOffset;
+int Tree_NextFreeNode;
+MyDouble *Tree_Pos_list;
+unsigned long long *Tree_IntPos_list;
+int *Tree_Task_list;
+int *Tree_ResultIndexList;
+
+struct treepoint_data *Tree_Points;
+struct resultsactiveimported_data *Tree_ResultsActiveImported;
+
+int *Nextnode; /*!< gives next node in tree walk  (nodes array) */
+int *Father;   /*!< gives parent node in tree (Prenodes array) */
+
+struct NODE *Nodes; /*!< points to the actual memory allocted for the nodes */
+                    /*!< this is a pointer used to access the nodes which is shifted such that Nodes[All.MaxPart]
+                       gives the first allocated node */
+
+#ifdef MULTIPLE_NODE_SOFTENING
+struct ExtNODE *ExtNodes;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+float *Nodes_GravCost;
+
+/*! Variables for neighbor tree
+ * -----------------
+ */
+int Ngb_MaxPart;
+int Ngb_NumNodes;
+int Ngb_MaxNodes;
+int Ngb_FirstNonTopLevelNode;
+int Ngb_NextFreeNode;
+int *Ngb_Father;
+int *Ngb_Marker;
+int Ngb_MarkerValue;
+
+int *Ngb_DomainNodeIndex;
+int *DomainListOfLocalTopleaves;
+int *DomainNLocalTopleave;
+int *DomainFirstLocTopleave;
+int *Ngb_Nextnode;
+
+/*! The ngb-tree data structure
+ */
+struct NgbNODE *Ngb_Nodes;
+struct ExtNgbNODE *ExtNgb_Nodes;
+
+#ifdef STATICNFW
+double Rs, R200;
+double Dc;
+double RhoCrit, V200;
+double fac;
+#endif /* #ifdef STATICNFW */
+
+int MaxThreads = 1;
+
+IO_Field *IO_Fields;
+int N_IO_Fields   = 0;
+int Max_IO_Fields = 0;
diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h
new file mode 100644
index 0000000000..2dc46e56b3
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/allvars.h
@@ -0,0 +1,1924 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/allvars.h
+ * \date        05/2018
+ * \brief       All (global) variables.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 30.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef ALLVARS_H
+#define ALLVARS_H
+
+#include <assert.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_spline.h>
+#include <mpi.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "./arepoconfig.h"
+
+#ifdef IMPOSE_PINNING
+#include <hwloc.h>
+#endif /* #ifdef IMPOSE_PINNING */
+
+#include "../time_integration/timestep.h"
+#include "../utils/dtypes.h"
+#include "../utils/tags.h"
+
+#define AREPO_VERSION "Arepo public 1.0" /* code version string */
+
+/* default values for unspecified config options */
+
+#if defined(__linux__) && !defined(HOST_MEMORY_REPORTING)
+#define HOST_MEMORY_REPORTING
+#endif /* #if defined(__linux__) && !defined(HOST_MEMORY_REPORTING) */
+
+#ifndef LOAD_TYPES
+#define LOAD_TYPES 0xff
+#endif /* #ifndef LOAD_TYPES */
+
+#if defined(REFINEMENT_SPLIT_CELLS) || defined(REFINEMENT_MERGE_CELLS)
+#define REFINEMENT
+#else /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) */
+#undef REFINEMENT
+#endif /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) #else */
+
+#ifndef NTYPES
+#define NTYPES 6
+#endif /* #ifndef NTYPES */
+
+#ifndef NSOFTTYPES
+#define NSOFTTYPES NTYPES
+#endif /* #ifndef NSOFTTYPES */
+
+#if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \
+    !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY)
+// only if no gradient output defined, no need to update them directly before output.
+#else /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \
+         !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) */
+#define UPDATE_GRADIENTS_FOR_OUTPUT
+#endif /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) &&        \
+          !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) #else \
+        */
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+#ifndef NSOFTTYPES_HYDRO
+#define NSOFTTYPES_HYDRO 64
+#endif /* #ifndef NSOFTTYPES_HYDRO */
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#undef NSOFTTYPES_HYDRO
+#define NSOFTTYPES_HYDRO 0
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+
+#if defined(SAVE_HSML_IN_SNAPSHOT)
+#define SUBFIND_CALC_MORE
+#endif /* #if defined(SAVE_HSML_IN_SNAPSHOT) */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+#define NO_SELFGRAVITY_TYPE \
+  EXACT_GRAVITY_FOR_PARTICLE_TYPE                        // exclude particle type from self-gravity (can be used with exact gravity)
+#define NO_GRAVITY_TYPE EXACT_GRAVITY_FOR_PARTICLE_TYPE  // disable computation of gravity on particle type
+#define EXACT_GRAVITY_REACTION                           // include reaction to other particle types when using exact gravity
+#endif                                                   /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+/* restrictions on config option combinations */
+#if(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254
+#error "(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254"
+#endif /* #if (NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254 */
+
+#if NSOFTTYPES < 2
+#error "NSOFTTYPES < 2"
+#endif /* #if NSOFTTYPES < 2 */
+
+#if defined(HOST_MEMORY_REPORTING) && !defined(__linux__)
+#error "HOST_MEMORY_REPORTING only works under Linux."
+#endif /* #if defined(HOST_MEMORY_REPORTING) && !defined(__linux__) */
+
+#if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__)
+#error "USE_DIRECT_IO_FOR_RESTARTS only works under Linux."
+#endif /* #if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__) */
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+#if !((INDIVIDUAL_GRAVITY_SOFTENING + 0) >= 1)
+#error "set INDIVIDUAL_GRAVITY_SOFTENING to a bitmask of particle types"
+#endif /* #if !((INDIVIDUAL_GRAVITY_SOFTENING+0) >= 1) */
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+#ifdef OUTPUTPOTENTIAL
+#ifndef EVALPOTENTIAL
+#error "the option OUTPUTPOTENTIAL requires EVALPOTENTIAL"
+#endif /* #ifndef EVALPOTENTIAL */
+#endif /* #ifdef OUTPUTPOTENTIAL */
+
+#if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY)
+#ifndef HIERARCHICAL_GRAVITY
+#error "the of option CELL_CENTER_GRAVITY requires HIERARCHICAL_GRAVITY"
+#endif /* #ifndef HIERARCHICAL_GRAVITY */
+#endif /* #if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY) */
+
+#ifdef MHD
+#ifndef RIEMANN_HLLD
+#error "the of option MHD requires RIEMANN_HLLD"
+#endif /* #ifndef RIEMANN_HLLD */
+#endif /* #ifdef MHD */
+
+/* optional additional headers based on config options */
+
+#include "../utils/timer.h"
+
+#if defined(COOLING)
+#include "../cooling/cooling_vars.h"
+#endif /* #if defined(COOLING) */
+
+#ifdef ADDBACKGROUNDGRID
+#include "../add_backgroundgrid/add_bggrid.h"
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+/* function mappings and macros */
+
+#ifdef MPI_HYPERCUBE_ALLGATHERV
+#define MPI_Allgatherv MPI_hypercube_Allgatherv
+#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */
+
+#ifdef MPISENDRECV_CHECKSUM
+#define MPI_Sendrecv MPI_Check_Sendrecv
+#endif /* #ifdef MPISENDRECV_CHECKSUM */
+
+#define terminate(...)                                                                                                            \
+  {                                                                                                                               \
+    if(FlagNyt == 0)                                                                                                              \
+      {                                                                                                                           \
+        char termbuf1[1000], termbuf2[1000];                                                                                      \
+        sprintf(termbuf1, "TERMINATE: ******!!!!!******  Code termination on task=%d, function %s(), file %s, line %d", ThisTask, \
+                __FUNCTION__, __FILE__, __LINE__);                                                                                \
+        sprintf(termbuf2, __VA_ARGS__);                                                                                           \
+        printf("%s: %s\n", termbuf1, termbuf2);                                                                                   \
+        fflush(stdout);                                                                                                           \
+        FlagNyt = 1;                                                                                                              \
+        MPI_Abort(MPI_COMM_WORLD, 1);                                                                                             \
+      }                                                                                                                           \
+    exit(1);                                                                                                                      \
+  }
+#define mpi_terminate(...)    \
+  {                           \
+    if(ThisTask == 0)         \
+      terminate(__VA_ARGS__); \
+  }
+#define warn(...)                                                                                                            \
+  {                                                                                                                          \
+    char termbuf1[1000], termbuf2[1000];                                                                                     \
+    sprintf(termbuf1, "WARNING: Code warning on task=%d, function %s(), file %s, line %d", ThisTask, __FUNCTION__, __FILE__, \
+            __LINE__);                                                                                                       \
+    sprintf(termbuf2, __VA_ARGS__);                                                                                          \
+    printf("%s: %s\n", termbuf1, termbuf2);                                                                                  \
+    myflush(stdout);                                                                                                         \
+    FILE *fd = fopen("WARNINGS", "a");                                                                                       \
+    fprintf(fd, "%s: %s\n", termbuf1, termbuf2);                                                                             \
+    fclose(fd);                                                                                                              \
+  }
+
+/* define an "assert" macro which outputs MPI task (we do NOT want to
+   call MPI_Abort, because then the assertion failure isn't caught in
+   the debugger) */
+#define myassert(cond)                                                                                                              \
+  if(!(cond))                                                                                                                       \
+    {                                                                                                                               \
+      char termbuf[1000];                                                                                                           \
+      sprintf(termbuf, "Assertion failure!\n\ttask=%d, function %s(), file %s, line %d:\n\t%s\n", ThisTask, __FUNCTION__, __FILE__, \
+              __LINE__, #cond);                                                                                                     \
+      printf("%s", termbuf);                                                                                                        \
+      myflush(stdout);                                                                                                              \
+      assert(0);                                                                                                                    \
+    }
+
+/* memory manager */
+#define mymalloc(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, NULL)
+#define mymalloc_g(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, callorigin)
+#define mymalloc_clear(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 1, NULL)
+#define mymalloc_movable(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, NULL)
+#define mymalloc_movable_g(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, callorigin)
+#define myrealloc(x, y) myrealloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__)
+#define myrealloc_movable(x, y) myrealloc_movable_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__)
+#define myfree(x) myfree_fullinfo(x, __FUNCTION__, __FILE__, __LINE__)
+#define myfree_movable(x) myfree_movable_fullinfo(x, __FUNCTION__, __FILE__, __LINE__)
+
+#define MAX_FIRST_ELEMENTS_CONSIDERED \
+  5 /* This sets the number of lowest loaded tasks to be considered for assignment of next domain patch */
+
+#define NUMBER_OF_MEASUREMENTS_TO_RECORD 6
+
+#ifndef GRAVCOSTLEVELS
+#define GRAVCOSTLEVELS 6
+#endif /* #ifndef  GRAVCOSTLEVELS */
+
+#define MODE_LOCAL_NO_EXPORT -1
+#define MODE_LOCAL_PARTICLES 0
+#define MODE_IMPORTED_PARTICLES 1
+#define MODE_FINISHED 2
+
+#ifndef DIRECT_SUMMATION_THRESHOLD
+#define DIRECT_SUMMATION_THRESHOLD 3000
+#endif /* #ifndef DIRECT_SUMMATION_THRESHOLD */
+
+#define MODE_FIRST_HALFSTEP 0
+#define MODE_SECOND_HALFSTEP 1
+
+#define FLAG_PARTIAL_TREE 0
+#define FLAG_FULL_TREE 1
+
+#ifndef MPI_MESSAGE_SIZELIMIT_IN_MB
+#define MPI_MESSAGE_SIZELIMIT_IN_MB 200
+#endif /* #ifndef MPI_MESSAGE_SIZELIMIT_IN_MB */
+
+#define MPI_MESSAGE_SIZELIMIT_IN_BYTES ((MPI_MESSAGE_SIZELIMIT_IN_MB)*1024LL * 1024LL)
+
+#define COMMBUFFERSIZE (32 * 1024LL * 1024LL)
+
+#define NUM_THREADS 1 /* no OpenMP support in this code! */
+
+extern int Nforces;
+extern int *TargetList;
+
+extern struct thread_data
+{
+  int Nexport __attribute__((__aligned__(64))); /* to align on different cache lines */
+  int NexportNodes;
+  int Interactions;
+  int dummy;
+  double Cost;
+
+  double Costtotal;  /*!< The total cost of the particles/nodes processed by each thread */
+  double Ewaldcount; /*!< The total cost for the Ewald correction per thread */
+  int FirstExec;     /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */
+
+  size_t ExportSpace;
+  size_t InitialSpace;
+  size_t ItemSize;
+
+  int *P_CostCount;
+  int *TreePoints_CostCount;
+  int *Node_CostCount;
+
+  struct data_partlist *PartList;
+
+  int *Ngblist;
+  double *R2list;
+  int *Exportflag;
+  int *toGoDM;
+  int *toGoSph;
+
+} Thread[NUM_THREADS];
+
+/* If we use a static Voronoi mesh with local timestepping and no rebuild of
+ * the static mesh, then we need to backup the face areas before calling
+ * compute_interface_fluxes(), because this function calls face_get_normals()
+ * which sets some face area to 0 under some circumstances */
+#if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION)
+#define VORONOI_BACKUP_RESTORE_FACE_AREAS
+#else /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) \
+       */
+#undef VORONOI_BACKUP_RESTORE_FACE_AREAS
+#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && \
+          !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */
+
+#ifdef IMPOSE_PINNING
+extern hwloc_cpuset_t cpuset_thread[NUM_THREADS];
+#endif /* #ifdef IMPOSE_PINNING */
+
+#ifdef ONEDIMS
+#define ALLOC_TOLERANCE 0.3
+#else /* #ifdef ONEDIMS */
+#define ALLOC_TOLERANCE 0.1
+#endif /* #ifdef ONEDIMS #else */
+#define ALLOC_STARBH_ROOM 0.02
+
+#ifdef TOLERATE_WRITE_ERROR
+#define IO_TRIALS 20
+#define IO_SLEEP_TIME 10
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+/* calculate appropriate value of MAXSCALARS */
+
+#if defined(REFINEMENT_HIGH_RES_GAS) || defined(PASSIVE_SCALARS)
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+#define COUNT_REFINE 1
+#else /* #ifdef  REFINEMENT_HIGH_RES_GAS */
+#define COUNT_REFINE 0
+#endif /* #ifdef  REFINEMENT_HIGH_RES_GAS #else */
+
+#ifdef PASSIVE_SCALARS
+#define COUNT_PASSIVE_SCALARS PASSIVE_SCALARS
+#else /* #ifdef PASSIVE_SCALARS */
+#define COUNT_PASSIVE_SCALARS 0
+#endif /* #ifdef PASSIVE_SCALARS #else */
+
+#define MAXSCALARS (COUNT_REFINE + COUNT_PASSIVE_SCALARS)
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) ||  defined(PASSIVE_SCALARS)*/
+
+/* calculate appropriate value of MAXGRADIENTS */
+
+#define COUNT_GRAD_DEFAULT 5
+
+#ifdef MHD
+#define COUNT_GRAD_MHD 3
+#else /* #ifdef MHD */
+#define COUNT_GRAD_MHD 0
+#endif /* #ifdef MHD #else */
+
+#ifdef MAXSCALARS
+#define COUNT_GRAD_SCALARS MAXSCALARS
+#else /* #ifdef MAXSCALARS */
+#define COUNT_GRAD_SCALARS 0
+#endif /* #ifdef MAXSCALARS #else*/
+
+#define MAXGRADIENTS (COUNT_GRAD_DEFAULT + COUNT_GRAD_MHD + COUNT_GRAD_SCALARS)
+
+/*************************************/
+
+/*! For Peano-Hilbert order.
+ *  Note: Maximum is 10 to fit in 32-bit integer,
+ *  maximum is 21 to fit into 64-bit integer,
+ *  and 42 is the absolute maximum, for which 128-bit integers are needed
+ */
+#ifndef BITS_PER_DIMENSION
+#define BITS_PER_DIMENSION 42
+#endif /* #ifndef  BITS_PER_DIMENSION */
+#if(BITS_PER_DIMENSION <= 21)
+typedef unsigned long long peanokey;
+#else  /* #if (BITS_PER_DIMENSION <= 21) */
+typedef __int128 peanokey;
+#endif /* #if (BITS_PER_DIMENSION <= 21) #else */
+#if(BITS_PER_DIMENSION <= 31)
+typedef unsigned int peano1D;
+#else /* #if (BITS_PER_DIMENSION <= 31) */
+#if(BITS_PER_DIMENSION <= 42)
+typedef unsigned long long peano1D;
+#else /* #if (BITS_PER_DIMENSION <= 42) */
+#error "BITS_PER_DIMENSION can be at most 42"
+#endif /* #if (BITS_PER_DIMENSION <= 42) #else */
+#endif /* #if (BITS_PER_DIMENSION <= 31) #else */
+
+#define PEANOCELLS (((peanokey)1) << (3 * BITS_PER_DIMENSION))
+
+#define MAX_FLOAT_NUMBER 1e37
+#define MIN_FLOAT_NUMBER 1e-37
+#define MAX_DOUBLE_NUMBER 1e306
+#define MIN_DOUBLE_NUMBER 1e-306
+
+#ifdef DOUBLEPRECISION
+#if(DOUBLEPRECISION == 2)
+#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER
+#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER
+#else /* #if (DOUBLEPRECISION==2) */
+#define MAX_REAL_NUMBER MAX_DOUBLE_NUMBER
+#define MIN_REAL_NUMBER MIN_DOUBLE_NUMBER
+#endif /* #if (DOUBLEPRECISION==2) #else */
+#else  /* #ifdef DOUBLEPRECISION */
+#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER
+#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER
+#endif /* #ifdef DOUBLEPRECISION #else */
+
+#ifndef GAMMA
+#define GAMMA (5. / 3.) /*!< adiabatic index of simulated gas */
+#endif                  /* #ifndef  GAMMA */
+#define GAMMA_MINUS1 (GAMMA - 1.)
+#define GAMMA_PLUS1 (GAMMA + 1.)
+
+#define HYDROGEN_MASSFRAC 0.76 /*!< mass fraction of hydrogen, relevant only for radiative cooling */
+#define HE_ABUND ((1. / HYDROGEN_MASSFRAC - 1.) / 4.)
+
+/* ... often used physical constants (cgs units; NIST 2010) */
+
+#define GRAVITY 6.6738e-8
+#define SOLAR_MASS 1.989e33
+#define SOLAR_LUM 3.826e33
+#define SOLAR_EFF_TEMP 5.780e3
+#define RAD_CONST 7.5657e-15
+#define AVOGADRO 6.02214e23
+#define BOLTZMANN 1.38065e-16
+#define GAS_CONST 8.31446e7
+#define CLIGHT 2.99792458e10
+
+#define PLANCK 6.6260695e-27
+#define PARSEC 3.085678e18
+#define KILOPARSEC 3.085678e21
+#define MEGAPARSEC 3.085678e24
+#define ASTRONOMICAL_UNIT 1.49598e13
+#define PROTONMASS 1.67262178e-24
+#define ELECTRONMASS 9.1093829e-28
+#define THOMPSON 6.65245873e-25
+#define ELECTRONCHARGE 4.8032042e-10
+#define HUBBLE 3.2407789e-18      /* in h/sec */
+#define LYMAN_ALPHA 1215.6e-8     /* 1215.6 Angstroem */
+#define LYMAN_ALPHA_HeII 303.8e-8 /* 303.8 Angstroem */
+#define OSCILLATOR_STRENGTH 0.41615
+#define OSCILLATOR_STRENGTH_HeII 0.41615
+#define ELECTRONVOLT_IN_ERGS 1.60217656e-12
+
+#define SEC_PER_GIGAYEAR 3.15576e16
+#define SEC_PER_MEGAYEAR 3.15576e13
+#define SEC_PER_YEAR 3.15576e7
+
+#ifndef FOF_PRIMARY_LINK_TYPES
+#define FOF_PRIMARY_LINK_TYPES 2
+#endif /* #ifndef FOF_PRIMARY_LINK_TYPES */
+
+#ifndef FOF_SECONDARY_LINK_TYPES
+#define FOF_SECONDARY_LINK_TYPES 0
+#endif /* #ifndef FOF_SECONDARY_LINK_TYPES */
+
+#ifndef ASMTH
+/*! ASMTH gives the scale of the short-range/long-range force split in units
+ * of FFT-mesh cells
+ */
+#define ASMTH 1.25
+#endif /* #ifndef ASMTH */
+
+#ifndef RCUT
+/*! RCUT gives the maximum distance (in units of the scale used for the force
+ * split) out to which short-range forces are evaluated in the short-range
+ * tree walk.
+ */
+#define RCUT 4.5
+#endif /* #ifndef RCUT */
+
+#define MAXLEN_OUTPUTLIST 1100  /*!< maxmimum number of entries in output list */
+#define MAXLEN_PATH 256         /*!< maximum length of various filenames (full path) */
+#define MAXLEN_PARAM_TAG 50     /*!< maximum length of the tag of a parameter in the parameter file */
+#define MAXLEN_PARAM_VALUE 200  /*!< maximum length of the value of a parameter in the parameter file */
+#define MAX_PARAMETERS 300      /*!< maximum number of parameters in the parameter file */
+#define DRIFT_TABLE_LENGTH 1000 /*!< length of the lookup table used to hold the drift and kick factors */
+
+#define BASENUMBER 100
+#define HIGHRESMASSFAC 0.5
+
+#define MAXITER 300000 /*! Maximum number of iterations before process is terminated */
+
+#ifndef FOF_LINKLENGTH
+#define FOF_LINKLENGTH 0.2
+#endif /* #ifndef FOF_LINKLENGTH */
+
+#ifndef FOF_GROUP_MIN_LEN
+#define FOF_GROUP_MIN_LEN 32
+#endif /* #ifndef FOF_GROUP_MIN_LEN */
+
+typedef struct
+{
+  double r;
+  double mass;
+} sort_r2list;
+
+typedef struct
+{
+  MyFloat r2;
+  int index;
+} r2type;
+
+#include "../mesh/mesh.h"
+#include "../mesh/voronoi/voronoi.h"
+
+struct unbind_data
+{
+  int index;
+};
+
+#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG
+extern MPI_Status mpistat;
+#undef MPI_STATUS_IGNORE
+#define MPI_STATUS_IGNORE &mpistat
+#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */
+
+#define FLT(x) (x)
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif /* #ifndef M_PI */
+
+#define TO_MBYTE_FAC (1.0 / (1024.0 * 1024.0))
+
+#ifdef ONEDIMS
+#define NUMDIMS 1
+#define KERNEL_COEFF_1 (4.0 / 3)
+#define KERNEL_COEFF_2 (8.0)
+#define KERNEL_COEFF_3 (24.0)
+#define KERNEL_COEFF_4 (16.0)
+#define KERNEL_COEFF_5 (8.0 / 3)
+#define KERNEL_COEFF_6 (-8.0)
+#define NORM_COEFF 2.0
+#else /* #ifdef   ONEDIMS */
+#ifndef TWODIMS
+#define NUMDIMS 3                     /*!< For 3D-normalized kernel */
+#define KERNEL_COEFF_1 2.546479089470 /*!< Coefficients for SPH spline kernel and its derivative */
+#define KERNEL_COEFF_2 15.278874536822
+#define KERNEL_COEFF_3 45.836623610466
+#define KERNEL_COEFF_4 30.557749073644
+#define KERNEL_COEFF_5 5.092958178941
+#define KERNEL_COEFF_6 (-15.278874536822)
+#define NORM_COEFF 4.188790204786                 /*!< Coefficient for kernel normalization. Note:  4.0/3 * PI = 4.188790204786 */
+#else                                             /* #ifndef  TWODIMS */
+#define NUMDIMS 2                                 /*!< For 2D-normalized kernel */
+#define KERNEL_COEFF_1 (5.0 / 7 * 2.546479089470) /*!< Coefficients for SPH spline kernel and its derivative */
+#define KERNEL_COEFF_2 (5.0 / 7 * 15.278874536822)
+#define KERNEL_COEFF_3 (5.0 / 7 * 45.836623610466)
+#define KERNEL_COEFF_4 (5.0 / 7 * 30.557749073644)
+#define KERNEL_COEFF_5 (5.0 / 7 * 5.092958178941)
+#define KERNEL_COEFF_6 (5.0 / 7 * (-15.278874536822))
+#define NORM_COEFF M_PI /*!< Coefficient for kernel normalization. */
+#endif                  /* #ifndef  TWODIMS #else */
+#endif                  /* #ifdef   ONEDIMS #else*/
+
+#define SOFTFAC1 10.666666666667 /*!< Coefficients for gravitational softening */
+#define SOFTFAC2 32.0
+#define SOFTFAC3 (-38.4)
+#define SOFTFAC4 (-2.8)
+#define SOFTFAC5 5.333333333333
+#define SOFTFAC6 6.4
+#define SOFTFAC7 (-9.6)
+#define SOFTFAC8 21.333333333333
+#define SOFTFAC9 (-48.0)
+#define SOFTFAC10 38.4
+#define SOFTFAC11 (-10.666666666667)
+#define SOFTFAC12 (-0.066666666667)
+#define SOFTFAC13 (-3.2)
+#define SOFTFAC14 0.066666666667
+#define SOFTFAC15 (-16.0)
+#define SOFTFAC16 9.6
+#define SOFTFAC17 (-2.133333333333)
+#define SOFTFAC18 128.0
+#define SOFTFAC19 (-115.2)
+#define SOFTFAC20 21.333333333333
+#define SOFTFAC21 (-96.0)
+#define SOFTFAC22 115.2
+#define SOFTFAC23 (-42.666666666667)
+#define SOFTFAC24 0.1333333333333
+
+extern MyDouble boxSize, boxHalf;
+#ifdef LONG_X
+extern MyDouble boxSize_X, boxHalf_X;
+#else /* #ifdef LONG_X */
+#define boxSize_X boxSize
+#define boxHalf_X boxHalf
+#endif /* #ifdef LONG_X #else */
+#ifdef LONG_Y
+extern MyDouble boxSize_Y, boxHalf_Y;
+#else /* #ifdef LONG_Y */
+#define boxSize_Y boxSize
+#define boxHalf_Y boxHalf
+#endif /* #ifdef LONG_Y #else */
+#ifdef LONG_Z
+extern MyDouble boxSize_Z, boxHalf_Z;
+#else /* #ifdef LONG_Z */
+#define boxSize_Z boxSize
+#define boxHalf_Z boxHalf
+#endif /* #ifdef LONG_Z #else */
+
+#if !defined(GRAVITY_NOT_PERIODIC)
+#define GRAVITY_NEAREST_X(x) \
+  (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp)))
+#define GRAVITY_NEAREST_Y(x) \
+  (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp)))
+#define GRAVITY_NEAREST_Z(x) \
+  (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp)))
+#else /* #if !defined(GRAVITY_NOT_PERIODIC) */
+#define GRAVITY_NEAREST_X(x) (x)
+#define GRAVITY_NEAREST_Y(x) (x)
+#define GRAVITY_NEAREST_Z(x) (x)
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */
+
+#if !defined(GRAVITY_NOT_PERIODIC)
+#define FOF_NEAREST_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp)
+#define FOF_NEAREST_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp)
+#define FOF_NEAREST_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp)
+#else /* #if !defined(GRAVITY_NOT_PERIODIC) */
+#define FOF_NEAREST_LONG_X(x) fabs(x)
+#define FOF_NEAREST_LONG_Y(x) fabs(x)
+#define FOF_NEAREST_LONG_Z(x) fabs(x)
+#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */
+
+/* periodicity of gas */
+#ifndef REFLECTIVE_X
+#define NGB_PERIODIC_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp)
+#define NEAREST_X(x) (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp)))
+#define WRAP_X(x) (xtmp = (x), (xtmp > boxSize_X) ? (xtmp - boxSize_X) : ((xtmp < 0) ? (xtmp + boxSize_X) : (xtmp)))
+#else /* #ifndef REFLECTIVE_X */
+#define NGB_PERIODIC_LONG_X(x) fabs(x)
+#define NEAREST_X(x) (x)
+#define WRAP_X(x) (x)
+#endif /* #ifndef REFLECTIVE_X #else */
+
+#ifndef REFLECTIVE_Y
+#define NGB_PERIODIC_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp)
+#define NEAREST_Y(x) (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp)))
+#define WRAP_Y(x) (ytmp = (x), (ytmp > boxSize_Y) ? (ytmp - boxSize_Y) : ((ytmp < 0) ? (ytmp + boxSize_Y) : (ytmp)))
+#else /* #ifndef REFLECTIVE_Y */
+#define NGB_PERIODIC_LONG_Y(x) fabs(x)
+#define NEAREST_Y(x) (x)
+#define WRAP_Y(x) (x)
+#endif /* #ifndef REFLECTIVE_Y #else */
+
+#ifndef REFLECTIVE_Z
+#define NGB_PERIODIC_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp)
+#define NEAREST_Z(x) (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp)))
+#define WRAP_Z(x) (ztmp = (x), (ztmp > boxSize_Z) ? (ztmp - boxSize_Z) : ((ztmp < 0) ? (ztmp + boxSize_Z) : (ztmp)))
+#else /* #ifndef REFLECTIVE_Z */
+#define NGB_PERIODIC_LONG_Z(x) fabs(x)
+#define NEAREST_Z(x) (x)
+#define WRAP_Z(x) (x)
+#endif /* #ifndef REFLECTIVE_Z #else */
+
+#define FACT1 0.366025403785 /* FACT1 = 0.5 * (sqrt(3)-1) */
+#define FAC_TWO_TO_TWO_THIRDS 1.5874011
+
+/*********************************************************/
+/*  Global variables                                     */
+/*********************************************************/
+
+extern int TimeBinSynchronized[TIMEBINS];
+extern struct TimeBinData TimeBinsHydro, TimeBinsGravity;
+
+#ifdef USE_SFR
+extern double TimeBinSfr[TIMEBINS];
+#endif /* #ifdef USE_SFR */
+
+extern int ThisTask; /*!< the number of the local processor  */
+extern int NTask;    /*!< number of processors */
+extern int PTask;    /*!< note: NTask = 2^PTask */
+
+extern int ThisNode;        /*!< the rank of the current compute node  */
+extern int NumNodes;        /*!< the number of compute nodes used  */
+extern int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes  */
+extern int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes  */
+extern int TasksInThisNode; /*!< number of MPI tasks on  current compute node */
+extern int RankInThisNode;  /*!< rank of the MPI task on the current compute node */
+extern long long MemoryOnNode;
+
+extern double CPUThisRun; /*!< Sums CPU time of current process */
+
+extern int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */
+
+extern int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from
+                             initial conditions, 1 is resuming a run from a set of restart files, while 2
+                             marks a restart from a snapshot file. */
+extern int RestartSnapNum;
+extern int TakeLevel;
+extern int TagOffset;
+
+extern int Argc;
+extern char **Argv;
+
+extern double CPU_Step[CPU_LAST];
+extern double CPU_Step_Stored[CPU_LAST];
+
+extern double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */
+extern double StartOfRun;    /*!< This stores the time of the start of the run for evaluating the elapsed time */
+
+extern size_t AllocatedBytes;
+extern size_t FreeBytes;
+
+extern char DumpFlag;
+extern char DumpFlagNextSnap;
+
+extern int FlagNyt;
+
+extern int NumPart; /*!< number of particles on the LOCAL processor */
+extern int NumGas;  /*!< number of gas particles on the LOCAL processor  */
+
+extern gsl_rng *random_generator;     /*!< a random number generator  */
+extern gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main
+                                         code's random numbers  */
+
+#ifdef USE_SFR
+extern int Stars_converted; /*!< current number of star particles in gas particle block */
+#endif                      /* #ifdef USE_SFR */
+
+#ifdef TOLERATE_WRITE_ERROR
+extern int WriteErrorFlag;
+extern char AlternativeOutputDir[MAXLEN_PATH];
+#endif /* #ifdef TOLERATE_WRITE_ERROR */
+
+extern double EgyInjection;
+
+extern double TimeOfLastDomainConstruction; /*!< holds what it says */
+
+extern double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac;
+extern double DomainInverseLen, DomainBigFac;
+extern int *DomainStartList, *DomainEndList;
+extern double *DomainCost, *TaskCost;
+extern int *DomainCount, *TaskCount;
+extern struct no_list_data
+{
+  int task;
+  int no;
+  int domainCount;
+  double domainCost;
+} * ListNoData;
+
+extern int domain_bintolevel[TIMEBINS];
+extern int domain_refbin[TIMEBINS];
+extern int domain_grav_weight[TIMEBINS];
+extern int domain_hydro_weight[TIMEBINS];
+extern int domain_to_be_balanced[TIMEBINS];
+
+/*! Array of task numbers holding the respective top-level nodes. For
+    the topnodes entries, it is indexed by the Leaf member, for
+    pseudoparticles it is indexed by the node
+    number-MaxPart-MaxNodes.  */
+extern int *DomainTask;
+extern int *DomainNewTask;
+
+/*! Array of indices of the main tree nodes that are identical to the
+ *  top-level nodes. For the topnodes entries, it is indexed by the
+ *  Leaf member, for pseudoparticles it is indexed by the node
+ *  number-MaxPart-MaxNodes.
+ */
+extern int *DomainNodeIndex;
+
+extern peanokey *Key, *KeySorted;
+
+/*! The top node structure is an octree used for encoding the domain
+ *  decomposition. Its leaf nodes are the units into which the domain
+ *  is decomposed.
+ */
+extern struct topnode_data
+{
+  peanokey Size;
+  peanokey StartKey;
+  long long Count;
+  /*! The index of the first daughter node. The remaining 7 follow
+      sequentially, I think. */
+  int Daughter;
+  /*! The index of this topnode in the DomainTask etc arrays. Is this
+      only valid for topnodes that have daughter=-1, i.e. the actual
+      leaves? */
+  int Leaf;
+  unsigned char MortonToPeanoSubnode[8];
+} * TopNodes;
+
+extern int NTopnodes, NTopleaves;
+
+/*! Variables for gravitational tree */
+extern int Tree_MaxPart;
+extern int Tree_NumNodes;
+extern int Tree_MaxNodes;
+extern int Tree_FirstNonTopLevelNode;
+extern int Tree_NumPartImported;
+extern int Tree_NumPartExported;
+extern int Tree_ImportedNodeOffset;
+extern int Tree_NextFreeNode;
+
+extern int *Tree_ResultIndexList;
+extern int *Tree_Task_list;
+extern MyDouble *Tree_Pos_list;
+extern unsigned long long *Tree_IntPos_list;
+
+extern struct treepoint_data
+{
+  MyDouble Pos[3];
+  unsigned long long IntPos[3];
+  MyDouble Mass;
+  float OldAcc;
+  int index;
+  int th;
+  unsigned char level;
+  unsigned char Type;
+  unsigned char SofteningType : 7;
+#ifndef HIERARCHICAL_GRAVITY
+  unsigned char ActiveFlag : 1;
+#endif /* #ifndef HIERARCHICAL_GRAVITY */
+
+#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES)
+  MyFloat GroupRad;
+  int GrNr;
+#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */
+} * Tree_Points;
+
+extern struct resultsactiveimported_data
+{
+  MyFloat GravAccel[3];
+#ifdef EVALPOTENTIAL
+  MyFloat Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+  int index;
+} * Tree_ResultsActiveImported;
+
+extern char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */
+
+extern FILE *FdInfo, /*!< file handle for info.txt log-file. */
+    *FdEnergy,       /*!< file handle for energy.txt log-file. */
+    *FdTimings,      /*!< file handle for timings.txt log-file. */
+    *FdBalance,      /*!< file handle for balance.txt log-file. */
+    *FdTimebin,      /*!< file handle for timebins.txt log-file. */
+    *FdDomain,       /*!< file handle for domain.txt log-file. */
+    *FdMemory,       /*!< file handle for memory.txt log-file. */
+    *FdCPU;          /*!< file handle for cpu.txt log-file. */
+
+#ifdef DETAILEDTIMINGS
+extern FILE *FdDetailed;
+#endif /* #ifdef DETAILEDTIMINGS */
+
+#ifdef OUTPUT_CPU_CSV
+extern FILE *FdCPUCSV; /**< file handle for cpu.csv log-file. Used if the cpu log is printed in csv format as well. */
+#endif                 /* #ifdef OUTPUT_CPU_CSV */
+
+#ifdef RESTART_DEBUG
+extern FILE *FdRestartTest;
+#endif /* #ifdef RESTART_DEBUG */
+
+#ifdef USE_SFR
+extern FILE *FdSfr; /**< file handle for sfr.txt log-file. */
+#endif              /* #ifdef USE_SFR */
+
+#ifdef FORCETEST
+extern FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */
+#endif                    /* #ifdef FORCETEST */
+
+/*! Determines whether various dump files are written. Normally true,
+    set to false by Sunrise to avoid creating them. */
+extern int WriteMiscFiles;
+
+extern void *CommBuffer; /*!< points to communication buffer, which is used at a few places */
+
+/*! \brief Global simulation data.
+ *
+ *  Data which is the SAME for all tasks (mostly code parameters read
+ *  from the parameter file).  Holding this data in a structure is
+ *  convenient for writing/reading the restart file, and it allows the
+ *  introduction of new global variables in a simple way. The only
+ *  thing to do is to introduce them into this structure.
+ */
+extern struct global_data_all_processes
+{
+  long long TotNumPart; /*!<  total particle numbers (global value) */
+  long long TotNumGas;  /*!<  total gas particle number (global value) */
+
+  int MaxPart;    /*!< This gives the maxmimum number of particles that can be stored on one
+                     processor. */
+  int MaxPartSph; /*!< This gives the maxmimum number of SPH particles that can be stored on one
+                     processor. */
+
+#if defined(COOLING)
+  char TreecoolFile[MAXLEN_PATH];
+#endif /* #if defined(COOLING) */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+  int TotPartSpecial, MaxPartSpecial;
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+#if defined(REFINEMENT)
+  double ReferenceGasPartMass;
+#endif /* #if defined(REFINEMENT) */
+
+#ifdef REFINEMENT
+  double TargetGasMass;
+  double TargetGasMassFactor;
+  int RefinementCriterion;
+  int DerefinementCriterion;
+#endif /* #ifdef REFINEMENT */
+
+  double TotGravCost;
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+  double AvgType1Mass;
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+  double MeanVolume;
+
+  int MultipleDomains;
+  double TopNodeFactor;
+
+  int ICFormat; /*!< selects different versions of IC file-format */
+
+  int SnapFormat; /*!< selects different versions of snapshot file-formats */
+
+  int NumFilesPerSnapshot;       /*!< number of files in multi-file snapshot dumps */
+  int NumFilesWrittenInParallel; /*!< maximum number of files that may be written/read simultaneously when
+                                    writing/reading restart-files, or when writing snapshot files */
+
+  double TreeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times
+                             the maximum(!) number of particles.  Note: A typical local tree for N
+                             particles needs usually about ~0.65*N nodes. */
+
+  double TopNodeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times
+                                the maximum(!) number of particles.  Note: A typical local tree for N
+                                particles needs usually about ~0.65*N nodes. */
+
+  double NgbTreeAllocFactor; /*!< Each processor allocates a number of nodes for the neighbor search which is NgbTreeAllocFactor times
+                                 the maximum(!) number of gas particles.  Note: A typical local tree for N
+                                 particles needs usually about ~0.65*N nodes. */
+
+  int MaxMemSize; /*!< size of maximum memory consumption in MB */
+
+  /* some SPH parameters */
+
+  int DesNumNgb; /*!< Desired number of SPH neighbours */
+
+#ifdef SUBFIND
+  int DesLinkNgb;
+  double ErrTolThetaSubfind;
+#endif /* #ifdef SUBFIND */
+
+  double TotCountReducedFluxes;
+  double TotCountFluxes;
+
+  double DtDisplacement;
+
+  double MaxNumNgbDeviation; /*!< Maximum allowed deviation neighbour number */
+
+  double InitGasTemp; /*!< may be used to set the temperature in the IC's */
+  double InitGasU;    /*!< the same, but converted to thermal energy per unit mass */
+  double MinGasTemp;  /*!< may be used to set a floor for the gas temperature */
+  double MinEgySpec;  /*!< the minimum allowed temperature expressed as energy per unit mass; code will inject energy if a cell falls
+                         below this limit */
+
+  double MinimumDensityOnStartUp;
+
+  double GasSoftFactor;
+
+  double LimitUBelowThisDensity;
+  double LimitUBelowCertainDensityToThisValue;
+
+  /* some force counters  */
+  long long TotNumOfForces; /*!< counts total number of force computations  */
+
+#ifdef MULTIPLE_RESTARTS
+  int RestartFileCount;
+#endif /* #ifdef MULTIPLE_RESTARTS */
+
+  /* various cosmological factors that are only a function of the current scale factor, and in non-comoving runs are set to 1 */
+  double cf_atime, cf_a2inv, cf_a3inv, cf_afac1, cf_afac2, cf_afac3, cf_hubble_a, cf_time_hubble_a, cf_redshift;
+  /* Hubble rate at the current time, valid both for comoving and non-comoving integration */
+  double cf_H;
+  /* Hubble expansion rate, but in non-comoving integration set to zero */
+  double cf_Hrate;
+
+  /* system of units  */
+  double UnitTime_in_s,         /*!< factor to convert internal time unit to seconds/h */
+      UnitMass_in_g,            /*!< factor to convert internal mass unit to grams/h */
+      UnitVelocity_in_cm_per_s, /*!< factor to convert internal velocity unit to cm/sec */
+      UnitLength_in_cm,         /*!< factor to convert internal length unit to cm/h */
+      UnitPressure_in_cgs,      /*!< factor to convert internal pressure unit to cgs units (little 'h' still
+                                   around!) */
+      UnitDensity_in_cgs,       /*!< factor to convert internal mass density unit to g/cm^3*h^2 */
+      UnitCoolingRate_in_cgs,   /*!< factor to convert internal cooling rate to cgs units */
+      UnitEnergy_in_cgs,        /*!< factor to convert internal energy to cgs units */
+      UnitTime_in_Megayears,    /*!< factor to convert internal time to megayears/h */
+      GravityConstantInternal,  /*!< If set to zero in the parameterfile, the internal value of the
+                                   gravitational constant is set to the Newtonian value based on the system of
+                                   units specified. Otherwise the value provided is taken as internal gravity
+                                   constant G. */
+      G;                        /*!< Gravity-constant in internal units */
+
+  /* Cosmology */
+
+  double Hubble;   /*!< Hubble-constant in internal units */
+  double Omega0,   /*!< matter density in units of the critical density (at z=0) */
+      OmegaLambda, /*!< vaccum energy density relative to crictical density (at z=0) */
+      OmegaBaryon, /*!< baryon density in units of the critical density (at z=0) */
+      HubbleParam; /*!< little `h', i.e. Hubble constant in units of 100 km/s/Mpc.  Only needed to get absolute
+                    * physical values for cooling physics
+                    */
+
+  double BoxSize; /*!< Boxsize in case periodic boundary conditions are used */
+
+  /* Code options */
+
+  int ComovingIntegrationOn;   /*!< flags that comoving integration is enabled */
+  int PeriodicBoundariesOn;    /*!< flags that periodic boundaries are enabled for gravity */
+  int ResubmitOn;              /*!< flags that automatic resubmission of job to queue system is enabled */
+  int TypeOfOpeningCriterion;  /*!< determines tree cell-opening criterion: 0 for Barnes-Hut, 1 for relative
+                                  criterion */
+  int TypeOfTimestepCriterion; /*!< gives type of timestep criterion (only 0 supported right now - unlike
+                                  gadget-1.1) */
+  int OutputListOn;            /*!< flags that output times are listed in a specified file */
+  int CoolingOn;               /*!< flags that cooling is enabled */
+  int StarformationOn;         /*!< flags that star formation is enabled */
+
+  int NParameters;
+
+  int LowestActiveTimeBin;
+  int HighestActiveTimeBin;
+  int LowestOccupiedTimeBin;
+  int HighestOccupiedTimeBin;
+  int LowestOccupiedGravTimeBin;
+  int HighestOccupiedGravTimeBin;
+  int HighestSynchronizedTimeBin;
+  int SmallestTimeBinWithDomainDecomposition;
+  double ActivePartFracForNewDomainDecomp;
+
+  /* parameters determining output frequency */
+
+  int SnapshotFileCount;     /*!< number of snapshot that is written next */
+  double TimeBetSnapshot,    /*!< simulation time interval between snapshot files */
+      TimeOfFirstSnapshot,   /*!< simulation time of first snapshot files */
+      CpuTimeBetRestartFile, /*!< cpu-time between regularly generated restart files */
+      TimeLastRestartFile,   /*!< cpu-time when last restart-file was written */
+      TimeBetStatistics,     /*!< simulation time interval between computations of energy statistics */
+      TimeLastStatistics;    /*!< simulation time when the energy statistics was computed the last time */
+  int NumCurrentTiStep;      /*!< counts the number of system steps taken up to this point */
+
+  /* Current time of the simulation, global step, and end of simulation */
+
+  double Time,   /*!< current time of the simulation */
+      TimeBegin, /*!< time of initial conditions of the simulation */
+      TimeStep,  /*!< difference between current times of previous and current timestep */
+      TimeMax;   /*!< marks the point of time until the simulation is to be evolved */
+
+  /* variables for organizing discrete timeline */
+
+  double Timebase_interval; /*!< factor to convert from floating point time interval to integer timeline */
+  integertime Ti_Current;   /*!< current time on integer timeline */
+  integertime Previous_Ti_Current;
+  integertime Ti_nextoutput; /*!< next output time on integer timeline */
+  integertime Ti_lastoutput;
+
+  integertime Ti_begstep[TIMEBINS]; /*!< marks start of current step of each timebin on integer timeline */
+
+#ifdef PMGRID
+  integertime PM_Ti_endstep, PM_Ti_begstep;
+  double Asmth[2], Rcut[2];
+  double Corner[2][3], UpperCorner[2][3], Xmintot[2][3], Xmaxtot[2][3];
+  double TotalMeshSize[2];
+#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC)
+  double MassPMregions[2];
+#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */
+#endif /* #ifdef PMGRID */
+
+  long long GlobalNSynchronizedHydro;
+  long long GlobalNSynchronizedGravity;
+
+  int LevelToTimeBin[GRAVCOSTLEVELS];
+  int LevelHasBeenMeasured[GRAVCOSTLEVELS];
+
+  /* variables that keep track of cumulative CPU consumption */
+
+  double TimeLimitCPU;
+  double CPU_Sum[CPU_LAST]; /*!< sums wallclock time/CPU consumption in whole run */
+
+  /* tree code opening criterion */
+
+  double ErrTolTheta;    /*!< BH tree opening angle */
+  double ErrTolForceAcc; /*!< parameter for relative opening criterion in tree walk */
+
+  /* adjusts accuracy of time-integration */
+
+  double ErrTolIntAccuracy; /*!< accuracy tolerance parameter \f$ \eta \f$ for timestep criterion. The
+                               timesteps is \f$ \Delta t = \sqrt{\frac{2 \eta eps}{a}} \f$ */
+
+  double MinSizeTimestep, /*!< minimum allowed timestep. Normally, the simulation terminates if the
+                             timestep determined by the timestep criteria falls below this limit. */
+      MaxSizeTimestep;    /*!< maximum allowed timestep */
+
+#ifdef TIMESTEP_OUTPUT_LIMIT
+  double TimestepOutputLimit;
+#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */
+
+#ifdef FORCE_EQUAL_TIMESTEPS
+  integertime GlobalTimeStep;
+#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */
+
+  double IsoSoundSpeed;
+
+  double CourantFac; /*!< Hydrodynamics-Courant factor */
+
+#ifdef REGULARIZE_MESH_FACE_ANGLE
+  double CellMaxAngleFactor;
+#else  /* #ifdef REGULARIZE_MESH_FACE_ANGLE */
+  double CellShapingFactor;
+#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */
+  double CellShapingSpeed;
+
+  int CPU_TimeBinCountMeasurements[TIMEBINS];
+  double CPU_TimeBinMeasurements[TIMEBINS][NUMBER_OF_MEASUREMENTS_TO_RECORD];
+
+  /* gravitational and hydrodynamical softening lengths (given in terms of an `equivalent' Plummer softening
+   * length)
+   *
+   */
+
+  int SofteningTypeOfPartType[NTYPES];
+
+  double SofteningComoving[NSOFTTYPES]; /*!< comoving gravitational softening lengths for each softeniung type */
+  double SofteningMaxPhys[NSOFTTYPES];  /*!< maximum physical gravitational softening lengths for each softening type */
+
+  double
+      SofteningTable[NSOFTTYPES + NSOFTTYPES_HYDRO]; /*!< current (comoving) gravitational softening lengths for each softening type */
+  double ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO + 1]; /*!<  current (comoving) gravitational softening lengths, multiplied by a
+                                                               factor 2.8 - at that scale the force is Newtonian */
+
+  /*! If particle masses are all equal for one type, the corresponding entry in MassTable is set to this
+   *  value, * allowing the size of the snapshot files to be reduced
+   */
+  double MassTable[NTYPES];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  double MinimumComovingHydroSoftening;
+  double AdaptiveHydroSofteningSpacing;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+
+  /* some filenames */
+  char InitCondFile[MAXLEN_PATH], OutputDir[MAXLEN_PATH], SnapshotFileBase[MAXLEN_PATH], ResubmitCommand[MAXLEN_PATH],
+      OutputListFilename[MAXLEN_PATH];
+
+  /*! table with desired output times */
+  double OutputListTimes[MAXLEN_OUTPUTLIST];
+  char OutputListFlag[MAXLEN_OUTPUTLIST];
+  int OutputListLength; /*!< number of times stored in table of desired output times */
+
+#ifdef USE_SFR /* enable Springel & Hernquist model */
+  double OverDensThresh;
+  double CritOverDensity;
+  double TemperatureThresh;
+  double CritPhysDensity;
+  double PhysDensThresh;
+  double EgySpecSN;
+  double EgySpecCold;
+  double FactorEVP;
+  double TempSupernova;
+  double TempClouds;
+  double MaxSfrTimescale;
+  double FactorSN;
+#endif /* #ifdef USE_SFR */
+
+#ifdef MHD_POWELL
+  double Powell_Momentum[3];
+  double Powell_Angular_Momentum[3];
+  double Powell_Energy;
+#endif /* #ifdef MHD_POWELL */
+
+#ifdef MHD_SEEDFIELD
+  int B_dir;      /* flags for direction: x = 1, y = 2, z = 4 */
+  double B_value; /* value for the chosen component(s) of the magnetic field */
+#endif            /* #ifdef MHD_SEEDFIELD */
+
+  MyIDType MaxID;
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+  double MaxVolumeDiff;
+  double MinVolume;
+  double MaxVolume;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+#ifdef REDUCE_FLUSH
+  double FlushCpuTimeDiff;
+  double FlushLast;
+#endif /* #ifdef REDUCE_FLUSH */
+
+#ifdef TILE_ICS
+  int TileICsFactor;
+#endif /* #ifdef TILE_ICS */
+
+#ifdef ADDBACKGROUNDGRID
+  int GridSize;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+#ifdef ONEDIMS_SPHERICAL
+  double CoreMass;
+  double CoreRadius;
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+  double GlobalDisplacementVector[3];
+} All;
+
+/*****************************************************************************
+ ** particle data ************************************************************
+ ****************************************************************************/
+
+/*! \brief This structure holds all the information that is
+ *         stored for each particle of the simulation.
+ */
+extern struct particle_data
+{
+  MyDouble Pos[3];       /*!< particle position at its current time */
+  MyDouble Mass;         /*!< particle mass */
+  MyFloat Vel[3];        /*!< particle velocity at its current time */
+  MySingle GravAccel[3]; /*!< particle acceleration due to gravity */
+
+#ifdef EXTERNALGRAVITY
+  MySingle dGravAccel; /*!< norm of spatial derivatives tensor of gravity accelerations due to external force */
+#endif
+
+#ifdef PMGRID
+  MySingle GravPM[3]; /*!< particle acceleration due to long-range PM gravity force */
+#endif                /* #ifdef PMGRID */
+
+#ifdef FORCETEST
+  MyFloat GravAccelDirect[3]; /*!< particle acceleration calculated by direct summation */
+  MyFloat PotentialDirect;    /*!< potential computed with direct summation */
+  MyFloat DistToID1;
+#ifdef PMGRID
+  MyFloat GravAccelShortRange[3]; /*!< short range component of gravitational acceleration */
+  MyFloat GravAccelLongRange[3];  /*!< long range component of gravitational acceleration */
+  MyFloat PotentialShortRange;    /*!< potential due to short-range forces */
+  MyFloat PotentialLongRange;     /*!< potential due to long-range forces */
+#endif                            /* #ifdef PMGRID */
+#endif                            /* #ifdef FORCETEST  */
+
+#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL)
+  MySingle Potential; /*!< gravitational potential */
+#if defined(PMGRID)
+  MySingle PM_Potential; /*!< gravitational potential in Particle-Mesh */
+#endif                   /* #if defined(PMGRID) */
+#endif                   /* #if defined(EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) */
+
+#ifdef OUTPUTGRAVINTERACTIONS
+  int GravInteractions; /*!< number of gravitational ineractions calculated */
+#endif                  /* #ifdef OUTPUTGRAVINTERACTIONS */
+
+#ifdef EXTERNALGRAVITY
+  MyFloat ExtPotential; /*!< value of external potential */
+#endif                  /* #ifdef EXTERNALGRAVITY */
+
+  MyIDType ID; /*!< unique ID of particle */
+
+#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
+  MyIDType FileOrder;
+#endif /* #ifdefined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
+
+  integertime Ti_Current; /*!< current time on integer timeline */
+
+  float OldAcc; /*!< magnitude of old gravitational force. Used in relative opening criterion */
+
+  float GravCost[GRAVCOSTLEVELS]; /*!< weight factors used for balancing the work-load */
+
+  unsigned char Type; /*!< flags particle type.  0=gas, 1=halo, 2=disk, 3=bulge, 4=stars, 5=bndry */
+  unsigned char SofteningType;
+  signed char TimeBinGrav;
+  signed char TimeBinHydro;
+} * P,              /*!< holds particle data on local processor */
+    *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */
+
+/*****************************************************************************
+ ** (sub)halo data ***********************************************************
+ ****************************************************************************/
+
+extern struct subfind_data
+{
+  int OriginIndex, OriginTask;
+  int TargetIndex, TargetTask;
+  int GrNr;
+
+#ifdef SUBFIND
+  int SubNr;
+  int OldIndex;
+  int submark;
+  int originindex, origintask;
+  MyFloat Utherm;
+  MyFloat Density;
+  MyFloat Potential;
+  MyFloat Hsml;
+  MyFloat BindingEnergy;
+
+#ifdef CELL_CENTER_GRAVITY
+  MyDouble Center[3];
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+
+#ifdef SUBFIND_CALC_MORE
+  MyFloat SubfindHsml;
+  MyFloat SubfindDensity;   /* total matter density */
+  MyFloat SubfindDMDensity; /* dark matter density */
+  MyFloat SubfindVelDisp;   /* 3D DM velocity dispersion */
+#endif                      /* #ifdef SUBFIND_CALC_MORE */
+
+#endif /* #ifdef SUBFIND */
+} * PS;
+
+/*****************************************************************************
+ ** cell data ****************************************************************
+ ****************************************************************************/
+
+/*! \brief Holds data that is stored for each hydro mesh cell in addition to
+ *         the collisionless variables.
+ */
+extern struct sph_particle_data
+{
+  /* conserved variables */
+  MyFloat Energy;
+  MyFloat Momentum[3];
+  MyFloat Volume;
+  MyFloat OldMass;
+
+  /* primitive variables */
+  MyFloat Density;
+  MyFloat Pressure; /*!< current pressure */
+  MySingle Utherm;
+
+#ifdef HIERARCHICAL_GRAVITY
+  MySingle FullGravAccel[3];
+#endif /* #ifdef HIERARCHICAL_GRAVITY */
+
+  /* variables for mesh  */
+  MyDouble Center[3];    /*!< center of mass of cell */
+  MySingle VelVertex[3]; /*!< current vertex velocity (primitive variable) */
+
+  MySingle MaxDelaunayRadius;
+  MySingle Hsml; /* auxiliary search radius for points around a delaunay triangle */
+  MySingle SurfaceArea;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+  MySingle MaxFaceAngle;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+  MySingle ActiveArea;
+
+#if defined(OUTPUT_DIVVEL)
+  MyFloat DivVel; /*!< divergence of the velocity field */
+#endif            /* #if defined(OUTPUT_DIVVEL) */
+
+#if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL)
+  MySingle CurlVel; /*!< magnitude of the curl of the velocity field */
+#endif              /* #if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL) */
+
+#ifdef TREE_BASED_TIMESTEPS
+  MySingle CurrentMaxTiStep;
+  MySingle Csnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+#if defined(REFINEMENT_HIGH_RES_GAS)
+  MyFloat HighResMass;
+  MyFloat HighResDensity;
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+
+#ifdef MHD
+  MyFloat B[3];
+  MyFloat BConserved[3];
+  MyFloat DivB;
+  MyFloat CurlB[3];
+#endif /* #ifdef MHD */
+
+#ifdef PASSIVE_SCALARS
+  MyFloat PScalars[PASSIVE_SCALARS];
+  MyFloat PConservedScalars[PASSIVE_SCALARS];
+#endif /* #ifdef PASSIVE_SCALARS */
+
+#ifdef OUTPUT_SURFACE_AREA
+  int CountFaces;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+
+#if defined(REFINEMENT_SPLIT_CELLS)
+  MySingle MinimumEdgeDistance;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+
+#if defined(COOLING)
+  MyFloat Ne; /* electron fraction, expressed as local electron number
+                 density normalized to the hydrogen number density. Gives
+                 indirectly ionization state and mean molecular weight. */
+#endif        /* #if defined(COOLING) */
+
+#ifdef USE_SFR
+  MySingle Sfr;
+#endif /* #ifdef USE_SFR */
+
+#ifdef OUTPUT_COOLHEAT
+  MyFloat CoolHeat;
+#endif /* #ifdef OUTPUT_COOLHEAT */
+
+  struct grad_data Grad;
+
+  int first_connection;
+  int last_connection;
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+  int AllowRefinement;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+#ifdef REFINEMENT_SPLIT_CELLS
+  MySingle SepVector[3];
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+  MyFloat MinNgbVolume;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+  double TimeLastPrimUpdate;
+
+#ifdef ADDBACKGROUNDGRID
+  MyFloat Weight;
+#endif /* #ifdef ADDBACKGROUNDGRID */
+
+} * SphP,          /*!< holds SPH particle data on local processor */
+    *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+extern struct special_particle_data
+{
+  MyIDType ID;
+  double pos[3];
+  double mass;
+} * PartSpecialListGlobal;
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+extern peanokey *DomainKeyBuf;
+
+/*! global state of system
+ */
+extern struct state_of_system
+{
+  double Mass, EnergyKin, EnergyPot, EnergyInt, EnergyTot, Momentum[4], AngMomentum[4], CenterOfMass[4], MassComp[NTYPES],
+      EnergyKinComp[NTYPES], EnergyPotComp[NTYPES], EnergyIntComp[NTYPES], EnergyTotComp[NTYPES], MomentumComp[NTYPES][4],
+      AngMomentumComp[NTYPES][4], CenterOfMassComp[NTYPES][4];
+} SysState, SysStateAtStart, SysStateAtEnd;
+
+/*! \brief Struct used for passing the parameters during the mesh cell search.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  int Task;
+  union
+  {
+    int Index;
+    float hsmlguess;
+  } u;
+
+} mesh_search_data;
+
+/*! \brief Struct used for sending positions to other tasks during the
+ *         mesh cell search.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Distance;
+} mesh_search_request;
+
+/*! \brief Struct used for receiving the results from other tasks during the
+ *         mesh cell search.
+ */
+typedef struct
+{
+  MyDouble Distance;
+  int Task;
+  int Index;
+} mesh_search_response;
+
+extern struct data_partlist
+{
+  int Task;  /*!< The task the item was exported to. */
+  int Index; /*!< The particle index of the item on the sending task. */
+} * PartList;
+
+extern struct datanodelist
+{
+  int Task;  /*!< target process */
+  int Index; /*!< local index that wants to open this node */
+  int Node;  /*!< node to be opened on foreign process */
+} * NodeList;
+
+#define FAC_AVG_NODES_PER_EXPORT 4.0 /*!< default choice for estimated average number of exported nodes per exported particle */
+
+extern struct directdata
+{
+  MyDouble Pos[3];
+  MyDouble Mass;
+  unsigned char Type;
+  unsigned char SofteningType;
+} * DirectDataIn, *DirectDataAll;
+
+extern struct accdata
+{
+  MyFloat Acc[3];
+#ifdef EVALPOTENTIAL
+  MyFloat Potential;
+#endif /* #ifdef EVALPOTENTIAL */
+} * DirectAccOut, *DirectAccIn;
+
+#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL) || defined(SUBFIND)
+extern struct potdata_out
+{
+  MyFloat Potential;
+}
+    /*! \brief Holds the partial results computed for imported particles. Note:
+     *         We use GravDataResult = GravDataGet, such that the result replaces
+     *         the imported data
+     */
+    * PotDataResult,
+    /*! \brief Holds partial results received from other processors. This will
+     *         overwrite the GravDataIn array
+     */
+    *PotDataOut;
+#endif /* #if defined (EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) || defined(SUBFIND) */
+
+/*! \brief Buffer of size NTask used for flagging whether a particle needs to
+ *         be exported to the other tasks.
+ */
+extern int *Exportflag;
+/*! \brief Buffer of size NTask used for counting how many nodes are to be
+ *         exported to the other tasks?
+ */
+extern int *Exportnodecount;
+/*! \brief Buffer of size NTask used for holding the index into the
+ *         DataIndexTable.
+ */
+extern int *Exportindex;
+/*! \brief Array of NTask size of the offset into the send array where the
+ *         objects to be sent to the specified task starts.
+ */
+extern int *Send_offset,
+    /*! \brief Array of NTask size of the number of objects to send to the
+     *  tasks.
+     */
+    *Send_count,
+    /*! \brief Array of NTask size of the number of objects to receive from the
+     *         tasks.
+     */
+    *Recv_count,
+    /*! \brief Array of NTask size of the offset into the receive array where the
+     *         objects from the specified task starts.
+     */
+    *Recv_offset;
+
+extern int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks;
+
+extern struct send_recv_counts
+{
+  int Count;
+  int CountNodes;
+} * Send, *Recv;
+
+extern int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes;
+
+extern int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset;
+
+extern int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset;
+
+/*! \brief Header for the standard file format.
+ */
+#if(NTYPES == 7 || NTYPES == 8)
+#define NTYPES_INT_HEADER 8
+#else /* #if (NTYPES==7 || NTYPES==8) */
+#define NTYPES_INT_HEADER NTYPES
+#endif /* #if (NTYPES==7 || NTYPES==8) #else */
+extern struct io_header
+{
+  int npart[NTYPES_INT_HEADER];                       /*!< number of particles of each type in this file */
+  double mass[NTYPES];                                /*!< mass of particles of each type. If 0, then the masses are explicitly
+                                                         stored in the mass-block of the snapshot file, otherwise they are omitted */
+  double time;                                        /*!< time of snapshot file */
+  double redshift;                                    /*!< redshift of snapshot file */
+  int flag_sfr;                                       /*!< flags whether the simulation was including star formation */
+  int flag_feedback;                                  /*!< flags whether feedback was included (obsolete) */
+  unsigned int npartTotal[NTYPES_INT_HEADER];         /*!< total number of particles of each type in this snapshot. This can be
+                                         different from npart if one is dealing with a multi-file snapshot. */
+  int flag_cooling;                                   /*!< flags whether cooling was included  */
+  int num_files;                                      /*!< number of files in multi-file snapshot */
+  double BoxSize;                                     /*!< box-size of simulation in case periodic boundaries were used */
+  double Omega0;                                      /*!< matter density in units of critical density */
+  double OmegaLambda;                                 /*!< cosmological constant parameter */
+  double HubbleParam;                                 /*!< Hubble parameter in units of 100 km/sec/Mpc */
+  int flag_stellarage;                                /*!< flags whether the file contains formation times of star particles */
+  int flag_metals;                                    /*!< flags whether the file contains metallicity values for gas and star
+                                                         particles */
+  unsigned int npartTotalHighWord[NTYPES_INT_HEADER]; /*!< High word of the total number of particles of each type */
+  int flag_entropy_instead_u;                         /*!< flags that IC-file contains entropy instead of u */
+  int flag_doubleprecision;                           /*!< flags that snapshot contains double-precision instead of single precision */
+
+  int flag_lpt_ics;        /*!< flag to signal that IC file contains 2lpt initial conditions */
+  float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */
+
+  int flag_tracer_field; /*!< flags presence of a tracer field */
+
+  int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present)  */
+
+#if(NTYPES == 6)
+  char fill[40];   /*!< fills to 256 Bytes */
+#elif(NTYPES == 7) /* #if (NTYPES==6) */
+  char fill[8]; /*!< fills to 256 Bytes */
+#endif             /* #elif (NTYPES==7) */
+} header;          /*!< holds header for snapshot files */
+
+/*! \brief Header for the ICs file format, if NTYPES does not match.
+ */
+#ifdef NTYPES_ICS
+extern struct io_header_ICs
+{
+  int npart[NTYPES_ICS];                       /*!< number of particles of each type in this file */
+  double mass[NTYPES_ICS];                     /*!< mass of particles of each type. If 0, then the masses are explicitly
+                                                  stored in the mass-block of the snapshot file, otherwise they are omitted */
+  double time;                                 /*!< time of snapshot file */
+  double redshift;                             /*!< redshift of snapshot file */
+  int flag_sfr;                                /*!< flags whether the simulation was including star formation */
+  int flag_feedback;                           /*!< flags whether feedback was included (obsolete) */
+  unsigned int npartTotal[NTYPES_ICS];         /*!< total number of particles of each type in this snapshot. This can be
+                                          different from npart if one is dealing with a multi-file snapshot. */
+  int flag_cooling;                            /*!< flags whether cooling was included  */
+  int num_files;                               /*!< number of files in multi-file snapshot */
+  double BoxSize;                              /*!< box-size of simulation in case periodic boundaries were used */
+  double Omega0;                               /*!< matter density in units of critical density */
+  double OmegaLambda;                          /*!< cosmological constant parameter */
+  double HubbleParam;                          /*!< Hubble parameter in units of 100 km/sec/Mpc */
+  int flag_stellarage;                         /*!< flags whether the file contains formation times of star particles */
+  int flag_metals;                             /*!< flags whether the file contains metallicity values for gas and star
+                                                  particles */
+  unsigned int npartTotalHighWord[NTYPES_ICS]; /*!< High word of the total number of particles of each type */
+  int flag_entropy_instead_u;                  /*!< flags that IC-file contains entropy instead of u */
+  int flag_doubleprecision;                    /*!< flags that snapshot contains double-precision instead of single precision */
+
+  int flag_lpt_ics;        /*!< flag to signal that IC file contains 2lpt initial conditions */
+  float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */
+
+  int flag_tracer_field; /*!< flags presence of a tracer field */
+
+  int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present)  */
+
+#if(NTYPES_ICS == 6)
+  char fill[40]; /*!< fills to 256 Bytes */
+#else            /* #if (NTYPES_ICS==6) */
+  terminate("NTYPES_ICS != 6")
+#endif           /* #if (NTYPES_ICS==6) #else */
+} header_ICs;    /*!< holds header for IC files */
+#endif           /* #ifdef NTYPES_ICS */
+
+enum iofields
+{
+  IO_POS,
+  IO_VEL,
+  IO_ID,
+  IO_MASS,
+  IO_U,
+  IO_RHO,
+  IO_VORT,
+  IO_VOL,
+  IO_CM,
+  IO_VERTEXVEL,
+  IO_FACEANGLE,
+  IO_SAREA,
+  IO_NFACES,
+
+  IO_HIGHRESMASS,
+  IO_PRESSURE,
+  IO_CSND,
+  IO_NE,
+  IO_NH,
+  IO_SFR,
+
+  IO_POT,
+  IO_ACCEL,
+  IO_GRADP,
+  IO_GRADR,
+  IO_GRADV,
+  IO_GRADB,
+
+  IO_POT_MINI,
+  IO_POS_MINI,
+
+  IO_HI,
+  IO_TSTP,
+  IO_BFLD,
+  IO_DIVB,
+  IO_COOLRATE,
+  IO_ALLOWREFINEMENT,
+
+  IO_DIVVEL,
+  IO_CURLVEL,
+  IO_COOLHEAT,
+  IO_PASS,
+
+  IO_SUBFINDHSML,
+  IO_SUBFINDDENSITY,
+  IO_SUBFINDDMDENSITY,
+  IO_SUBFINDVELDISP,
+  IO_GROUPNR,
+
+  IO_SOFTENING,
+  IO_TASK,
+  IO_TIMEBIN_HYDRO,
+
+  IO_LASTENTRY /* This should be kept - it signals the end of the list */
+};
+
+enum arrays
+{
+  A_NONE,
+  A_SPHP,
+  A_P,
+  A_PS
+};
+
+enum types_in_file
+{
+  FILE_NONE        = -1,
+  FILE_INT         = 0,
+  FILE_MY_ID_TYPE  = 2,
+  FILE_MY_IO_FLOAT = 1,
+  FILE_DOUBLE      = 3,
+  FILE_FLOAT       = 4
+};
+
+enum types_in_memory
+{
+  MEM_INT,
+  MEM_MY_ID_TYPE,
+  MEM_FLOAT,
+  MEM_DOUBLE,
+  MEM_MY_SINGLE,
+  MEM_MY_FLOAT,
+  MEM_MY_DOUBLE,
+  MEM_NONE
+};
+
+enum e_typelist
+{
+  GAS_ONLY                      = 1,
+  STARS_ONLY                    = 16,
+  GAS_AND_STARS                 = 17,
+  BHS_ONLY                      = 32,
+  ALL_TYPES                     = ((1 << NTYPES) - 1),
+  SET_IN_GET_PARTICLES_IN_BLOCK = 0
+};
+
+enum sn_type
+{
+  SN_FULL      = 0,
+  SN_MINI      = 1,
+  SN_MINI_ONLY = 2,
+  SN_NO_SUBBOX = 3
+};
+
+typedef struct
+{
+  enum iofields field;
+  enum types_in_memory type_in_memory;
+  enum types_in_file type_in_file_input;
+  enum types_in_file type_in_file_output;
+  int values_per_block;
+  char label[4];
+  char datasetname[256];
+  void (*io_func)(int, int, void *, int);
+  int typelist;
+  enum arrays array;
+  size_t offset;
+  enum sn_type snap_type;
+
+  char hasunit;
+  double a;
+  double h;
+  double L;
+  double M;
+  double V;
+  double c;
+} IO_Field;
+
+extern IO_Field *IO_Fields;
+extern int N_IO_Fields;
+extern int Max_IO_Fields;
+
+extern char (*Parameters)[MAXLEN_PARAM_TAG];
+extern char (*ParametersValue)[MAXLEN_PARAM_VALUE];
+extern char *ParametersType;
+
+/*! \brief The tree data structure.
+ *
+ *  Nodes points to the actual memory
+ *  allocated for the internal nodes, but is shifted such that
+ *  Nodes[All.MaxPart] gives the first allocated node. Note that node
+ *  numbers less than All.MaxPart are the leaf nodes that contain a
+ *  single particle, and node numbers >= MaxPart+MaxNodes are "pseudo
+ *  particles" that hang off the toplevel leaf nodes belonging to
+ *  other tasks. These are not represented by this structure. Instead,
+ *  the tree traversal for these are saved in the Nextnode, Prevnode
+ *  and Father arrays, indexed with the node number in the case of
+ *  real particles and by nodenumber-MaxNodes for pseudo
+ *  particles.
+ */
+extern struct NODE
+{
+  union
+  {
+    int suns[8]; /*!< temporary pointers to daughter nodes */
+    struct
+    {
+      MyDouble s[3]; /*!< center of mass of node */
+      MyDouble mass; /*!< mass of node */
+      /*! The next node in the tree walk in case the current node does
+       *  not need to be opened. This means that it traverses the 8
+       *  subnodes of a node in a breadth-first fashion, and then goes
+       *  to father->sibling.
+       */
+      int sibling;
+      /*! The next node in case the current node needs to be
+       *  opened. Applying nextnode repeatedly results in a pure
+       *  depth-first traversal of the tree.
+       */
+      int nextnode;
+      /*! The parent node of the node. (Is -1 for the root node.)
+       */
+      int father;
+#if(NSOFTTYPES > 1)
+      unsigned char maxsofttype; /**< hold the maximum gravitational softening of particles */
+#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING)
+      unsigned char maxhydrosofttype;
+      unsigned char minhydrosofttype;
+#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */
+#endif /* #if (NSOFTTYPES > 1) */
+    } d;
+  } u;
+
+  MyDouble center[3]; /*!< geometrical center of node */
+  MyFloat len;        /*!< sidelength of treenode */
+
+} * Nodes;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+extern struct ExtNODE
+{
+  MyDouble mass_per_type[NSOFTTYPES];
+} * ExtNodes;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+/*! Gives next node in tree walk for the "particle" nodes. Entries 0
+ *  -- MaxPart-1 are the real particles, and the "pseudoparticles" are
+ *  indexed by the node number-MaxNodes.
+ */
+extern int *Nextnode;
+
+/*! Gives previous node in tree walk for the leaf (particle)
+ *  nodes. Entries 0 -- MaxPart-1 are the real particles, and the
+ *  "pseudoparticles" are indexed by the node number-MaxNodes.
+ */
+extern int *Father;
+
+/*! Variables for neighbor tree */
+extern int Ngb_MaxPart;
+extern int Ngb_NumNodes;
+extern int Ngb_MaxNodes;
+extern int Ngb_FirstNonTopLevelNode;
+extern int Ngb_NextFreeNode;
+extern int *Ngb_Father;
+extern int *Ngb_Marker;
+extern int Ngb_MarkerValue;
+
+extern int *Ngb_DomainNodeIndex;
+extern int *DomainListOfLocalTopleaves;
+extern int *DomainNLocalTopleave;
+extern int *DomainFirstLocTopleave;
+extern int *Ngb_Nextnode;
+
+/*! The ngb-tree data structure
+ */
+extern struct NgbNODE
+{
+  union
+  {
+    int suns[8]; /*!< temporary pointers to daughter nodes */
+    struct
+    {
+      int sibling;
+      int nextnode;
+      MyNgbTreeFloat range_min[3];
+      MyNgbTreeFloat range_max[3];
+    } d;
+  } u;
+
+  MyNgbTreeFloat vertex_vmin[3];
+  MyNgbTreeFloat vertex_vmax[3];
+
+  int father;
+
+  integertime Ti_Current;
+
+} * Ngb_Nodes;
+
+extern struct ExtNgbNODE
+{
+  float vmin[3];
+  float vmax[3];
+  float MaxCsnd;
+} * ExtNgb_Nodes;
+
+#ifdef STATICNFW
+extern double Rs, R200;
+extern double Dc;
+extern double RhoCrit, V200;
+extern double fac;
+#endif /* #ifdef STATICNFW */
+
+extern int MaxThreads;
+
+#endif /* #define ALLVARS_H */
diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c
new file mode 100644
index 0000000000..f1ae80be6a
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/main.c
@@ -0,0 +1,296 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/main.c
+ * \date        05/2018
+ * \brief       Start of the program.
+ * \details     contains functions:
+ *                int main(int argc, char **argv)
+ *                void endrun()
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+// #ifdef HAVE_HDF5
+// #include <hdf5.h>
+// #endif /* #ifdef HAVE_HDF5 */
+
+/*! \brief The entry point of the program.
+ *
+ *  This function initializes the MPI communication packages, and sets
+ *  cpu-time counters to 0. Then begrun1() is called, which sets up
+ *  the simulation. Then either IC's or restart files are loaded. In
+ *  case of IC's init() is called which prepares the IC's for the run.
+ *  A call to begrun2() finishes the initialization. Finally, run() is
+ *  started, the main simulation loop, which iterates over the timesteps.
+ *
+ *  \param[in] argc Argument count from command line.
+ *  \param[in] argv Argument vector from command line.
+ *
+ *  \return status of exit; 0 for normal exit.
+ */
+int main(int argc, char **argv)
+{
+// #ifdef IMPOSE_PINNING
+//   detect_topology();
+//   get_core_set();
+// #endif /* #ifdef IMPOSE_PINNING */
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
+  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
+
+  /* output a welcome message */
+  hello();
+
+  /* initialize CPU-time/Wallclock-time measurement */
+  init_cpu_log();
+
+  determine_compute_nodes();
+
+// #ifdef IMPOSE_PINNING
+//   /* pin the MPI ranks to the available core set */
+//   pin_to_core_set();
+//   report_pinning();
+// #endif /* #ifdef IMPOSE_PINNING */
+
+// #ifdef HOST_MEMORY_REPORTING
+//   mpi_report_committable_memory();
+// #endif /* #ifdef HOST_MEMORY_REPORTING */
+
+  for(PTask = 0; NTask > (1 << PTask); PTask++)
+    ;
+
+  begrun0();
+
+  // if(argc < 2)
+  //   {
+  //     if(ThisTask == 0)
+  //       {
+  //         printf("\nParameters are missing. \n");
+  //         printf("Call with <ParameterFile> [<RestartFlag>] [<RestartSnapNum>] [<SpecialOptions>]\n");
+  //         printf("\n");
+  //         printf("   RestartFlag    Action\n");
+  //         printf("       0          Read initial conditions and start simulation\n");
+  //         printf("       1          Read restart files and resume simulation\n");
+  //         printf("       2          Restart from specified snapshot dump and resume simulation\n");
+  //         printf("       3          Run FOF and optionally SUBFIND: [<SubboxSnapNum> for SUBBOX_SNAPSHOTS]\n");
+  //         printf(
+  //             "       6          Convert snapshot file to different format [input=ICFormat  output=SnapFormat   NOTE: derived "
+  //             "quantities have round-off errors!\n");
+  //         printf("      14          Write out the Voronoi mesh: <SnapNum>\n");
+  //         printf("      17          Write out snapshot dump with measured gradients\n");
+  //         printf("      18          Recalculate gravitational potential values for specified snaphot dump: <snapnum>\n");
+  //         printf("\n");
+  //       }
+  //     endrun();
+  //   }
+
+  strcpy(ParameterFile, "param.txt");  /* Removing command line parsing. argv[1] replaced with "param.txt". */
+
+  // if(argc >= 3)
+  //   RestartFlag = atoi(argv[2]);
+  // else
+  RestartFlag = 0;
+
+  // if(argc >= 4)
+  //   RestartSnapNum = atoi(argv[3]);
+  // else
+  //   RestartSnapNum = -1;
+
+  // Do minimal validation of arguments here rather than in random places in the code
+  // if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0)
+  //   {
+  //     mpi_printf("Need to give the snapshot number\n");
+  //     return (0);
+  //   }
+
+// #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT
+//   if(RestartFlag == 18)
+//     {
+//       mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n");
+//       return (0);
+//     }
+// #endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */
+
+// #ifdef RUNNING_SAFETY_FILE
+//   /* do not run if 'running' safety file exists */
+//   int runningflag = 0;
+//   if(ThisTask == 0)
+//     {
+//       FILE *fd;
+//       char runningfname[MAXLEN_PATH];
+
+//       sprintf(runningfname, "./running");
+//       if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */
+//         {
+//           fclose(fd);
+//           printf("running-file detected. stopping.\n");
+//           runningflag = 1;
+//         }
+//     }
+//   MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+//   if(runningflag)
+//     {
+//       MPI_Finalize(); /* do not call endrun() */
+//       return 0;
+//     }
+//   else
+//     {
+//       /* touch a running safety file */
+//       if(ThisTask == 0)
+//         {
+//           FILE *fd;
+//           char runningfname[MAXLEN_PATH];
+
+//           sprintf(runningfname, "./running");
+//           if((fd = fopen(runningfname, "w")))
+//             {
+//               fclose(fd);
+//               printf("touching a running-file: %s \n", runningfname);
+//             }
+//           else
+//             terminate("could not touch a running-file: %s\n", runningfname);
+//         }
+//     }
+// #endif /* #ifdef RUNNING_SAFETY_FILE */
+
+  begrun1(); /* set-up run  */
+
+  /* see if we are loading a restart file or an IC file */
+  // if(RestartFlag == 1)
+  //   loadrestart();
+  // else
+  //   {
+  /* We're reading an IC file. Is it a snapshot or really an IC? */
+  char fname[MAXLEN_PATH];
+
+  // if(RestartFlag >= 2 && RestartSnapNum >= 0)
+  //   {
+  //     if(All.NumFilesPerSnapshot > 1)
+  //       sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum);
+  //     else
+  //       sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum);
+  //   }
+  // else
+  strcpy(fname, All.InitCondFile);
+
+  /* now we can load the file */
+
+#ifdef READ_DM_AS_GAS
+      read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES);
+#else  /* #ifdef READ_DM_AS_GAS */
+      read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES);
+#endif /* #ifdef READ_DM_AS_GAS #else */
+
+  /* If we are supposed to just convert the file, write and exit here. */
+  // if(RestartFlag == 6)
+  //   {
+  //     /* important for proper functioning of FOF+SUBFIND */
+  //     if(All.ComovingIntegrationOn) /* change to new velocity variable */
+  //       {
+  //         int i, j;
+  //         for(i = 0; i < NumPart; i++)
+  //           for(j = 0; j < 3; j++)
+  //             P[i].Vel[j] *= sqrt(All.Time) * All.Time;
+  //       }
+  //     set_softenings();
+  //     All.TopNodeAllocFactor = 0.08;
+  //     All.TreeAllocFactor    = 0.7;
+  //     All.NgbTreeAllocFactor = 0.7;
+
+  //     sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase);
+  //     mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum);
+  //     savepositions(RestartSnapNum, 0);
+  //     endrun();
+  //   }
+
+  /* init returns a status code, where a value of >=0 means that endrun() should be called. */
+  int status = init();
+
+  if(status >= 0)
+    {
+      if(status > 0)
+        mpi_printf("init() returned with %d\n", status);
+
+      endrun();
+    }
+  // }
+
+  begrun2();
+
+  run(); /* main simulation loop */
+
+  endrun(); /* clean up & finalize MPI */
+
+  return 0;
+}
+
+/*! \brief This function ends the simulations in case of no error.
+ *
+ *  This method has to be called by all processes. It should be used only
+ *  if the simulation ends without a errors.
+ *  Otherwise terminate() should be used instead.
+ *
+ *  \return void
+ */
+void endrun()
+{
+  mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
+  mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
+  fflush(stdout);
+
+#ifdef HAVE_HDF5
+  /*The hdf5 library will sometimes register an atexit() handler that calls its
+   * error handler. In AREPO this is set to my_hdf_error_handler, which calls
+   * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed.
+   * Hence unset the HDF error handler here
+   */
+  H5Eset_auto(NULL, NULL);
+#endif /* #ifdef HAVE_HDF5 */
+
+// #ifdef RUNNING_SAFETY_FILE
+//   if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */
+//     {
+//       char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH];
+//       sprintf(running_fname, "./running");
+//       sprintf(running_done_fname, "./running_done");
+//       rename(running_fname, running_done_fname);
+//       mpi_printf("moved ./running file to ./running_done, job can now restart.\n");
+//     }
+//   else
+//     mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n");
+// #endif /* #ifdef RUNNING_SAFETY_FILE */
+
+  MPI_Finalize();
+  exit(0);
+}
diff --git a/src/amuse/community/arepo/src/main/main_original.c b/src/amuse/community/arepo/src/main/main_original.c
new file mode 100644
index 0000000000..629e988526
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/main_original.c
@@ -0,0 +1,299 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/main.c
+ * \date        05/2018
+ * \brief       Start of the program.
+ * \details     contains functions:
+ *                int main(int argc, char **argv)
+ *                void endrun()
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+#endif /* #ifdef HAVE_HDF5 */
+
+/*! \brief The entry point of the program.
+ *
+ *  This function initializes the MPI communication packages, and sets
+ *  cpu-time counters to 0. Then begrun1() is called, which sets up
+ *  the simulation. Then either IC's or restart files are loaded. In
+ *  case of IC's init() is called which prepares the IC's for the run.
+ *  A call to begrun2() finishes the initialization. Finally, run() is
+ *  started, the main simulation loop, which iterates over the timesteps.
+ *
+ *  \param[in] argc Argument count from command line.
+ *  \param[in] argv Argument vector from command line.
+ *
+ *  \return status of exit; 0 for normal exit.
+ */
+int main(int argc, char **argv)
+{
+#ifdef IMPOSE_PINNING
+  detect_topology();
+  get_core_set();
+#endif /* #ifdef IMPOSE_PINNING */
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
+  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
+
+  /* output a welcome message */
+  hello();
+
+  /* initialize CPU-time/Wallclock-time measurement */
+  init_cpu_log();
+
+  determine_compute_nodes();
+
+#ifdef IMPOSE_PINNING
+  /* pin the MPI ranks to the available core set */
+  pin_to_core_set();
+  report_pinning();
+#endif /* #ifdef IMPOSE_PINNING */
+
+#ifdef HOST_MEMORY_REPORTING
+  mpi_report_committable_memory();
+#endif /* #ifdef HOST_MEMORY_REPORTING */
+
+  Argc = argc;
+  Argv = argv;
+
+  for(PTask = 0; NTask > (1 << PTask); PTask++)
+    ;
+
+  begrun0();
+
+  if(argc < 2)
+    {
+      if(ThisTask == 0)
+        {
+          printf("\nParameters are missing. \n");
+          printf("Call with <ParameterFile> [<RestartFlag>] [<RestartSnapNum>] [<SpecialOptions>]\n");
+          printf("\n");
+          printf("   RestartFlag    Action\n");
+          printf("       0          Read initial conditions and start simulation\n");
+          printf("       1          Read restart files and resume simulation\n");
+          printf("       2          Restart from specified snapshot dump and resume simulation\n");
+          printf("       3          Run FOF and optionally SUBFIND: [<SubboxSnapNum> for SUBBOX_SNAPSHOTS]\n");
+          printf(
+              "       6          Convert snapshot file to different format [input=ICFormat  output=SnapFormat   NOTE: derived "
+              "quantities have round-off errors!\n");
+          printf("      14          Write out the Voronoi mesh: <SnapNum>\n");
+          printf("      17          Write out snapshot dump with measured gradients\n");
+          printf("      18          Recalculate gravitational potential values for specified snaphot dump: <snapnum>\n");
+          printf("\n");
+        }
+      endrun();
+    }
+
+  strcpy(ParameterFile, argv[1]);
+
+  if(argc >= 3)
+    RestartFlag = atoi(argv[2]);
+  else
+    RestartFlag = 0;
+
+  if(argc >= 4)
+    RestartSnapNum = atoi(argv[3]);
+  else
+    RestartSnapNum = -1;
+
+  // Do minimal validation of arguments here rather than in random places in the code
+  if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0)
+    {
+      mpi_printf("Need to give the snapshot number\n");
+      return (0);
+    }
+
+#ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT
+  if(RestartFlag == 18)
+    {
+      mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n");
+      return (0);
+    }
+#endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */
+
+#ifdef RUNNING_SAFETY_FILE
+  /* do not run if 'running' safety file exists */
+  int runningflag = 0;
+  if(ThisTask == 0)
+    {
+      FILE *fd;
+      char runningfname[MAXLEN_PATH];
+
+      sprintf(runningfname, "./running");
+      if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */
+        {
+          fclose(fd);
+          printf("running-file detected. stopping.\n");
+          runningflag = 1;
+        }
+    }
+  MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  if(runningflag)
+    {
+      MPI_Finalize(); /* do not call endrun() */
+      return 0;
+    }
+  else
+    {
+      /* touch a running safety file */
+      if(ThisTask == 0)
+        {
+          FILE *fd;
+          char runningfname[MAXLEN_PATH];
+
+          sprintf(runningfname, "./running");
+          if((fd = fopen(runningfname, "w")))
+            {
+              fclose(fd);
+              printf("touching a running-file: %s \n", runningfname);
+            }
+          else
+            terminate("could not touch a running-file: %s\n", runningfname);
+        }
+    }
+#endif /* #ifdef RUNNING_SAFETY_FILE */
+
+  begrun1(); /* set-up run  */
+
+  /* see if we are loading a restart file or an IC file */
+  if(RestartFlag == 1)
+    loadrestart();
+  else
+    {
+      /* We're reading an IC file. Is it a snapshot or really an IC? */
+      char fname[MAXLEN_PATH];
+
+      if(RestartFlag >= 2 && RestartSnapNum >= 0)
+        {
+          if(All.NumFilesPerSnapshot > 1)
+            sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum);
+          else
+            sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum);
+        }
+      else
+        strcpy(fname, All.InitCondFile);
+
+        /* now we can load the file */
+
+#ifdef READ_DM_AS_GAS
+      read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES);
+#else  /* #ifdef READ_DM_AS_GAS */
+      read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES);
+#endif /* #ifdef READ_DM_AS_GAS #else */
+
+      /* If we are supposed to just convert the file, write and exit here. */
+      if(RestartFlag == 6)
+        {
+          /* important for proper functioning of FOF+SUBFIND */
+          if(All.ComovingIntegrationOn) /* change to new velocity variable */
+            {
+              int i, j;
+              for(i = 0; i < NumPart; i++)
+                for(j = 0; j < 3; j++)
+                  P[i].Vel[j] *= sqrt(All.Time) * All.Time;
+            }
+          set_softenings();
+          All.TopNodeAllocFactor = 0.08;
+          All.TreeAllocFactor    = 0.7;
+          All.NgbTreeAllocFactor = 0.7;
+
+          sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase);
+          mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum);
+          savepositions(RestartSnapNum, 0);
+          endrun();
+        }
+
+      /* init returns a status code, where a value of >=0 means that endrun() should be called. */
+      int status = init();
+
+      if(status >= 0)
+        {
+          if(status > 0)
+            mpi_printf("init() returned with %d\n", status);
+
+          endrun();
+        }
+    }
+
+  begrun2();
+
+  run(); /* main simulation loop */
+
+  endrun(); /* clean up & finalize MPI */
+
+  return 0;
+}
+
+/*! \brief This function ends the simulations in case of no error.
+ *
+ *  This method has to be called by all processes. It should be used only
+ *  if the simulation ends without a errors.
+ *  Otherwise terminate() should be used instead.
+ *
+ *  \return void
+ */
+void endrun()
+{
+  mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
+  mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
+  fflush(stdout);
+
+#ifdef HAVE_HDF5
+  /*The hdf5 library will sometimes register an atexit() handler that calls its
+   * error handler. In AREPO this is set to my_hdf_error_handler, which calls
+   * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed.
+   * Hence unset the HDF error handler here
+   */
+  H5Eset_auto(NULL, NULL);
+#endif /* #ifdef HAVE_HDF5 */
+
+#ifdef RUNNING_SAFETY_FILE
+  if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */
+    {
+      char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH];
+      sprintf(running_fname, "./running");
+      sprintf(running_done_fname, "./running_done");
+      rename(running_fname, running_done_fname);
+      mpi_printf("moved ./running file to ./running_done, job can now restart.\n");
+    }
+  else
+    mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n");
+#endif /* #ifdef RUNNING_SAFETY_FILE */
+
+  MPI_Finalize();
+  exit(0);
+}
diff --git a/src/amuse/community/arepo/src/main/main_reduced.c b/src/amuse/community/arepo/src/main/main_reduced.c
new file mode 100644
index 0000000000..1e7eec7ba7
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/main_reduced.c
@@ -0,0 +1,135 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/main.c
+ * \date        05/2018
+ * \brief       Start of the program.
+ * \details     contains functions:
+ *                int main(int argc, char **argv)
+ *                void endrun()
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief The entry point of the program.
+ *
+ *  This function initializes the MPI communication packages, and sets
+ *  cpu-time counters to 0. Then begrun1() is called, which sets up
+ *  the simulation. Then either IC's or restart files are loaded. In
+ *  case of IC's init() is called which prepares the IC's for the run.
+ *  A call to begrun2() finishes the initialization. Finally, run() is
+ *  started, the main simulation loop, which iterates over the timesteps.
+ *
+ *  \param[in] argc Argument count from command line.
+ *  \param[in] argv Argument vector from command line.
+ *
+ *  \return status of exit; 0 for normal exit.
+ */
+int main(int argc, char **argv)
+{
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
+  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
+
+  /* output a welcome message */
+  hello();
+
+  /* initialize CPU-time/Wallclock-time measurement */
+  init_cpu_log();
+
+  determine_compute_nodes();
+
+  for(PTask = 0; NTask > (1 << PTask); PTask++)
+    ;
+
+  begrun0();
+
+  strcpy(ParameterFile, "param.txt");  /* Removing command line parsing. argv[1] replaced with "param.txt". */
+  RestartFlag = 0;
+
+  begrun1(); /* set-up run  */
+
+  char fname[MAXLEN_PATH];
+  strcpy(fname, All.InitCondFile);
+
+  /* now we can load the file */
+
+#ifdef READ_DM_AS_GAS
+      read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES);
+#else  /* #ifdef READ_DM_AS_GAS */
+      read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES);
+#endif /* #ifdef READ_DM_AS_GAS #else */
+
+  /* init returns a status code, where a value of >=0 means that endrun() should be called. */
+  int status = init();
+
+  if(status >= 0)
+    {
+      if(status > 0)
+        mpi_printf("init() returned with %d\n", status);
+
+      endrun();
+    }
+
+  begrun2();
+  run(); /* main simulation loop */
+  endrun(); /* clean up & finalize MPI */
+
+  return 0;
+}
+
+/*! \brief This function ends the simulations in case of no error.
+ *
+ *  This method has to be called by all processes. It should be used only
+ *  if the simulation ends without a errors.
+ *  Otherwise terminate() should be used instead.
+ *
+ *  \return void
+ */
+void endrun()
+{
+  mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
+  mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
+  fflush(stdout);
+
+#ifdef HAVE_HDF5
+  /*The hdf5 library will sometimes register an atexit() handler that calls its
+   * error handler. In AREPO this is set to my_hdf_error_handler, which calls
+   * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed.
+   * Hence unset the HDF error handler here
+   */
+  H5Eset_auto(NULL, NULL);
+#endif /* #ifdef HAVE_HDF5 */
+
+  MPI_Finalize();
+  exit(0);
+}
diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h
new file mode 100644
index 0000000000..15a346f1bc
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/proto.h
@@ -0,0 +1,665 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/proto.h
+ * \date        05/2018
+ * \brief       Function declarations.
+ * \details     No particular order.
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 29.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef PROTO_H
+#define PROTO_H
+
+#include "../gravity/forcetree.h"
+#include "../main/allvars.h"
+#include "../utils/timer.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef IMPOSE_PINNING
+#ifndef __USE_GNU
+#define __USE_GNU
+#endif /* #ifndef __USE_GNU */
+#include <sched.h>
+#endif /* #ifdef IMPOSE_PINNING */
+
+#ifdef HAVE_HDF5
+#include <hdf5.h>
+#endif /* #ifdef HAVE_HDF5 */
+
+#if defined(COOLING)
+#include "../cooling/cooling_proto.h"
+#endif /* #if defined(COOLING) */
+
+void sfr_init();
+void sfr_create_star_particles(void);
+void ngb_finish_rangebounds_update(int nchanged, int *nodelist);
+void ngb_update_rangebounds(int i, int *nchanged, int *nodelist);
+int ngb_treefind_variable(MyDouble searchcenter[3], MyFloat hsml, int target, int *startnode, int mode, int *nexport,
+                          int *nsend_local);
+int ngb_treebuild(int npart);
+void ngb_treeallocate(void);
+void ngb_treefree(void);
+int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag);
+int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes,
+                                  int *firstnode);
+
+void drift_node(struct NgbNODE *current, integertime time1);
+void drift_all_particles(void);
+double get_desired_softening_from_mass(double mass);
+void log_restart_debug(void);
+int get_thread_num(void);
+void report_pinning(void);
+void detect_topology(void);
+void pin_to_core_set(void);
+void get_core_set(void);
+int derefine_should_this_cell_be_merged(int i, int flag);
+
+void gravity_external(void);
+void gravity(int timebin, int fullflag);
+int my_ffsll(peanokey i);
+void set_cosmo_factors_for_current_time(void);
+void calc_exact_gravity_for_particle_type(void);
+void calculate_non_standard_physics_with_valid_gravity_tree(void);
+void calculate_non_standard_physics_with_valid_gravity_tree_always(void);
+int get_softeningtype_for_hydro_cell(int i);
+void gravity_forcetest_testforcelaw(void);
+void *myfree_query_last_block(void);
+
+void subdivide_evenly(int N, int pieces, int index, int *first, int *count);
+void force_evaluate_direct(int target, int result_idx, int nimport);
+void gravity_direct(int timebin);
+double dabs(double a);
+double dmax(double a, double b);
+double dmin(double a, double b);
+double max_array(double *a, int num_elements);
+int imax(int a, int b);
+int imin(int a, int b);
+double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *));
+
+int myflush(FILE *fstream);
+int flush_everything(void);
+void gravity_force_finalize(int timebin);
+void permutate_chunks_in_list(int ncount, int *list);
+double get_default_softening_of_particletype(int type);
+double get_random_number_aux(void);
+void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm);
+void ngb_update_velocities(void);
+void hello(void);
+void find_long_range_step_constraint(void);
+
+void ngb_treemodifylength(int delta_NgbMaxPart);
+void domain_resize_storage(int count_get, int count_get_sph, int option_flag);
+void init_individual_softenings(void);
+void do_derefinements_and_refinements();
+void mark_active_timebins(void);
+void voronoi_test(void);
+void execute_resubmit_command(void);
+void output_compile_time_options(void);
+void init_io_fields();
+void produce_dump(void);
+
+void create_snapshot_if_desired(void);
+void output_log_messages(void);
+void mpi_report_committable_memory(void);
+long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree);
+int check_for_interruption_of_run(void);
+void set_non_standard_physics_for_current_time(void);
+void calculate_non_standard_physics_prior_mesh_construction(void);
+void calculate_non_standard_physics_end_of_step(void);
+void compute_statistics(void);
+void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R,
+                       struct fluxes *flux, double dt, double *count, double *count_reduced);
+
+double get_sound_speed(int p);
+void set_pressure_of_cell(int i);
+void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type);
+void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd);
+void subfind_density_hsml_guess(void);
+void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z);
+void find_nearest_meshpoint_global(mesh_search_data *searchdata, int n, int hsmlguess, int verbose);
+void reorder_DP(void);
+void peano_hilbert_order_DP(void);
+void validate_vertex_velocities(void);
+
+double get_cell_radius(int i);
+double nearest_x(double d);
+double nearest_y(double d);
+double nearest_z(double d);
+int voronoi_get_connected_particles(tessellation *T);
+void voronoi_init_connectivity(tessellation *T);
+void voronoi_update_connectivity(tessellation *T);
+int compare_foreign_connection(const void *a, const void *b);
+void voronoi_remove_connection(int i);
+int pmforce_is_particle_high_res(int type, MyDouble *pos);
+
+void cooling_only(void);
+void report_VmRSS(void);
+void tree_based_timesteps_setsoundspeeds(void);
+void voronoi_update_ghost_velvertex(void);
+int should_this_cell_be_split(int i);
+int do_refinements(void);
+int should_this_cell_be_merged(int i, int flag);
+int do_derefinements(void);
+void move_collisionless_particle(int new_i, int old_i);
+void dump_memory_table(void);
+
+void report_detailed_memory_usage_of_largest_task(void);
+void calculate_vertex_velocity_divergence(void);
+void make_list_of_active_particles(void);
+void find_gravity_timesteps_and_do_gravity_step_first_half(void);
+void do_gravity_step_second_half(void);
+void voronoi_1D_reorder_gas(void);
+int voronoi_1D_compare_key(const void *a, const void *b);
+void voronoi_1D_order(void);
+void pm2d_init_periodic(void);
+void pm2d_init_periodic_allocate(void);
+
+void pm2d_init_periodic_free(void);
+void pm2d_force_periodic(int mode);
+int pm2d_periodic_compare_sortindex(const void *a, const void *b);
+void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *));
+int timestep_evaluate(int target, int mode, int threadid);
+void tree_based_timesteps(void);
+int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount,
+                       MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status);
+int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs,
+                             MPI_Datatype recvtype, MPI_Comm comm);
+double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
+
+double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm);
+int compare_IDs(const void *a, const void *b);
+void test_id_uniqueness(void);
+void drift_particle(int i, integertime time1);
+void put_symbol(char *string, double t0, double t1, char c);
+void write_cpu_log(void);
+void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int linenr, int clear_flag, char *origin);
+void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line, char *origin);
+void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line);
+void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line);
+
+void myfree_fullinfo(void *p, const char *func, const char *file, int line);
+void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line);
+void mymalloc_init(void);
+void calculate_maxid(void);
+void determine_compute_nodes(void);
+double INLINE_FUNC hubble_function(double a);
+void fof_fof(int num);
+double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask);
+void fof_compile_catalogue(void);
+void fof_save_groups(int num);
+
+double fof_periodic(double x);
+double fof_periodic_wrap(double x);
+double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask);
+void fof_compute_group_properties(int gr, int start, int len);
+int fof_compare_FOF_PList_MinID(const void *a, const void *b);
+int fof_compare_FOF_GList_MinID(const void *a, const void *b);
+int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b);
+int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b);
+int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b);
+int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b);
+
+int fof_compare_Group_GrNr(const void *a, const void *b);
+int fof_compare_Group_MinIDTask(const void *a, const void *b);
+int fof_compare_Group_MinID(const void *a, const void *b);
+int fof_compare_ID_list_GrNrID(const void *a, const void *b);
+int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b);
+int fof_compare_Group_Len(const void *a, const void *b);
+int fof_compare_aux_sort_Type(const void *a, const void *b);
+int fof_compare_aux_sort_GrNr(const void *a, const void *b);
+int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b);
+int fof_compare_aux_sort_FileOrder(const void *a, const void *b);
+
+int fof_compare_local_sort_data_targetindex(const void *a, const void *b);
+void fof_subfind_exchange(MPI_Comm Communicator);
+void fof_prepare_output_order(void);
+void fof_compute_group_properties(int gr, int start, int len);
+void fof_exchange_group_data(void);
+void fof_finish_group_properties(void);
+double fof_get_comoving_linking_length(void);
+void fof_assign_group_numbers(void);
+void fof_reorder_PS(int *Id, int Nstart, int N);
+void fof_subfind_write_file(char *fname, int writeTask, int lastTask);
+
+void fof_subfind_prepare_ID_list(void);
+int subfind_compare_procassign_GrNr(const void *a, const void *b);
+double subfind_so_potegy(double *egypot);
+void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach);
+void fof_check_for_full_nodes_recursive(int no);
+int fof_return_a_particle_in_cell_recursive(int no);
+void subfind_loctree_copyExtent(void);
+int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail);
+void subfind_reorder_according_to_submp(void);
+int subfind_compare_submp_OldIndex(const void *a, const void *b);
+
+int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b);
+double subfind_exchange(void);
+void subfind_coll_domain_decomposition(void);
+void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain);
+void subfind_coll_domain_free(void);
+void subfind_coll_domain_allocate(void);
+int subfind_coll_domain_determineTopTree(void);
+void subfind(int num);
+double subfind_density(int mode);
+double subfind_overdensity(void);
+
+void subfind_save_final(int num);
+void subfind_process_group_collectively(int nsubgroups_cat);
+void subfind_coll_findExtent(void);
+void subfind_reorder_PS(int *Id, int Nstart, int N);
+void subfind_reorder_P(int *Id, int Nstart, int N);
+void subfind_distribute_particles(MPI_Comm Communicator);
+void subfind_coll_domain_walktoptree(int no);
+int subfind_compare_densities(const void *a, const void *b);
+int subfind_compare_binding_energy(const void *a, const void *b);
+int subfind_compare_dist_rotcurve(const void *a, const void *b);
+
+int subfind_compare_coll_candidates_rank(const void *a, const void *b);
+int subfind_compare_coll_candidates_boundlength(const void *a, const void *b);
+int subfind_compare_coll_candidates_nsubs(const void *a, const void *b);
+int subfind_compare_coll_candidates_subnr(const void *a, const void *b);
+void subfind_col_find_coll_candidates(int totgrouplen);
+void subfind_unbind_independent_ones(int count);
+void subfind_distribute_groups(void);
+void subfind_potential_compute(int num, struct unbind_data *d, int phase, double weakly_bound_limit);
+int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas);
+void subfind_find_linkngb(void);
+
+int subfind_loctree_treebuild(int npart, struct unbind_data **mp);
+void subfind_loctree_update_node_recursive(int no, int sib, int father);
+double subfind_loctree_treeevaluate_potential(int target);
+void subfind_loctree_copyExtent(void);
+double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess);
+void subfind_loctree_findExtent(int npart, struct unbind_data *mp);
+int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess);
+size_t subfind_loctree_treeallocate(int maxnodes, int maxpart);
+void subfind_loctree_treefree(void);
+void subfind_find_nearesttwo(void);
+
+int subfind_process_group_serial(int gr, int offset, int nsubgroups_cat);
+int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas);
+int subfind_locngb_compare_key(const void *a, const void *b);
+int subfind_compare_serial_candidates_subnr(const void *a, const void *b);
+int subfind_compare_serial_candidates_rank(const void *a, const void *b);
+int subfind_compare_dens(const void *a, const void *b);
+int subfind_compare_serial_candidates_boundlength(const void *a, const void *b);
+int subfind_compare_dist_rotcurve(const void *a, const void *b);
+int subfind_compare_binding_energy(const void *a, const void *b);
+int subfind_compare_densities(const void *a, const void *b);
+
+int subfind_compare_ID_list(const void *a, const void *b);
+int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b);
+void subfind_poll_for_requests(void);
+long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank);
+long long subfind_distlinklist_get_rank(long long index);
+void subfind_distlinklist_set_next(long long index, long long next);
+void subfind_distlinklist_add_particle(long long index);
+void subfind_distlinklist_add_bound_particles(long long index, int nsub);
+void subfind_distlinklist_mark_particle(long long index, int target, int submark);
+long long subfind_distlinklist_get_next(long long index);
+
+long long subfind_distlinklist_get_head(long long index);
+void subfind_distlinklist_set_headandnext(long long index, long long head, long long next);
+void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len);
+void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len);
+void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next);
+long long subfind_distlinklist_set_head_get_next(long long index, long long head);
+int subfind_compare_dist_rotcurve(const void *a, const void *b);
+void subfind_coll_treeallocate(int maxpart, int maxindex);
+void subfind_coll_treefree(void);
+void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z);
+
+void subfind_coll_exchange_topleafdata(void);
+void subfind_coll_update_node_recursive(int no, int sib, int father, int *last);
+void subfind_coll_insert_pseudo_particles(void);
+int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc,
+                                    unsigned long long zc, unsigned long long ilen);
+int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels);
+int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp);
+int subfind_coll_treebuild(int npart, struct unbind_data *mp);
+double subfind_get_particle_balance(void);
+int subfind_fof_compare_ID(const void *a, const void *b);
+void write_file(char *fname, int readTask, int lastTask, int subbox_flag);
+
+void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last);
+int get_values_per_blockelement(enum iofields blocknr);
+int get_datatype_in_block(enum iofields blocknr, int mode);
+void get_dataset_name(enum iofields blocknr, char *buf);
+int blockpresent(enum iofields blocknr, int write);
+void fill_write_buffer(void *buffer, enum iofields blocknr, int *pindex, int pc, int type, int subbox_flag);
+void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type);
+int get_particles_in_block(enum iofields blocknr, int *typelist);
+int get_bytes_per_blockelement(enum iofields blocknr, int mode);
+void read_file(const char *fname, int filenr, int readTask, int lastTask, int);
+
+void get_Tab_IO_Label(enum iofields blocknr, char *label);
+void long_range_init_regionsize(void);
+int find_files(const char *fname);
+double get_random_number(void);
+int peano_compare_key(const void *a, const void *b);
+void mysort_domain(void *b, size_t n, size_t s);
+void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *));
+int density_isactive(int n);
+size_t sizemax(size_t a, size_t b);
+void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno);
+
+void reconstruct_timebins(void);
+peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits);
+void enable_core_dumps_and_fpu_exceptions(void);
+void find_next_sync_point(void);
+void set_units_sfr(void);
+void gravity_forcetest(void);
+void allocate_memory(void);
+void begrun0(void);
+void begrun1(void);
+void begrun2(void);
+
+int init(void);
+void loadrestart(void);
+void reread_params_after_loading_restart(void);
+void check_omega(void);
+void close_logfiles(void);
+void compute_grav_accelerations(int timebin, int fullflag);
+void compute_global_quantities_of_system(void);
+void cooling_and_starformation(void);
+void density(void);
+void do_box_wrapping(void);
+
+void domain_Decomposition(void);
+double enclosed_mass(double R);
+void endrun(void);
+void energy_statistics(void);
+void ewald_corr(double dx, double dy, double dz, double *fper);
+void ewald_force(double x, double y, double z, double force[3]);
+int my_fls(int x);
+void ewald_init(void);
+double ewald_psi(double x, double y, double z);
+double ewald_pot_corr(double dx, double dy, double dz);
+
+integertime find_next_outputtime(integertime time);
+void minimum_large_ints(int n, long long *src, long long *res);
+double get_starformation_rate(int i);
+double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP);
+void gravity_tree(int timebin);
+void init_clouds(void);
+void integrate_sfr(void);
+size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream);
+size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream);
+void open_logfiles(void);
+
+void peano_hilbert_order(void);
+void predict(double time);
+void read_ic(const char *fname, int);
+void read_header_attributes(FILE *fd);
+MyIDType determine_ids_offset(void);
+int read_outputlist(char *fname);
+void read_parameter_file(char *fname);
+void check_parameters();
+void reorder_gas(int *Id);
+void reorder_particles(int *Id);
+
+void restart(int mod);
+void run(void);
+void savepositions(int num, int subbox_flag);
+void mpi_printf(const char *fmt, ...);
+void mpi_fprintf(FILE *stream, const char *fmt, ...);
+void mpi_printf_each(const char *fmt, ...);
+FILE *open_file(char *);
+double second(void);
+void set_softenings(void);
+void set_units(void);
+
+void setup_smoothinglengths(void);
+void sumup_large_ints(int n, int *src, long long *res);
+void sumup_longs(int n, long long *src, long long *res);
+void statistics(void);
+double timediff(double t0, double t1);
+void veldisp(void);
+double get_hydrokick_factor(integertime time0, integertime time1);
+double get_gravkick_factor(integertime time0, integertime time1);
+double drift_integ(double a, void *param);
+double gravkick_integ(double a, void *param);
+
+double hydrokick_integ(double a, void *param);
+void init_drift_table(void);
+double get_drift_factor(integertime time0, integertime time1);
+double measure_time(void);
+void long_range_init(void);
+void long_range_force(void);
+void pm_init_periodic(void);
+void pmforce_periodic(int mode, int *typelist);
+void pm_init_regionsize(void);
+void pm_init_nonperiodic(void);
+
+int pmforce_nonperiodic(int grnr);
+void readjust_timebase(double TimeMax_old, double TimeMax_new);
+void pm_setup_nonperiodic_kernel(void);
+void init_gradients();
+void init_scalars();
+void print_particle_info(int i);
+void print_state_info(struct state *st);
+void print_state_face_info(struct state_face *st);
+void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux);
+void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom);
+
+void face_clear_fluxes(struct fluxes *flux);
+int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R);
+int face_get_normals(tessellation *T, int i, struct geometry *geom);
+int face_get_state(tessellation *T, int p, int i, struct state *st);
+void face_boundary_check(point *p, double *velx, double *vely, double *velz);
+void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz);
+double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime);
+void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime);
+void face_do_time_extrapolation(struct state *delta, struct state *st, double atime);
+void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other);
+
+void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r);
+void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat);
+void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat);
+void face_turn_velocities(struct state *st, struct geometry *geom);
+void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face);
+void face_turnback_velocities(struct state_face *st_face, struct geometry *geom);
+void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom,
+                     double *vel_face);
+void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face);
+double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face);
+void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face);
+
+void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face);
+void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face);
+void get_mach_numbers(struct state *st_L, struct state *st_R, double Press);
+void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face);
+int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel);
+void pressure_function(double P, struct state *st, double *F, double *FD);
+double guess_for_pressure(struct state *st_L, struct state *st_R);
+void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd);
+void isothermal_function(double rhostar, double rho, double *F, double *FD);
+void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face,
+                                  double csnd);
+
+void apply_flux_list(void);
+int flux_list_data_compare(const void *a, const void *b);
+void set_vertex_velocities(void);
+int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type);
+void compute_interface_fluxes(tessellation *T);
+void update_primitive_variables(void);
+void set_pressure_of_cell_internal(struct particle_data *P, struct sph_particle_data *SphP, int i);
+void do_validity_checks(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd);
+void update_primitive_variables_single(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd);
+
+void update_internal_energy(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd);
+void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset,
+                          int item_size, int commtag, int include_self);
+int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical);
+void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size);
+void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size);
+void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag,
+                                      int task_offset, int cell_offset);
+void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag);
+void tile_ics(void);
+void reallocate_memory_maxpart(void);
+void reallocate_memory_maxpartsph(void);
+
+void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes);
+int dump_memory_table_buffer(char *p);
+void calc_memory_checksum(void *base, size_t bytes);
+void allreduce_sparse_double_sum(double *loc, double *glob, int N);
+void allreduce_sparse_imin(int *loc, int *glob, int N);
+void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len,
+                     int big_flag, MPI_Comm comm);
+int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount,
+                   MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status);
+size_t roundup_to_multiple_of_cacheline_size(size_t n);
+void init_cpu_log(void);
+
+void write_error(int check, size_t nwritten, size_t nmemb);
+size_t smax(size_t a, size_t b);
+void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory,
+                enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array,
+                void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask);
+void init_units(enum iofields field, double a, double h, double L, double M, double V, double c);
+void init_snapshot_type(enum iofields field, enum sn_type type);
+
+void swap_Nbyte(char *data, int n, int m);
+void swap_header(void);
+
+#if defined(COOLING)
+void cool_cell(int i);
+#endif /* #if defined(COOLING) */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+void special_particle_create_list();
+void special_particle_update_list();
+#endif /* #ifdef  EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+#ifdef HAVE_HDF5
+
+hid_t my_H5Fcreate(const char *fname, unsigned flags, hid_t fcpl_id, hid_t fapl_id);
+hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint);
+hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id);
+hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id);
+hid_t my_H5Screate(H5S_class_t type);
+hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims);
+herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf,
+                   const char *datasetname);
+herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name);
+hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id);
+hid_t my_H5Dopen(hid_t file_id, const char *datasetname);
+
+hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname);
+herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf,
+                  const char *datasetname);
+hid_t my_H5Gopen(hid_t loc_id, const char *groupname);
+hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name);
+herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size);
+herr_t my_H5Aclose(hid_t attr_id, const char *attr_name);
+herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname);
+herr_t my_H5Gclose(hid_t group_id, const char *groupname);
+herr_t my_H5Fclose(hid_t file_id, const char *fname);
+herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type);
+
+hid_t my_H5Tcopy(hid_t type_id);
+herr_t my_H5Tclose(hid_t type_id);
+herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count,
+                              const hsize_t *block);
+size_t my_H5Tget_size(hid_t datatype_id);
+herr_t my_H5Tset_size(hid_t datatype_id, size_t size);
+herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size,
+                               const char *attr_name);
+hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname);
+
+#ifdef HDF5_FILTERS
+htri_t my_H5Pall_filters_avail(hid_t plist_id);
+hid_t my_H5Pcreate(hid_t class_id);
+herr_t my_H5Pclose(hid_t plist);
+herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim);
+herr_t my_H5Pset_shuffle(hid_t plist_id);
+herr_t my_H5Pset_deflate(hid_t plist_id, uint level);
+herr_t my_H5Pset_fletcher32(hid_t plist_id);
+#endif /* #ifdef HDF5_FILTERS */
+
+#endif /* #ifdef HAVE_HDF5 */
+
+#ifdef HOST_MEMORY_REPORTING
+void check_maxmemsize_setting(void);
+#endif /* #ifdef HOST_MEMORY_REPORTING */
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+int get_softening_type_from_mass(double mass);
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+#ifdef MHD
+void do_mhd_source_terms_first_half(void);
+void do_mhd_source_terms_second_half(void);
+#endif /* #ifdef MHD */
+
+#ifdef ONEDIMS_SPHERICAL
+void gravity_monopole_1d_spherical();
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+#if defined(PMGRID)
+void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward);
+void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward);
+void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ);
+void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch);
+void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch);
+void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ);
+void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ);
+void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward);
+void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward);
+void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out);
+
+void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out);
+void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out);
+void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out);
+#endif /* #if defined(PMGRID) */
+
+#ifdef RIEMANN_HLLC
+double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux);
+#endif /* #ifdef RIEMANN_HLLC */
+
+#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)
+void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux);
+#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */
+
+#ifdef RIEMANN_HLLD
+double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux);
+#endif /* #ifdef RIEMANN_HLLD */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat);
+int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat);
+void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef USE_SFR
+void convert_cell_into_star(int i, double birthtime);
+void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star);
+void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars);
+#endif /* #ifdef USE_SFR */
+
+#endif /* #ifndef PROTO_H */
diff --git a/src/amuse/community/arepo/src/main/run.c b/src/amuse/community/arepo/src/main/run.c
new file mode 100644
index 0000000000..0bdca04354
--- /dev/null
+++ b/src/amuse/community/arepo/src/main/run.c
@@ -0,0 +1,660 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/main/run.c
+ * \date        05/2018
+ * \brief       The main simulation loop.
+ * \details     contains functions:
+ *                void run(void)
+ *                void do_second_order_source_terms_first_half(void)
+ *                void do_second_order_source_terms_second_half(void)
+ *                void set_non_standard_physics_for_current_time(void)
+ *                void calculate_non_standard_physics_with_valid_gravity_tree(void)
+ *                void calculate_non_standard_physics_with_valid_gravity_tree_always(void)
+ *                void calculate_non_standard_physics_prior_mesh_construction(void)
+ *                void calculate_non_standard_physics_end_of_step(void)
+ *                int check_for_interruption_of_run(void)
+ *                int check_for_interruption_of_run(void)
+ *                integertime find_next_outputtime(integertime ti_curr)
+ *                void execute_resubmit_command(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <ctype.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../mesh/voronoi/voronoi.h"
+
+static void do_second_order_source_terms_first_half(void);
+static void do_second_order_source_terms_second_half(void);
+static void create_end_file(void);
+
+/*! \brief Contains the main simulation loop that iterates over
+ *  single timesteps.
+ *
+ *  The loop terminates when the cpu-time limit is
+ *  reached, when a `stop' file is found in the output directory, or
+ *  when the simulation ends because we arrived at TimeMax.
+ *
+ *  If the simulation is started from initial conditions, a domain
+ *  decomposition performed, the gravitational forces are computed and the
+ *  Voronoi mesh is constructed.
+ *
+ *  The main loop is structured as follow:
+ *   - find new timesteps: find_timesteps()
+ *   - first gravitational half kick: do_gravity_step_first_half()
+ *   - gradients are calculated: calculate_gradients()
+ *   - vertex velocities are assigned: set_vertex_velocities()
+ *   - computation of the hydro flux: compute_interface_fluxes() (first half)
+ *   - (de)refinement of hydro cells: do_derefinements_and_refinements()
+ *   - drifting particles to next sync point: find_next_sync_point()
+ *   (Afterwards the timebins are updated, so different particles might
+ *   now be active then before)
+ *   - (if needed) a new domain decomposition: domain_Decomposition()
+ *   - construction of the Voronoi mesh: create_mesh()
+ *   - computation of the hydro flux: compute_interface_fluxes() (second half)
+ *   - update of primitive variables: update_primitive_variables()
+ *   - computation of gravitational forces: in do_gravity_step_second_half()
+ *   - second gravitational half kick: do_gravity_step_second_half()
+ *
+ *  \return void
+ */
+void run(void)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+
+  if(RestartFlag != 1) /* if we have restarted from restart files, no need to do the setup sequence */
+    {
+      mark_active_timebins();
+
+      output_log_messages();
+
+      set_non_standard_physics_for_current_time();
+
+      ngb_treefree();
+      domain_free();
+      domain_Decomposition(); /* do domain decomposition if needed */
+
+      ngb_treeallocate();
+      ngb_treebuild(NumGas);
+
+      calculate_non_standard_physics_prior_mesh_construction();
+
+      create_mesh();
+
+      mesh_setup_exchange();
+
+      update_primitive_variables();
+
+      calculate_non_standard_physics_end_of_step();
+
+      exchange_primitive_variables();
+
+      calculate_gradients();
+
+      set_vertex_velocities(); /* determine the speed of the mesh-generating vertices */
+
+      ngb_update_velocities(); /* update the neighbor tree with the new vertex and cell velocities */
+
+      do_second_order_source_terms_second_half();
+
+      do_gravity_step_second_half();
+    }
+
+#if defined(VORONOI_STATIC_MESH)
+  if(RestartFlag == 1)
+    {
+      int n_hydro_backup   = TimeBinsHydro.NActiveParticles;
+      int *time_bin_hydro  = (int *)malloc(NumGas * sizeof(int));
+      int *hydro_particles = (int *)malloc(n_hydro_backup * sizeof(int));
+      for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++)
+        hydro_particles[j] = TimeBinsHydro.ActiveParticleList[j];
+
+      for(int j = 0; j < NumGas; j++)
+        {
+          time_bin_hydro[j]                   = P[j].TimeBinHydro;
+          P[j].TimeBinHydro                   = All.HighestActiveTimeBin;
+          TimeBinsHydro.ActiveParticleList[j] = j;
+        }
+      TimeBinsHydro.NActiveParticles = NumGas;
+
+      create_mesh();
+      mesh_setup_exchange();
+
+      for(int j = 0; j < NumGas; j++)
+        P[j].TimeBinHydro = time_bin_hydro[j];
+
+      TimeBinsHydro.NActiveParticles = n_hydro_backup;
+      for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++)
+        TimeBinsHydro.ActiveParticleList[j] = hydro_particles[j];
+
+      free(time_bin_hydro);
+      free(hydro_particles);
+    }
+#endif /* #if defined(VORONOI_STATIC_MESH) */
+
+  while(1) /* main loop */
+    {
+      if(RestartFlag !=
+         1) /* if we are starting from restart files, skip in the first iteration the parts until the restart files were written  */
+        {
+          compute_statistics();
+
+          flush_everything();
+
+          create_snapshot_if_desired();
+
+          if(All.Ti_Current >= TIMEBASE) /* we reached the final time */
+            {
+              mpi_printf("\nFinal time=%g reached. Simulation ends.\n", All.TimeMax);
+
+              if(All.Ti_lastoutput != All.Ti_Current) /* make a snapshot at the final time in case none has produced at this time */
+                produce_dump(); /* this will be overwritten if All.TimeMax is increased and the run is continued */
+
+              create_end_file();  // create empty file called end in output directory
+
+              break;
+            }
+
+          find_timesteps_without_gravity(); /* find-timesteps */
+
+          find_gravity_timesteps_and_do_gravity_step_first_half(); /* gravity half-step for hydrodynamics */
+                                                                   /* kicks collisionless particles by half a step */
+
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+          update_timesteps_from_gravity();
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \
+        */
+
+          do_second_order_source_terms_first_half();
+
+          exchange_primitive_variables();
+
+          /* let's reconstruct gradients for every cell using Green-Gauss gradient estimation */
+          calculate_gradients();
+
+          /* determine the speed of the mesh-generating vertices */
+          set_vertex_velocities();
+
+          /* update the neighbor tree with the new vertex and cell velocities */
+          ngb_update_velocities();
+
+          exchange_primitive_variables_and_gradients();
+
+          /* compute intercell flux with Riemann solver and update the cells with the fluxes */
+          compute_interface_fluxes(&Mesh);
+
+#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT
+#ifndef VORONOI_STATIC_MESH
+          free_mesh_structures_not_needed_for_derefinement_refinement();
+#endif /* #ifndef VORONOI_STATIC_MESH */
+#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */
+
+#ifdef REFINEMENT
+          do_derefinements_and_refinements();
+#endif /* #ifdef REFINEMENT */
+
+          write_cpu_log(); /* output some CPU usage log-info (accounts for everything needed up to completion of the current
+                              sync-point) */
+
+          find_next_sync_point(); /* find next synchronization time */
+
+          make_list_of_active_particles();
+
+          output_log_messages(); /* write some info to log-files */
+
+#if !defined(VORONOI_STATIC_MESH)
+#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT
+          free_all_remaining_mesh_structures();
+#else  /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */
+          free_mesh();
+#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */
+#endif /* #if !defined(VORONOI_STATIC_MESH) */
+          /* Check whether we should write a restart file.
+           * Note that at this place we do not need to store the mesh, not the gravity tree.
+           */
+          if(check_for_interruption_of_run())
+            return;
+        }
+      else
+        RestartFlag = 0;
+
+      set_non_standard_physics_for_current_time();
+
+#if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) /* may only be used if there is no gravity \
+                                                                                           */
+#else /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) */
+
+      if(All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition) /* only do this for sufficiently large steps */
+        {
+#ifdef VORONOI_STATIC_MESH
+          free_mesh();
+#endif /* #ifdef VORONOI_STATIC_MESH */
+
+          ngb_treefree();
+          domain_free();
+
+          drift_all_particles();
+
+          domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */
+
+          ngb_treeallocate();
+          ngb_treebuild(NumGas);
+
+#if defined(VORONOI_STATIC_MESH)
+          create_mesh();
+          mesh_setup_exchange();
+#endif /* #if defined(VORONOI_STATIC_MESH) */
+        }
+#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+      special_particle_update_list();
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+      calculate_non_standard_physics_prior_mesh_construction();
+
+#if !defined(VORONOI_STATIC_MESH)
+      create_mesh();
+      mesh_setup_exchange();
+#endif /* #if !defined(VORONOI_STATIC_MESH) */
+
+      exchange_primitive_variables_and_gradients();
+
+      compute_interface_fluxes(&Mesh);
+
+      update_primitive_variables(); /* these effectively closes off the hydro step */
+
+      /* the masses and positions are updated, let's get new forces and potentials */
+
+      do_second_order_source_terms_second_half();
+
+      do_gravity_step_second_half(); /* this closes off the gravity half-step */
+
+      /* do any extra physics, Strang-split (update both primitive and conserved variables as needed ) */
+      calculate_non_standard_physics_end_of_step();
+    }
+
+  restart(0); /* write a restart file at final time - can be used to continue simulation beyond final time */
+
+  write_cpu_log(); /* output final cpu measurements */
+}
+
+/*! \brief Source terms before hydrodynamics timestep.
+ *
+ *  \return void
+ */
+void do_second_order_source_terms_first_half(void)
+{
+#ifdef MHD
+  do_mhd_source_terms_first_half();
+#endif /* #ifdef MHD */
+}
+
+/* \brief Source terms after hydrodynamics timestep.
+ *
+ *  If there are multiple source terms, the order of the second half source
+ *  terms should be applied inverse to the order of the source terms in
+ *  do_second_order_source_terms_first_half().
+ *
+ *  \return void
+ */
+void do_second_order_source_terms_second_half(void)
+{
+#ifdef MHD
+  do_mhd_source_terms_second_half();
+#endif /* #ifdef MHD */
+}
+
+/*! \brief Calls extra modules after drift operator.
+ *
+ *  This routine is called after the active particles are drifted
+ *  to the next syncpoint, but before a new domain decomposition
+ *  is performed.
+ *
+ *  \return void
+ */
+void set_non_standard_physics_for_current_time(void)
+{
+#if defined(COOLING)
+  IonizeParams(); /* set UV background for the current time */
+#endif            /* #if defined(COOLING) */
+}
+
+/*! \brief calls extra modules after the gravitational force is recomputed.
+ *
+ *  Only called if full gravity tree is present.
+ *  *** NOTICE *** if HIERARCHICAL_GRAVITY is adopted, this function is carried
+ *  out once per synchronization time, with in general only a partial tree that
+ *  does not necessarily contain all particles. The latter is the case only for
+ *   steps where the highest timesteps are active ("full timesteps").
+ *
+ *  \return void
+ */
+void calculate_non_standard_physics_with_valid_gravity_tree(void) {}
+
+/*! \brief Calls extra modules after the gravitational force is recomputed
+ *
+ *  This is for runs which have the full tree at each time step;
+ *  no HIERARCHICAL_GRAVITY
+ *
+ *  \return void
+ */
+void calculate_non_standard_physics_with_valid_gravity_tree_always(void) {}
+
+/*! \brief Calls extra modules before the Voronoi mesh is built.
+ *
+ *  \return void
+ */
+void calculate_non_standard_physics_prior_mesh_construction(void)
+{
+#if defined(COOLING) && defined(USE_SFR)
+  sfr_create_star_particles();
+#endif /* #if defined(COOLING) && defined(USE_SFR) */
+}
+
+/*! \brief Calls extra modules at the end of the run loop.
+ *
+ *  The second gravitational half kick is already applied to the
+ *  particles and the voronoi mesh is updated.
+ *
+ * \return void
+ */
+void calculate_non_standard_physics_end_of_step(void)
+{
+#ifdef COOLING
+#ifdef USE_SFR
+  cooling_and_starformation();
+#else  /* #ifdef USE_SFR */
+  cooling_only();
+#endif /* #ifdef USE_SFR #else */
+#endif /* #ifdef COOLING */
+}
+
+/*! \brief Checks whether the run must interrupted.
+ *
+ *  The run is interrupted either if the stop file is present or,
+ *  if 85% of the CPU time are up. This routine also handles the
+ *  regular writing of restart files. The restart file is also
+ *  written if the restart file is present.
+ *
+ *  \return 1 if the run has to be interrupted, 0 otherwise.
+ */
+int check_for_interruption_of_run(void)
+{
+  /* Check whether we need to interrupt the run */
+  int stopflag = 0;
+  if(ThisTask == 0)
+    {
+      FILE *fd;
+      char stopfname[MAXLEN_PATH];
+
+      sprintf(stopfname, "%sstop", All.OutputDir);
+      if((fd = fopen(stopfname, "r"))) /* Is the stop-file present? If yes, interrupt the run. */
+        {
+          fclose(fd);
+          printf("stop-file detected. stopping.\n");
+          stopflag = 1;
+          unlink(stopfname);
+        }
+
+      sprintf(stopfname, "%srestart", All.OutputDir);
+      if((fd = fopen(stopfname, "r"))) /* Is the restart-file present? If yes, write a user-requested restart file. */
+        {
+          fclose(fd);
+          printf("restart-file detected. writing restart files.\n");
+          stopflag = 3;
+          unlink(stopfname);
+        }
+
+      if(CPUThisRun > 0.85 * All.TimeLimitCPU) /* are we running out of CPU-time ? If yes, interrupt run. */
+        {
+          printf("reaching time-limit. stopping.\n");
+          stopflag = 2;
+        }
+    }
+
+  MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  if(stopflag)
+    {
+      restart(0); /* write restart file */
+
+      MPI_Barrier(MPI_COMM_WORLD);
+
+      if(stopflag == 3)
+        return 0;
+
+      if(stopflag == 2 && ThisTask == 0)
+        {
+          FILE *fd;
+          char contfname[MAXLEN_PATH];
+          sprintf(contfname, "%scont", All.OutputDir);
+          if((fd = fopen(contfname, "w")))
+            fclose(fd);
+
+          if(All.ResubmitOn)
+            execute_resubmit_command();
+        }
+      return 1;
+    }
+
+  /* is it time to write a regular restart-file? (for security) */
+  if(ThisTask == 0)
+    {
+      if((CPUThisRun - All.TimeLastRestartFile) >= All.CpuTimeBetRestartFile)
+        {
+          All.TimeLastRestartFile = CPUThisRun;
+          stopflag                = 3;
+        }
+      else
+        stopflag = 0;
+    }
+
+  MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+  if(stopflag == 3)
+    {
+      restart(0); /* write an occasional restart file */
+      stopflag = 0;
+    }
+  return 0;
+}
+
+/*! \brief Returns the next output time that is equal or larger than
+ *         ti_curr.
+ *
+ *  \param[in] ti_curr Current simulation time.
+ *
+ *  \return Next output time.
+ */
+integertime find_next_outputtime(integertime ti_curr)
+{
+  int i, iter = 0;
+  integertime ti, ti_next;
+  double next, time;
+
+  DumpFlagNextSnap = 1;
+  ti_next          = -1;
+
+  if(All.OutputListOn)
+    {
+      for(i = 0; i < All.OutputListLength; i++)
+        {
+          time = All.OutputListTimes[i];
+
+          if(time >= All.TimeBegin && time <= All.TimeMax)
+            {
+              if(All.ComovingIntegrationOn)
+                ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval);
+              else
+                ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval);
+
+#ifdef PROCESS_TIMES_OF_OUTPUTLIST
+              /* first, determine maximum output interval based on All.MaxSizeTimestep */
+              integertime timax = (integertime)(All.MaxSizeTimestep / All.Timebase_interval);
+
+              /* make it a power 2 subdivision */
+              integertime ti_min = TIMEBASE;
+              while(ti_min > timax)
+                ti_min >>= 1;
+              timax = ti_min;
+
+              double multiplier = ti / ((double)timax);
+
+              /* now round this to the nearest multiple of timax */
+              ti = ((integertime)(multiplier + 0.5)) * timax;
+#endif /* #ifdef PROCESS_TIMES_OF_OUTPUTLIST */
+              if(ti >= ti_curr)
+                {
+                  if(ti_next == -1)
+                    {
+                      ti_next          = ti;
+                      DumpFlagNextSnap = All.OutputListFlag[i];
+                    }
+
+                  if(ti_next > ti)
+                    {
+                      ti_next          = ti;
+                      DumpFlagNextSnap = All.OutputListFlag[i];
+                    }
+                }
+            }
+        }
+    }
+  else
+    {
+      if(All.ComovingIntegrationOn)
+        {
+          if(All.TimeBetSnapshot <= 1.0)
+            terminate("TimeBetSnapshot > 1.0 required for your simulation.\n");
+        }
+      else
+        {
+          if(All.TimeBetSnapshot <= 0.0)
+            terminate("TimeBetSnapshot > 0.0 required for your simulation.\n");
+        }
+
+      time = All.TimeOfFirstSnapshot;
+      iter = 0;
+
+      while(time < All.TimeBegin)
+        {
+          if(All.ComovingIntegrationOn)
+            time *= All.TimeBetSnapshot;
+          else
+            time += All.TimeBetSnapshot;
+
+          iter++;
+
+          if(iter > 1000000)
+            terminate("Can't determine next output time.\n");
+        }
+
+      while(time <= All.TimeMax)
+        {
+          if(All.ComovingIntegrationOn)
+            ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval);
+          else
+            ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval);
+
+          if(ti >= ti_curr)
+            {
+              ti_next = ti;
+              break;
+            }
+
+          if(All.ComovingIntegrationOn)
+            time *= All.TimeBetSnapshot;
+          else
+            time += All.TimeBetSnapshot;
+
+          iter++;
+
+          if(iter > 1000000)
+            terminate("Can't determine next output time.\n");
+        }
+    }
+
+  if(ti_next == -1)
+    {
+      ti_next = 2 * TIMEBASE; /* this will prevent any further output */
+
+      mpi_printf("\nRUN: There is no valid time for a further snapshot file.\n");
+    }
+  else
+    {
+      if(All.ComovingIntegrationOn)
+        next = All.TimeBegin * exp(ti_next * All.Timebase_interval);
+      else
+        next = All.TimeBegin + ti_next * All.Timebase_interval;
+
+#ifdef TIMESTEP_OUTPUT_LIMIT
+      mpi_printf("\nRUN: Limiting timestep to %g to fulfill output frequency", 0.1 * (next - All.Time));
+      All.TimestepOutputLimit = 0.1 * (next - All.Time);
+#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */
+
+      mpi_printf("\nRUN: Setting next time for snapshot file to Time_next= %g  (DumpFlag=%d)\n\n", next, DumpFlagNextSnap);
+    }
+
+  return ti_next;
+}
+
+/*! \brief Creates an empty file called 'end' in the output directory.
+ *
+ *  The existence of this file can be used e.g. for analysis scripts to
+ *  verify that the simulation has run up to its final time and ended without
+ *  error. Note that the end-file is completely passive.
+ *
+ *  \return void
+ */
+static void create_end_file(void)
+{
+  FILE *fd;
+  char contfname[MAXLEN_PATH];
+  sprintf(contfname, "%send", All.OutputDir);
+  if((fd = fopen(contfname, "w")))
+    fclose(fd);
+}
+
+/*! \brief Executes the resubmit command.
+ *
+ *  \return void
+ */
+void execute_resubmit_command(void)
+{
+  char buf[1000];
+  sprintf(buf, "%s", All.ResubmitCommand);
+#ifndef NOCALLSOFSYSTEM
+  system(buf);
+#endif /* #ifndef NOCALLSOFSYSTEM */
+}
diff --git a/src/amuse/community/arepo/src/mesh/criterion_derefinement.c b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c
new file mode 100644
index 0000000000..7108310fc2
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c
@@ -0,0 +1,181 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/criterion_derefinement.c
+ * \date        05/2018
+ * \brief       Criteria for the de-refinement of a cell.
+ * \details     Routines which are checking whether a cell should be
+ *              de-refined.
+ *              contains functions:
+ *                int derefine_should_this_cell_be_merged(int i, int flag)
+ *                static int derefine_criterion_default(int i)
+ *                static int derefine_criterion_jeans_ref(int i)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS)
+static int derefine_criterion_jeans_ref(int i);
+static int derefine_criterion_default(int i);
+static int jeans_derefinement_criteria(int i);
+
+/*! \brief Should this cell be dissolved?
+ *
+ *  This function signals whether a cell should be dissolved. This needs to be
+ *  adjusted according to the needs of the simulation in question. One may also
+ *  set the SphP[].Flag variable beforehand, these cells will also be
+ *  dissolved.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *  \param[in] flag If this is nonzero, flag is returned.
+ *
+ *  \return Flag if this cell should be dissolved.
+ */
+int derefine_should_this_cell_be_merged(int i, int flag)
+{
+#ifdef REFINEMENT_HIGH_RES_GAS
+  if(SphP[i].AllowRefinement == 0)
+    return 0;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+#ifdef NODEREFINE_BACKGROUND_GRID
+  /* Keep in mind that this is used in cosmological zoom simulations.
+   * I.e. this enforces no derefinement for cells in low-res region, while not
+   * affecting the high-res region.
+   */
+  if(SphP[i].Volume > 0.1 * All.MeanVolume)
+    return 0;
+#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */
+
+#if defined(REFINEMENT_VOLUME_LIMIT)
+  double maxvolume = All.MaxVolume;
+  double minvolume = All.MinVolume;
+
+  if(SphP[i].Volume > 0.5 * maxvolume)
+    return 0;
+
+  if(SphP[i].Volume < 0.5 * minvolume)
+    return 1;
+
+  if(All.MaxVolumeDiff > 0 && SphP[i].Volume > 0.3 * All.MaxVolumeDiff * SphP[i].MinNgbVolume)
+    return 0;
+#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */
+
+  if(flag)
+    return flag;
+
+  switch(All.DerefinementCriterion)
+    {
+      case 0:
+        return 0;
+        break;
+
+      case 1:
+        return derefine_criterion_default(i);
+        break;
+
+      case 2:
+        return derefine_criterion_jeans_ref(i);
+        break;
+
+      default:
+        terminate("invalid derefinement criterion specified");
+        break;
+    }
+
+  return 0;
+}
+
+/*
+ * static functions; i.e. functions that are only called within this file
+ */
+
+/*! \brief Default de-refinement criterion.
+ *
+ *  Checks if cell is within a factor of 2 of the target gas mass.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be dissolved.
+ */
+static int derefine_criterion_default(int i)
+{
+#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS)
+
+  if(P[i].Mass < 0.5 * All.TargetGasMass)
+    return 1;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */
+
+  return 0;
+}
+
+/*! \brief Wrapper for Jeans de-refinement criterion.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be dissolved.
+ */
+static int derefine_criterion_jeans_ref(int i)
+{
+#ifdef JEANS_REFINEMENT
+  return jeans_derefinement_criteria(i);
+#endif /* #ifdef JEANS_REFINEMENT */
+  return 0;
+}
+
+/*! \brief De-refinement criterion according to Jeans stability of a cell.
+ *
+ *  The cell can only be de-refined if the Jeans length is resolved by
+ *  1.5 * JEANS_REFINEMENT cells. Otherwise, no de-refinement is possible even
+ *  if the cell has a low mass.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be dissolved.
+ */
+static int jeans_derefinement_criteria(int i)
+{
+  if(P[i].Mass < 0.5 * All.TargetGasMass)
+    return 1;
+
+#ifdef JEANS_REFINEMENT
+  double jeans_number, jeans_length, sound_speed, dx;
+  sound_speed  = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density);
+  jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed;
+  dx           = 2.0 * get_cell_radius(i);
+  jeans_number = jeans_length / dx;
+
+  if(jeans_number > 1.5 * JEANS_REFINEMENT && P[i].Mass < 0.5 * All.TargetGasMass)
+    return 1;
+#endif /* #ifdef JEANS_REFINEMENT */
+  return 0;
+}
+
+#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/criterion_refinement.c b/src/amuse/community/arepo/src/mesh/criterion_refinement.c
new file mode 100644
index 0000000000..5b0334972a
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/criterion_refinement.c
@@ -0,0 +1,267 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/criterion_refinement.c
+ * \date        05/2018
+ * \brief       Criteria for the refinement of a cell.
+ * \details     Routines which are checking whether a cell should be refined.
+ *              contains functions:
+ *                int should_this_cell_be_split(int i)
+ *                static int can_this_cell_be_split(int i)
+ *                static int refine_criterion_default(int i)
+ *                static int refine_criterion_jeans_ref(int i)
+ *                static int jeans_refinement_criteria(int i)
+ *                static int refine_criterion_volume(int i)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS)
+static int can_this_cell_be_split(int i);
+static int refine_criterion_default(int i);
+static int refine_criterion_jeans_ref(int i);
+static int jeans_refinement_criteria(int i);
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+static int refine_criterion_volume(int i);
+#endif
+
+#ifdef REFINEMENT_MERGE_CELLS
+char *FlagDoNotRefine;
+#endif /* #ifdef REFINEMENT_MERGE_CELLS */
+
+/*! \brief Should this cell be refined?
+ *
+ *  This function signals whether a cell needs further refinement. This needs
+ *  to be adjusted according to the needs of the simulation in question.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be split.
+ */
+int should_this_cell_be_split(int i)
+{
+#ifdef REFINEMENT_MERGE_CELLS
+  if(FlagDoNotRefine[i])
+    return 0;
+#endif /* #ifdef REFINEMENT_MERGE_CELLS */
+
+  if(P[i].Mass == 0 && P[i].ID == 0) /* skip cells that have been swallowed or dissolved */
+    return 0;
+
+#if defined(REFINEMENT_VOLUME_LIMIT)
+  double maxvolume = All.MaxVolume;
+  double minvolume = All.MinVolume;
+
+  if(SphP[i].Volume > 2. * maxvolume)
+    if(can_this_cell_be_split(i))
+      return 1;
+
+  if(SphP[i].Volume < 2. * minvolume)
+    return 0;
+
+  if(refine_criterion_volume(i))
+    if(can_this_cell_be_split(i))
+      return 1;
+#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */
+
+  switch(All.RefinementCriterion) /* select the function that evaluates the refinement criterion */
+    {
+      case 0:
+        return 0;
+        break;
+
+      case 1:
+        return refine_criterion_default(i);
+        break;
+
+      case 2:
+        return refine_criterion_jeans_ref(i);
+        break;
+
+      default:
+        terminate("invalid refinement criterion specified");
+        break;
+    }
+
+  return 0;
+}
+
+/*
+ * static functions; i.e. functions that are only called within this file
+ */
+
+/*! \brief Is cell round enough to be refined?
+ *
+ *  This function signals whether a cell is allowed refinement. A cell that
+ *  is supposed to be refined needs to match certain roundness criteria, which
+ *  are specified in this function.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell is allowed to be refined.
+ */
+static int can_this_cell_be_split(int i)
+{
+#ifdef REGULARIZE_MESH_FACE_ANGLE
+  if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor)
+    return 1;
+
+#else  /* #ifdef REGULARIZE_MESH_FACE_ANGLE */
+  double dx      = nearest_x(P[i].Pos[0] - SphP[i].Center[0]);
+  double dy      = nearest_y(P[i].Pos[1] - SphP[i].Center[1]);
+  double dz      = nearest_z(P[i].Pos[2] - SphP[i].Center[2]);
+  double d       = sqrt(dx * dx + dy * dy + dz * dz);
+  double cellrad = get_cell_radius(i);
+
+  if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */
+    return 1;
+#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */
+
+  return 0;
+}
+
+/*! \brief Default refinement criterion.
+ *
+ *  Checks if cell is within a factor of 2 of the target gas mass.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be refined.
+ */
+static int refine_criterion_default(int i)
+{
+#ifdef REFINEMENT_HIGH_RES_GAS
+  if(SphP[i].AllowRefinement != 0)
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+    if(can_this_cell_be_split(i) && P[i].Mass > 2.0 * All.TargetGasMass)
+      return 1;
+
+  return 0; /* default is not to refine */
+}
+
+/*! \brief Jeans refinement criterion additional target mass criterion
+ *
+ *  Resolving the Jeans length is an additional criterion, apart from obeying
+ *  the usual factor of 2 within a target mass criterion.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be refined.
+ */
+static int refine_criterion_jeans_ref(int i)
+{
+#ifdef REFINEMENT_HIGH_RES_GAS
+  if(SphP[i].AllowRefinement != 0)
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+    if(can_this_cell_be_split(i))
+      {
+        if(P[i].Mass > 2.0 * All.TargetGasMass)
+          return 1;
+
+#ifdef JEANS_REFINEMENT
+        return jeans_refinement_criteria(i);
+#else  /* #ifdef JEANS_REFINEMENT */
+      return 0;
+#endif /* #ifdef JEANS_REFINEMENT #else */
+      }
+
+  return 0;
+}
+
+/*! \brief Refinement criterion according to Jeans stability of a cell.
+ *
+ *  The cell will be refined if the Jeans length is not resolved by
+ *  JEANS_REFINEMENT cells.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be refined.
+ */
+static int jeans_refinement_criteria(int i)
+{
+#ifdef JEANS_REFINEMENT
+  if(can_this_cell_be_split(i))
+    {
+      double jeans_number, jeans_length, sound_speed, dx;
+
+      sound_speed  = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density);
+      jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed;
+      dx           = 2.0 * get_cell_radius(i);
+      jeans_number = jeans_length / dx;
+
+      if(jeans_number < JEANS_REFINEMENT)
+        {
+          return 1;
+        }
+    }
+#endif /* #ifdef JEANS_REFINEMENT */
+
+  return 0;
+}
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+/*! \brief Refinement criterion for based on the minimum volume of a
+ *  neighboring cell.
+ *
+ *  This criterion is supposed to avoid sudden jumps in resolution which lead
+ *  to an inaccurate result. Each cell that has a volume larger than a
+ *  specified factor times the minimum volume of all neighboring cells will be
+ *  refined. This also includes a global absolute minimum and maximum volume.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return Flag if this cell should be refined.
+ */
+static int refine_criterion_volume(int i)
+{
+  if(All.MaxVolumeDiff > 0 && SphP[i].Volume > All.MaxVolumeDiff * SphP[i].MinNgbVolume)
+    {
+#ifdef REGULARIZE_MESH_FACE_ANGLE
+      if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor)
+        return 1;
+#else  /* #ifdef REGULARIZE_MESH_FACE_ANGLE */
+
+      double dx      = nearest_x(P[i].Pos[0] - SphP[i].Center[0]);
+      double dy      = nearest_y(P[i].Pos[1] - SphP[i].Center[1]);
+      double dz      = nearest_z(P[i].Pos[2] - SphP[i].Center[2]);
+      double d       = sqrt(dx * dx + dy * dy + dz * dz);
+      double cellrad = get_cell_radius(i);
+
+      if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */
+        return 1;
+#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */
+    }
+
+  return 0;
+}
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/mesh.h b/src/amuse/community/arepo/src/mesh/mesh.h
new file mode 100644
index 0000000000..654555ebf6
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/mesh.h
@@ -0,0 +1,268 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/mesh.h
+ * \date        05/2018
+ * \brief       Header for mesh structures.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 29.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef MESH_H
+#define MESH_H
+
+#define SCALAR_TYPE_PASSIVE 0   /*!< only advection */
+#define SCALAR_TYPE_SPECIES 1   /*!< species are normalised to guarantee sum{species}=1 */
+#define SCALAR_TYPE_NORMALIZE 2 /*!< the same normalisation factor as for species is applied, but no contribution to sum{species} */
+
+#define REFL_X_FLAGS 115043766
+#define REFL_Y_FLAGS 132379128
+#define REFL_Z_FLAGS 134217216
+
+#define OUTFLOW_X (1 << 27)
+#define OUTFLOW_Y (1 << 28)
+#define OUTFLOW_Z (1 << 29)
+
+#if defined MAXSCALARS
+extern struct scalar_elements
+{
+  int type;           /*!< scalar type, determines whether a normalization is applied */
+  size_t offset;      /*!< offset of the primitive quantity in the SphP struct */
+  size_t offset_mass; /*!< offset of the conserved quantity in the SphP struct */
+} scalar_elements[MAXSCALARS];
+
+extern struct scalar_index
+{
+#ifdef REFINEMENT_HIGH_RES_GAS
+  int HighResMass;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+} ScalarIndex;
+
+extern int N_Scalar; /*!< number of registered scalars */
+#endif               /* #if defined MAXSCALARS */
+
+#define GRADIENT_TYPE_NORMAL 0
+#define GRADIENT_TYPE_VELX 1
+#define GRADIENT_TYPE_VELY 2
+#define GRADIENT_TYPE_VELZ 3
+#define GRADIENT_TYPE_DENSITY 4
+#define GRADIENT_TYPE_PRESSURE 5
+#define GRADIENT_TYPE_UTHERM 6
+#define GRADIENT_TYPE_AX 7
+#define GRADIENT_TYPE_AY 8
+#define GRADIENT_TYPE_AZ 9
+#define GRADIENT_TYPE_FLD 10
+#define GRADIENT_TYPE_RTF 11
+
+extern struct grad_elements
+{
+  int type;           /*!< gradient type, ensures special treatment for velocities and speed of sound */
+  size_t offset;      /*!< offset of the quantity in the SphP struct */
+  size_t offset_exch; /*!< offset of the quantity in the PrimExch struct */
+  size_t offset_grad; /*!< offset in the grad_data struct */
+  double *min_value, *max_value;
+  double value0, value1;
+} grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm;
+
+extern int N_Grad; /*!< number of gradients to be calculated */
+
+extern struct grad_data
+{
+  MySingle drho[3];
+
+  MySingle dvel[3][3];
+  MySingle dpress[3];
+
+#ifdef MHD
+  MySingle dB[3][3];
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  MySingle dscalars[MAXSCALARS][3];
+#endif /* #ifdef MAXSCALARS */
+} * GradExch;
+
+extern struct primexch
+{
+  double Volume;
+  MyFloat Density;
+
+  MyFloat VelGas[3];
+  MyFloat VelVertex[3];
+
+#ifdef MHD
+  MyFloat B[3];
+
+#ifdef MHD_POWELL
+  MyFloat DivB;
+#endif /* #ifdef MHD_POWELL */
+
+  MyFloat CurlB[3];
+#endif /* #ifdef MHD */
+  MyFloat Pressure;
+
+#ifdef MAXSCALARS
+  MyFloat Scalars[MAXSCALARS];
+#endif /* #ifdef MAXSCALARS */
+
+  double TimeLastPrimUpdate;
+
+  MyDouble Center[3];
+  MyFloat OldMass;
+  MySingle Csnd;
+  MySingle SurfaceArea;
+  MySingle ActiveArea;
+  /*  int task, index; */
+  short int TimeBinHydro;
+} * PrimExch;
+
+#ifdef REFINEMENT
+extern struct refdata
+{
+#ifdef REFINEMENT_VOLUME_LIMIT
+  double Volume;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+  short int TimeBinHydro;
+} * RefExch;
+#endif /* #ifdef REFINEMENT */
+
+typedef struct face_data
+{
+  int p1, p2;
+#ifdef REFINEMENT_MERGE_CELLS
+  int t, nr; /* delaunay tetra and edge number that generated this face */
+#endif       /* #ifdef REFINEMENT_MERGE_CELLS */
+
+#ifdef OPTIMIZE_MEMORY_USAGE
+  MyFloat area;
+  MyFloat cx, cy, cz; /* center-of-mass of face */
+#else                 /* #ifdef OPTIMIZE_MEMORY_USAGE */
+  double area;
+  double cx, cy, cz; /* center-of-mass of face */
+#endif                /* #ifdef OPTIMIZE_MEMORY_USAGE #else */
+
+#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS
+  double area_backup;
+#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */
+#ifdef TETRA_INDEX_IN_FACE
+  int dt_index;
+#endif /* #ifdef TETRA_INDEX_IN_FACE */
+} face;
+
+/*! left or right state of a face */
+struct state
+{
+  double dx, dy, dz;
+  double dt_half;
+  short int timeBin;
+
+  double rho;
+  double velx, vely, velz;
+  double press;
+  double oldmass;
+  double surfacearea;
+  double activearea;
+  double volume;
+
+  MyFloat velGas[3];
+  MyFloat velVertex[3];
+  struct grad_data *grad;
+
+  double csnd;
+  double Energy;
+#ifdef MHD
+  double Bx, By, Bz;
+#ifdef MHD_POWELL
+  double divB;
+#endif /* #ifdef MHD_POWELL */
+  double CurlB[3];
+#endif /* #ifdef MHD */
+
+#if defined(GODUNOV_STATS)
+  double mach;
+#endif /* #if defined(GODUNOV_STATS) */
+
+#ifdef MAXSCALARS
+  double scalars[MAXSCALARS];
+#endif /* #ifdef MAXSCALARS */
+  MyIDType ID;
+
+#ifdef ONEDIMS_SPHERICAL
+  double radius;
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+  double dtExtrapolation;
+};
+
+/*! state on a face determined by riemann solver */
+extern struct state_face
+{
+  double rho;
+  double velx, vely, velz;
+  double press;
+#ifdef MHD
+  double Bx, By, Bz;
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  double *scalars;
+#endif /* #ifdef MAXSCALARS */
+} state_face;
+
+/*! flux through a face */
+extern struct fluxes
+{
+  double mass;
+  double momentum[3];
+  double energy;
+
+#ifdef MHD
+  double B[3];
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  double scalars[MAXSCALARS];
+#endif /* #ifdef MAXSCALARS */
+} fluxes, diffusionfluxes;
+
+extern struct geometry
+{
+  double nn;
+  double nx, ny, nz;
+  double mx, my, mz;
+  double px, py, pz;
+  double cx, cy, cz;
+} geom;
+
+struct pv_update_data
+{
+  double atime;
+  double hubble_a;
+  double a3inv;
+};
+#endif /* MESH_H */
+
+struct fvs_stat
+{
+  int count_disable_extrapolation;
+};
diff --git a/src/amuse/community/arepo/src/mesh/refinement.c b/src/amuse/community/arepo/src/mesh/refinement.c
new file mode 100644
index 0000000000..20b2c4d5a2
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/refinement.c
@@ -0,0 +1,217 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/refinement.c
+ * \date        05/2018
+ * \brief       Driver routines that handle refinement and de-refinement.
+ * \details     contains functions:
+ *                void do_derefinements_and_refinements()
+ *                void refinement_prepare()
+ *                void refinement_cleanup()
+ *                void move_collisionless_particle(int new_i, int old_i)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 06.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+
+#ifdef REFINEMENT
+#include "../main/proto.h"
+
+#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS)
+char *FlagDoNotRefine;
+#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */
+
+static void refinement_prepare();
+static void refinement_cleanup();
+
+/*! \brief Main routine to trigger refinement and de-refinements.
+ *
+ *  Called in main run loop (run.c).
+ *
+ *  \return void
+ */
+void do_derefinements_and_refinements()
+{
+  refinement_prepare();
+
+#ifdef REFINEMENT_MERGE_CELLS
+  do_derefinements();
+#endif /* #ifdef REFINEMENT_MERGE_CELLS */
+
+#ifdef REFINEMENT_SPLIT_CELLS
+  do_refinements();
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+
+  refinement_cleanup();
+}
+
+/*! \brief Prepares for refinement.
+ *
+ *  Determines quantities needed by refinement routine;
+ *  Allocates additional arrays.
+ *
+ *  \return void
+ */
+void refinement_prepare()
+{
+  TIMER_START(CPU_REFINE);
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+  int idx, i;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS)
+  FlagDoNotRefine = mymalloc_movable(&FlagDoNotRefine, "FlagDoNotRefine", NumGas * sizeof(char));
+#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */
+
+#ifdef REFINEMENT_VOLUME_LIMIT
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      SphP[i].MinNgbVolume = MAX_REAL_NUMBER;
+
+      int q = SphP[i].first_connection;
+      while(q >= 0)
+        {
+          int dp       = DC[q].dp_index;
+          int particle = Mesh.DP[dp].index;
+
+          if(particle < 0)
+            {
+              if(q == SphP[i].last_connection)
+                break;
+
+              q = DC[q].next;
+              continue;
+            }
+
+          if(particle >= NumGas && Mesh.DP[dp].task == ThisTask)
+            particle -= NumGas;
+
+          double Volume;
+          if(DC[q].task == ThisTask)
+            Volume = SphP[particle].Volume;
+          else
+            {
+#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT
+              Volume = PrimExch[particle].Volume;
+#else  /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */
+              Volume = RefExch[particle].Volume;
+#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */
+            }
+
+          if(Volume < SphP[i].MinNgbVolume)
+            SphP[i].MinNgbVolume = Volume;
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+    }
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+
+  TIMER_STOP(CPU_REFINE);
+}
+
+/*! \brief Cleans up after refinement.
+ *
+ *  Frees memory allocated by refinement_prepare().
+ *
+ *  \return void
+ */
+void refinement_cleanup()
+{
+#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS)
+  myfree(FlagDoNotRefine);
+#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */
+}
+
+/*! \brief Moves collisionless particle from index old_i to new_i.
+ *
+ *  Needed if new cell is introduced, as cells have to be at the beginning of
+ *  the P array and all other particles have to be located after the last
+ *  gas cell. This routine moves not only data in P and SphP, but also updates
+ *  the time-bin data consistently.
+ *
+ *  \param[in] new_i New index of particle in P.
+ *  \param[in] old_i Previous index of particle in P.
+ *
+ *  \return void
+ */
+void move_collisionless_particle(int new_i, int old_i)
+{
+  int prev, next, bin;
+  struct TimeBinData *tbData;
+
+  P[new_i] = P[old_i];
+
+  if(P[old_i].Mass == 0 && P[old_i].ID == 0)
+    return;
+
+  if(P[old_i].Mass == 0 && P[old_i].Type == 4)
+    return;
+
+  tbData = &TimeBinsGravity;
+  bin    = P[old_i].TimeBinGrav;
+
+  if(TimeBinSynchronized[bin])
+    {
+      /* particle is active, need to add it to the list of active particles again
+         we assume here, that the new particle at the old index in this list is also active! */
+      tbData->ActiveParticleList[tbData->NActiveParticles] = new_i;
+      tbData->NActiveParticles++;
+    }
+
+  /* now move it in the link list of its timebin
+     we only need to change the gravity timebin here */
+
+  tbData->NextInTimeBin[new_i] = tbData->NextInTimeBin[old_i];
+  tbData->PrevInTimeBin[new_i] = tbData->PrevInTimeBin[old_i];
+
+  prev = tbData->PrevInTimeBin[old_i];
+  next = tbData->NextInTimeBin[old_i];
+
+  if(prev >= 0)
+    tbData->NextInTimeBin[prev] = new_i;
+  else
+    {
+      if(tbData->FirstInTimeBin[bin] != old_i)
+        terminate("strange");
+      tbData->FirstInTimeBin[bin] = new_i;
+    }
+
+  if(next >= 0)
+    tbData->PrevInTimeBin[next] = new_i;
+  else
+    {
+      if(tbData->LastInTimeBin[bin] != old_i)
+        terminate("strange");
+      tbData->LastInTimeBin[bin] = new_i;
+    }
+}
+
+#endif /* REFINEMENT */
diff --git a/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c
new file mode 100644
index 0000000000..9280b5fde6
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c
@@ -0,0 +1,321 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/set_vertex_velocities.c
+ * \date        05/2018
+ * \brief       Algorithms that decide how individual cells are moving.
+ * \details     contains functions:
+ *                void set_vertex_velocities(void)
+ *                static void validate_vertex_velocities_1d()
+ *                void validate_vertex_velocities(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 08.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#ifdef ONEDIMS_SPHERICAL
+static void validate_vertex_velocities_1d();
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+/*! \brief Sets velocities of individual mesh-generating points.
+ *
+ *  \retur void
+ */
+void set_vertex_velocities(void)
+{
+  TIMER_START(CPU_SET_VERTEXVELS);
+
+  int idx, i, j;
+  double dt;
+
+#if defined(VORONOI_STATIC_MESH) || defined(NOHYDRO)
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      for(j = 0; j < 3; j++)
+        SphP[i].VelVertex[j] = 0;
+    }
+  TIMER_STOP(CPU_SET_VERTEXVELS);
+  return;
+#endif /* #if defined (VORONOI_STATIC_MESH) || defined (NOHYDRO) */
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+#ifdef MESHRELAX
+      for(j = 0; j < 3; j++)
+        SphP[i].VelVertex[j] = 0;
+#else  /* #ifdef MESHRELAX */
+      for(j = 0; j < 3; j++)
+        SphP[i].VelVertex[j] = P[i].Vel[j]; /* make cell velocity equal to fluid's velocity */
+#endif /* #ifdef MESHRELAX #else */
+
+      double acc[3];
+
+      /*  the actual time-step of particle */
+      integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0;
+      dt                  = ti_step * All.Timebase_interval;
+      dt /= All.cf_hubble_a; /* this gives the actual timestep: dt = dloga/ (adot/a) */
+
+      /* now let's add the gradient of the pressure force
+       * note that the gravity half-step was already included in P[i].Vel[j]
+       * prior to calling this function, thus it does not need to be accounted
+       * here explicitly.
+       */
+      if(SphP[i].Density > 0)
+        {
+          acc[0] = -SphP[i].Grad.dpress[0] / SphP[i].Density;
+          acc[1] = -SphP[i].Grad.dpress[1] / SphP[i].Density;
+          acc[2] = -SphP[i].Grad.dpress[2] / SphP[i].Density;
+
+#ifdef MHD
+          /* we also add the acceleration due to the Lorentz force */
+          acc[0] += (SphP[i].CurlB[1] * SphP[i].B[2] - SphP[i].CurlB[2] * SphP[i].B[1]) / SphP[i].Density;
+          acc[1] += (SphP[i].CurlB[2] * SphP[i].B[0] - SphP[i].CurlB[0] * SphP[i].B[2]) / SphP[i].Density;
+          acc[2] += (SphP[i].CurlB[0] * SphP[i].B[1] - SphP[i].CurlB[1] * SphP[i].B[0]) / SphP[i].Density;
+
+#endif /* #ifdef MHD */
+
+          SphP[i].VelVertex[0] += 0.5 * dt * acc[0];
+          SphP[i].VelVertex[1] += 0.5 * dt * acc[1];
+          SphP[i].VelVertex[2] += 0.5 * dt * acc[2];
+        }
+    }
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+#ifdef REGULARIZE_MESH_CM_DRIFT
+
+      double dx, dy, dz, d, fraction;
+
+      dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]);
+      dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]);
+      dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]);
+
+      /*  the actual time-step of particle */
+      dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval;
+      dt /= All.cf_hubble_a; /* this is dt, the actual timestep  */
+
+      double cellrad = get_cell_radius(i);
+
+#if !defined(REGULARIZE_MESH_FACE_ANGLE)
+      /* if there is a density gradient, use a center that is displaced slightly in the direction of the gradient.
+       * This makes sure that the Lloyd scheme does not simply iterate towards cells of equal volume, instead
+       * we keep cells of roughly equal mass.
+       */
+      double dgrad = sqrt(SphP[i].Grad.drho[0] * SphP[i].Grad.drho[0] + SphP[i].Grad.drho[1] * SphP[i].Grad.drho[1] +
+                          SphP[i].Grad.drho[2] * SphP[i].Grad.drho[2]);
+
+      if(dgrad > 0)
+        {
+          double scale = SphP[i].Density / dgrad;
+          double tmp   = 3 * cellrad + scale;
+          double x     = (tmp - sqrt(tmp * tmp - 8 * cellrad * cellrad)) / 4;
+
+          if(x < 0.25 * cellrad)
+            {
+              dx = nearest_x(P[i].Pos[0] - (SphP[i].Center[0] + x * SphP[i].Grad.drho[0] / dgrad));
+              dy = nearest_y(P[i].Pos[1] - (SphP[i].Center[1] + x * SphP[i].Grad.drho[1] / dgrad));
+              dz = nearest_z(P[i].Pos[2] - (SphP[i].Center[2] + x * SphP[i].Grad.drho[2] / dgrad));
+            }
+        }
+#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */
+
+      d = sqrt(dx * dx + dy * dy + dz * dz);
+
+      fraction = 0;
+
+#if !defined(REGULARIZE_MESH_FACE_ANGLE)
+      if(d > 0.75 * All.CellShapingFactor * cellrad && dt > 0)
+        {
+          if(d > All.CellShapingFactor * cellrad)
+            fraction = All.CellShapingSpeed;
+          else
+            fraction = All.CellShapingSpeed * (d - 0.75 * All.CellShapingFactor * cellrad) / (0.25 * All.CellShapingFactor * cellrad);
+        }
+#else  /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */
+      if(SphP[i].MaxFaceAngle > 0.75 * All.CellMaxAngleFactor && dt > 0)
+        {
+          if(SphP[i].MaxFaceAngle > All.CellMaxAngleFactor)
+            fraction = All.CellShapingSpeed;
+          else
+            fraction = All.CellShapingSpeed * (SphP[i].MaxFaceAngle - 0.75 * All.CellMaxAngleFactor) / (0.25 * All.CellMaxAngleFactor);
+        }
+#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) #else */
+
+      if(d > 0 && fraction > 0)
+        {
+          double v;
+#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED
+
+          v = All.cf_atime * get_sound_speed(i);
+
+#if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)
+          /* calculate gravitational velocity scale */
+          double ax, ay, az, ac, vgrav;
+#ifdef HIERARCHICAL_GRAVITY
+          ax = SphP[i].FullGravAccel[0];
+          ay = SphP[i].FullGravAccel[1];
+          az = SphP[i].FullGravAccel[2];
+#else  /* #ifdef HIERARCHICAL_GRAVITY */
+          ax = P[i].GravAccel[0];
+          ay = P[i].GravAccel[1];
+          az = P[i].GravAccel[2];
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+#ifdef PMGRID
+          ax += P[i].GravPM[0];
+          ay += P[i].GravPM[1];
+          az += P[i].GravPM[2];
+#endif /* #ifdef PMGRID */
+          ac    = sqrt(ax * ax + ay * ay + az * az);
+          vgrav = 4 * sqrt(All.cf_atime * cellrad * ac);
+          if(v < vgrav)
+            v = vgrav;
+#endif /* #if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE) */
+
+          double vcurl = cellrad * SphP[i].CurlVel;
+          if(v < vcurl)
+            v = vcurl;
+
+#else  /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */
+          v = All.cf_atime * All.cf_atime * d / dt; /* use fiducial velocity */
+
+          double vel  = sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]);
+          double vmax = dmax(All.cf_atime * get_sound_speed(i), vel);
+          if(v > vmax)
+            v = vmax;
+#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED #else */
+
+#ifdef REFINEMENT_SPLIT_CELLS
+          double proj = SphP[i].SepVector[0] * dx + SphP[i].SepVector[1] * dy + SphP[i].SepVector[2] * dz;
+
+          if(proj != 0)
+            {
+              dx = proj * SphP[i].SepVector[0];
+              dy = proj * SphP[i].SepVector[1];
+              dz = proj * SphP[i].SepVector[2];
+            }
+
+          SphP[i].SepVector[0] = 0;
+          SphP[i].SepVector[1] = 0;
+          SphP[i].SepVector[2] = 0;
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+
+          SphP[i].VelVertex[0] += fraction * v * (-dx / d);
+          SphP[i].VelVertex[1] += fraction * v * (-dy / d);
+          SphP[i].VelVertex[2] += fraction * v * (-dz / d);
+        }
+#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT */
+
+      for(j = NUMDIMS; j < 3; j++)
+        SphP[i].VelVertex[j] = 0; /* vertex velocities for unused dimensions set to zero */
+    }
+
+#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE
+  voronoi_exchange_primitive_variables();
+  calculate_vertex_velocity_divergence();
+#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */
+
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+  validate_vertex_velocities();
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+
+#ifdef ONEDIMS_SPHERICAL
+  validate_vertex_velocities_1d();
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+  TIMER_STOP(CPU_SET_VERTEXVELS);
+}
+
+#ifdef ONEDIMS_SPHERICAL
+/*! \brief Handles inner boundary cells in 1d spherical case.
+ *
+ *  \return void
+ */
+static void validate_vertex_velocities_1d()
+{
+  double dt = (P[0].TimeBinHydro ? (((integertime)1) << P[0].TimeBinHydro) : 0) * All.Timebase_interval;
+  if(P[0].Pos[0] + dt * SphP[0].VelVertex[0] < All.CoreRadius)
+    SphP[0].VelVertex[0] = 0.;
+}
+#endif /* #ifdef ONEDIMS_SPHERICAL */
+
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+/*! \brief Checks validity of vertex velocities with boundary conditions.
+ *
+ *  In case we have reflecting boundaries, make sure that cell does not drift
+ *  beyond boundary.
+ *
+ *  \return void
+ */
+void validate_vertex_velocities(void)
+{
+  int idx, i;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0;
+      double dt_drift;
+
+      if(All.ComovingIntegrationOn)
+        dt_drift = get_drift_factor(All.Ti_Current, All.Ti_Current + ti_step);
+      else
+        dt_drift = ti_step * All.Timebase_interval;
+
+#if defined(REFLECTIVE_X)
+      if((P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) < 0 || (P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) >= boxSize_X)
+        SphP[i].VelVertex[0] = 0;
+#endif /* #if defined(REFLECTIVE_X) */
+#if defined(REFLECTIVE_Y)
+      if((P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) < 0 || (P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) >= boxSize_Y)
+        SphP[i].VelVertex[1] = 0;
+#endif /* #if defined(REFLECTIVE_Y) */
+#if defined(REFLECTIVE_Z)
+      if((P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) < 0 || (P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) >= boxSize_Z)
+        SphP[i].VelVertex[2] = 0;
+#endif /* #if defined(REFLECTIVE_Z) */
+    }
+}
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c
new file mode 100644
index 0000000000..cc6964c01b
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c
@@ -0,0 +1,1163 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi.c
+ * \date        05/2018
+ * \brief       Main file for Voronoi-mesh construction.
+ * \details     contains functions:
+ *                void create_mesh(void)
+ *                int voronoi_get_local_particles(void)
+ *                void free_mesh_structures_not_needed_for_derefinement_
+ *                  refinement(void)
+ *                void free_all_remaining_mesh_structures(void)
+ *                void free_mesh(void)
+ *                int compute_max_delaunay_radius(void)
+ *                void compute_voronoi_faces_and_volumes(void)
+ *                int area_list_data_compare(const void *a, const void *b)
+ *                void apply_area_list(void)
+ *                void derefine_refine_compute_volumes(double *vol)
+ *                double nearest_x(double d)
+ *                double nearest_y(double d)
+ *                double nearest_z(double d)
+ *                double get_cell_radius(int i)
+ *                void dump_points(tessellation * T)
+ *                int face_get_normals(tessellation * T, int i, struct
+ *                  geometry *geom)
+ *                double distance_to_border(int cell)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+tessellation Mesh, DeRefMesh;
+
+unsigned char *Edge_visited;
+struct area_list_data *AreaList;
+int Narea, MaxNarea;
+
+int DPinfinity; /* marker for special infinity point */
+double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac;
+
+struct list_export_data *ListExports;
+struct list_P_data *List_P;
+int NumGasInMesh;
+int *List_InMesh;
+
+int CountInSphereTests, CountInSphereTestsExact;
+int CountConvexEdgeTest, CountConvexEdgeTestExact;
+int Ninlist, MaxNinlist;
+
+int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d;
+int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips;
+int Count_EdgeSplits, Count_FaceSplits;
+int Count_InTetra, Count_InTetraExact;
+int Largest_N_DP_Buffer;
+
+long long TotCountInSphereTests, TotCountInSphereTestsExact;
+long long TotCountConvexEdgeTest, TotCountConvexEdgeTestExact;
+
+long long TotCountFlips, TotCount_1_to_3_Flips2d, TotCount_2_to_4_Flips2d;
+long long TotCount_1_to_4_Flips, TotCount_2_to_3_Flips, TotCount_3_to_2_Flips, TotCount_4_to_4_Flips;
+long long TotCount_EdgeSplits, TotCount_FaceSplits;
+long long TotCount_InTetra, TotCount_InTetraExact;
+
+/*! \brief Creates the Voronoi mesh.
+ *
+ *  Routine which is called in run.
+ *  If first creates a first, giant tetrahedron and than successively insert
+ *  particles (first local, then ghost particles) compute their circumcircles
+ *  and count the undecided tetrahedra. This procedure is repeated until all
+ *  tetrahedra are decided. Then, the maximum Delauny radius is computed as
+ *  well as the faces and volumes of the Voronoi-cells.
+ *
+ *  \return void
+ */
+void create_mesh(void)
+{
+#ifdef CREATE_FULL_MESH
+  int k;
+
+  short int *buTimeBin = mymalloc_movable(&buTimeBin, "buTimeBin", NumPart * sizeof(short int));
+  static int buTimeBinActive[TIMEBINS];
+
+  for(k = 0; k < NumPart; k++)
+    {
+      buTimeBin[k]      = P[k].TimeBinHydro;
+      P[k].TimeBinHydro = 0;
+    }
+
+  for(k = 0; k < TIMEBINS; k++)
+    {
+      buTimeBinActive[k] = TimeBinSynchronized[k];
+
+      TimeBinSynchronized[k] = 1;
+    }
+
+  reconstruct_timebins();
+#endif /* #ifdef CREATE_FULL_MESH */
+
+  int tlast;
+  int idx, i, iter = 0, n, skip;
+  double tstart, tend;
+  long long ntot;
+
+  if(All.TotNumGas == 0)
+    return;
+
+  TIMER_START(CPU_MESH);
+
+  mpi_printf("VORONOI: create delaunay mesh\n");
+
+  Ngb_MarkerValue++;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].Ti_Current != All.Ti_Current)
+        {
+          terminate("surprise! we don't expect this here anymore");
+          drift_particle(i, All.Ti_Current);
+        }
+
+      SphP[i].Hsml = 1.01 * SphP[i].MaxDelaunayRadius;
+    }
+
+  initialize_and_create_first_tetra(&Mesh);
+
+  CountInSphereTests = CountInSphereTestsExact = 0;
+  CountConvexEdgeTest = CountConvexEdgeTestExact = 0;
+  CountFlips = Count_1_to_3_Flips2d = Count_2_to_4_Flips2d = 0;
+  Count_1_to_4_Flips                                       = 0;
+  Count_2_to_3_Flips                                       = 0;
+  Count_3_to_2_Flips                                       = 0;
+  Count_4_to_4_Flips                                       = 0;
+  Count_EdgeSplits                                         = 0;
+  Count_FaceSplits                                         = 0;
+  Count_InTetra = Count_InTetraExact = 0;
+  Largest_N_DP_Buffer                = 0;
+
+  MaxNinlist  = Mesh.Indi.AllocFacNinlist;
+  ListExports = mymalloc_movable(&ListExports, "ListExports", MaxNinlist * sizeof(struct list_export_data));
+
+  NumGasInMesh = 0;
+  List_InMesh  = mymalloc_movable(&List_InMesh, "List_InMesh", NumGas * sizeof(int));
+
+  List_P = mymalloc_movable(&List_P, "List_P", NumGas * sizeof(struct list_P_data));
+
+  Mesh.DTC = mymalloc_movable(&Mesh.DTC, "DTC", Mesh.MaxNdt * sizeof(tetra_center));
+  Mesh.DTF = mymalloc_movable(&Mesh.DTF, "DTF", Mesh.MaxNdt * sizeof(char));
+  for(i = 0; i < Mesh.Ndt; i++)
+    Mesh.DTF[i] = 0;
+
+  Ninlist = 0;
+
+  tlast = 0;
+
+  do
+    {
+      skip = Mesh.Ndp;
+
+      TIMER_STOPSTART(CPU_MESH, CPU_MESH_FIND_DP);
+
+      tstart = second();
+
+      if(iter == 0)
+        {
+          MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+          if(Largest_Nvc > 0)
+            n = voronoi_get_connected_particles(&Mesh);
+          else
+            n = voronoi_get_local_particles();
+        }
+      else
+        {
+          n = voronoi_ghost_search(&Mesh);
+        }
+
+      sumup_large_ints(1, &n, &ntot);
+
+      tend = second();
+
+      if(iter == 0)
+        mpi_printf("VORONOI: iter=%d: %llu local points, points/sec/task = %g, took %g secs\n", iter, ntot,
+                   ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend));
+      else
+        {
+          if(ntot)
+            mpi_printf("VORONOI: iter=%d: %llu additional points, points/sec/task = %g, took %g secs\n", iter, ntot,
+                       ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend));
+          else
+            mpi_printf("VORONOI: iter=%d: %llu additional points, took %g secs\n", iter, ntot, timediff(tstart, tend));
+        }
+
+      TIMER_STOPSTART(CPU_MESH_FIND_DP, CPU_MESH_INSERT);
+
+      for(i = 0; i < n; i++)
+        {
+#ifndef OPTIMIZE_MEMORY_USAGE
+          set_integers_for_point(&Mesh, skip + i);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+          tlast = insert_point(&Mesh, skip + i, tlast);
+        }
+
+      TIMER_STOPSTART(CPU_MESH_INSERT, CPU_MESH_CELLCHECK);
+
+      compute_circumcircles(&Mesh);
+
+      if(iter > 0)
+        {
+          n = count_undecided_tetras(&Mesh);
+
+          sumup_large_ints(1, &n, &ntot);
+
+          if(ntot)
+            {
+              mpi_printf("VORONOI: still undecided %llu tetrahedras\n", ntot);
+
+#ifndef DOUBLE_STENCIL
+              for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+                {
+                  i = TimeBinsHydro.ActiveParticleList[idx];
+                  if(i < 0)
+                    continue;
+                  SphP[i].Hsml *= HSML_INCREASE_FACTOR;
+                }
+#else  /* #ifndef DOUBLE_STENCIL */
+              for(i = 0; i < Mesh.Ndp; i++)
+                Mesh.DP[i].Hsml *= HSML_INCREASE_FACTOR;
+#endif /* #ifndef DOUBLE_STENCIL #else */
+            }
+        }
+      else
+        {
+          ntot = 1;
+        }
+
+      TIMER_STOPSTART(CPU_MESH_CELLCHECK, CPU_MESH);
+
+      if(iter > MAX_VORONOI_ITERATIONS)
+        terminate("too many iterations\n");
+
+      iter++;
+    }
+  while(ntot > 0);
+
+#if(REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2)
+  for(i = 0; i < Mesh.Ndp; i++)
+    {
+#if(REFLECTIVE_X == 2)
+      Mesh.DP[i].image_flags |= OUTFLOW_X;
+#endif /* #if (REFLECTIVE_X == 2) */
+#if(REFLECTIVE_Y == 2)
+      Mesh.DP[i].image_flags |= OUTFLOW_Y;
+#endif /* #if (REFLECTIVE_Y == 2) */
+#if(REFLECTIVE_Z == 2)
+      Mesh.DP[i].image_flags |= OUTFLOW_Z;
+#endif /* #if (REFLECTIVE_Z == 2) */
+    }
+#endif /* #if (REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2) */
+
+  compute_max_delaunay_radius();
+
+  TIMER_STOPSTART(CPU_MESH, CPU_LOGS);
+
+#ifdef VERBOSE
+  long long TotNdp, TotNdt;
+
+  int in[15];
+  long long out[15];
+
+  in[0] = Mesh.Ndp;
+  in[1] = Mesh.Ndt;
+  in[2] = CountInSphereTests;
+  in[3] = CountInSphereTestsExact;
+  in[4] = CountFlips;
+  in[5] = Count_InTetra;
+  in[6] = Count_InTetraExact;
+#ifndef TWODIMS
+  in[7]  = Count_1_to_4_Flips;
+  in[8]  = Count_2_to_3_Flips;
+  in[9]  = Count_3_to_2_Flips;
+  in[10] = Count_4_to_4_Flips;
+  in[11] = Count_FaceSplits;
+  in[12] = Count_EdgeSplits;
+  in[13] = CountConvexEdgeTest;
+  in[14] = CountConvexEdgeTestExact;
+  n      = 15;
+#else  /* #ifndef TWODIMS */
+  in[7]                   = Count_1_to_3_Flips2d;
+  in[8]                   = Count_2_to_4_Flips2d;
+  n                       = 9;
+#endif /* #ifndef TWODIMS #else */
+
+  sumup_large_ints(n, in, out);
+
+  TotNdp                     = out[0];
+  TotNdt                     = out[1];
+  TotCountInSphereTests      = out[2];
+  TotCountInSphereTestsExact = out[3];
+  TotCountFlips              = out[4];
+  TotCount_InTetra           = out[5];
+  TotCount_InTetraExact      = out[6];
+#ifndef TWODIMS
+  TotCount_1_to_4_Flips       = out[7];
+  TotCount_2_to_3_Flips       = out[8];
+  TotCount_3_to_2_Flips       = out[9];
+  TotCount_4_to_4_Flips       = out[10];
+  TotCount_FaceSplits         = out[11];
+  TotCount_EdgeSplits         = out[12];
+  TotCountConvexEdgeTest      = out[13];
+  TotCountConvexEdgeTestExact = out[14];
+#else  /* #ifndef TWODIMS */
+  TotCount_1_to_3_Flips2d = out[7];
+  TotCount_2_to_4_Flips2d = out[8];
+#endif /* #ifndef TWODIMS #else */
+
+  if(ThisTask == 0)
+    {
+#ifndef TWODIMS
+      printf(
+          "VORONOI: Average D-Points=%llu  (NumGas=%llu)  D-Tetrahedra=%llu  InSphereTests=%llu  InSphereTestsExact=%llu  "
+          "Flips=%llu\n",
+          TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask,
+          TotCountFlips / NTask);
+      printf("VORONOI: 1_to_4_Flips=%llu  2_to_3_Flips=%llu  3_to_2_Flips=%llu  4_to_4_Flips=%llu  FaceSplits=%llu  EdgeSplits=%llu\n",
+             TotCount_1_to_4_Flips / NTask, TotCount_2_to_3_Flips / NTask, TotCount_3_to_2_Flips / NTask,
+             TotCount_4_to_4_Flips / NTask, TotCount_FaceSplits / NTask, TotCount_EdgeSplits / NTask);
+      printf("VORONOI: InTetra=%llu  InTetraExact=%llu  ConvexEdgeTest=%llu  ConvexEdgeTestExact=%llu\n", TotCount_InTetra,
+             TotCount_InTetraExact / NTask, TotCountConvexEdgeTest / NTask, TotCountConvexEdgeTestExact / NTask);
+#else  /* #ifndef TWODIMS */
+      printf(
+          "VORONOI: Average D-Points=%llu  (NumGas=%llu)  D-Triangles=%llu  InCircleTests=%llu InCircleTestsExact=%llu  Flips=%llu\n",
+          TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask,
+          TotCountFlips / NTask);
+      printf("VORONOI: 1_to_3_Flips=%llu  2_to_4_Flips=%llu  InTriangle=%llu  InTriangleExact=%llu\n", TotCount_1_to_3_Flips2d / NTask,
+             TotCount_2_to_4_Flips2d / NTask, TotCount_InTetra / NTask, TotCount_InTetraExact / NTask);
+#endif /* #ifndef TWODIMS #else */
+      printf("VORONOI: Total D-Points: %llu Ratio=%g\n", TotNdp, ((double)TotNdp) / All.TotNumGas);
+    }
+#endif /* #ifdef VERBOSE */
+
+  TIMER_STOPSTART(CPU_LOGS, CPU_MESH_GEOMETRY);
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      SphP[i].Volume      = 0;
+      SphP[i].SurfaceArea = 0;
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+      SphP[i].MaxFaceAngle = 0;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+#ifdef OUTPUT_SURFACE_AREA
+      SphP[i].CountFaces = 0;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+    }
+
+  compute_voronoi_faces_and_volumes();
+
+  double vol, voltot;
+
+  vol = 0;
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      vol += SphP[i].Volume;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      P[i].SofteningType = get_softeningtype_for_hydro_cell(i);
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+    }
+
+  MPI_Reduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  mpi_printf("VORONOI: Total volume of active cells = %g\n", voltot);
+
+  TIMER_STOP(CPU_MESH_GEOMETRY);
+
+  voronoi_update_connectivity(&Mesh);
+
+  myfree(Mesh.DTF);
+
+  if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* only do this for full steps */
+    {
+      /* check whether we can reduce allocation factors */
+      while(Mesh.Ndp < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdp && Mesh.Indi.AllocFacNdp > MIN_ALLOC_NUMBER)
+        Mesh.Indi.AllocFacNdp /= ALLOC_INCREASE_FACTOR;
+
+      while(Mesh.Ndt < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdt && Mesh.Indi.AllocFacNdt > MIN_ALLOC_NUMBER)
+        Mesh.Indi.AllocFacNdt /= ALLOC_INCREASE_FACTOR;
+
+      while(Mesh.Nvf < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNvf && Mesh.Indi.AllocFacNvf > MIN_ALLOC_NUMBER)
+        Mesh.Indi.AllocFacNvf /= ALLOC_INCREASE_FACTOR;
+
+      while(Ninlist < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNinlist && Mesh.Indi.AllocFacNinlist > MIN_ALLOC_NUMBER)
+        Mesh.Indi.AllocFacNinlist /= ALLOC_INCREASE_FACTOR;
+
+      while(Largest_N_DP_Buffer < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacN_DP_Buffer &&
+            Mesh.Indi.AllocFacN_DP_Buffer > MIN_ALLOC_NUMBER)
+        Mesh.Indi.AllocFacN_DP_Buffer /= ALLOC_INCREASE_FACTOR;
+    }
+
+#ifdef CREATE_FULL_MESH
+  for(k = 0; k < TIMEBINS; k++)
+    TimeBinSynchronized[k] = buTimeBinActive[k];
+
+  for(k = 0; k < NumPart; k++)
+    P[k].TimeBinHydro = buTimeBin[k];
+
+  reconstruct_timebins();
+
+  myfree_movable(buTimeBin);
+#endif /* #if defined(CREATE_FULL_MESH) */
+}
+
+/*! \brief Routine that fetches local gas cells.
+ *
+ *  Runs through all active particles and inserts active gas cells into mesh
+ *  structure. Increases length of Mesh.DP and ListExports arrays if needed.
+ *
+ *  \return Number of points.
+ */
+int voronoi_get_local_particles(void)
+{
+  int p, idx, count = 0;
+
+  /* first, let's add all the primary active points */
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      p = TimeBinsHydro.ActiveParticleList[idx];
+
+      if(p < 0)
+        continue;
+
+      if(P[p].Type == 0)
+        {
+          Ngb_Marker[p] = Ngb_MarkerValue;
+
+          if((P[p].Mass == 0) && (P[p].ID == 0)) /* skip cells that have been swallowed or eliminated */
+            {
+              List_P[p].firstexport   = -1;
+              List_P[p].currentexport = -1;
+              continue;
+            }
+
+          if(Ninlist >= MaxNinlist)
+            {
+              Mesh.Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+              MaxNinlist = Mesh.Indi.AllocFacNinlist;
+#ifdef VERBOSE
+              printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                     Mesh.Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+              ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+              if(Ninlist >= MaxNinlist)
+                terminate("Ninlist >= MaxNinlist");
+            }
+
+          List_InMesh[NumGasInMesh++] = p;
+
+          List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+          ListExports[List_P[p].currentexport].image_bits = 1;
+          ListExports[List_P[p].currentexport].nextexport = -1;
+          ListExports[List_P[p].currentexport].origin     = ThisTask;
+          ListExports[List_P[p].currentexport].index      = p;
+
+          if(Mesh.Ndp >= Mesh.MaxNdp)
+            {
+              Mesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+              Mesh.MaxNdp = Mesh.Indi.AllocFacNdp;
+#ifdef VERBOSE
+              printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, Mesh.MaxNdp,
+                     Mesh.Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+              Mesh.DP -= 5;
+              Mesh.DP = myrealloc_movable(Mesh.DP, (Mesh.MaxNdp + 5) * sizeof(point));
+              Mesh.DP += 5;
+
+              if(Mesh.Ndp >= Mesh.MaxNdp)
+                terminate("Ndp >= MaxNdp");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          point *dp = &Mesh.DP[Mesh.Ndp];
+
+          dp->x             = P[p].Pos[0];
+          dp->y             = P[p].Pos[1];
+          dp->z             = P[p].Pos[2];
+          dp->ID            = P[p].ID;
+          dp->task          = ThisTask;
+          dp->index         = p;
+          dp->originalindex = -1;
+          dp->timebin       = P[p].TimeBinHydro;
+          dp->image_flags   = 1;
+#ifdef DOUBLE_STENCIL
+          dp->Hsml             = SphP[p].Hsml;
+          dp->first_connection = -1;
+          dp->last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+
+          Mesh.Ndp++;
+          count++;
+        }
+    }
+
+  return count;
+}
+
+#ifdef REFINEMENT
+struct refdata *RefExch;
+
+/*! \brief Structures that are freed before refinement and derefinement step.
+ *
+ *  To Optimize the memory usage, this, in comubnation with
+ *  free_all_remaining_mesh_structures() can be used instead of a free_mesh()
+ *  after the refinement. This saves some memory.
+ *
+ *  \return void
+ */
+void free_mesh_structures_not_needed_for_derefinement_refinement(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  int i;
+
+  myfree(GradExch);
+
+  RefExch = (struct refdata *)mymalloc_movable(&RefExch, "RefExch", Mesh_nimport * sizeof(struct refdata));
+
+  for(i = 0; i < Mesh_nimport; i++)
+    {
+#ifdef REFINEMENT_VOLUME_LIMIT
+      RefExch[i].Volume = PrimExch[i].Volume;
+#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */
+      RefExch[i].TimeBinHydro = PrimExch[i].TimeBinHydro;
+    }
+
+  myfree_movable(PrimExch);
+}
+
+/* \brief Structures that are freed after refinement and derefinement step.
+ *
+ *  To Optimize the memory usage, this, in comubnation with
+ *  free_mesh_structures_not_needed_for_derefinement_refinement(void) can be
+ *  used instead of a free_mesh() after the refinement. This saves some memory.
+ *
+ *  \return void
+ */
+void free_all_remaining_mesh_structures(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  myfree(RefExch);
+
+  myfree(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */
+  Mesh.DTC = NULL;
+  myfree(List_P);
+  myfree(List_InMesh);
+  myfree(ListExports);
+  myfree(Mesh.DT);
+  myfree(Mesh.DP - 5);
+  myfree(Mesh.VF);
+}
+#endif /* #ifdef REFINEMENT */
+
+/*! \brief Frees arrays associated with Voronoi-mesh.
+ *
+ *  \return void
+ */
+void free_mesh(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+#if defined(DOUBLE_STENCIL)
+  mpi_printf("freeing double stencil connections...\n");
+  int i;
+  for(i = 0; i < Mesh.Ndp; i++)
+    if(Mesh.DP[i].first_connection >= 0)
+      {
+        if(Mesh.DP[i].flag_primary_triangle == 0)
+          terminate("Mesh.DP[i].flag_primary_triangle");
+
+        int q = Mesh.DP[i].first_connection;
+
+        if(q >= 0) /* we have connections, let's add them to the free list */
+          {
+            while(q >= 0)
+              {
+                Nvc--;
+                DC[q].task = -1; /* mark that this is unused */
+
+                if(q == Mesh.DP[i].last_connection)
+                  break;
+
+                q = DC[q].next;
+              }
+
+            /* we add the new free spots at the beginning of the free list */
+            DC[Mesh.DP[i].last_connection].next = FirstUnusedConnection;
+            FirstUnusedConnection               = Mesh.DP[i].first_connection;
+
+            Mesh.DP[i].first_connection = -1;
+            Mesh.DP[i].last_connection  = -1;
+          }
+      }
+  mpi_printf("done with freeing double stencil connections.\n");
+#endif /* #if defined(DOUBLE_STENCIL) */
+
+  myfree_movable(GradExch);
+  myfree_movable(PrimExch);
+
+  myfree_movable(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */
+  Mesh.DTC = NULL;
+  myfree_movable(List_P);
+  myfree_movable(List_InMesh);
+  myfree_movable(ListExports);
+  myfree_movable(Mesh.DT);
+  myfree_movable(Mesh.DP - 5);
+  myfree_movable(Mesh.VF);
+}
+
+/*! \brief Get the maximum Delaunay radius for all active cells.
+ *
+ *  Defined as the maximum distance between tetrahedron center and its
+ *  neighboring points. Stores this radius in the respective field in the
+ *  SphP structure.
+ *
+ *  \return 0 (unused).
+ */
+int compute_max_delaunay_radius(void)
+{
+  int idx, i, j, count = 0;
+  point *p;
+  double dx, dy, dz, r;
+
+#ifdef ONEDIMS
+  return 0;
+#endif /* #ifdef ONEDIMS */
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      SphP[i].MaxDelaunayRadius = 0;
+    }
+
+  point *DP         = Mesh.DP;
+  tetra *DT         = Mesh.DT;
+  tetra_center *DTC = Mesh.DTC;
+
+  for(i = 0; i < Mesh.Ndt; i++)
+    {
+      if(DT[i].t[0] < 0) /* deleted ? */
+        continue;
+
+      dx = DP[DT[i].p[0]].x - DTC[i].cx;
+      dy = DP[DT[i].p[0]].y - DTC[i].cy;
+      dz = DP[DT[i].p[0]].z - DTC[i].cz;
+
+      r = 2 * sqrt(dx * dx + dy * dy + dz * dz);
+
+      for(j = 0; j < (DIMS + 1); j++)
+        {
+          p = &DP[DT[i].p[j]];
+
+          if(p->task == ThisTask && p->index < NumGas && p->index >= 0)
+            if(TimeBinSynchronized[P[p->index].TimeBinHydro])
+              if(r > SphP[p->index].MaxDelaunayRadius)
+                SphP[p->index].MaxDelaunayRadius = r;
+        }
+    }
+
+  return count;
+}
+
+#ifndef ONEDIMS
+/*! \brief Computes interface areas volume of cells.
+ *
+ *  Loops over Delaunay tetrahedra to calculate interface area and volume
+ *  contributions to the individual cells. Calculates as well the center of
+ *  mass.
+ *
+ *  \return void
+ */
+void compute_voronoi_faces_and_volumes(void)
+{
+  int idx, i, bit, nr;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      SphP[i].Volume    = 0;
+      SphP[i].Center[0] = 0;
+      SphP[i].Center[1] = 0;
+      SphP[i].Center[2] = 0;
+#if defined(REFINEMENT_SPLIT_CELLS)
+      SphP[i].MinimumEdgeDistance = MAX_FLOAT_NUMBER;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+    }
+
+  Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char));
+
+  for(i = 0; i < Mesh.Ndt; i++)
+    Edge_visited[i] = 0;
+
+  MaxNarea = Mesh.Indi.AllocFacNflux;
+  Narea    = 0;
+  AreaList = mymalloc_movable(&AreaList, "AreaList", MaxNarea * sizeof(struct area_list_data));
+
+  for(i = 0; i < Mesh.Ndt; i++)
+    {
+      if(Mesh.DT[i].t[0] < 0) /* deleted ? */
+        continue;
+
+      bit = 1;
+      nr  = 0;
+
+      while(Edge_visited[i] != EDGE_ALL)
+        {
+          if((Edge_visited[i] & bit) == 0)
+            process_edge_faces_and_volumes(&Mesh, i, nr);
+
+          bit <<= 1;
+          nr++;
+        }
+    }
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(SphP[i].Volume)
+        {
+          SphP[i].Center[0] /= SphP[i].Volume;
+          SphP[i].Center[1] /= SphP[i].Volume;
+          SphP[i].Center[2] /= SphP[i].Volume;
+        }
+    }
+
+  apply_area_list();
+  myfree(AreaList);
+
+  myfree(Edge_visited);
+}
+
+/*! \brief Compare task of two area_list_data structures.
+ *
+ *  \param[in] a Pointer to first area_list_data structure.
+ *  \param[in] b Pointer to second area_list_data structure.
+ *
+ *  \return (-1,0,1), -1 if a.task<b.task.
+ */
+int area_list_data_compare(const void *a, const void *b)
+{
+  if(((struct area_list_data *)a)->task < (((struct area_list_data *)b)->task))
+    return -1;
+
+  if(((struct area_list_data *)a)->task > (((struct area_list_data *)b)->task))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Sorts all interface areas and adds them to respective mesh
+ *         generating points (ActiveArea).
+ *
+ *  \return void
+ */
+void apply_area_list(void)
+{
+  int i, j, p, nimport, ngrp, recvTask;
+
+  /* now exchange the area-list and apply where needed */
+
+  mysort(AreaList, Narea, sizeof(struct area_list_data), area_list_data_compare);
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0; i < Narea; i++)
+    Send_count[AreaList[i].task]++;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  struct area_list_data *AreaListGet = (struct area_list_data *)mymalloc("AreaListGet", nimport * sizeof(struct area_list_data));
+
+  /* exchange particle data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&AreaList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct area_list_data), MPI_BYTE, recvTask,
+                           TAG_DENS_A, &AreaListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct area_list_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  /* apply the area */
+  for(i = 0; i < nimport; i++)
+    {
+      p = AreaListGet[i].index;
+      SphP[p].ActiveArea += AreaListGet[i].darea;
+    }
+
+  myfree(AreaListGet);
+}
+
+/*! \brief Calculates volumes of all cells that are created in refinement.
+ *
+ *  \param[out] vol Volumes of cells.
+ *
+ *  \return void
+ */
+void derefine_refine_compute_volumes(double *vol)
+{
+  int i, bit, nr;
+
+  for(i = 0; i < DeRefMesh.Ndp; i++)
+    vol[i] = 0;
+
+  Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", DeRefMesh.Ndt * sizeof(unsigned char));
+
+  for(i = 0; i < DeRefMesh.Ndt; i++)
+    Edge_visited[i] = 0;
+
+  for(i = 0; i < DeRefMesh.Ndt; i++)
+    {
+      if(DeRefMesh.DT[i].t[0] < 0) /* deleted ? */
+        continue;
+
+      bit = 1;
+      nr  = 0;
+
+      while(Edge_visited[i] != EDGE_ALL)
+        {
+          if((Edge_visited[i] & bit) == 0)
+            derefine_refine_process_edge(&DeRefMesh, vol, i, nr);
+
+          bit <<= 1;
+          nr++;
+        }
+    }
+
+  myfree(Edge_visited);
+}
+
+#endif /* #ifndef ONEDIMS */
+
+/*! \brief Nearest distance in x direction, accounting for periodicity.
+ *
+ *  \param[in] d Distance to be checked.
+ *
+ *  \return Nearest distance.
+ */
+double nearest_x(double d)
+{
+#if !defined(REFLECTIVE_X)
+  if(d < -boxHalf_X)
+    d += boxSize_X;
+  if(d > boxHalf_X)
+    d -= boxSize_X;
+#endif /* #if !defined(REFLECTIVE_X) */
+  return d;
+}
+
+/*! \brief Nearest distance in y direction, accounting for periodicity.
+ *
+ *  \param[in] d Distance to be checked.
+ *
+ *  \return Nearest distance.
+ */
+double nearest_y(double d)
+{
+#if !defined(REFLECTIVE_Y)
+  if(d < -boxHalf_Y)
+    d += boxSize_Y;
+  if(d > boxHalf_Y)
+    d -= boxSize_Y;
+#endif /* #if !defined(REFLECTIVE_Y) */
+  return d;
+}
+
+/* \brief Nearest distance in z direction, accounting for periodicity.
+ *
+ * \param[in] d Distance to be checked.
+ *
+ * \return Nearest distance.
+ */
+double nearest_z(double d)
+{
+#if !defined(REFLECTIVE_Z)
+  if(d < -boxHalf_Z)
+    d += boxSize_Z;
+  if(d > boxHalf_Z)
+    d -= boxSize_Z;
+#endif /* #if !defined(REFLECTIVE_Z) */
+  return d;
+}
+
+/*! \brief Gets "radius" of a cell.
+ *
+ *  Defined as the radius of a sphere with the same volume as the Voronoi cell.
+ *
+ *  \param[in] i Index of cell in P and SphP arrays.
+ *
+ *  \return radius of cell i.
+ */
+double get_cell_radius(int i)
+{
+  double cellrad;
+
+#ifdef TWODIMS
+  cellrad = sqrt(SphP[i].Volume / M_PI);
+#else /* #ifdef TWODIMS */
+#ifdef ONEDIMS
+#ifdef ONEDIMS_SPHERICAL
+  cellrad = 0.5 * (Mesh.VF[i + 1].cx - Mesh.VF[i].cx);
+#else  /* #ifdef ONEDIMS_SPHERICAL */
+  cellrad = 0.5 * SphP[i].Volume;
+#endif /* #ifdef ONEDIMS_SPHERICAL #else */
+#else  /* #ifdef ONEDIMS */
+  cellrad = pow(SphP[i].Volume * 3.0 / (4.0 * M_PI), 1.0 / 3);
+#endif /* #ifdef ONEDIMS #else */
+#endif /* #ifdef TWODIMS */
+  return cellrad;
+}
+
+/*! \brief Writes a file points_X.dat with Delaunay points.
+ *
+ *  Writes position as in DP structure.
+ *
+ *  \param[in] T tessellation for which Delaunay point positions should be
+ *               written.
+ *
+ *  \return void
+ */
+void dump_points(tessellation *T)
+{
+  FILE *fd;
+  int i;
+  double xyz[3];
+  char buf[1000];
+
+  sprintf(buf, "points_%d.dat", ThisTask);
+  fd = fopen(buf, "w");
+  my_fwrite(&T->Ndp, sizeof(int), 1, fd);
+  for(i = 0; i < T->Ndp; i++)
+    {
+      xyz[0] = T->DP[i].x;
+      xyz[1] = T->DP[i].y;
+      xyz[2] = T->DP[i].z;
+      my_fwrite(xyz, sizeof(double), 3, fd);
+    }
+  fclose(fd);
+}
+
+/*! \brief Calculates the normals to given interfaces.
+ *
+ *  \param[in] T Pointer to tesslation data.
+ *  \param[in] i Index of Voronoi-face in tesslation T.
+ *  \param[out] geom Pointer to structure to which normal data is written.
+ *
+ *  \return 0 if success, -1 if interface can be ignored.
+ */
+int face_get_normals(tessellation *T, int i, struct geometry *geom)
+{
+  int li, ri;
+  double surface, surface_l, surface_r;
+  int present_left, present_right;
+  double mm;
+
+  face *VF  = T->VF;
+  point *DP = T->DP;
+
+  li = DP[VF[i].p1].index;
+  ri = DP[VF[i].p2].index;
+
+  if(li < 0 || ri < 0)
+    return -1;
+
+  if(li >= NumGas && DP[VF[i].p1].task == ThisTask)
+    li -= NumGas;
+
+  if(ri >= NumGas && DP[VF[i].p2].task == ThisTask)
+    ri -= NumGas;
+
+  if(DP[VF[i].p1].task == ThisTask)
+    surface_l = SphP[li].SurfaceArea;
+  else
+    surface_l = PrimExch[li].SurfaceArea;
+
+  if(DP[VF[i].p2].task == ThisTask)
+    surface_r = SphP[ri].SurfaceArea;
+  else
+    surface_r = PrimExch[ri].SurfaceArea;
+
+  if(surface_r > surface_l)
+    surface = 1.0e-5 * surface_r;
+  else
+    surface = 1.0e-5 * surface_l;
+
+  present_left = present_right = 0;
+
+  /* if the area of this face is negligible compared to the surface
+     of the larger cell, skip it */
+  if(DP[VF[i].p1].task == ThisTask && DP[VF[i].p1].index < NumGas)
+    if(TimeBinSynchronized[P[DP[VF[i].p1].index].TimeBinHydro])
+      if(VF[i].area > surface)
+        present_left = 1;
+
+  if(DP[VF[i].p2].task == ThisTask && DP[VF[i].p2].index < NumGas)
+    if(TimeBinSynchronized[P[DP[VF[i].p2].index].TimeBinHydro])
+      if(VF[i].area > surface)
+        present_right = 1;
+
+  if(present_left == 0 && present_right == 0)
+    {
+#ifndef VORONOI_STATIC_MESH
+      VF[i].area = 0;
+#endif /* #ifndef VORONOI_STATIC_MESH */
+      return -1;
+    }
+
+  /* center of face */
+  geom->cx = VF[i].cx;
+  geom->cy = VF[i].cy;
+  geom->cz = VF[i].cz;
+
+  /* normal vector pointing to "right" state */
+  geom->nx = DP[VF[i].p2].x - DP[VF[i].p1].x;
+  geom->ny = DP[VF[i].p2].y - DP[VF[i].p1].y;
+  geom->nz = DP[VF[i].p2].z - DP[VF[i].p1].z;
+
+  geom->nn = sqrt(geom->nx * geom->nx + geom->ny * geom->ny + geom->nz * geom->nz);
+  geom->nx /= geom->nn;
+  geom->ny /= geom->nn;
+  geom->nz /= geom->nn;
+
+  /* need an ortonormal basis */
+  if(geom->nx != 0 || geom->ny != 0)
+    {
+      geom->mx = -geom->ny;
+      geom->my = geom->nx;
+      geom->mz = 0;
+    }
+  else
+    {
+      geom->mx = 1;
+      geom->my = 0;
+      geom->mz = 0;
+    }
+
+  mm = sqrt(geom->mx * geom->mx + geom->my * geom->my + geom->mz * geom->mz);
+  geom->mx /= mm;
+  geom->my /= mm;
+  geom->mz /= mm;
+
+  geom->px = geom->ny * geom->mz - geom->nz * geom->my;
+  geom->py = geom->nz * geom->mx - geom->nx * geom->mz;
+  geom->pz = geom->nx * geom->my - geom->ny * geom->mx;
+
+  return 0;
+}
+
+/*! \brief Calculates distance of a cell to boundary of computational box.
+ *
+ *  \param[in] cell Index of cell in P and SphP structure.
+ *
+ *  \return Distance to border.
+ */
+double distance_to_border(int cell)
+{
+  double d1 = boxSize_X - P[cell].Pos[0];
+  assert(d1 > 0);
+
+  double d2 = P[cell].Pos[0];
+
+  double min = fmin(d1, d2);
+
+  d1 = boxSize_Y - P[cell].Pos[1];
+  assert(d1 > 0);
+
+  d2 = P[cell].Pos[1];
+
+  double min2 = fmin(d1, d2);
+  min         = fmin(min, min2);
+
+  d1 = boxSize_Z - P[cell].Pos[2];
+  assert(d1 > 0);
+
+  d2   = P[cell].Pos[2];
+  min2 = fmin(d1, d2);
+
+  min = fmin(min, min2);
+
+  return min;
+}
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h
new file mode 100644
index 0000000000..31aaae1ecb
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h
@@ -0,0 +1,379 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi.h
+ * \date        05/2018
+ * \brief       Header for Voronoi mesh-construcion
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 29.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef HAVE_H_VORONOI
+#define HAVE_H_VORONOI
+
+#include <gmp.h>
+
+#define STACKSIZE_TETRA 10000
+#define MIN_ALLOC_NUMBER 1000
+#define ALLOC_INCREASE_FACTOR 1.1
+#define ALLOC_DECREASE_FACTOR 0.7
+#define MAX_VORONOI_ITERATIONS 500
+
+#define GENTLE_DEREFINE_FACTOR 1.2
+
+#define USEDBITS 52
+
+#if USEDBITS > 31
+typedef signed long long int IntegerMapType;
+void MY_mpz_set_si(mpz_t dest, signed long long int val);
+void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val);
+void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val);
+#else /* #if USEDBITS > 31 */
+typedef signed long int IntegerMapType;
+#define MY_mpz_set_si mpz_set_si
+#define MY_mpz_mul_si mpz_mul_si
+#define MY_mpz_sub_ui mpz_sub_ui
+#endif /* #if USEDBITS > 31 #else */
+
+#define DOUBLE_to_VORONOIINT(y) ((IntegerMapType)(((*((long long *)&y)) & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS)))
+
+/*    Prerequisites for this function:
+ *    sizeof(double)==sizeof(unsigned long long)
+ *    doubles must be stored according to IEEE 754
+ */
+static inline IntegerMapType double_to_voronoiint(double d)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d = d;
+  return (u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS);
+}
+
+static inline double mask_voronoi_int(double x)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d   = x;
+  u.ull = u.ull & (~((1llu << (52 - USEDBITS)) - 1));
+  return u.d;
+}
+
+#ifndef TWODIMS
+
+#define EDGE_0 1  /* points 0-1 */
+#define EDGE_1 2  /* points 0-2 */
+#define EDGE_2 4  /* points 0-3 */
+#define EDGE_3 8  /* points 1-2 */
+#define EDGE_4 16 /* points 1-3 */
+#define EDGE_5 32 /* points 2-3 */
+#define EDGE_ALL 63
+
+#else /* #ifndef TWODIMS */
+
+#define EDGE_0 1 /* points 1-2 */
+#define EDGE_1 2 /* points 0-2 */
+#define EDGE_2 4 /* points 0-1 */
+#define EDGE_ALL 7
+
+#endif /* #ifndef TWODIMS #else */
+
+#define HSML_INCREASE_FACTOR 1.3
+
+#ifdef TWODIMS /* will only be compiled in 2D case */
+#define DIMS 2
+#else /* #ifdef TWODIMS */
+#define DIMS 3
+#endif /*#ifdef TWODIMS #else */
+
+typedef struct
+{
+  double x, y, z;  // The 3-space position of the point
+  MyIDType ID;
+  int task;   // The MPI task owning this cell
+  int index;  // The hydro quantity index of the cell
+  int originalindex, timebin;
+  unsigned int image_flags;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double xx, yy, zz;
+  IntegerMapType ix, iy, iz;
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+
+#ifdef DOUBLE_STENCIL
+  MyFloat Hsml;
+  int first_connection;
+  int last_connection;
+  char flag_primary_triangle;
+#endif /* #ifdef DOUBLE_STENCIL */
+} point;
+
+typedef struct tetra_data
+{
+  int p[DIMS + 1];           /*!< oriented tetrahedron points */
+  int t[DIMS + 1];           /*!< adjacent tetrahedrons, always opposite to corresponding point */
+  unsigned char s[DIMS + 1]; /*!< gives the index of the point in the adjacent tetrahedron that
+                                lies opposite to the common face */
+
+  /* Note: if t[0] == -1, the tetrahedron has been deleted */
+} tetra;
+
+typedef struct tetra_center_data
+{
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double cx, cy, cz; /*!< describes circumcircle center */
+#else                /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  MyFloat cx, cy, cz;
+#endif               /*#ifndef OPTIMIZE_MEMORY_USAGE */
+} tetra_center;
+
+typedef struct tri_data
+{
+  double p[DIMS + 1][DIMS];
+  int owner;
+} triangle;
+
+extern unsigned char *Edge_visited;
+
+extern struct list_export_data
+{
+  unsigned int image_bits;
+  int origin, index;
+  int nextexport;
+} * ListExports;
+
+extern int Ninlist, MaxNinlist;
+
+extern struct area_list_data
+{
+  int task, index;
+  double darea;
+} * AreaList;
+
+extern int Narea, MaxNarea;
+
+extern int NumGasInMesh;
+extern int *List_InMesh;
+
+extern struct list_P_data
+{
+  int firstexport, currentexport;
+
+} * List_P;
+
+typedef struct connection_data
+{
+  int task;
+  int index;
+  int image_flags;
+  int next;
+
+  int dp_index; /*!< this seems to be needed always the way voronoi_makeimage is implemented at the moment */
+  int vf_index; /*!< index to the corresponding face */
+#if defined(TETRA_INDEX_IN_FACE)
+  int dt_index;
+#endif /* #if defined(TETRA_INDEX_IN_FACE)*/
+  MyIDType ID;
+} connection;
+
+/*! This structure contains the points where a line segment intersects
+ *  the tetrahedron faces and the internal voronoi faces. Is returned
+ *  by calc_voronoi_intersections().
+ */
+typedef struct intersection_list_data
+{
+  double s;       /*!< the distance from the entry point (fraction of whole segment) */
+  point p;        /*!< the intersection point */
+  int indA, indB; /*!< the indices of the tetra points (0-4) defining the face */
+} intersection_list;
+
+extern int CountInSphereTests, CountInSphereTestsExact;
+extern int CountConvexEdgeTest, CountConvexEdgeTestExact;
+extern int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d;
+extern int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips;
+extern int Count_EdgeSplits, Count_FaceSplits;
+extern int Count_InTetra, Count_InTetraExact;
+extern int Largest_N_DP_Buffer;
+
+extern int Ninlist, MaxNinlist;
+
+typedef struct individual_alloc_data
+{
+  double AllocFacNdp;
+  double AllocFacNdt;
+  double AllocFacNvf;
+  double AllocFacNinlist;
+  double AllocFacN_DP_Buffer;
+  double AllocFacNflux;
+  double AllocFacNradinflux;
+  double AllocFacNvc;
+} mesh_alloc_facs;
+
+typedef struct tessellation_data
+{
+  int Ndp;    /*!< number of delaunay points */
+  int MaxNdp; /*!< maximum number of delaunay points */
+  point *DP;  /*!< delaunay points */
+
+  int Ndt;
+  int MaxNdt;        /*!< number of delaunary tetrahedra */
+  tetra *DT;         /*!< Delaunay tetrahedra */
+  tetra_center *DTC; /*!< circumcenters of delaunay tetrahedra */
+  char *DTF;
+
+  int Nvf;    /*!< number of Voronoi faces */
+  int MaxNvf; /*!< maximum number of Voronoi faces */
+  face *VF;   /*!< Voronoi faces */
+
+  mesh_alloc_facs Indi;
+} tessellation;
+
+extern tessellation Mesh, DeRefMesh;
+
+extern int DPinfinity;
+
+extern int Nvc;    /* number of connections */
+extern int MaxNvc; /* maximum number of connections */
+extern int Largest_Nvc;
+extern connection *DC; /* Connections */
+extern int FirstUnusedConnection;
+
+extern double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac;
+
+int derefine_add_point_and_split_tri(int q, triangle *trilist, int n, int max_n, double vol);
+void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr);
+void derefine_refine_compute_volumes(double *vol);
+int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri);
+void create_mesh(void);
+void mesh_setup_exchange(void);
+void free_mesh(void);
+void free_mesh_structures_not_needed_for_derefinement_refinement(void);
+void free_all_remaining_mesh_structures(void);
+void apply_area_list(void);
+int area_list_data_compare(const void *a, const void *b);
+void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask);
+void initialize_and_create_first_tetra(tessellation *T);
+void compute_voronoi_faces_and_volumes(void);
+void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements);
+double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n);
+void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int *nof_elements);
+void intersection_plane_grid(double *center, double *n, const char *filename);
+void process_edge_faces_and_volumes(tessellation *T, int tt, int nr);
+int insert_point(tessellation *T, int pp, int ttstart);
+void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist);
+void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2);
+double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3);
+void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr);
+double get_tri_volume(int i, triangle *trilist);
+void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3);
+void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom);
+void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2);
+int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr);
+int InTetra(tessellation *T, int tt, point *pp, int *edgeface_nr, int *nexttetra);
+double InSphere(point *p0, point *p1, point *p2, point *p3, point *p);
+void update_circumcircle(tessellation *T, int tt);
+int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3);
+int voronoi_ghost_search_alternative(tessellation *T);
+void compute_circumcircles(tessellation *T);
+int compute_max_delaunay_radius(void);
+void check_for_min_distance(tessellation *T);
+void check_links(tessellation *T);
+void check_orientations(tessellation *T);
+void check_tetras(tessellation *T, int npoints);
+int voronoi_get_local_particles(void);
+int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr);
+void calculate_gradients(void);
+void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi);
+void exchange_primitive_variables(void);
+void exchange_primitive_variables_and_gradients(void);
+int compare_primexch(const void *a, const void *b);
+
+/* 2D voronoi routines */
+void check_edge_and_flip_if_needed(tessellation *T, int ip, int it);
+int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart);
+double InCircle(point *p0, point *p1, point *p2, point *p);
+void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2);
+double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2);
+void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0);
+void dump_points(tessellation *T);
+void set_integers_for_pointer(point *p);
+
+#if !defined(ONEDIMS)
+#ifndef OPTIMIZE_MEMORY_USAGE
+static inline void set_integers_for_point(tessellation *T, int pp)
+{
+  point *p = &T->DP[pp];
+  set_integers_for_pointer(p);
+}
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+static inline void get_integers_for_point(point *p, IntegerMapType ixyz[], double xyz[])
+{
+  xyz[0] = (p->x - CentralOffsetX) * ConversionFac + 1.0;
+  xyz[1] = (p->y - CentralOffsetY) * ConversionFac + 1.0;
+  xyz[2] = (p->z - CentralOffsetZ) * ConversionFac + 1.0;
+
+  ixyz[0] = double_to_voronoiint(xyz[0]);
+  ixyz[1] = double_to_voronoiint(xyz[1]);
+  ixyz[2] = double_to_voronoiint(xyz[2]);
+
+  xyz[0] = mask_voronoi_int(xyz[0]);
+  xyz[1] = mask_voronoi_int(xyz[1]);
+  xyz[2] = mask_voronoi_int(xyz[2]);
+}
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+#else  /* #if !defined(ONEDIMS) */
+void set_integers_for_point(tessellation *T, int pp);
+#endif /* #if !defined(ONEDIMS) #else */
+
+/* quick function to compare a point to the infinity point */
+static inline int isInfinity(point *p) { return p->x == MAX_DOUBLE_NUMBER; }
+
+int solve_linear_equations(double *m, double *res);
+void check_triangles(tessellation *T, int npoints);
+int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp);
+int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp);
+int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp);
+int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2);
+int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2);
+int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra);
+int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p);
+int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p);
+int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p);
+int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3);
+int Orient3d(point *p0, point *p1, point *p2, point *p3);
+int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3);
+int count_undecided_tetras(tessellation *T);
+int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target,
+                              int origin, int mode, int thread_id, int numnodes, int *firstnode);
+int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id);
+int voronoi_ghost_search(tessellation *T);
+double distance_to_border(int cell);
+
+#endif /* HAVE_H_VORONOI */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c
new file mode 100644
index 0000000000..54c325cd3b
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c
@@ -0,0 +1,363 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_1d.c
+ * \date        05/2018
+ * \brief       Routines to build a 1d Voronoi mesh
+ * \details     Note that some of these routines have the same name as the ones
+ *              in voronoi_2d.c and voronoi_3d.c and just replace them in case
+ *              the Config-option ONEDIMS is active. This is also the reason
+ *              why some of these functions are empty but nonetheless have to
+ *              exist in this file.
+ *              contains functions:
+ *                void write_voronoi_mesh(tessellation * T, char *fname,
+ *                  int writeTask, int lastTask)
+ *                void initialize_and_create_first_tetra(tessellation * T)
+ *                void compute_circumcircles(tessellation * T)
+ *                void set_integers_for_point(tessellation * T, int pp)
+ *                int insert_point(tessellation * T, int pp, int ttstart)
+ *                int voronoi_ghost_search(tessellation * T)
+ *                int count_undecided_tetras(tessellation * T)
+ *                int voronoi_ghost_search_alternative(tessellation * T)
+ *                void compute_voronoi_faces_and_volumes(void)
+ *                void voronoi_1D_order(void)
+ *                int voronoi_1D_compare_key(const void *a, const void *b)
+ *                void voronoi_1D_reorder_gas(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gmp.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if defined(ONEDIMS) && !defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D case */
+
+/*! \brief Output of Voroioi mesh to file.
+ *
+ *  Not supported for 1d.
+ *
+ *  \return void
+ */
+void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask)
+{
+  terminate("write_voronoi_mesh not supported in 1d case!");
+}
+
+/*! \brief Initialises 1d tessellation and create all-enclosing segment.
+ *
+ *  \param[out] T Pointer to tessllation structure which is set and its arrays
+ *              are allocated in this routine.
+ *
+ *  \return void
+ */
+void initialize_and_create_first_tetra(tessellation *T)
+{
+  char msg[200];
+
+  if(NTask > 1)
+    {
+      mpi_printf("1D code works only for 1 CPU\n");
+      endrun();
+    }
+
+  T->MaxNdp = NumGas + 4;
+  T->MaxNdt = 4 + T->MaxNdp * 2;
+  T->MaxNvf = T->MaxNdt;
+
+  if(NumGas == 0)
+    {
+      sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask);
+      terminate(msg);
+    }
+
+  T->Ndp = 0;
+  T->Nvf = 0;
+  T->Ndt = 0;
+
+  T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face));
+
+  T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point));
+  T->DP += 5;
+
+  T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra));
+}
+
+/*! \brief Computes circumcircles in 1d.
+ *
+ *  Not necessary in 1d. However, this function has to exist for the 1d code
+ *  to work.
+ *
+ *  \param[in] T Pointer to tessllation structure.
+ *
+ *  \return void
+ */
+void compute_circumcircles(tessellation *T) {}
+
+/*! \brief Empty funciton in 1d case.
+ *
+ *  Not necessary in 1d. However, this function has to exist for the 1d code
+ *  to work.
+ *
+ * \return void
+ */
+void set_integers_for_point(tessellation *T, int pp) {}
+
+/*! \brief Empty funciton in 1d case.
+ *
+ *  Not necessary in 1d. However, this function has to exist for the 1d code
+ *  to work.
+ *
+ * \return 0
+ */
+int insert_point(tessellation *T, int pp, int ttstart) { return 0; }
+
+/*! \brief Wrapper routine to search for ghost cells for boundary cells.
+ *
+ *  \param[out] T Pointer to tessellation.
+ *
+ *  \return 0
+ */
+int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); }
+
+/*! \brief Empty funciton in 1d case.
+ *
+ *  Not necessary in 1d. However, this function has to exist for the 1d code
+ *  to work.
+ *
+ * \return 0
+ */
+int count_undecided_tetras(tessellation *T) { return 0; }
+
+/*! \brief Searches for ghost cells in 1d Voronoi mesh.
+ *
+ *  This routine assumes an x ordered cell array.
+ *
+ *  \param[out] T pointer to tessellation.
+ *
+ *  \return 0
+ */
+int voronoi_ghost_search_alternative(tessellation *T)
+{
+  double xl, xr;
+  int index_l, index_r;
+
+#if defined(REFLECTIVE_X)
+  xl      = -P[0].Pos[0];
+  index_l = 0;
+
+  xr      = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]);
+  index_r = NumGas - 1;
+#else  /* #if defined(REFLECTIVE_X) */
+  xl      = P[NumGas - 1].Pos[0] - boxSize_X;
+  index_l = NumGas - 1;
+
+  xr      = P[0].Pos[0] + boxSize_X;
+  index_r = 0;
+#endif /* #if defined(REFLECTIVE_X) #else */
+
+  point *DP = T->DP;
+
+  DP[-1].x     = xl;
+  DP[-1].y     = 0;
+  DP[-1].z     = 0;
+  DP[-1].task  = ThisTask;
+  DP[-1].ID    = P[index_l].ID;
+  DP[-1].index = index_l + NumGas; /* this is a mirrored local point */
+#if defined(REFLECTIVE_X)
+  DP[-1].image_flags = REFL_X_FLAGS;
+#if(REFLECTIVE_X == 2)
+  DP[-1].image_flags |= OUTFLOW_X;
+#endif /* #if (REFLECTIVE_X == 2) */
+#endif /* #if defined(REFLECTIVE_X) */
+  DP[NumGas].x     = xr;
+  DP[NumGas].y     = 0;
+  DP[NumGas].z     = 0;
+  DP[NumGas].task  = ThisTask;
+  DP[NumGas].ID    = P[index_r].ID;
+  DP[NumGas].index = index_r + NumGas; /* this is a mirrored local point */
+#if defined(REFLECTIVE_X)
+  DP[NumGas].image_flags = REFL_X_FLAGS;
+#if(REFLECTIVE_X == 2)
+  DP[NumGas].image_flags |= OUTFLOW_X;
+#endif /* #if (REFLECTIVE_X == 2) */
+#endif /* #if defined(REFLECTIVE_X) */
+  return 0;
+}
+
+/*! \brief Computes faces and volume of cells in 1d Voronoi mesh.
+ *
+ *  Also computes the center of mass.
+ *
+ *  \return void
+ */
+void compute_voronoi_faces_and_volumes(void)
+{
+  int i;
+
+  tessellation *T = &Mesh;
+
+  T->Nvf    = 0;
+  point *DP = T->DP;
+  face *VF  = T->VF;
+
+  for(i = -1; i < NumGas; i++)
+    {
+      VF[T->Nvf].p1 = i;
+      VF[T->Nvf].p2 = i + 1;
+
+      VF[T->Nvf].cx = 0.5 * (DP[i].x + DP[i + 1].x);
+
+      VF[T->Nvf].cy   = 0;
+      VF[T->Nvf].cz   = 0;
+      VF[T->Nvf].area = 1;
+
+      T->Nvf++;
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      SphP[i].Volume    = VF[i + 1].cx - VF[i].cx;
+      SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx);
+      SphP[i].Center[1] = 0;
+      SphP[i].Center[2] = 0;
+
+      SphP[i].SurfaceArea = 2.;
+    }
+}
+
+/*! \brief Data for 1d Voronoi mesh.
+ */
+static struct voronoi_1D_data
+{
+  double x;
+  int index;
+} * mp;
+
+static int *Id;
+
+/*! \brief Sort cells by their position and reorder in P and SphP array.
+ *
+ *  \return void
+ */
+void voronoi_1D_order(void)
+{
+  int i;
+
+  mpi_printf("begin 1D order...\n");
+
+  if(NumGas)
+    {
+      mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas);
+      Id = (int *)mymalloc("Id", sizeof(int) * NumGas);
+
+      for(i = 0; i < NumGas; i++)
+        {
+          mp[i].index = i;
+          mp[i].x     = P[i].Pos[0];
+        }
+
+      mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key);
+
+      for(i = 0; i < NumGas; i++)
+        Id[mp[i].index] = i;
+
+      voronoi_1D_reorder_gas();
+
+      myfree(Id);
+      myfree(mp);
+    }
+
+  mpi_printf("1D order done.\n");
+}
+
+/*! \brief Compare x value of voronoi_1D_data objects.
+ *
+ *  \param[in] a Pointer to first voronoi_1D_data object.
+ *  \param[in] b Pointer to second voronoi_1D_data object.
+ *
+ *  \return (-1,0,1) -1 if a->x < b->x.
+ */
+int voronoi_1D_compare_key(const void *a, const void *b)
+{
+  if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x))
+    return -1;
+
+  if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Order the gas cells according to the index given in the ID array.
+ *
+ *  \return void
+ */
+void voronoi_1D_reorder_gas(void)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  struct sph_particle_data SphPsave, SphPsource;
+  int idsource, idsave, dest;
+
+  for(i = 0; i < NumGas; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource    = P[i];
+          SphPsource = SphP[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              Psave    = P[dest];
+              SphPsave = SphP[dest];
+              idsave   = Id[dest];
+
+              P[dest]    = Psource;
+              SphP[dest] = SphPsource;
+              Id[dest]   = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource    = Psave;
+              SphPsource = SphPsave;
+              idsource   = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+#endif /* #if defined (ONEDIMS) && !defined (ONEDIMS_SPHERICAL) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c
new file mode 100644
index 0000000000..c0212da41d
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c
@@ -0,0 +1,339 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_1d_spherical.c
+ * \date        05/2018
+ * \brief       Routines to build a 1d Voronoi mesh in spherical coordinates.
+ * \details     Note that some of these routines have the same name as the ones
+ *              in voronoi_2d.c and voronoi_3d.c and just replace them in case
+ *              the Config-option ONEDIMS is active. This is also the reason
+ *              why some of these functions are empty but nonetheless have to
+ *              exist in this file.
+ *              contains functions:
+ *                void write_voronoi_mesh(tessellation * T, char *fname,
+ *                  int writeTask, int lastTask)
+ *                void initialize_and_create_first_tetra(tessellation * T)
+ *                void compute_circumcircles(tessellation * T)
+ *                void set_integers_for_point(tessellation * T, int pp)
+ *                int insert_point(tessellation * T, int pp, int ttstart)
+ *                int voronoi_ghost_search(tessellation * T)
+ *                int count_undecided_tetras(tessellation * T)
+ *                int voronoi_ghost_search_alternative(tessellation * T)
+ *                void compute_voronoi_faces_and_volumes(void)
+ *                void voronoi_1D_order(void)
+ *                int voronoi_1D_compare_key(const void *a, const void *b)
+ *                void voronoi_1D_reorder_gas(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gmp.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if defined(ONEDIMS) && defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D spherical case */
+
+/*! \brief Output of Voroioi mesh to file.
+ *
+ *  Not supported for 1d spherical.
+ *
+ *  \retur void
+ */
+void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask)
+{
+  terminate("write_voronoi_mesh not supported in 1d spherical case!");
+}
+
+/*! \brief Initialises spherical 1d tesslation and create all-enclosing
+ *         segment.
+ *
+ *  \param[out] T Pointer to tessllation structure which is set and its arrays
+ *              are allocated in this routine.
+ *
+ *  \return void
+ */
+void initialize_and_create_first_tetra(tessellation *T)
+{
+  char msg[200];
+
+  if(NTask > 1)
+    {
+      mpi_terminate("1D code works only for 1 CPU\n");
+    }
+
+  T->MaxNdp = NumGas + 4;
+  T->MaxNdt = 4 + T->MaxNdp * 2;
+  T->MaxNvf = T->MaxNdt;
+
+  if(NumGas == 0)
+    {
+      sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask);
+      terminate(msg);
+    }
+
+  T->Ndp = 0;
+  T->Nvf = 0;
+  T->Ndt = 0;
+
+  T->VF = mymalloc("VF", T->MaxNvf * sizeof(face));
+
+  T->DP = mymalloc("DP", (T->MaxNdp + 5) * sizeof(point));
+  T->DP += 5;
+
+  T->DT = mymalloc("DT", T->MaxNdt * sizeof(tetra));
+}
+
+/*! \brief Computes circumcircles in 1d spherical coordinates.
+ *
+ *  Not necessary in 1d spherical. However, this function has to exist for
+ *  the 1d spherical code to work.
+ *
+ *  \param[in] T Pointer to tessllation structure.
+ *
+ *  \return void
+ */
+void compute_circumcircles(tessellation *T) {}
+
+/*! \brief Empty funciton in 1d spherical case.
+ *
+ *  Not necessary in 1d spherical. However, this function has to exist for the
+ *  1d spherical code to work.
+ *
+ * \return void
+ */
+void set_integers_for_point(tessellation *T, int pp) {}
+
+/*! \brief Empty funciton in 1d spherical case.
+ *
+ *  Not necessary in 1d spherical. However, this function has to exist for
+ *  the 1d spherical code to work.
+ *
+ * \return 0
+ */
+int insert_point(tessellation *T, int pp, int ttstart) { return 0; }
+
+/*! \brief Wrapper routine to search for ghost cells for boundary cells.
+ *
+ *  \param[out] T Pointer to tessellation.
+ *
+ *  \return 0
+ */
+int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); }
+
+/*! \brief Empty funciton in 1d spherical case.
+ *
+ *  Not necessary in 1d spherical. However, this function has to exist for
+ *  the 1d spherical code to work.
+ *
+ * \return 0
+ */
+int count_undecided_tetras(tessellation *T) { return 0; }
+
+/*! \brief Searches for ghost cells in 1d spherical Voronoi mesh.
+ *
+ *  This routine assumes an radius ordered cell array.
+ *
+ *  \param[out] T pointer to tesslation.
+ *
+ *  \return 0
+ */
+int voronoi_ghost_search_alternative(tessellation *T)
+{
+  point *DP = T->DP;
+
+  /* reflective inner boundaries */
+  DP[-1].x     = 2. * All.CoreRadius - P[0].Pos[0];
+  DP[-1].y     = 0;
+  DP[-1].z     = 0;
+  DP[-1].task  = ThisTask;
+  DP[-1].ID    = P[0].ID;
+  DP[-1].index = NumGas; /* this is a mirrored local point */
+
+  /* outflow outer boundaries */
+  DP[NumGas].x     = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]);
+  DP[NumGas].y     = 0;
+  DP[NumGas].z     = 0;
+  DP[NumGas].task  = ThisTask;
+  DP[NumGas].ID    = P[NumGas - 1].ID;
+  DP[NumGas].index = NumGas - 1 + NumGas; /* this is a mirrored local point */
+
+  return 0;
+}
+
+/*! \brief Compute faces and volume of cells in 1d spherical Voronoi mesh.
+ *
+ *  Also computes the center of mass.
+ *
+ *  \return void
+ */
+void compute_voronoi_faces_and_volumes(void)
+{
+  int i;
+
+  tessellation *T = &Mesh;
+
+  T->Nvf    = 0;
+  point *DP = T->DP;
+  face *VF  = T->VF;
+
+  for(i = -1; i < NumGas; i++)
+    {
+      VF[T->Nvf].p1 = i;
+      VF[T->Nvf].p2 = i + 1;
+
+      VF[T->Nvf].cx   = 0.5 * (DP[i].x + DP[i + 1].x);
+      VF[T->Nvf].cy   = 0;
+      VF[T->Nvf].cz   = 0;
+      VF[T->Nvf].area = 4. * M_PI * VF[T->Nvf].cx * VF[T->Nvf].cx;
+
+      T->Nvf++;
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      SphP[i].Volume    = 4.0 / 3.0 * M_PI * (VF[i + 1].cx * VF[i + 1].cx * VF[i + 1].cx - VF[i].cx * VF[i].cx * VF[i].cx);
+      SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx);
+      SphP[i].Center[1] = 0;
+      SphP[i].Center[2] = 0;
+
+      SphP[i].SurfaceArea = VF[i].area + VF[i + 1].area;
+      SphP[i].ActiveArea  = SphP[i].SurfaceArea;
+    }
+}
+
+/*! \brief Structure for 1d spherical Voronoi mesh.
+ */
+static struct voronoi_1D_data
+{
+  double x;
+  int index;
+} * mp;
+
+static int *Id;
+
+/*! \brief Sort cells by their position (i.e. radius) and reorder in P and
+ *         SphP array.
+ *
+ *  \return void
+ */
+void voronoi_1D_order(void)
+{
+  int i;
+
+  mpi_printf("begin 1D order...\n");
+
+  if(NumGas)
+    {
+      mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas);
+      Id = (int *)mymalloc("Id", sizeof(int) * NumGas);
+
+      for(i = 0; i < NumGas; i++)
+        {
+          mp[i].index = i;
+          mp[i].x     = P[i].Pos[0];
+        }
+
+      mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key);
+
+      for(i = 0; i < NumGas; i++)
+        Id[mp[i].index] = i;
+
+      voronoi_1D_reorder_gas();
+
+      myfree(Id);
+      myfree(mp);
+    }
+
+  mpi_printf("1D order done.\n");
+}
+
+/*! \brief Compare x value of voronoi_1D_data objects.
+ *
+ *  \param[in] a Pointer to first voronoi_1D_data object.
+ *  \param[in] b Pointer to second voronoi_1D_data object.
+ *
+ *  \return (-1,0,1) -1 if a->x < b->x.
+ */
+int voronoi_1D_compare_key(const void *a, const void *b)
+{
+  if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x))
+    return -1;
+
+  if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Order the gas cells according to the index given in the ID array.
+ *
+ *  \return void
+ */
+void voronoi_1D_reorder_gas(void)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  struct sph_particle_data SphPsave, SphPsource;
+  int idsource, idsave, dest;
+
+  for(i = 0; i < NumGas; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource    = P[i];
+          SphPsource = SphP[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              Psave    = P[dest];
+              SphPsave = SphP[dest];
+              idsave   = Id[dest];
+
+              P[dest]    = Psource;
+              SphP[dest] = SphPsource;
+              Id[dest]   = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource    = Psave;
+              SphPsource = SphPsave;
+              idsource   = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+#endif /* #if defined (ONEDIMS) && defined (ONEDIMS_SPHERICAL) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c
new file mode 100644
index 0000000000..7e9e519c13
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c
@@ -0,0 +1,2110 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_2d.c
+ * \date        05/2018
+ * \brief       Routines to build a 2d Voronoi mesh.
+ * \details     Note that some of these routines have the same name as the ones
+ *              in voronoi_1d.c and voronoi_3d.c and just replace them in case
+ *              the Config-option TWODIMS is active. This is also the reason
+ *              why some of these functions are empty but nonetheless have to
+ *              exist in this file.
+ *              contains functions:
+ *                void initialize_and_create_first_tetra(tessellation * T)
+ *                int insert_point(tessellation * T, int pp, int ttstart)
+ *                void make_a_2_to_4_flip(tessellation * T, int pp, int tt0,
+ *                  int tt1, int tt2, int tt3, int i0, int j0)
+ *                void make_a_1_to_3_flip(tessellation * T, int pp, int tt0,
+ *                  int tt1, int tt2)
+ *                void check_edge_and_flip_if_needed(tessellation * T, int ip,
+ *                  int it)
+ *                int get_triangle(tessellation * T, int pp, int *moves, int
+ *                  *degenerate_flag, int ttstart)
+ *                static inline void add_row_2d(double *m, int r1, int r2,
+ *                  double fac)
+ *                int solve_linear_equations_2d(double *m, double *res)
+ *                int FindTriangle(tessellation * T, int tt, int pp,
+ *                  int *degnerate_flag, int *nexttetra)
+ *                int InCircle_Quick(tessellation * T, int pp0, int pp1,
+ *                  int pp2, int pp)
+ *                int InCircle_Errorbound(tessellation * T, int pp0, int pp1,
+ *                  int pp2, int pp)
+ *                int InCircle_Exact(tessellation * T, int pp0, int pp1,
+ *                  int pp2, int pp)
+ *                double test_triangle_orientation(tessellation * T, int pp0,
+ *                  int pp1, int pp2)
+ *                int Orient2d_Quick(tessellation * T, int pp0, int pp1,
+ *                  int pp2)
+ *                int Orient2d_Exact(tessellation * T, int pp0, int pp1,
+ *                  int pp2)
+ *                void process_edge_faces_and_volumes(tessellation * T, int tt,
+ *                  int nr)
+ *                int derefine_refine_get_triangles(tessellation * T, int tt,
+ *                  int nr, point * dtip, triangle * trilist, int ntri,
+ *                  int max_n_tri)
+ *                int derefine_add_point_and_split_tri(int q, triangle
+ *                  * trilist, int ntri, int max_ntri, double vol)
+ *                double get_tri_volume(int i, triangle * trilist)
+ *                void derefine_refine_process_edge(tessellation * T, double
+ *                  *vol, int tt, int nr)
+ *                void compute_circumcircles(tessellation * T)
+ *                void update_circumcircle(tessellation * T, int tt)
+ *                void set_integers_for_pointer(point * p)
+ *                void write_voronoi_mesh(tessellation * T, char *fname, int
+ *                  writeTask, int lastTask)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gmp.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 2D case */
+
+#define INSIDE_EPS 1.0e-8
+#define GAUSS_EPS 1.0e-8
+
+/*! \brief Initializes 2d tessellation and create all-enclosing triangle.
+ *
+ *  \param[out] T Pointer to tessellation structure which is set and its arrays
+ *              are allocated in this routine.
+ *
+ *  \return void
+ */
+void initialize_and_create_first_tetra(tessellation *T)
+{
+  point *p;
+  int i, n;
+
+  T->MaxNdp = T->Indi.AllocFacNdp;
+  T->MaxNdt = T->Indi.AllocFacNdt;
+  T->MaxNvf = T->Indi.AllocFacNvf;
+
+  T->Ndp = 0;
+  T->Nvf = 0;
+  T->Ndt = 0;
+
+  T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face));
+
+  T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point));
+  T->DP += 5;
+
+  T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra));
+
+  /* construct all encompassing huge triangle */
+  double box, tetra_incircle, tetra_sidelength, tetra_height;
+
+  box = boxSize_X;
+  if(box < boxSize_Y)
+    box = boxSize_Y;
+
+  box *= 1.05;
+
+  tetra_incircle = 2.001 * (1 + sqrt(3)) / 3.0 * box; /* to give room for ghost particles needed for periodic/reflective
+                                                         boundary conditions, the incircle is twice as large, i.e.
+                                                         [-0.5*box, 1.5*box,-0.5*box, 1.5*box] should be inside triangle */
+  tetra_sidelength = tetra_incircle * sqrt(12);
+  tetra_height     = sqrt(3.0) / 2 * tetra_sidelength;
+
+  if(ThisTask == 0)
+    printf("side-length of enclosing triangle=%g tetra_height=%g box=%g\n", tetra_sidelength, tetra_height, box);
+
+  point *DP = T->DP;
+  tetra *DT = T->DT;
+
+  /* first, let's make the points */
+  DP[-3].x = 0.5 * tetra_sidelength;
+  DP[-3].y = -1.0 / 3 * tetra_height;
+  DP[-3].z = 0;
+
+  DP[-2].x = 0;
+  DP[-2].y = 2.0 / 3 * tetra_height;
+  DP[-2].z = 0;
+
+  DP[-1].x = -0.5 * tetra_sidelength;
+  DP[-1].y = -1.0 / 3 * tetra_height;
+  DP[-1].z = 0;
+
+  for(i = -3; i <= -1; i++)
+    {
+      DP[i].x += 0.5 * box;
+      DP[i].y += 1.0 / 3 * tetra_height - 0.5 * box;
+    }
+
+  for(i = -3, p = &DP[-3]; i < 0; i++, p++)
+    {
+      p->index   = -1;
+      p->task    = ThisTask;
+      p->timebin = 0;
+    }
+
+  /* we also define a neutral element at infinity */
+  DPinfinity = -4;
+
+  DP[DPinfinity].x       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].y       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].z       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].index   = -1;
+  DP[DPinfinity].task    = ThisTask;
+  DP[DPinfinity].timebin = 0;
+
+  /* now let's make the big triangle */
+  DT[0].p[0] = -3;
+  DT[0].p[1] = -2;
+  DT[0].p[2] = -1;
+
+  /* On the outer faces, we attach tetrahedra with the neutral element as tip.
+   * This way we will be able to navigate nicely within the tesselation,
+   * and all tetrahedra have defined neighbouring tetrahedra.
+   */
+
+  for(i = 0; i < 3; i++)
+    {
+      n = i + 1; /* tetra index */
+
+      DT[0].t[i] = n;
+      DT[0].s[i] = 2;
+
+      DT[n].t[2] = 0;
+      DT[n].s[2] = i;
+      DT[n].p[2] = DPinfinity;
+    }
+
+  DT[1].p[0] = DT[0].p[2];
+  DT[1].p[1] = DT[0].p[1];
+
+  DT[2].p[0] = DT[0].p[0];
+  DT[2].p[1] = DT[0].p[2];
+
+  DT[3].p[0] = DT[0].p[1];
+  DT[3].p[1] = DT[0].p[0];
+
+  DT[1].t[0] = 3;
+  DT[3].t[1] = 1;
+  DT[1].s[0] = 1;
+  DT[3].s[1] = 0;
+
+  DT[1].t[1] = 2;
+  DT[2].t[0] = 1;
+  DT[1].s[1] = 0;
+  DT[2].s[0] = 1;
+
+  DT[2].t[1] = 3;
+  DT[3].t[0] = 2;
+  DT[2].s[1] = 0;
+  DT[3].s[0] = 1;
+
+  T->Ndt = 4; /* we'll start out with 4 triangles */
+
+  CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength;
+  CentralOffsetY = -0.5000001 * box;
+
+  ConversionFac = 1.0 / (1.001 * tetra_sidelength);
+
+  for(i = -3; i < 0; i++)
+    set_integers_for_point(T, i);
+}
+
+/*! \brief Insert a point into mesh.
+ *
+ *  Finds the triangle that contains this point, splits the triangle (usually
+ *  into three). After this, flip the edges if needed restore
+ *  Delaunayhood (which is applied recursively) until a valid Delaunay mesh
+ *  is restored.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of Delaunay point in DP array.
+ *  \param[in] ttstart Initial guess in which triangle it might be,
+ *             index in DT array.
+ *
+ * \return Index of triangle containing point pp.
+ */
+int insert_point(tessellation *T, int pp, int ttstart)
+{
+  int tt0, tt1, tt2, tt3, ttetra_with_p;
+  int moves, degenerate_flag;
+
+  /* first, need to do a point location */
+  tt0 = get_triangle(T, pp, &moves, &degenerate_flag, ttstart);
+
+  ttetra_with_p = tt0;
+
+  if(degenerate_flag == 1) /* that's the normal split of a triangle into 3 */
+    {
+      /* we now need to split this triangle into three  */
+      tt1 = T->Ndt++;
+      tt2 = T->Ndt++;
+
+      if(T->Ndt > T->MaxNdt)
+        {
+          T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+          T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+          printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+          T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+          T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+          T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+          if(T->Ndt > T->MaxNdt)
+            terminate("Ndt > MaxNdt");
+        }
+
+      T->DT[tt1] = T->DT[tt0];
+      T->DT[tt2] = T->DT[tt0];
+
+      make_a_1_to_3_flip(T, pp, tt0, tt1, tt2);
+
+      T->DTF[tt0] = 0;
+      T->DTF[tt1] = 0;
+      T->DTF[tt2] = 0;
+
+      check_edge_and_flip_if_needed(T, pp, tt0);
+      check_edge_and_flip_if_needed(T, pp, tt1);
+      check_edge_and_flip_if_needed(T, pp, tt2);
+    }
+  else
+    {
+      degenerate_flag -= 10;
+
+      tt1 = T->DT[tt0].t[degenerate_flag];
+
+      /* we now need to split this into two triangles */
+      tt2 = T->Ndt++;
+      tt3 = T->Ndt++;
+
+      if(T->Ndt > T->MaxNdt)
+        {
+          T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+          T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+          printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+          T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+          T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+          T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+          if(T->Ndt > T->MaxNdt)
+            terminate("Ndt > MaxNdt");
+        }
+
+      T->DT[tt2] = T->DT[tt0];
+      T->DT[tt3] = T->DT[tt1];
+
+      make_a_2_to_4_flip(T, pp, tt0, tt1, tt2, tt3, degenerate_flag, T->DT[tt0].s[degenerate_flag]);
+
+      T->DTF[tt0] = 0;
+      T->DTF[tt1] = 0;
+      T->DTF[tt2] = 0;
+      T->DTF[tt3] = 0;
+
+      check_edge_and_flip_if_needed(T, pp, tt0);
+      check_edge_and_flip_if_needed(T, pp, tt1);
+      check_edge_and_flip_if_needed(T, pp, tt2);
+      check_edge_and_flip_if_needed(T, pp, tt3);
+    }
+
+  return ttetra_with_p;
+}
+
+/*! \brief Make a 2 to 4 flip needed if point is on edge of a Delaunay
+ *         triangle.
+ *
+ *  If a new point is at the edge of a Delaunay triangle, both adjacent
+ *  triangles need to be split into two. See Springel (2010) for a
+ *  detailed discussion.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of Delaunay point in DP array.
+ *  \param[in] tt0 Index of point 0 in DT array.
+ *  \param[in] tt1 Index of point 1 in DT array.
+ *  \param[in] tt2 Index of point 2 in DT array.
+ *  \param[in] tt3 Index of point 3 in DT array.
+ *  \param[in] i0 Index (in DT->s) of point opposite to common face that needs
+ *             to be involved in flip.
+ *  \param[in] j0 Second Index (in DT->s) of point opposite to common face that
+ *             needs to be involved in flip.
+ *
+ *  \return void
+ */
+void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+  tetra *t3 = &DT[tt3];
+
+  int i1, i2, j1, j2;
+
+  CountFlips++;
+  Count_2_to_4_Flips2d++;
+
+  i1 = i0 + 1;
+  i2 = i0 + 2;
+  j1 = j0 + 1;
+  j2 = j0 + 2;
+
+  if(i1 > 2)
+    i1 -= 3;
+  if(i2 > 2)
+    i2 -= 3;
+
+  if(j1 > 2)
+    j1 -= 3;
+  if(j2 > 2)
+    j2 -= 3;
+
+  t0->p[i1] = pp;
+  t1->p[j2] = pp;
+  t2->p[i2] = pp;
+  t3->p[j1] = pp;
+
+  t0->t[i0] = tt1;
+  t1->t[j0] = tt0;
+  t0->s[i0] = j0;
+  t1->s[j0] = i0;
+
+  t1->t[j1] = tt3;
+  t3->t[j2] = tt1;
+  t1->s[j1] = j2;
+  t3->s[j2] = j1;
+
+  t2->t[i1] = tt0;
+  t0->t[i2] = tt2;
+  t2->s[i1] = i2;
+  t0->s[i2] = i1;
+
+  t2->t[i0] = tt3;
+  t3->t[j0] = tt2;
+  t2->s[i0] = j0;
+  t3->s[j0] = i0;
+
+  DT[t0->t[i1]].t[t0->s[i1]] = tt0;
+  DT[t1->t[j2]].t[t1->s[j2]] = tt1;
+  DT[t2->t[i2]].t[t2->s[i2]] = tt2;
+  DT[t3->t[j1]].t[t3->s[j1]] = tt3;
+}
+
+/*! \brief Makes a 1 to 3 flip needed if point is in a Delaunay triangle.
+ *
+ *  If a new point is in a Delaunay triangle, this
+ *  triangles need to be split into three.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of Delaunay point in DP array.
+ *  \param[in] tt0 Index of point 0 in DT array.
+ *  \param[in] tt1 Index of point 1 in DT array.
+ *  \param[in] tt2 Index of point 2 in DT array.
+ *
+ *  \return void
+ */
+void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+
+  CountFlips++;
+  Count_1_to_3_Flips2d++;
+
+  t0->p[0] = pp;
+  t1->p[1] = pp;
+  t2->p[2] = pp;
+
+  t0->t[1] = tt1;
+  t1->t[0] = tt0;
+  t0->s[1] = 0;
+  t1->s[0] = 1;
+
+  t1->t[2] = tt2;
+  t2->t[1] = tt1;
+  t1->s[2] = 1;
+  t2->s[1] = 2;
+
+  t2->t[0] = tt0;
+  t0->t[2] = tt2;
+  t2->s[0] = 2;
+  t0->s[2] = 0;
+
+  DT[t0->t[0]].t[t0->s[0]] = tt0;
+  DT[t1->t[1]].t[t1->s[1]] = tt1;
+  DT[t2->t[2]].t[t2->s[2]] = tt2;
+}
+
+/*! \brief Flips trangle if needed.
+ *
+ *  See Springel (2010) for detailed discussion how mesh is constructed.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] ip Index to Delaunay point, DP array.
+ *  \param[in] it Index to corner of triangle, DT array.
+ *
+ *  \return void
+ */
+void check_edge_and_flip_if_needed(tessellation *T, int ip, int it)
+{
+  tetra *DT = T->DT;
+  char *DTF = T->DTF;
+
+  tetra *t = &DT[it];
+
+  int tt, pp, t0, t2;
+  int pi, pi1, pi2;
+  int ni, ni1, ni2;
+  int st2, st0;
+
+  if(t->p[0] == ip)
+    pi = 0;
+  else if(t->p[1] == ip)
+    pi = 1;
+  else
+    pi = 2;
+
+  /* get the point that lies accross the edge to obtain the quadriliteral */
+
+  tt = t->t[pi];
+  ni = t->s[pi];
+  pp = DT[tt].p[ni];
+
+  int ret, ret_exact;
+
+  ret = InCircle_Errorbound(T, t->p[0], t->p[1], t->p[2], pp);
+  CountInSphereTests++;
+
+  if(ret != 0)
+    ret_exact = ret;
+  else
+    {
+      ret_exact = InCircle_Exact(T, t->p[0], t->p[1], t->p[2], pp);
+      CountInSphereTestsExact++;
+    }
+
+  if(ret_exact > 0)
+    {
+      /* pp lies in the triangle, the edge is not Delaunay. Need to do a flip */
+
+      CountFlips++;
+
+      ni1 = ni + 1;
+      if(ni1 > 2)
+        ni1 -= 3;
+      ni2 = ni + 2;
+      if(ni2 > 2)
+        ni2 -= 3;
+
+      pi1 = pi + 1;
+      if(pi1 > 2)
+        pi1 -= 3;
+      pi2 = pi + 2;
+      if(pi2 > 2)
+        pi2 -= 3;
+
+      t0 = DT[tt].t[ni1];
+      t2 = t->t[pi1];
+
+      st0 = DT[tt].s[ni1];
+      st2 = t->s[pi1];
+
+      /* change the points of the triangles */
+      t->p[pi2]     = pp;
+      DT[tt].p[ni2] = ip;
+
+      /* change the pointers to the neighbouring triangles, and fix
+         the adjency relations */
+
+      t->t[pi1]     = tt;
+      DT[tt].t[ni1] = it;
+      t->s[pi1]     = ni1;
+      DT[tt].s[ni1] = pi1;
+
+      t->t[pi]      = t0;
+      DT[t0].t[st0] = it;
+      t->s[pi]      = st0;
+      DT[t0].s[st0] = pi;
+
+      DT[tt].t[ni]  = t2;
+      DT[t2].t[st2] = tt;
+      DT[tt].s[ni]  = st2;
+      DT[t2].s[st2] = ni;
+
+      DTF[tt] = 0;
+      DTF[it] = 0;
+
+      /* now we need to test also the two sides opposite of p */
+      check_edge_and_flip_if_needed(T, ip, it);
+      check_edge_and_flip_if_needed(T, ip, tt);
+    }
+}
+
+/*! \brief Finds triangle in which new Delaunay point is located.
+ *
+ *  Starts with a suggested triangle ttstart and checks if the point is
+ *  contained in this triangle. If not, the procedure is repeated for the
+ *  neighboring triangle.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp Index of Delaunay point in DP array.
+ *  \param[out] moves Number of iterations to find the correct triangle.
+ *  \param[out] degenerate_flag Flag if point lies on edge of a triangle.
+ *  \param[in] ttstart Starting index for the search for the correct triangle.
+ *
+ *  \return Index of triangle in DT array.
+ */
+int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart)
+{
+  int count_moves = 0;
+  int ret;
+  int tt, next_tetra;
+
+  tt = ttstart;
+
+#define MAX_COUNT_MOVES 1000000
+
+  while((ret = FindTriangle(T, tt, pp, degenerate_flag, &next_tetra)) == 0)
+    {
+      /* we need to see in which of the three possible neighbouring triangles
+         we should walk. We'll choose the one which lies along the face that
+         is traversed by a line from the cm of the triangle to the point in
+         question.
+       */
+      count_moves++;
+
+      if(count_moves > MAX_COUNT_MOVES)
+        {
+          printf("ta=%d triangle=%d  xy=(%g|%g) ID=%d\n", ThisTask, (int)(tt), T->DP[pp].x, T->DP[pp].y, T->DP[pp].ID);
+          if(count_moves > MAX_COUNT_MOVES + 10)
+            terminate("too many moves, problem to find triangle");
+        }
+
+      tt = next_tetra;
+    }
+
+  *moves = count_moves;
+
+  return tt;
+}
+
+/*! \brief Add row in matrix equation.
+ *
+ *  Auxiliary function for solve_linear_equations_2d.
+ *
+ *  \param[in, out] m Matrix.
+ *  \param[in] r1 Index of row to be modified.
+ *  \param[in] r2 Index of row which is added to r1.
+ *  \param[in] fac Factor by which row r2 is multiplied before adding to r1.
+ *
+ *  \return void
+ */
+static inline void add_row_2d(double *m, int r1, int r2, double fac)
+{
+  int i;
+
+  for(i = 0; i < 3; i++)
+    m[r1 * 3 + i] += fac * m[r2 * 3 + i];
+}
+
+/*! \brief Solve system of linear equations for 2d Voronoi construction.
+ *
+ *  This is needed in get_triangle routine.
+ *
+ *  \param[in, out] m Matrix.
+ *  \param[in, out] res Array for result.
+ *
+ *  \return 0 if success, -1 else.
+ */
+int solve_linear_equations_2d(double *m, double *res)
+{
+  int ix, iy;
+
+  if(fabs(m[0]) > fabs(m[3]))
+    {
+      ix = 0;
+      iy = 1;
+    }
+  else
+    {
+      ix = 1;
+      iy = 0;
+    }
+
+  add_row_2d(m, iy, ix, -m[iy * 3] / m[ix * 3]);
+
+  res[1] = m[iy * 3 + 2] / m[iy * 3 + 1];
+  res[0] = (m[ix * 3 + 2] - res[1] * m[ix * 3 + 1]) / m[ix * 3];
+
+  if(fabs(m[ix * 3]) < 1.0e-12)
+    return -1;
+
+  return 0;
+}
+
+/*! \brief Does point lie in triangle?
+ *
+ *  Tests whether point pp lies in the triangle, on an edge, or outside. In the
+ *  latter case, a neighboring triangle is returned. First, a fast search is
+ *  performed and if this yields that point might be on an edge, a (more
+ *  expensive) exact determination is performed.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] tt Index of triangle in DT array.
+ *  \param[in] pp Index of Delaunay point in DP array.
+ *  \param[out] degenerate_flag Flag if point lies on edge of a triangle.
+ *  \param[out] nexttetra Index of neighboring triangle in direction of point.
+ *
+ *  \return 1: point inside triangle; 0 outside; 10,11,12: on edge.
+ */
+int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  tetra *t  = &DT[tt];
+  point *p  = &DP[pp];
+
+  int pp0, pp1, pp2;
+  point *p0, *p1, *p2;
+
+  pp0 = t->p[0];
+  pp1 = t->p[1];
+  pp2 = t->p[2];
+
+  p0 = &DP[pp0];
+  p1 = &DP[pp1];
+  p2 = &DP[pp2];
+
+  if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity)
+    {
+      char buf[1000];
+      sprintf(buf, "we are in a triangle with an infinity point. tetra=%d  p=(%g|%g)\n", (int)(tt), p->x, p->y);
+      terminate(buf);
+    }
+
+  Count_InTetra++;
+
+  double ax = p1->xx - p0->xx;
+  double ay = p1->yy - p0->yy;
+
+  double bx = p2->xx - p0->xx;
+  double by = p2->yy - p0->yy;
+
+  double qx = p->xx - p0->xx;
+  double qy = p->yy - p0->yy;
+
+  double mv_data[] = {ax, bx, qx, ay, by, qy};
+  double x[2];
+
+  int ivol, flag2, flag1, flag0;
+  int count_zeros = 0;
+
+  int status;
+
+  status = solve_linear_equations_2d(mv_data, x);
+
+  if(status < 0)
+    {
+      ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]);
+      if(ivol <= 0)
+        {
+          char buf[1000];
+          sprintf(buf, "flat or negatively triangle found (ivol=%d)\n", ivol);
+          terminate(buf);
+        }
+    }
+
+  if(status >= 0)
+    {
+      if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && (1 - (x[0] + x[1])) > INSIDE_EPS)
+        {
+          /* looks like we are safely inside the triangle */
+
+          *degnerate_flag = 1;
+          return 1;
+        }
+
+      if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || (1 - (x[0] + x[1])) < -INSIDE_EPS)
+        {
+          /* looks like we are clearly outside the triangle.
+             Let's look for a good neighbouring triangle to continue the search */
+
+          /* note: in the (a,b) basis, the center-of-mass has coordinates (1/3, 1/3) */
+
+          double w, u;
+
+          if(fabs(x[1] - (1.0 / 3)) > INSIDE_EPS)
+            {
+              w = (1.0 / 3) / ((1.0 / 3) - x[1]);
+              if(w > 0)
+                {
+                  u = (1.0 / 3) + w * (x[0] - (1.0 / 3));
+                  if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS)
+                    {
+                      *nexttetra = t->t[2];
+                      return 0;
+                    }
+                }
+            }
+
+          if(fabs(x[0] - (1.0 / 3)) > INSIDE_EPS)
+            {
+              w = (1.0 / 3) / ((1.0 / 3) - x[0]);
+              if(w > 0)
+                {
+                  u = (1.0 / 3) + w * (x[1] - (1.0 / 3));
+                  if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS)
+                    {
+                      *nexttetra = t->t[1];
+                      return 0;
+                    }
+                }
+            }
+
+          *nexttetra = t->t[0];
+          return 0;
+        }
+    }
+
+  /* here we need to decide whether we have a degenerate case, i.e.
+     whether we think the point lies on an edge of the triangle */
+
+  Count_InTetraExact++;
+
+  ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]);
+
+  if(ivol <= 0)
+    {
+      char buf[1000];
+      sprintf(buf, "flat or negatively oriented triangle found (ivol=%d)\n", ivol);
+      terminate(buf);
+    }
+
+  flag0 = Orient2d_Exact(T, pp1, pp2, pp);
+  flag1 = Orient2d_Exact(T, pp2, pp0, pp);
+  flag2 = Orient2d_Exact(T, pp0, pp1, pp);
+
+  if(flag0 == 0)
+    count_zeros++;
+
+  if(flag1 == 0)
+    count_zeros++;
+
+  if(flag2 == 0)
+    count_zeros++;
+
+  if(count_zeros >= 2)
+    {
+      printf("flags=%d %d %d\n", flag0, flag1, flag2);
+
+      printf("points: %d %d %d %d\n", (int)(pp0), (int)(pp1), (int)(pp2), (int)(pp));
+      printf("Ngas=%d\n", NumGas);
+      printf("xyz, p=%d: (%g|%g)  index=%d task=%d ID=%d  flags\n", (int)(pp0), p0->x, p0->y, p0->index, p0->task,
+             P[p0->index % NumGas].ID);
+      printf("xyz, p=%d: (%g|%g)  index=%d task=%d ID=%d  flags\n", (int)(pp1), p1->x, p1->y, p1->index, p1->task,
+             P[p1->index % NumGas].ID);
+      printf("xyz, p=%d: (%g|%g)  index=%d task=%d ID=%d  flags\n", (int)(pp2), p2->x, p2->y, p2->index, p2->task,
+             P[p2->index % NumGas].ID);
+      printf("xyz, p=%d: (%g|%g)  index=%d task=%d ID=%d  flags\n", (int)(pp), p->x, p->y, p->index, p->task, P[p->index % NumGas].ID);
+      terminate("too many zeros - (perhaps identical points inserted?)");
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0)
+    {
+      /* we have a point inside the triangle, but it may still be on one of the edges */
+
+      if(count_zeros == 0)
+        {
+          /* ok, we are inside */
+          *degnerate_flag = 1;
+          return 1;
+        }
+
+      if(count_zeros == 1) /* we lie on a face */
+        {
+          if(flag2 == 0)
+            {
+              *degnerate_flag = 12;
+              return 12; /* point lies on side A */
+            }
+          if(flag1 == 0)
+            {
+              *degnerate_flag = 11;
+              return 11; /* point lies on side C */
+            }
+
+          if(flag0 == 0)
+            {
+              *degnerate_flag = 10;
+              return 10; /* point lies on side B */
+            }
+        }
+    }
+
+  /* we are clearly outside, let's select the suitable neighbour */
+
+  if(flag0 < 0 && flag1 >= 0 && flag2 >= 0)
+    {
+      *nexttetra = t->t[0];
+      return 0;
+    }
+
+  if(flag0 >= 0 && flag1 < 0 && flag2 >= 0)
+    {
+      *nexttetra = t->t[1];
+      return 0;
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 < 0)
+    {
+      *nexttetra = t->t[2];
+      return 0;
+    }
+
+  /* there are apparently two negative values. Let's pick a random one */
+
+  int ind = -1;
+
+  if(flag0 < 0)
+    {
+      if(ind < 0)
+        ind = 0;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 0;
+        }
+    }
+
+  if(flag1 < 0)
+    {
+      if(ind < 0)
+        ind = 1;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 1;
+        }
+    }
+
+  if(flag2 < 0)
+    {
+      if(ind < 0)
+        ind = 2;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 2;
+        }
+    }
+
+  *nexttetra = t->t[ind];
+  return 0;
+}
+
+/*! \brief Tests whether point pp lies in the circumcircle around triangle
+ *        p0,p1,p2.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *  \param[in] pp Index in DP of point to be checked.
+ *
+ *  \return (-1,0,1); -1: in circle; 0 on circle, 1: outside circle.
+ */
+int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+  point *p  = &DP[pp];
+
+  double ax, ay, bx, by, cx, cy;
+  double ab, bc, ca, a2, b2, c2, x;
+
+  if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity)
+    return -1;
+
+  ax = p0->xx - p->xx;
+  ay = p0->yy - p->yy;
+  bx = p1->xx - p->xx;
+  by = p1->yy - p->yy;
+  cx = p2->xx - p->xx;
+  cy = p2->yy - p->yy;
+
+  ab = ax * by - bx * ay;
+  bc = bx * cy - cx * by;
+  ca = cx * ay - ax * cy;
+
+  a2 = ax * ax + ay * ay;
+  b2 = bx * bx + by * by;
+  c2 = cx * cx + cy * cy;
+
+  x = a2 * bc + b2 * ca + c2 * ab;
+
+  if(x < 0)
+    return -1;
+  if(x > 0)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Tests whether point pp lies in the circumcircle around triangle
+ *        p0,p1,p2 with some error margin.
+ *
+ *  This error margin should be large enough to exclude that close cases are
+ *  misclssified due to numerical round-off errors.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *  \param[in] pp Index in DP of point to be checked.
+ *
+ *  \return (-1,0,1); -1: in circle; 0 on circle (within tolerance),
+ *          1: outside circle.
+ */
+int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+  point *p  = &DP[pp];
+
+  if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity)
+    return -1;
+
+  double ax, ay, bx, by, cx, cy;
+  double ab, bc, ca, a2, b2, c2, x;
+  double axby, bxay, bxcy, cxby, cxay, axcy;
+
+  ax = p0->xx - p->xx;
+  ay = p0->yy - p->yy;
+  bx = p1->xx - p->xx;
+  by = p1->yy - p->yy;
+  cx = p2->xx - p->xx;
+  cy = p2->yy - p->yy;
+
+  axby = ax * by;
+  bxay = bx * ay;
+  bxcy = bx * cy;
+  cxby = cx * by;
+  cxay = cx * ay;
+  axcy = ax * cy;
+
+  ca = cxay - axcy;
+  ab = axby - bxay;
+  bc = bxcy - cxby;
+
+  a2 = ax * ax + ay * ay;
+  b2 = bx * bx + by * by;
+  c2 = cx * cx + cy * cy;
+
+  x = a2 * bc + b2 * ca + c2 * ab;
+
+  /* calculate absolute maximum size */
+
+  double sizelimit = a2 * (fabs(bxcy) + fabs(cxby)) + b2 * (fabs(cxay) + fabs(axcy)) + c2 * (fabs(axby) + fabs(bxay));
+
+  double errbound = 1.0e-14 * sizelimit;
+
+  if(x < -errbound)
+    return -1;
+  else if(x > errbound)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Tests whether point pp lies in the circumcircle around triangle
+ *  p0,p1,p2 using arbitrary precision operations.
+ *
+ *  This is the exact solution, but computationally very expensive, thus only
+ *  called for the unclear cases.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *  \param[in] pp Index in DP of point to be checked.
+ *
+ *  \return (-1,0,1); -1: in circle; 0 on circle,
+ *          1: outside circle.
+ */
+int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+  point *p  = &DP[pp];
+
+  if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity)
+    return -1;
+
+  IntegerMapType ax, ay, bx, by, cx, cy;
+
+  ax = p0->ix - p->ix;
+  ay = p0->iy - p->iy;
+  bx = p1->ix - p->ix;
+  by = p1->iy - p->iy;
+  cx = p2->ix - p->ix;
+  cy = p2->iy - p->iy;
+
+  mpz_t axby, bxay, bxcy, cxby, cxay, axcy, tmp;
+
+  mpz_init(tmp);
+
+  mpz_init(axby);
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(axby, tmp, by);
+  mpz_init(bxay);
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(bxay, tmp, ay);
+  mpz_init(bxcy);
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(bxcy, tmp, cy);
+  mpz_init(cxby);
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(cxby, tmp, by);
+  mpz_init(cxay);
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(cxay, tmp, ay);
+  mpz_init(axcy);
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(axcy, tmp, cy);
+
+  mpz_t ca, ab, bc;
+
+  mpz_init(ca);
+  mpz_init(ab);
+  mpz_init(bc);
+
+  mpz_sub(ca, cxay, axcy);
+  mpz_sub(ab, axby, bxay);
+  mpz_sub(bc, bxcy, cxby);
+
+  mpz_t AA, BB, a2, b2, c2;
+
+  mpz_init(AA);
+  mpz_init(BB);
+  mpz_init(a2);
+  mpz_init(b2);
+  mpz_init(c2);
+
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(AA, tmp, ax);
+  MY_mpz_set_si(tmp, ay);
+  MY_mpz_mul_si(BB, tmp, ay);
+  mpz_add(a2, AA, BB);
+
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(AA, tmp, bx);
+  MY_mpz_set_si(tmp, by);
+  MY_mpz_mul_si(BB, tmp, by);
+  mpz_add(b2, AA, BB);
+
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(AA, tmp, cx);
+  MY_mpz_set_si(tmp, cy);
+  MY_mpz_mul_si(BB, tmp, cy);
+  mpz_add(c2, AA, BB);
+
+  /* now calculate the final result */
+
+  mpz_mul(AA, a2, bc);
+  mpz_mul(BB, b2, ca);
+  mpz_add(tmp, AA, BB);
+  mpz_mul(BB, c2, ab);
+  mpz_add(AA, BB, tmp);
+
+  int sign = mpz_sgn(AA);
+
+  mpz_clear(c2);
+  mpz_clear(b2);
+  mpz_clear(a2);
+  mpz_clear(BB);
+  mpz_clear(AA);
+  mpz_clear(bc);
+  mpz_clear(ab);
+  mpz_clear(ca);
+  mpz_clear(axcy);
+  mpz_clear(cxay);
+  mpz_clear(cxby);
+  mpz_clear(bxcy);
+  mpz_clear(bxay);
+  mpz_clear(axby);
+  mpz_clear(tmp);
+
+  return sign;
+}
+
+/*! \brief Returns the orientation of the triangle.
+ *
+ *  Defined as the determinant of the matrix of the position of the three edge
+ *  points a, b and c:
+ *  | ax, ay, 1 |
+ *  | bx, by, 1 |
+ *  | cx, cy, 1 |
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *
+ *  \return Determinant of orientation matrix.
+ */
+double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+
+  return (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x);
+}
+
+/*! \brief Check if triangle is positively or negatively oriented.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *
+ *  \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively
+ *          oriented.
+ */
+int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+
+  double x;
+
+  x = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx);
+
+  if(x < 0)
+    return -1;
+  if(x > 0)
+    return +1;
+  return 0;
+}
+
+/*! \brief Check if triangle is positively or negatively oriented.
+ *
+ *  Uses arbitrary precision operations, which is computationally expensive but
+ *  garantees the correct result.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] pp0 Index in DP of first point in triangle.
+ *  \param[in] pp1 Index in DP of second point in triangle.
+ *  \param[in] pp2 Index in DP of third point in triangle.
+ *
+ *  \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively
+ *          oriented.
+ */
+int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2)
+{
+  point *DP = T->DP;
+  point *p0 = &DP[pp0];
+  point *p1 = &DP[pp1];
+  point *p2 = &DP[pp2];
+
+#if USEDBITS > 31
+  IntegerMapType dx1, dy1, dx2, dy2;
+
+  dx1 = (p1->ix - p0->ix);
+  dy1 = (p1->iy - p0->iy);
+  dx2 = (p2->ix - p0->ix);
+  dy2 = (p2->iy - p0->iy);
+
+  mpz_t dx1dy2, dx2dy1, tmp;
+
+  mpz_init(tmp);
+  mpz_init(dx1dy2);
+  mpz_init(dx2dy1);
+
+  MY_mpz_set_si(tmp, dx1);
+  MY_mpz_mul_si(dx1dy2, tmp, dy2);
+
+  MY_mpz_set_si(tmp, dx2);
+  MY_mpz_mul_si(dx2dy1, tmp, dy1);
+
+  mpz_sub(tmp, dx1dy2, dx2dy1);
+
+  int sign = mpz_sgn(tmp);
+
+  mpz_clear(dx2dy1);
+  mpz_clear(dx1dy2);
+  mpz_clear(tmp);
+
+  return (sign);
+
+#else  /* #if USEDBITS > 31 */
+  signed long long dx1, dy1, dx2, dy2, x;
+
+  dx1 = (p1->ix - p0->ix);
+  dy1 = (p1->iy - p0->iy);
+  dx2 = (p2->ix - p0->ix);
+  dy2 = (p2->iy - p0->iy);
+
+  x = dx1 * dy2 - dy1 * dx2;
+
+  if(x < 0)
+    return -1;
+  if(x > 0)
+    return +1;
+  return 0;
+#endif /* #if USEDBITS > 31 #else */
+}
+
+const int edge_start[3] = {1, 2, 0};
+const int edge_end[3]   = {2, 0, 1};
+
+/*! \brief Calculate cell volumes and face areas of mesh.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index in DT array.
+ *  \param[in] nr Index in edges.
+ *
+ *  \return void
+ */
+void process_edge_faces_and_volumes(tessellation *T, int tt, int nr)
+{
+  int i, j, qq, p1, p2, k;
+  face *f;
+  double nx, ny;
+  double sx, sy;
+  double hx, hy;
+  double dvol, h;
+
+  if(T->Nvf + 1 >= T->MaxNvf)
+    {
+      T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR;
+      T->MaxNvf = T->Indi.AllocFacNvf;
+#ifdef VERBOSE
+      printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf);
+#endif /* #ifdef VERBOSE */
+      T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face));
+
+      if(T->Nvf + 1 >= T->MaxNvf)
+        terminate("Nvf larger than MaxNvf");
+    }
+
+  tetra *DT         = T->DT;
+  point *DP         = T->DP;
+  face *VF          = T->VF;
+  tetra_center *DTC = T->DTC;
+
+  tetra *t = &DT[tt];
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+
+  point *dpi = &DP[t->p[i]];
+  point *dpj = &DP[t->p[j]];
+
+  qq = t->t[nr];
+
+  Edge_visited[tt] |= (1 << nr);
+  Edge_visited[qq] |= (1 << (t->s[nr]));
+
+  p1 = t->p[i];
+  p2 = t->p[j];
+
+  f = &VF[T->Nvf++];
+
+  f->p1 = p1;
+  f->p2 = p2;
+
+  f->cx = 0.5 * (DTC[tt].cx + DTC[qq].cx);
+  f->cy = 0.5 * (DTC[tt].cy + DTC[qq].cy);
+  f->cz = 0;
+
+#ifdef TETRA_INDEX_IN_FACE
+  f->dt_index = tt;
+#endif /* #ifdef TETRA_INDEX_IN_FACE */
+
+#ifdef REFINEMENT_MERGE_CELLS
+  f->t  = tt;
+  f->nr = nr; /* delaunay tetra and edge number that generated this face */
+#endif        /* #ifdef REFINEMENT_MERGE_CELLS */
+
+  nx = DTC[tt].cx - DTC[qq].cx;
+  ny = DTC[tt].cy - DTC[qq].cy;
+
+  f->area = sqrt(nx * nx + ny * ny);
+
+  hx = 0.5 * (dpi->x - dpj->x);
+  hy = 0.5 * (dpi->y - dpj->y);
+
+  h    = sqrt(hx * hx + hy * hy);
+  dvol = 0.5 * f->area * h;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+  double angle = 0.5 * f->area / h;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+  if(dpi->task == ThisTask && dpi->index >= 0 && dpi->index < NumGas)
+    {
+      if(TimeBinSynchronized[P[dpi->index].TimeBinHydro])
+        {
+          SphP[dpi->index].Volume += dvol;
+          SphP[dpi->index].SurfaceArea += f->area;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+          if(SphP[dpi->index].MaxFaceAngle < angle)
+            SphP[dpi->index].MaxFaceAngle = angle;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+#ifdef OUTPUT_SURFACE_AREA
+          if(f->area)
+            SphP[dpi->index].CountFaces++;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+
+#if defined(REFINEMENT_SPLIT_CELLS)
+          if(SphP[dpi->index].MinimumEdgeDistance > h)
+            SphP[dpi->index].MinimumEdgeDistance = h;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+          /* let's now compute the center-of-mass of the pyramid at the bottom top */
+          sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpi->x;
+          sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpi->y;
+
+          SphP[dpi->index].Center[0] += dvol * sx;
+          SphP[dpi->index].Center[1] += dvol * sy;
+        }
+    }
+
+  if(dpj->task == ThisTask && dpj->index >= 0 && dpj->index < NumGas)
+    {
+      if(TimeBinSynchronized[P[dpj->index].TimeBinHydro])
+        {
+          SphP[dpj->index].Volume += dvol;
+          SphP[dpj->index].SurfaceArea += f->area;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+          if(SphP[dpj->index].MaxFaceAngle < angle)
+            SphP[dpj->index].MaxFaceAngle = angle;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+#ifdef OUTPUT_SURFACE_AREA
+          if(f->area)
+            SphP[dpj->index].CountFaces++;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+
+#if defined(REFINEMENT_SPLIT_CELLS)
+          if(SphP[dpj->index].MinimumEdgeDistance > h)
+            SphP[dpj->index].MinimumEdgeDistance = h;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+
+          /* let's now compute the center-of-mass of the pyramid on top */
+          sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpj->x;
+          sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpj->y;
+
+          SphP[dpj->index].Center[0] += dvol * sx;
+          SphP[dpj->index].Center[1] += dvol * sy;
+        }
+    }
+  int low_p, high_p;
+
+  if(DP[p1].ID < DP[p2].ID)
+    {
+      low_p  = p1;
+      high_p = p2;
+    }
+  else
+    {
+      low_p  = p2;
+      high_p = p1;
+    }
+
+  int this_task_responsible_flag = 0;
+
+  if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */
+    {
+      /* we need to check whether the one with the lower ID is a local particle */
+      if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas)
+        this_task_responsible_flag = 1;
+    }
+  else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */
+    {
+      /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */
+      if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas)
+        this_task_responsible_flag = 1;
+    }
+
+  if(this_task_responsible_flag)
+    {
+      for(k = 0; k < 2; k++)
+        {
+          int p, q;
+
+          if(k == 0)
+            {
+              q = p1;
+              p = DP[q].index;
+            }
+          else
+            {
+              q = p2;
+              p = DP[q].index;
+            }
+
+          if(DP[q].task == ThisTask)
+            {
+              if(DP[q].index >= NumGas) /* this is a local ghost point */
+                p -= NumGas;
+
+              SphP[p].ActiveArea += f->area;
+            }
+          else
+            {
+              /* here we have a foreign ghost point */
+              if(DP[q].originalindex < 0)
+                terminate("should not happen");
+
+              if(Narea >= MaxNarea)
+                {
+                  T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR;
+                  MaxNarea = T->Indi.AllocFacNflux;
+                  AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data));
+
+                  if(Narea >= MaxNarea)
+                    terminate("Narea >= MaxNarea");
+                }
+
+              AreaList[Narea].task  = DP[q].task;
+              AreaList[Narea].index = DP[q].originalindex;
+              AreaList[Narea].darea = f->area;
+              Narea++;
+            }
+        }
+    }
+}
+
+/*! \brief Copies triangle information from DTC array to trilist.
+ *
+ *  Performs an orientation check and swaps orientation if needed.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] tt Index of triangle in DT array.
+ *  \param[in] nr Index in DT[tt].t array (adjacent tetrahedrons).
+ *  \param[in] dtip Pointer to point to be inserted.
+ *  \param[out] trilist Array of triangles.
+ *  \param[in] ntri Index in trilist array.
+ *  \param[in] max_n_tri Maximum index in trilist array.
+ *
+ *  \return Next index in trilist array.
+ */
+int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri)
+{
+  tetra *DT         = T->DT;
+  tetra_center *DTC = T->DTC;
+  tetra *t          = &DT[tt];
+  int qq            = t->t[nr];
+
+  if(ntri >= max_n_tri)
+    terminate("ntri >= max_n_tri");
+
+  trilist[ntri].p[0][0] = DTC[tt].cx;
+  trilist[ntri].p[0][1] = DTC[tt].cy;
+
+  trilist[ntri].p[1][0] = DTC[qq].cx;
+  trilist[ntri].p[1][1] = DTC[qq].cy;
+
+  trilist[ntri].p[2][0] = dtip->x;
+  trilist[ntri].p[2][1] = dtip->y;
+
+  if(get_tri_volume(ntri, trilist) < 0)
+    {
+      /* swap two points to get proper orientation */
+      trilist[ntri].p[1][0] = DTC[tt].cx;
+      trilist[ntri].p[1][1] = DTC[tt].cy;
+
+      trilist[ntri].p[0][0] = DTC[qq].cx;
+      trilist[ntri].p[0][1] = DTC[qq].cy;
+    }
+
+  ntri++;
+
+  return ntri;
+}
+
+/*! \brief Add point and adjust triangles accordingly.
+ *
+ *  \param[in] q Index of point in DP array.
+ *  \param[in, out] trilist Array of triangles.
+ *  \param[in] ntri Number of elements in trilist before splitting.
+ *  \param[in] max_ntri Maximum number of triangles allowed.
+ *  \param[in] vol (Unused)
+ *
+ *  \return Updated number of triangles.
+ */
+int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol)
+{
+  double m[2], n[2], sc[3], *a;
+  double cut[2][2], ed[2];
+  int i, j, k, kk, l, nnew, flag[3], count, oldq;
+
+  for(i = 0, nnew = ntri; i < ntri; i++)
+    {
+      if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp)
+        {
+          char buf[1000];
+          sprintf(buf, "i=%d trilist[i].owner=%d\n", i, trilist[i].owner);
+          terminate(buf);
+        }
+
+      if(q < 0 || q >= Mesh.Ndp)
+        {
+          char buf[1000];
+          sprintf(buf, "i=%d q=%d\n", i, q);
+          terminate(buf);
+        }
+
+      /* midpoint */
+      m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x);
+      m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y);
+
+      n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x);
+      n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y);
+
+      if(q == trilist[i].owner)
+        terminate("q == trilist[i].owner");
+
+      for(k = 0, count = 0; k < 3; k++) /* determine the side of each point */
+        {
+          a = &trilist[i].p[k][0];
+
+          sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1];
+
+          if(sc[k] > 0)
+            {
+              flag[k] = 1;
+              count++;
+            }
+          else
+            flag[k] = 0;
+        }
+
+      switch(count)
+        {
+          case 0: /* the whole tetra is on the side of current owner - nothing to be done */
+            break;
+
+          case 3:                 /* the whole tetra is on the side of new point */
+            trilist[i].owner = q; /* change owner */
+            break;
+
+          case 1:
+          case 2:
+
+            if(nnew + 2 > max_ntri)
+              terminate("nnew + 2 > max_ntri");
+
+            trilist[nnew]     = trilist[i];
+            trilist[nnew + 1] = trilist[i];
+
+            /* find the point index that is on the other side */
+            for(k = 0; k < 3; k++)
+              {
+                if(flag[k] == 1 && count == 1)
+                  break;
+                if(flag[k] == 0 && count == 2)
+                  break;
+              }
+
+            for(j = 0; j < 2; j++)
+              {
+                kk = k + j + 1;
+                if(kk > 2)
+                  kk -= 3;
+
+                double *b = trilist[i].p[k];
+                double *a = trilist[i].p[kk];
+
+                for(l = 0; l < 2; l++)
+                  ed[l] = a[l] - b[l];
+
+                double prod = (ed[0] * n[0] + ed[1] * n[1]);
+                double t;
+                if(prod)
+                  t = -sc[k] / prod;
+                else
+                  t = 0.5;
+
+                if(t < 0)
+                  t = 0;
+                if(t > 1)
+                  t = 1;
+
+                for(l = 0; l < 2; l++)
+                  cut[j][l] = b[l] + t * ed[l];
+              }
+
+            /* modify the tetra that's assigned to the new point */
+            for(j = 0; j < 2; j++)
+              {
+                kk = k + j + 1;
+                if(kk > 2)
+                  kk -= 3;
+
+                for(l = 0; l < 2; l++)
+                  trilist[i].p[kk][l] = cut[j][l];
+              }
+
+            oldq = trilist[i].owner;
+
+            if(count == 1)
+              trilist[i].owner = q;
+
+            /* modify the two new tetras */
+            kk = k + 1;
+            if(kk > 2)
+              kk -= 3;
+
+            for(l = 0; l < 2; l++)
+              {
+                trilist[nnew].p[k][l] = cut[0][l];
+
+                trilist[nnew + 1].p[k][l]  = cut[1][l];
+                trilist[nnew + 1].p[kk][l] = cut[0][l];
+              }
+
+            if(count == 1)
+              {
+                trilist[nnew].owner     = oldq;
+                trilist[nnew + 1].owner = oldq;
+              }
+            else
+              {
+                trilist[nnew].owner     = q;
+                trilist[nnew + 1].owner = q;
+              }
+            nnew += 2;
+            break;
+        }
+    }
+
+  return nnew;
+}
+
+/*! \brief Determines area of triangle (i.e. 2d Volume).
+ *
+ *  \param i Index in trilist array.
+ *  \param trilist Array with triangles.
+ *
+ *  \return Area of triangle.
+ */
+double get_tri_volume(int i, triangle *trilist)
+{
+  double *p0 = &trilist[i].p[0][0];
+  double *p1 = &trilist[i].p[1][0];
+  double *p2 = &trilist[i].p[2][0];
+
+  double nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]);
+
+  return 0.5 * nz;
+}
+
+/*! \brief Process edge for volume calculation.
+ *
+ *  Calculates the contribution of edge to volumes of neighboring
+ *  Voronoi cells in vol array.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in, out] vol Volume of tetrahedra.
+ *  \param[in] tt Index of triangle in DT array.
+ *  \param[in] nr Index in edge array.
+ *
+ *  \return void
+ */
+void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr)
+{
+  tetra *DT         = T->DT;
+  point *DP         = T->DP;
+  tetra_center *DTC = T->DTC;
+
+  int i, j, qq, p1, p2;
+  double nx, ny;
+  double hx, hy;
+  double dvol, h;
+
+  tetra *t = &DT[tt];
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+
+  point *dpi = &DP[t->p[i]];
+  point *dpj = &DP[t->p[j]];
+
+  qq = t->t[nr];
+
+  Edge_visited[tt] |= (1 << nr);
+  Edge_visited[qq] |= (1 << (t->s[nr]));
+
+  p1 = t->p[i];
+  p2 = t->p[j];
+
+  nx = DTC[tt].cx - DTC[qq].cx;
+  ny = DTC[tt].cy - DTC[qq].cy;
+
+  double area = sqrt(nx * nx + ny * ny);
+
+  hx = 0.5 * (dpi->x - dpj->x);
+  hy = 0.5 * (dpi->y - dpj->y);
+
+  h    = sqrt(hx * hx + hy * hy);
+  dvol = 0.5 * area * h;
+
+  if(p1 >= 0 && p1 < DeRefMesh.Ndp)
+    vol[p1] += dvol;
+
+  if(p2 >= 0 && p2 < DeRefMesh.Ndp)
+    vol[p2] += dvol;
+}
+
+/*! \brief Computes the circum-circle of all triangles in mesh.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void compute_circumcircles(tessellation *T)
+{
+  tetra *DT = T->DT;
+  char *DTF = T->DTF;
+
+  int i;
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if(DTF[i] & 1)
+        continue;
+      DTF[i] |= 1;
+
+      if(DT[i].p[0] == DPinfinity)
+        continue;
+      if(DT[i].p[1] == DPinfinity)
+        continue;
+      if(DT[i].p[2] == DPinfinity)
+        continue;
+
+      update_circumcircle(T, i);
+    }
+}
+
+/*! \brief Computes the circum-circle of triangle tt.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index of triangle in DT array.
+ *
+ *  \return void
+ */
+void update_circumcircle(tessellation *T, int tt)
+{
+  tetra *DT         = T->DT;
+  tetra_center *DTC = T->DTC;
+  point *DP         = T->DP;
+
+  tetra *t = &DT[tt];
+  point *p0, *p1, *p2;
+  int pp0, pp1, pp2;
+
+  pp0 = t->p[0];
+  pp1 = t->p[1];
+  pp2 = t->p[2];
+
+  p0 = &DP[pp0];
+  p1 = &DP[pp1];
+  p2 = &DP[pp2];
+
+  if(t->p[0] == DPinfinity)
+    return;
+  if(t->p[1] == DPinfinity)
+    return;
+  if(t->p[2] == DPinfinity)
+    return;
+
+  double ax = p1->xx - p0->xx;
+  double ay = p1->yy - p0->yy;
+
+  double bx = p2->xx - p0->xx;
+  double by = p2->yy - p0->yy;
+
+  double aa = 0.5 * (ax * ax + ay * ay);
+  double bb = 0.5 * (bx * bx + by * by);
+
+  double mv_data[] = {ax, ay, aa, bx, by, bb};
+  double x[2];
+
+  int status = solve_linear_equations_2d(mv_data, x);
+
+  if(status < 0)
+    {
+      terminate("trouble in circum-circle calculation\n");
+    }
+  else
+    {
+      x[0] += p0->xx;
+      x[1] += p0->yy;
+
+      DTC[tt].cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX;
+      DTC[tt].cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY;
+      DTC[tt].cz = 0;
+    }
+}
+
+/*! \brief Computes the integer coordinates from coordinates for a point.
+ *
+ *  \pararm[in, out] p Pointer to point.
+ *
+ *  \return void
+ */
+void set_integers_for_pointer(point *p)
+{
+  p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0;
+  p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0;
+
+  if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0)
+    {
+      printf("(%g, %g) (%g, %g)\n", p->x, p->y, p->xx, p->yy);
+      terminate("invalid coordinate range");
+    }
+
+  p->ix = double_to_voronoiint(p->xx);
+  p->iy = double_to_voronoiint(p->yy);
+
+  p->xx = mask_voronoi_int(p->xx);
+  p->yy = mask_voronoi_int(p->yy);
+}
+
+/*! \brief Outputs Voronoi mesh to file.
+ *
+ *  Outputs the Voronoi mesh data from task write Task to lastTask in file
+ *  fname.
+ *
+ *  \param[in] T Pointer to tesselation.
+ *  \param[in] fname File name of file the data is written in.
+ *  \param[in] writeTask Task that gathers information and writes data.
+ *  \param[in] lastTask Last task that is included in this dump.
+ *
+ *  \return void
+ */
+void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+
+  FILE *fd;
+  char msg[1000];
+  MPI_Status status;
+  int i, j, k, MaxNel, Nel;
+  int ngas_tot, nel_tot, ndt_tot, nel_before, ndt_before, task;
+  int *EdgeList, *Nedges, *NedgesOffset, *whichtetra;
+  int *ngas_list, *nel_list, *ndt_list, *tmp;
+  float *xyz_edges;
+  tetra *q, *qstart;
+
+  tetra_center *DTC = T->DTC;
+  tetra *DT         = T->DT;
+  point *DP         = T->DP;
+
+  MaxNel = 10 * NumGas; /* max edge list */
+  Nel    = 0;           /* length of edge list */
+
+  EdgeList     = mymalloc("EdgeList", MaxNel * sizeof(int));
+  Nedges       = mymalloc("Nedges", NumGas * sizeof(int));
+  NedgesOffset = mymalloc("NedgesOffset", NumGas * sizeof(int));
+  whichtetra   = mymalloc("whichtetra", NumGas * sizeof(int));
+  xyz_edges    = mymalloc("xyz_edges", T->Ndt * DIMS * sizeof(float));
+  ngas_list    = mymalloc("ngas_list", sizeof(int) * NTask);
+  nel_list     = mymalloc("nel_list", sizeof(int) * NTask);
+  ndt_list     = mymalloc("ndt_list", sizeof(int) * NTask);
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      xyz_edges[i * DIMS + 0] = DTC[i].cx;
+      xyz_edges[i * DIMS + 1] = DTC[i].cy;
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      Nedges[i]     = 0;
+      whichtetra[i] = -1;
+    }
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      for(j = 0; j < DIMS + 1; j++)
+        if(DP[DT[i].p[j]].task == ThisTask && DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas)
+          whichtetra[DP[DT[i].p[j]].index] = i;
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      if(whichtetra[i] < 0)
+        continue;
+
+      qstart = q = &DT[whichtetra[i]];
+
+      do
+        {
+          Nedges[i]++;
+
+          if(Nel >= MaxNel)
+            terminate("Nel >= MaxNel");
+
+          EdgeList[Nel++] = q - DT;
+
+          for(j = 0; j < 3; j++)
+            if(DP[q->p[j]].task == ThisTask && DP[q->p[j]].index == i)
+              break;
+
+          k = j + 1;
+          if(k >= 3)
+            k -= 3;
+
+          q = &DT[q->t[k]];
+        }
+      while(q != qstart);
+    }
+
+  for(i = 1, NedgesOffset[0] = 0; i < NumGas; i++)
+    NedgesOffset[i] = NedgesOffset[i - 1] + Nedges[i - 1];
+
+  /* determine particle numbers and number of edges in file */
+
+  if(ThisTask == writeTask)
+    {
+      ngas_tot = NumGas;
+      nel_tot  = Nel;
+      ndt_tot  = T->Ndt;
+
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          MPI_Recv(&ngas_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
+          MPI_Recv(&nel_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status);
+          MPI_Recv(&ndt_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status);
+
+          MPI_Send(&nel_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD);
+          MPI_Send(&ndt_tot, 1, MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD);
+
+          ngas_tot += ngas_list[task];
+          nel_tot += nel_list[task];
+          ndt_tot += ndt_list[task];
+        }
+
+      if(!(fd = fopen(fname, "w")))
+        {
+          sprintf(msg, "can't open file `%s' for writing snapshot.\n", fname);
+          terminate(msg);
+        }
+
+      my_fwrite(&ngas_tot, sizeof(int), 1, fd);
+      my_fwrite(&nel_tot, sizeof(int), 1, fd);
+      my_fwrite(&ndt_tot, sizeof(int), 1, fd);
+
+      my_fwrite(Nedges, sizeof(int), NumGas, fd);
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]);
+          MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 2, MPI_COMM_WORLD, &status);
+          my_fwrite(tmp, sizeof(int), ngas_list[task], fd);
+          myfree(tmp);
+        }
+
+      my_fwrite(NedgesOffset, sizeof(int), NumGas, fd);
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]);
+          MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 3, MPI_COMM_WORLD, &status);
+          my_fwrite(tmp, sizeof(int), ngas_list[task], fd);
+          myfree(tmp);
+        }
+
+      my_fwrite(EdgeList, sizeof(int), Nel, fd);
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp = mymalloc("tmp", sizeof(int) * nel_list[task]);
+          MPI_Recv(tmp, nel_list[task], MPI_INT, task, TAG_N + 4, MPI_COMM_WORLD, &status);
+          my_fwrite(tmp, sizeof(int), nel_list[task], fd);
+          myfree(tmp);
+        }
+
+      my_fwrite(xyz_edges, sizeof(float), T->Ndt * DIMS, fd);
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp = mymalloc("tmp", sizeof(float) * DIMS * ndt_list[task]);
+          MPI_Recv(tmp, sizeof(float) * DIMS * ndt_list[task], MPI_BYTE, task, TAG_N + 5, MPI_COMM_WORLD, &status);
+          my_fwrite(tmp, sizeof(float), DIMS * ndt_list[task], fd);
+          myfree(tmp);
+        }
+
+      fclose(fd);
+    }
+  else
+    {
+      MPI_Send(&NumGas, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
+      MPI_Send(&Nel, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD);
+      MPI_Send(&T->Ndt, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD);
+
+      MPI_Recv(&nel_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status);
+      MPI_Recv(&ndt_before, 1, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD, &status);
+
+      for(i = 0; i < NumGas; i++)
+        NedgesOffset[i] += nel_before;
+      for(i = 0; i < Nel; i++)
+        EdgeList[i] += ndt_before;
+
+      MPI_Send(Nedges, NumGas, MPI_INT, writeTask, TAG_N + 2, MPI_COMM_WORLD);
+      MPI_Send(NedgesOffset, NumGas, MPI_INT, writeTask, TAG_N + 3, MPI_COMM_WORLD);
+      MPI_Send(EdgeList, Nel, MPI_INT, writeTask, TAG_N + 4, MPI_COMM_WORLD);
+      MPI_Send(xyz_edges, sizeof(float) * DIMS * T->Ndt, MPI_BYTE, writeTask, TAG_N + 5, MPI_COMM_WORLD);
+    }
+
+  myfree(ndt_list);
+  myfree(nel_list);
+  myfree(ngas_list);
+  myfree(xyz_edges);
+  myfree(whichtetra);
+  myfree(NedgesOffset);
+  myfree(Nedges);
+  myfree(EdgeList);
+
+  mpi_printf("wrote Voronoi mesh to file\n");
+
+  CPU_Step[CPU_MAKEIMAGES] += measure_time();
+}
+
+#endif /* #if defined(TWODIMS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c
new file mode 100644
index 0000000000..f8cc3ad712
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c
@@ -0,0 +1,5111 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_3d.c
+ * \date        05/2018
+ * \brief       Routines to build a 3d Voronoi mesh.
+ * \details     Note that some of these routines have the same name as the ones
+ *              in voronoi_1d.c and voronoi_2d.c and just replace them in case
+ *              neither the Config-option TWODIMS nor ONEDIMS is active.
+ *              contains functions:
+ *                void initialize_and_create_first_tetra(tessellation * T)
+ *                void get_line_segments(int sphp_index, int dp_index, double
+ *                  *segments, unsigned int *nof_elements, unsigned int
+ *                  max_elements)
+ *                void process_edge_faces_and_volumes(tessellation * T,
+ *                  int tt, int nr)
+ *                int derefine_refine_get_triangles(tessellation * T, int tt,
+ *                  int nr, point * dtip, triangle * trilist, int ntri,
+ *                  int max_n_tri)
+ *                double get_tri_volume(int i, triangle * trilist)
+ *                int derefine_add_point_and_split_tri(int q, triangle
+ *                  * trilist, int ntri, int max_ntri, double vol)
+ *                void derefine_refine_process_edge(tessellation * T,
+ *                  double *vol, int tt, int nr)
+ *                int insert_point(tessellation * T, int pp, int ttstart)
+ *                int convex_edge_test(tessellation * T, int tt, int tip,
+ *                  int *edgenr)
+ *                void make_a_face_split(tessellation * T, int tt0,
+ *                  int face_nr, int pp, int tt1, int tt2, int qq1, int qq2)
+ *                void make_an_edge_split(tessellation * T, int tt0,
+ *                  int edge_nr, int count, int pp, int *ttlist)
+ *                void make_a_4_to_4_flip(tessellation * T, int tt,
+ *                  int tip_index, int edge_nr)
+ *                void make_a_1_to_4_flip(tessellation * T, int pp, int tt0,
+ *                  int tt1, int tt2, int tt3)
+ *                void make_a_3_to_2_flip(tessellation * T, int tt0, int tt1,
+ *                  int tt2, int tip, int edge, int bottom)
+ *                void make_a_2_to_3_flip(tessellation * T, int tt0, int tip,
+ *                  int tt1, int bottom, int qq, int tt2)
+ *                int get_tetra(tessellation * T, point * p, int *moves,
+ *                  int ttstart, int *flag, int *edgeface_nr)
+ *                int InTetra(tessellation * T, int tt, point * p,
+ *                  int *edgeface_nr, int *nexttetra)
+ *                void compute_circumcircles(tessellation * T)
+ *                void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay,
+ *                  mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx,
+ *                  mpz_t cy, mpz_t cz)
+ *                void get_circumcircle_exact(tessellation * T, int tt,
+ *                  double *x, double *y, double *z)
+ *                void update_circumcircle(tessellation * T, int tt)
+ *                int test_tetra_orientation(point * p0, point * p1,
+ *                  point * p2, point * p3)
+ *                double calculate_tetra_volume(point * p0, point * p1,
+ *                  point * p2, point * p3)
+ *                void add_row(double *m, int r1, int r2, double fac)
+ *                int solve_linear_equations(double *m, double *res)
+ *                void set_integers_for_pointer(point * p)
+ *                int InSphere_Exact(point * p0, point * p1, point * p2,
+ *                  point * p3, point * p)
+ *                int InSphere_Quick(point * p0, point * p1, point * p2,
+ *                  point * p3, point * p)
+ *                int InSphere_Errorbound(point * p0, point * p1, point * p2,
+ *                  point * p3, point * p)
+ *                int Orient3d_Exact(point * p0, point * p1, point * p2,
+ *                  point * p3)
+ *                int Orient3d_Quick(point * p0, point * p1, point * p2,
+ *                  point * p3)
+ *                int Orient3d(point * p0, point * p1, point * p2, point * p3)
+ *                int compare_face_sort(const void *a, const void *b)
+ *                void get_voronoi_face_vertex_indices(tessellation * T)
+ *                void get_voronoi_face_vertex_coordinates(tessellation * T)
+ *                void sort_faces_by_ID(void)
+ *                void write_voronoi_face_vertex_indices(tessellation * T,
+ *                  char *fname1, char *fname2, int writeTask, int lastTask)
+ *                void write_voronoi_face_vertex_coordinates(tessellation * T,
+ *                  char *fname, int writeTask, int lastTask)
+ *                void write_voronoi_mesh(tessellation * T, char *fname,
+ *                  int writeTask, int lastTask)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gmp.h>
+#include <gsl/gsl_linalg.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if !defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 3D case */
+
+#define INSIDE_EPS 1.0e-6
+#define GAUSS_EPS 1.0e-8
+
+const int access_triangles[4][3] = {{1, 3, 2}, {0, 2, 3}, {0, 3, 1}, {0, 1, 2}};
+
+const int edge_start[6]     = {0, 0, 0, 1, 1, 2};
+const int edge_end[6]       = {1, 2, 3, 2, 3, 3};
+const int edge_opposite[6]  = {3, 1, 2, 3, 0, 1};
+const int edge_nexttetra[6] = {2, 3, 1, 0, 2, 0};
+
+/*! \brief Initializes 3d tessellation and create all-enclosing tetrahedron.
+ *
+ *  \param[out] T Pointer to tessellation structure which is set and its arrays
+ *  are allocated in this routine.
+ *
+ *  \return void
+ */
+void initialize_and_create_first_tetra(tessellation *T)
+{
+  point *p;
+  int i, n;
+
+  T->MaxNdp = T->Indi.AllocFacNdp;
+  T->MaxNdt = T->Indi.AllocFacNdt;
+  T->MaxNvf = T->Indi.AllocFacNvf;
+
+  T->Ndp = 0;
+  T->Ndt = 0;
+  T->Nvf = 0;
+
+  T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face));
+
+  T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point));
+  T->DP += 5;
+
+  T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra));
+
+  /* construct all encompassing huge tetrahedron */
+
+  double box, tetra_incircle, tetra_sidelength, tetra_height, tetra_face_height;
+
+  box = boxSize_X;
+  if(box < boxSize_Y)
+    box = boxSize_Y;
+  if(box < boxSize_Z)
+    box = boxSize_Z;
+
+  tetra_incircle    = 1.5 * box;
+  tetra_sidelength  = tetra_incircle * sqrt(24);
+  tetra_height      = sqrt(2.0 / 3) * tetra_sidelength;
+  tetra_face_height = sqrt(3.0) / 2.0 * tetra_sidelength;
+
+  point *DP = T->DP;
+  tetra *DT = T->DT;
+
+  /* first, let's make the points */
+  DP[-4].x = 0.5 * tetra_sidelength;
+  DP[-4].y = -1.0 / 3 * tetra_face_height;
+  DP[-4].z = -0.25 * tetra_height;
+
+  DP[-3].x = 0;
+  DP[-3].y = 2.0 / 3 * tetra_face_height;
+  DP[-3].z = -0.25 * tetra_height;
+
+  DP[-2].x = -0.5 * tetra_sidelength;
+  DP[-2].y = -1.0 / 3 * tetra_face_height;
+  DP[-2].z = -0.25 * tetra_height;
+
+  DP[-1].x = 0;
+  DP[-1].y = 0;
+  DP[-1].z = 0.75 * tetra_height;
+
+  for(i = -4; i <= -1; i++)
+    {
+      DP[i].x += 0.5 * box;
+      DP[i].y += 0.5 * box;
+      DP[i].z += 0.5 * box;
+    }
+
+  for(i = -4, p = &DP[-4]; i < 0; i++, p++)
+    {
+      p->index   = -1;
+      p->task    = ThisTask;
+      p->timebin = 0;
+    }
+
+  /* we also define a neutral element at infinity */
+  DPinfinity = -5;
+
+  DP[DPinfinity].x       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].y       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].z       = MAX_DOUBLE_NUMBER;
+  DP[DPinfinity].index   = -1;
+  DP[DPinfinity].task    = ThisTask;
+  DP[DPinfinity].timebin = 0;
+
+  /* now let's make the big tetrahedron */
+  DT[0].p[0] = -4;
+  DT[0].p[1] = -3;
+  DT[0].p[2] = -2;
+  DT[0].p[3] = -1;
+
+  /* On the outer faces, we attach tetrahedra with the neutral element as tip.
+   * This way we will be able to navigate nicely within the tesselation,
+   * and all tetrahedra have defined neighbouring tetrahedra.
+   */
+
+  for(i = 0; i < 4; i++)
+    {
+      n = i + 1; /* tetra index */
+
+      DT[0].t[i] = n;
+      DT[0].s[i] = 3;
+
+      DT[n].t[3] = 0;
+      DT[n].s[3] = i;
+      DT[n].p[3] = DPinfinity;
+    }
+
+  DT[1].p[0] = DT[0].p[1];
+  DT[1].p[1] = DT[0].p[2];
+  DT[1].p[2] = DT[0].p[3];
+
+  DT[2].p[0] = DT[0].p[0];
+  DT[2].p[1] = DT[0].p[3];
+  DT[2].p[2] = DT[0].p[2];
+
+  DT[3].p[0] = DT[0].p[0];
+  DT[3].p[1] = DT[0].p[1];
+  DT[3].p[2] = DT[0].p[3];
+
+  DT[4].p[0] = DT[0].p[0];
+  DT[4].p[1] = DT[0].p[2];
+  DT[4].p[2] = DT[0].p[1];
+
+  DT[1].t[0] = 2;
+  DT[2].t[0] = 1;
+  DT[1].s[0] = 0;
+  DT[2].s[0] = 0;
+
+  DT[1].t[1] = 3;
+  DT[3].t[0] = 1;
+  DT[1].s[1] = 0;
+  DT[3].s[0] = 1;
+
+  DT[1].t[2] = 4;
+  DT[4].t[0] = 1;
+  DT[1].s[2] = 0;
+  DT[4].s[0] = 2;
+
+  DT[2].t[2] = 3;
+  DT[3].t[1] = 2;
+  DT[2].s[2] = 1;
+  DT[3].s[1] = 2;
+
+  DT[2].t[1] = 4;
+  DT[4].t[2] = 2;
+  DT[2].s[1] = 2;
+  DT[4].s[2] = 1;
+
+  DT[3].t[2] = 4;
+  DT[4].t[1] = 3;
+  DT[3].s[2] = 1;
+  DT[4].s[1] = 2;
+
+  T->Ndt = 5; /* we'll start out with 5 tetras */
+
+  CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength;
+  CentralOffsetY = 0.5 * box - (1.0000001 / 3) * tetra_face_height;
+  CentralOffsetZ = 0.5 * box - 0.25000001 * tetra_height;
+
+  ConversionFac = 1.0 / (1.001 * tetra_sidelength);
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  for(i = -4; i < 0; i++)
+    set_integers_for_point(T, i);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+}
+
+#ifdef TETRA_INDEX_IN_FACE
+/*! \brief Gets the line segments of a Voronoi cell.
+ *
+ *  Warning: The correspondance sphp_index == dp_index holds only for a global
+ *  timestep!
+ *
+ *  \param[in] sphp_index The index of the Voronoi cell.
+ *  \param[in] dp_index The index of the corresponding Delaunay point.
+ *  \param[out] segments The array in which the line segments are stored.
+ *  \param[out] nof_elements The number of elements written in segments during
+ *              this function call.
+ *  \param[in] max_elements The maximum size of the segments array.
+ *
+ *  \return void
+ */
+void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements)
+{
+  // index for segments array
+  unsigned int a = 0;
+
+  int edge      = SphP[sphp_index].first_connection;
+  int last_edge = SphP[sphp_index].last_connection;
+
+  // loop over all interfaces of the cell
+  while(1)
+    {
+      int dq_index = DC[edge].dp_index;
+
+      // one of the tetrahedras around the Delaunay connection
+      int tt   = DC[edge].dt_index;
+      tetra *t = &Mesh.DT[tt];
+
+      // find the local index of the edge
+      int nr = 6;
+      int e, dp_start_index, dp_end_index;
+
+      for(e = 0; e < 6; e++)
+        {
+          dp_start_index = t->p[edge_start[e]];
+          dp_end_index   = t->p[edge_end[e]];
+
+          if((dp_start_index == dp_index && dp_end_index == dq_index) || (dp_start_index == dq_index && dp_end_index == dp_index))
+            {
+              nr = e;
+              break;
+            }
+        }
+
+      // ensure that the local edge index has been found
+      assert(nr != 6);
+
+      // already set: t,tt,nr
+      int i, j, k, l, m, ii, jj, kk, ll, nn;
+      tetra *prev, *next;
+      tetra_center *prevc, *nextc;
+
+      i = edge_start[nr];
+      j = edge_end[nr];
+      k = edge_opposite[nr];
+      l = edge_nexttetra[nr];
+
+      prev  = t;
+      prevc = &Mesh.DTC[tt];
+
+      do
+        {
+          nn    = prev->t[l];
+          next  = &Mesh.DT[nn];
+          nextc = &Mesh.DTC[nn];
+
+          if(a > max_elements - 7)
+            {
+              terminate("termination in voronoi_3d.c get_line_segments: not enough memory!");
+            }
+
+          segments[a++] = prevc->cx;
+          segments[a++] = prevc->cy;
+          segments[a++] = prevc->cz;
+          segments[a++] = nextc->cx;
+          segments[a++] = nextc->cy;
+          segments[a++] = nextc->cz;
+
+          for(m = 0, ll = ii = jj = -1; m < 4; m++)
+            {
+              if(next->p[m] == prev->p[k])
+                ll = m;
+              if(next->p[m] == prev->p[i])
+                ii = m;
+              if(next->p[m] == prev->p[j])
+                jj = m;
+            }
+
+          if(ll < 0 || ii < 0 || jj < 0)
+            terminate("inconsistency");
+
+          kk = 6 - (ll + ii + jj);
+
+          prev  = next;
+          prevc = nextc;
+
+          i = ii;
+          l = ll;
+          j = jj;
+          k = kk;
+        }
+      while(next != t);
+
+      if(edge == last_edge)
+        {
+          break;
+        }
+
+      edge = DC[edge].next;
+
+    }  // end of while loop
+
+  *nof_elements = a;
+
+  return;
+}
+#endif /* #ifdef TETRA_INDEX_IN_FACE */
+
+/*! \brief Calculate cell volumes and face areas of mesh.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index in DT array.
+ *  \param[in] nr Index in edges.
+ *
+ *  \return void
+ */
+void process_edge_faces_and_volumes(tessellation *T, int tt, int nr)
+{
+  int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2;
+  face *f;
+  tetra *prev, *next;
+  tetra_center *prevc, *nextc;
+  double ax, ay, az;
+  double bx, by, bz;
+  double cx, cy, cz;
+  double nx, ny, nz;
+  double sx, sy, sz;
+  double hhx, hhy, hhz;
+  double darea, dvol, h;
+
+  if(T->Nvf + 1 >= T->MaxNvf)
+    {
+      T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR;
+      T->MaxNvf = T->Indi.AllocFacNvf;
+#ifdef VERBOSE
+      printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf);
+#endif /* #ifdef VERBOSE */
+      T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face));
+
+      if(T->Nvf + 1 >= T->MaxNvf)
+        terminate("Nvf larger than MaxNvf");
+    }
+
+  tetra *DT         = T->DT;
+  point *DP         = T->DP;
+  face *VF          = T->VF;
+  tetra_center *DTC = T->DTC;
+
+  tetra *t = &DT[tt];
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+  k = edge_opposite[nr];
+  l = edge_nexttetra[nr];
+
+  Edge_visited[tt] |= (1 << nr);
+
+  p1 = t->p[i];
+  p2 = t->p[j];
+
+  f = &VF[T->Nvf++];
+
+  f->area = 0;
+  f->p1   = p1;
+  f->p2   = p2;
+
+  f->cx = 0;
+  f->cy = 0;
+  f->cz = 0;
+
+#ifdef TETRA_INDEX_IN_FACE
+  f->dt_index = tt;
+#endif /* #ifdef TETRA_INDEX_IN_FACE */
+
+  hhx = 0.5 * (DP[p1].x - DP[p2].x);
+  hhy = 0.5 * (DP[p1].y - DP[p2].y);
+  hhz = 0.5 * (DP[p1].z - DP[p2].z);
+
+  h = sqrt(hhx * hhx + hhy * hhy + hhz * hhz);
+
+  cx = DTC[tt].cx;
+  cy = DTC[tt].cy;
+  cz = DTC[tt].cz;
+
+  count = 0;
+
+  prev  = t;
+  prevc = &DTC[tt];
+  do
+    {
+      nn    = prev->t[l];
+      next  = &DT[nn];
+      nextc = &DTC[nn];
+
+      if(prev != t && next != t)
+        {
+          ax = prevc->cx - cx;
+          ay = prevc->cy - cy;
+          az = prevc->cz - cz;
+
+          bx = nextc->cx - cx;
+          by = nextc->cy - cy;
+          bz = nextc->cz - cz;
+
+          nx = ay * bz - az * by;
+          ny = az * bx - ax * bz;
+          nz = ax * by - ay * bx;
+
+          sx = nextc->cx + prevc->cx + cx;
+          sy = nextc->cy + prevc->cy + cy;
+          sz = nextc->cz + prevc->cz + cz;
+
+          darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz);
+          f->area += darea;
+
+          darea *= (1.0 / 3);
+
+          f->cx += darea * sx;
+          f->cy += darea * sy;
+          f->cz += darea * sz;
+        }
+
+      for(m = 0, ll = ii = jj = -1; m < 4; m++)
+        {
+          if(next->p[m] == prev->p[k])
+            ll = m;
+          if(next->p[m] == prev->p[i])
+            ii = m;
+          if(next->p[m] == prev->p[j])
+            jj = m;
+        }
+
+      if(ll < 0 || ii < 0 || jj < 0)
+        terminate("inconsistency");
+
+      kk = 6 - (ll + ii + jj);
+
+      /* need to determine the edge number to be able to flag it */
+
+      for(nr_next = 0; nr_next < 6; nr_next++)
+        if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii))
+          {
+            if((Edge_visited[nn] & (1 << nr_next)) && next != t)
+              terminate("inconsistency");
+
+            Edge_visited[nn] |= (1 << nr_next);
+            break;
+          }
+
+      prev  = next;
+      prevc = nextc;
+      i     = ii;
+      l     = ll;
+      j     = jj;
+      k     = kk;
+
+      count++;
+
+      if(count > 1000)
+        terminate("count is too large");
+    }
+  while(next != t);
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+
+  if(f->area)
+    {
+      f->cx /= f->area;
+      f->cy /= f->area;
+      f->cz /= f->area;
+    }
+
+#ifdef REFINEMENT_MERGE_CELLS
+  f->t  = tt;
+  f->nr = nr; /* delaunay tetra and edge number that generated this face */
+#endif        /* #ifdef REFINEMENT_MERGE_CELLS */
+
+  dvol = (1.0 / 3) * f->area * h;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+  double angle = sqrt(f->area / M_PI) / h;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+  if(DP[p1].task == ThisTask && DP[p1].index >= 0 && DP[p1].index < NumGas)
+    {
+      if(TimeBinSynchronized[P[DP[p1].index].TimeBinHydro])
+        {
+          SphP[DP[p1].index].Volume += dvol;
+          SphP[DP[p1].index].SurfaceArea += f->area;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+          if(SphP[DP[p1].index].MaxFaceAngle < angle)
+            SphP[DP[p1].index].MaxFaceAngle = angle;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+#ifdef OUTPUT_SURFACE_AREA
+          if(f->area)
+            SphP[DP[p1].index].CountFaces++;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+
+#if defined(REFINEMENT_SPLIT_CELLS)
+          if(SphP[DP[p1].index].MinimumEdgeDistance > h)
+            SphP[DP[p1].index].MinimumEdgeDistance = h;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+          /* let's now compute the center-of-mass of the pyramid at the bottom top */
+          sx = 0.75 * f->cx + 0.25 * DP[p1].x;
+          sy = 0.75 * f->cy + 0.25 * DP[p1].y;
+          sz = 0.75 * f->cz + 0.25 * DP[p1].z;
+
+          SphP[DP[p1].index].Center[0] += dvol * sx;
+          SphP[DP[p1].index].Center[1] += dvol * sy;
+          SphP[DP[p1].index].Center[2] += dvol * sz;
+        }
+    }
+
+  if(DP[p2].task == ThisTask && DP[p2].index >= 0 && DP[p2].index < NumGas)
+    {
+      if(TimeBinSynchronized[P[DP[p2].index].TimeBinHydro])
+        {
+          SphP[DP[p2].index].Volume += dvol;
+          SphP[DP[p2].index].SurfaceArea += f->area;
+
+#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE)
+          if(SphP[DP[p2].index].MaxFaceAngle < angle)
+            SphP[DP[p2].index].MaxFaceAngle = angle;
+#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */
+
+#ifdef OUTPUT_SURFACE_AREA
+          if(f->area)
+            SphP[DP[p2].index].CountFaces++;
+#endif /* #ifdef OUTPUT_SURFACE_AREA */
+#if defined(REFINEMENT_SPLIT_CELLS)
+          if(SphP[DP[p2].index].MinimumEdgeDistance > h)
+            SphP[DP[p2].index].MinimumEdgeDistance = h;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */
+          /* let's now compute the center-of-mass of the pyramid on top */
+          sx = 0.75 * f->cx + 0.25 * DP[p2].x;
+          sy = 0.75 * f->cy + 0.25 * DP[p2].y;
+          sz = 0.75 * f->cz + 0.25 * DP[p2].z;
+
+          SphP[DP[p2].index].Center[0] += dvol * sx;
+          SphP[DP[p2].index].Center[1] += dvol * sy;
+          SphP[DP[p2].index].Center[2] += dvol * sz;
+        }
+    }
+
+  int low_p, high_p;
+
+  if(DP[p1].ID < DP[p2].ID)
+    {
+      low_p  = p1;
+      high_p = p2;
+    }
+  else
+    {
+      low_p  = p2;
+      high_p = p1;
+    }
+
+  int this_task_responsible_flag = 0;
+
+  if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */
+    {
+      /* we need to check whether the one with the lower ID is a local particle */
+      if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas)
+        this_task_responsible_flag = 1;
+    }
+  else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */
+    {
+      /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */
+      if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas)
+        this_task_responsible_flag = 1;
+    }
+
+  if(this_task_responsible_flag)
+    {
+      for(k = 0; k < 2; k++)
+        {
+          int p, q;
+
+          if(k == 0)
+            {
+              q = p1;
+              p = DP[q].index;
+            }
+          else
+            {
+              q = p2;
+              p = DP[q].index;
+            }
+
+          if(DP[q].task == ThisTask)
+            {
+              if(DP[q].index >= NumGas) /* this is a local ghost point */
+                p -= NumGas;
+
+              SphP[p].ActiveArea += f->area;
+            }
+          else
+            {
+              /* here we have a foreign ghost point */
+              if(DP[q].originalindex < 0)
+                terminate("should not happen");
+
+              if(Narea >= MaxNarea)
+                {
+                  T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR;
+                  MaxNarea = T->Indi.AllocFacNflux;
+                  AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data));
+
+                  if(Narea >= MaxNarea)
+                    terminate("Narea >= MaxNarea");
+                }
+
+              AreaList[Narea].task  = DP[q].task;
+              AreaList[Narea].index = DP[q].originalindex;
+              AreaList[Narea].darea = f->area;
+              Narea++;
+            }
+        }
+    }
+}
+
+/*! \brief Gathers tetrahedron data as elements in array called 'trilist'.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] tt Index of tetrahedron in T->DT array.
+ *  \param[in] nr Index in (global) edge arrays.
+ *  \param[in] dtip Point representing tip of tetrahedron.
+ *  \param[out] trilist List of triangles.
+ *  \param[in] ntri Index in trilist which should be filled.
+ *  \param[in] max_n_tri Maximum index in trilist.
+ *
+ *  \return New length of trilist data.
+ */
+int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri)
+{
+  tetra *DT         = T->DT;
+  tetra_center *DTC = T->DTC;
+
+  int i, j, k, l, m, ii, jj, kk, ll, nn, count;
+  tetra *prev, *next;
+  tetra_center *prevc, *nextc;
+  double cx, cy, cz;
+
+  tetra *t = &DT[tt];
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+  k = edge_opposite[nr];
+  l = edge_nexttetra[nr];
+
+  cx = DTC[tt].cx;
+  cy = DTC[tt].cy;
+  cz = DTC[tt].cz;
+
+  count = 0;
+
+  prev  = t;
+  prevc = &DTC[tt];
+  do
+    {
+      nn    = prev->t[l];
+      next  = &DT[nn];
+      nextc = &DTC[nn];
+
+      if(prev != t && next != t)
+        {
+          if(ntri >= max_n_tri)
+            terminate("ntri >= max_n_tri");
+
+          trilist[ntri].p[0][0] = cx;
+          trilist[ntri].p[0][1] = cy;
+          trilist[ntri].p[0][2] = cz;
+
+          trilist[ntri].p[1][0] = prevc->cx;
+          trilist[ntri].p[1][1] = prevc->cy;
+          trilist[ntri].p[1][2] = prevc->cz;
+
+          trilist[ntri].p[2][0] = nextc->cx;
+          trilist[ntri].p[2][1] = nextc->cy;
+          trilist[ntri].p[2][2] = nextc->cz;
+
+          trilist[ntri].p[3][0] = dtip->x;
+          trilist[ntri].p[3][1] = dtip->y;
+          trilist[ntri].p[3][2] = dtip->z;
+
+          if(get_tri_volume(ntri, trilist) < 0)
+            {
+              /* swap two points to get proper orientation */
+              trilist[ntri].p[3][0] = nextc->cx;
+              trilist[ntri].p[3][1] = nextc->cy;
+              trilist[ntri].p[3][2] = nextc->cz;
+
+              trilist[ntri].p[2][0] = dtip->x;
+              trilist[ntri].p[2][1] = dtip->y;
+              trilist[ntri].p[2][2] = dtip->z;
+            }
+
+          ntri++;
+        }
+
+      for(m = 0, ll = ii = jj = -1; m < 4; m++)
+        {
+          if(next->p[m] == prev->p[k])
+            ll = m;
+          if(next->p[m] == prev->p[i])
+            ii = m;
+          if(next->p[m] == prev->p[j])
+            jj = m;
+        }
+
+      if(ll < 0 || ii < 0 || jj < 0)
+        terminate("inconsistency");
+
+      kk = 6 - (ll + ii + jj);
+
+      prev  = next;
+      prevc = nextc;
+      i     = ii;
+      l     = ll;
+      j     = jj;
+      k     = kk;
+
+      count++;
+
+      if(count > 1000)
+        terminate("count is too large");
+    }
+  while(next != t);
+
+  return ntri;
+}
+
+/*! \brief Returns volume of a tetrahedron.
+ *
+ *  \param[in] i Index of tetrahedron in trilist.
+ *  \param[in] trilist Array with tetrahedra.
+ *
+ *  \return Volume of tetrahedron.
+ */
+double get_tri_volume(int i, triangle *trilist)
+{
+  double nx, ny, nz;
+
+  double *p0 = &trilist[i].p[0][0];
+  double *p1 = &trilist[i].p[1][0];
+  double *p2 = &trilist[i].p[2][0];
+  double *p3 = &trilist[i].p[3][0];
+
+  nx = (p1[1] - p0[1]) * (p2[2] - p0[2]) - (p1[2] - p0[2]) * (p2[1] - p0[1]);
+  ny = (p1[2] - p0[2]) * (p2[0] - p0[0]) - (p1[0] - p0[0]) * (p2[2] - p0[2]);
+  nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]);
+
+  return (nx * (p3[0] - p0[0]) + ny * (p3[1] - p0[1]) + nz * (p3[2] - p0[2])) / 6.0;
+}
+
+/*! \brief Add point and adjust tetrahedra accordingly.
+ *
+ *  \param[in] q Index of point in DP array.
+ *  \param[in, out] trilist Array of tetrahedra.
+ *  \param[in] ntri Number of elements in trilist before splitting.
+ *  \param[in] max_ntri Maximum number of tetrahedron allowed.
+ *  \param[in] vol Volume of tetrahedron to be split.
+ *
+ *  \return Updated number of triangles.
+ */
+int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol)
+{
+#define MIN_VOL_FAC 1.0e-6
+  double m[3], n[3], sc[4], *a;
+  double cut[3][3], p[8][3], ed[3];
+  int i, j, k, l, nnew, flag[4], count, oldq;
+  double vvi, vlargest, vv[5];
+  int ilargest, nadd;
+
+  for(i = 0, nnew = ntri; i < ntri; i++)
+    {
+      if(q < 0 || q >= Mesh.Ndp)
+        {
+          char buf[1000];
+          sprintf(buf, "q=%d\n", q);
+          terminate(buf);
+        }
+
+      if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp)
+        {
+          char buf[1000];
+          sprintf(buf, "trilist[i].owner=%d\n", trilist[i].owner);
+          terminate(buf);
+        }
+
+      /* midpoint */
+      m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x);
+      m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y);
+      m[2] = 0.5 * (Mesh.DP[q].z + Mesh.DP[trilist[i].owner].z);
+
+      n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x);
+      n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y);
+      n[2] = (Mesh.DP[q].z - Mesh.DP[trilist[i].owner].z);
+
+      if(q == trilist[i].owner)
+        terminate("q == trilist[i].owner");
+
+      for(k = 0, count = 0; k < 4; k++) /* determine the side of each point */
+        {
+          a = &trilist[i].p[k][0];
+
+          sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1] + (a[2] - m[2]) * n[2];
+
+          if(sc[k] > 0)
+            {
+              flag[k] = 1;
+              count++;
+            }
+          else
+            flag[k] = 0;
+        }
+
+      switch(count)
+        {
+          case 0: /* the whole tetra is on the side of current owner - nothing to be done */
+            break;
+
+          case 4:                 /* the whole tetra is on the side of new point */
+            trilist[i].owner = q; /* change owner */
+            break;
+
+          case 1:
+          case 3:
+
+            /* we have one point on either side */
+            /* for count=1 the tip of the tetra is cut off and assigned to the new point. */
+            /* the rest is subdivided into three tetras */
+
+            if(nnew + 3 > max_ntri)
+              {
+                terminate("nnew + 3 > max_ntri");
+              }
+
+            trilist[nnew]     = trilist[i];
+            trilist[nnew + 1] = trilist[i];
+            trilist[nnew + 2] = trilist[i];
+
+            /* find the point index that is on the other side */
+            for(k = 0; k < 4; k++)
+              {
+                if(flag[k] == 1 && count == 1)
+                  break;
+                if(flag[k] == 0 && count == 3)
+                  break;
+              }
+
+            /* determine the cut-points on the corresponding edges */
+
+            for(j = 0; j < 3; j++)
+              {
+                double *b = trilist[i].p[k];
+                double *a = trilist[i].p[access_triangles[k][j]];
+
+                for(l = 0; l < 3; l++)
+                  ed[l] = a[l] - b[l];
+
+                double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]);
+                double t;
+
+                if(prod)
+                  t = -sc[k] / prod;
+                else
+                  t = 0.5;
+
+                if(t < 0)
+                  t = 0;
+                if(t > 1)
+                  t = 1;
+
+                for(l = 0; l < 3; l++)
+                  cut[j][l] = b[l] + t * ed[l];
+              }
+
+            /* modify the tetra that's assigned to the new point */
+            for(j = 0; j < 3; j++)
+              {
+                double *a = trilist[i].p[access_triangles[k][j]];
+                for(l = 0; l < 3; l++)
+                  a[l] = cut[j][l];
+              }
+
+            oldq = trilist[i].owner;
+
+            if(count == 1)
+              trilist[i].owner = q;
+
+            /* modify the three new tetras */
+
+            for(l = 0; l < 3; l++)
+              {
+                trilist[nnew].p[k][l] = cut[0][l];
+
+                trilist[nnew + 1].p[access_triangles[k][0]][l] = cut[0][l];
+                trilist[nnew + 1].p[k][l]                      = cut[2][l];
+
+                trilist[nnew + 2].p[access_triangles[k][0]][l] = cut[0][l];
+                trilist[nnew + 2].p[access_triangles[k][2]][l] = cut[2][l];
+                trilist[nnew + 2].p[k][l]                      = cut[1][l];
+              }
+
+            if(count == 1)
+              {
+                trilist[nnew].owner     = oldq;
+                trilist[nnew + 1].owner = oldq;
+                trilist[nnew + 2].owner = oldq;
+              }
+            else
+              {
+                trilist[nnew].owner     = q;
+                trilist[nnew + 1].owner = q;
+                trilist[nnew + 2].owner = q;
+              }
+
+            nadd = 3;
+
+            vvi = fabs(get_tri_volume(i, trilist));
+            for(l = 0; l < nadd; l++)
+              vv[l] = fabs(get_tri_volume(nnew + l, trilist));
+
+            /* determine largest */
+            ilargest = i;
+            vlargest = vvi;
+            for(l = 0; l < nadd; l++)
+              if(vv[l] > vlargest)
+                {
+                  vlargest = vv[l];
+                  ilargest = nnew + l;
+                }
+            if(i != ilargest)
+              {
+                /* swap the largest to location i */
+                triangle trisave  = trilist[i];
+                trilist[i]        = trilist[ilargest];
+                trilist[ilargest] = trisave;
+
+                vv[ilargest - nnew] = vvi;
+              }
+
+            for(l = 0; l < nadd; l++)
+              {
+                if(vv[l] < MIN_VOL_FAC * vol)
+                  {
+                    vv[l]             = vv[nadd - 1];
+                    trilist[nnew + l] = trilist[nnew + nadd - 1];
+                    l--;
+                    nadd--;
+                  }
+              }
+
+            nnew += nadd;
+            break;
+
+          case 2:
+            /* we have two points on either side */
+
+            if(nnew + 5 > max_ntri)
+              terminate("nnew + 5 > max_ntri");
+
+            int kfirst, ksecond, jfirst, jsecond;
+
+            if(flag[2] == 1 && flag[3] == 1)
+              {
+                kfirst  = 3;
+                ksecond = 2;
+                jfirst  = 0;
+                jsecond = 1;
+              }
+            else if(flag[1] == 1 && flag[3] == 1)
+              {
+                kfirst  = 3;
+                ksecond = 1;
+                jfirst  = 2;
+                jsecond = 0;
+              }
+            else if(flag[0] == 1 && flag[3] == 1)
+              {
+                kfirst  = 3;
+                ksecond = 0;
+                jfirst  = 1;
+                jsecond = 2;
+              }
+            else if(flag[1] == 1 && flag[2] == 1)
+              {
+                kfirst  = 1;
+                ksecond = 2;
+                jfirst  = 3;
+                jsecond = 0;
+              }
+            else if(flag[0] == 1 && flag[2] == 1)
+              {
+                kfirst  = 0;
+                ksecond = 2;
+                jfirst  = 1;
+                jsecond = 3;
+              }
+            else if(flag[0] == 1 && flag[1] == 1)
+              {
+                kfirst  = 0;
+                ksecond = 1;
+                jfirst  = 3;
+                jsecond = 2;
+              }
+            else
+              terminate("can't be");
+
+            int next = 0;
+
+            for(l = 0; l < 3; l++)
+              p[next][l] = trilist[i].p[kfirst][l];
+            next++;
+
+            /* determine cuts with the corresponding two edges */
+            {
+              double *b = trilist[i].p[kfirst];
+              double *a = trilist[i].p[jfirst];
+
+              for(l = 0; l < 3; l++)
+                ed[l] = a[l] - b[l];
+
+              double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]);
+              double t;
+
+              if(prod)
+                t = -sc[kfirst] / prod;
+              else
+                t = 0.5;
+
+              if(t < 0)
+                t = 0;
+              if(t > 1)
+                t = 1;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = b[l] + t * ed[l];
+              next++;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = a[l];
+              next++;
+            }
+
+            {
+              double *b = trilist[i].p[kfirst];
+              double *a = trilist[i].p[jsecond];
+
+              for(l = 0; l < 3; l++)
+                ed[l] = a[l] - b[l];
+
+              double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]);
+              double t;
+
+              if(prod)
+                t = -sc[kfirst] / prod;
+              else
+                t = 0.5;
+
+              if(t < 0)
+                t = 0;
+              if(t > 1)
+                t = 1;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = b[l] + t * ed[l];
+              next++;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = a[l];
+              next++;
+            }
+
+            for(l = 0; l < 3; l++)
+              p[next][l] = trilist[i].p[ksecond][l];
+            next++;
+
+            {
+              double *b = trilist[i].p[ksecond];
+              double *a = trilist[i].p[jfirst];
+
+              for(l = 0; l < 3; l++)
+                ed[l] = a[l] - b[l];
+
+              double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]);
+              double t;
+
+              if(prod)
+                t = -sc[ksecond] / prod;
+              else
+                t = 0.5;
+
+              if(t < 0)
+                t = 0;
+              if(t > 1)
+                t = 1;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = b[l] + t * ed[l];
+              next++;
+            }
+
+            {
+              double *b = trilist[i].p[ksecond];
+              double *a = trilist[i].p[jsecond];
+
+              for(l = 0; l < 3; l++)
+                ed[l] = a[l] - b[l];
+
+              double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]);
+              double t;
+
+              if(prod)
+                t = -sc[ksecond] / prod;
+              else
+                t = 0.5;
+
+              if(t < 0)
+                t = 0;
+              if(t > 1)
+                t = 1;
+
+              for(l = 0; l < 3; l++)
+                p[next][l] = b[l] + t * ed[l];
+              next++;
+            }
+
+            oldq = trilist[i].owner;
+
+            /* now let's initialize the new triangles */
+            for(l = 0; l < 3; l++)
+              {
+                /* first the ones that get to the new side */
+                trilist[i].p[0][l] = p[0][l];
+                trilist[i].p[1][l] = p[6][l];
+                trilist[i].p[2][l] = p[5][l];
+                trilist[i].p[3][l] = p[7][l];
+
+                trilist[nnew].p[0][l] = p[1][l];
+                trilist[nnew].p[1][l] = p[3][l];
+                trilist[nnew].p[2][l] = p[7][l];
+                trilist[nnew].p[3][l] = p[0][l];
+
+                trilist[nnew + 1].p[0][l] = p[1][l];
+                trilist[nnew + 1].p[1][l] = p[7][l];
+                trilist[nnew + 1].p[2][l] = p[6][l];
+                trilist[nnew + 1].p[3][l] = p[0][l];
+
+                /* now the ones that are on the old side */
+                trilist[nnew + 2].p[0][l] = p[1][l];
+                trilist[nnew + 2].p[1][l] = p[2][l];
+                trilist[nnew + 2].p[2][l] = p[6][l];
+                trilist[nnew + 2].p[3][l] = p[4][l];
+
+                trilist[nnew + 3].p[0][l] = p[3][l];
+                trilist[nnew + 3].p[1][l] = p[1][l];
+                trilist[nnew + 3].p[2][l] = p[6][l];
+                trilist[nnew + 3].p[3][l] = p[4][l];
+
+                trilist[nnew + 4].p[0][l] = p[3][l];
+                trilist[nnew + 4].p[1][l] = p[6][l];
+                trilist[nnew + 4].p[2][l] = p[7][l];
+                trilist[nnew + 4].p[3][l] = p[4][l];
+              }
+
+            trilist[i].owner        = q;
+            trilist[nnew].owner     = q;
+            trilist[nnew + 1].owner = q;
+
+            trilist[nnew + 2].owner = oldq;
+            trilist[nnew + 3].owner = oldq;
+            trilist[nnew + 4].owner = oldq;
+
+            nadd = 5;
+
+            vvi = fabs(get_tri_volume(i, trilist));
+            for(l = 0; l < nadd; l++)
+              vv[l] = fabs(get_tri_volume(nnew + l, trilist));
+
+            /* determine largest */
+            ilargest = i;
+            vlargest = vvi;
+            for(l = 0; l < nadd; l++)
+              if(vv[l] > vlargest)
+                {
+                  vlargest = vv[l];
+                  ilargest = nnew + l;
+                }
+            if(i != ilargest)
+              {
+                /* swap the largest to location i */
+                triangle trisave  = trilist[i];
+                trilist[i]        = trilist[ilargest];
+                trilist[ilargest] = trisave;
+
+                vv[ilargest - nnew] = vvi;
+              }
+
+            for(l = 0; l < nadd; l++)
+              {
+                if(vv[l] < MIN_VOL_FAC * vol)
+                  {
+                    vv[l]             = vv[nadd - 1];
+                    trilist[nnew + l] = trilist[nnew + nadd - 1];
+                    l--;
+                    nadd--;
+                  }
+              }
+
+            nnew += nadd;
+            break;
+        }
+    }
+
+  return nnew;
+}
+
+/*! \brief Processes edge for volume calculation.
+ *
+ *  Calculates the contribution of edge to volumes of neighboring
+ *  Voronoi cells in vol array.
+ *
+ *  \param[in] T Pointer to tesselation.
+ *  \param[in, out] volume of tetrahedra.
+ *  \param[in] tt Index of triangle in DT array.
+ *  \param[in] nr Index in edge array.
+ *
+ *  \return void
+ */
+void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr)
+{
+  tetra *DT         = T->DT;
+  point *DP         = T->DP;
+  tetra_center *DTC = T->DTC;
+
+  int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2;
+  tetra *prev, *next;
+  tetra_center *prevc, *nextc;
+  double ax, ay, az;
+  double bx, by, bz;
+  double cx, cy, cz;
+  double nx, ny, nz;
+  double hhx, hhy, hhz;
+  double darea, dvol, h;
+
+  tetra *t = &DT[tt];
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+  k = edge_opposite[nr];
+  l = edge_nexttetra[nr];
+
+  Edge_visited[tt] |= (1 << nr);
+
+  p1 = t->p[i];
+  p2 = t->p[j];
+
+  double area = 0;
+
+  cx = DTC[tt].cx;
+  cy = DTC[tt].cy;
+  cz = DTC[tt].cz;
+
+  count = 0;
+
+  prev  = t;
+  prevc = &DTC[tt];
+  do
+    {
+      nn    = prev->t[l];
+      next  = &DT[nn];
+      nextc = &DTC[nn];
+
+      if(prev != t && next != t)
+        {
+          ax = prevc->cx - cx;
+          ay = prevc->cy - cy;
+          az = prevc->cz - cz;
+
+          bx = nextc->cx - cx;
+          by = nextc->cy - cy;
+          bz = nextc->cz - cz;
+
+          nx = ay * bz - az * by;
+          ny = az * bx - ax * bz;
+          nz = ax * by - ay * bx;
+
+          darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz);
+          area += darea;
+        }
+
+      for(m = 0, ll = ii = jj = -1; m < 4; m++)
+        {
+          if(next->p[m] == prev->p[k])
+            ll = m;
+          if(next->p[m] == prev->p[i])
+            ii = m;
+          if(next->p[m] == prev->p[j])
+            jj = m;
+        }
+
+      if(ll < 0 || ii < 0 || jj < 0)
+        terminate("inconsistency");
+
+      kk = 6 - (ll + ii + jj);
+
+      /* need to determine the edge number to be able to flag it */
+
+      for(nr_next = 0; nr_next < 6; nr_next++)
+        if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii))
+          {
+            if((Edge_visited[nn] & (1 << nr_next)) && next != t)
+              terminate("inconsistency");
+
+            Edge_visited[nn] |= (1 << nr_next);
+            break;
+          }
+
+      prev  = next;
+      prevc = nextc;
+      i     = ii;
+      l     = ll;
+      j     = jj;
+      k     = kk;
+
+      count++;
+
+      if(count > 1000)
+        terminate("count is too large");
+    }
+  while(next != t);
+
+  i = edge_start[nr];
+  j = edge_end[nr];
+
+  hhx = 0.5 * (DP[p1].x - DP[p2].x);
+  hhy = 0.5 * (DP[p1].y - DP[p2].y);
+  hhz = 0.5 * (DP[p1].z - DP[p2].z);
+
+  h    = sqrt(hhx * hhx + hhy * hhy + hhz * hhz);
+  dvol = (1.0 / 3) * area * h;
+
+  if(p1 >= 0 && p1 < DeRefMesh.Ndp)
+    vol[p1] += dvol;
+
+  if(p2 >= 0 && p2 < DeRefMesh.Ndp)
+    vol[p2] += dvol;
+}
+
+/*! \brief Insert a point into mesh.
+ *
+ *  Finds the tetrahedron that contains this point, splits the tetrahedron.
+ *  After this, flip the edges if needed restore Delaunayhood (which is applied
+ *  recursively) until a valid Delaunay mesh is restored.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp index of Delaunay point in DP array.
+ *  \param[in] ttstart initial guess in which triangle it might be,
+ *             index in DT array.
+ *
+ * \return index to tetra that (currently) contains the point pp.
+ */
+int insert_point(tessellation *T, int pp, int ttstart)
+{
+  int tt0, tt1, tt2, tt3, tt4, tetra_with_p, tt;
+  int to_check[STACKSIZE_TETRA], freestack[STACKSIZE_TETRA];
+  int n_faces_to_check = 0, nfree_on_stack = 0, moves;
+  int tip_index, flag, edgeface_nr;
+  int non_convex, convex_edge = 0, i, j;
+
+  /* first, need to do a point location */
+  tt0 = get_tetra(T, &T->DP[pp], &moves, ttstart, &flag, &edgeface_nr);
+
+  tetra_with_p = tt0;
+
+  if(flag == 1) /* that's the normal split of a tetrahedron into 4 */
+    {
+      if(n_faces_to_check >= STACKSIZE_TETRA - 4)
+        terminate("stacksize exceeded");
+
+      /* we now need to split this tetrahedron into four  */
+      if(nfree_on_stack)
+        tt1 = freestack[--nfree_on_stack];
+      else
+        tt1 = T->Ndt++;
+
+      if(nfree_on_stack)
+        tt2 = freestack[--nfree_on_stack];
+      else
+        tt2 = T->Ndt++;
+
+      if(nfree_on_stack)
+        tt3 = freestack[--nfree_on_stack];
+      else
+        tt3 = T->Ndt++;
+
+      if(T->Ndt > T->MaxNdt)
+        {
+          T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+          T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+          printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+          T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+          T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+          T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+          if(T->Ndt > T->MaxNdt)
+            terminate("Ndt > MaxNdt");
+        }
+
+      make_a_1_to_4_flip(T, pp, tt0, tt1, tt2, tt3);
+
+      /* now we have a triangulation again - need to check whether there are
+         facets that are not Delaunay */
+      /* let's initialize a stack with the facets that we need to check */
+
+      n_faces_to_check = 0;
+
+      to_check[n_faces_to_check++] = tt0;
+      to_check[n_faces_to_check++] = tt1;
+      to_check[n_faces_to_check++] = tt2;
+      to_check[n_faces_to_check++] = tt3;
+      char *DTF                    = T->DTF;
+      DTF[tt0]                     = 0;
+      DTF[tt1]                     = 0;
+      DTF[tt2]                     = 0;
+      DTF[tt3]                     = 0;
+    }
+
+  if(flag == 2)
+    {
+      /* create four new tetra  */
+      if(nfree_on_stack)
+        tt1 = freestack[--nfree_on_stack];
+      else
+        tt1 = T->Ndt++;
+
+      if(nfree_on_stack)
+        tt2 = freestack[--nfree_on_stack];
+      else
+        tt2 = T->Ndt++;
+
+      if(nfree_on_stack)
+        tt3 = freestack[--nfree_on_stack];
+      else
+        tt3 = T->Ndt++;
+
+      if(nfree_on_stack)
+        tt4 = freestack[--nfree_on_stack];
+      else
+        tt4 = T->Ndt++;
+
+      if(T->Ndt > T->MaxNdt)
+        {
+          T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+          T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+          printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+          T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+          T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+          T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+          if(T->Ndt > T->MaxNdt)
+            terminate("Ndt > MaxNdt");
+        }
+
+      n_faces_to_check = 0;
+
+      to_check[n_faces_to_check++] = tt0;
+      to_check[n_faces_to_check++] = T->DT[tt0].t[edgeface_nr];
+      to_check[n_faces_to_check++] = tt1;
+      to_check[n_faces_to_check++] = tt2;
+      to_check[n_faces_to_check++] = tt3;
+      to_check[n_faces_to_check++] = tt4;
+
+      char *DTF                      = T->DTF;
+      DTF[tt0]                       = 0;
+      DTF[T->DT[tt0].t[edgeface_nr]] = 0;
+      DTF[tt1]                       = 0;
+      DTF[tt2]                       = 0;
+      DTF[tt3]                       = 0;
+      DTF[tt4]                       = 0;
+
+      make_a_face_split(T, tt0, edgeface_nr, pp, tt1, tt2, tt3, tt4);
+    }
+
+  if(flag == 3) /* here we need to split an edge */
+    {
+      int i, j, k, l, ii, jj, kk, ll, m, count;
+      int prev, next;
+
+      /* count how many triangles share the edge */
+      i = edge_start[edgeface_nr];
+      j = edge_end[edgeface_nr];
+      k = edge_opposite[edgeface_nr];
+      l = edge_nexttetra[edgeface_nr];
+
+      count            = 0;
+      n_faces_to_check = 0;
+
+      prev = tt0;
+      do
+        {
+          to_check[n_faces_to_check++] = prev;
+          T->DTF[prev]                 = 0;
+
+          tetra *DT = T->DT;
+          next      = DT[prev].t[l];
+
+          for(m = 0, ll = ii = jj = -1; m < 4; m++)
+            {
+              if(DT[next].p[m] == DT[prev].p[k])
+                ll = m;
+              if(DT[next].p[m] == DT[prev].p[i])
+                ii = m;
+              if(DT[next].p[m] == DT[prev].p[j])
+                jj = m;
+            }
+
+          if(ll < 0 || ii < 0 || jj < 0)
+            terminate("inconsistency");
+
+          kk = 6 - (ll + ii + jj);
+
+          prev = next;
+          i    = ii;
+          l    = ll;
+          j    = jj;
+          k    = kk;
+
+          count++;
+
+          if(count > 1000)
+            terminate("count exceeded");
+        }
+      while(next != tt0);
+
+      int *ttlist = mymalloc_movable(&ttlist, "ttlist", count * sizeof(int));
+
+      for(i = 0; i < count; i++)
+        {
+          if(nfree_on_stack)
+            ttlist[i] = freestack[--nfree_on_stack];
+          else
+            {
+              ttlist[i] = T->Ndt++;
+
+              if(T->Ndt > T->MaxNdt)
+                {
+                  T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+                  T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+                  printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt,
+                         T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+                  T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+                  T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+                  T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+                  if(T->Ndt > T->MaxNdt)
+                    terminate("Ndt > MaxNdt");
+                }
+            }
+
+          to_check[n_faces_to_check++] = ttlist[i];
+          T->DTF[ttlist[i]]            = 0;
+        }
+
+      make_an_edge_split(T, tt0, edgeface_nr, count, pp, ttlist);
+
+      myfree(ttlist);
+    }
+
+  int iter = 0;
+
+  while(n_faces_to_check)
+    {
+      iter++;
+      if(iter > 200000)
+        terminate("too many iterations");
+
+      tt = to_check[--n_faces_to_check]; /* this is the current tetra to look at.
+                                            The facet in question lies opposite to q */
+      if(T->DT[tt].t[0] < 0)             /* deleted? */
+        continue;
+
+      for(tip_index = 0; tip_index < 4; tip_index++)
+        if(T->DT[tt].p[tip_index] == pp)
+          break;
+
+      if(tip_index < 4) /* otherwise the facet has been removed in a 3-2 flip */
+        {
+          tetra *DT = T->DT;
+          point *DP = T->DP;
+          int qq    = DT[tt].t[tip_index];           /* tetrahedron that's opposite of ours and shares the facet */
+          int ppp   = DT[qq].p[DT[tt].s[tip_index]]; /* point that's opposite of the facet in the other tetrahedron */
+
+          int ret, ret_exact;
+
+          ret = InSphere_Errorbound(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]);
+          CountInSphereTests++;
+
+          if(ret != 0)
+            ret_exact = ret;
+          else
+            {
+              // let's decide with exact integer arithmetic
+              ret_exact = InSphere_Exact(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]);
+              CountInSphereTestsExact++;
+            }
+
+          if(ret_exact > 0) /* facet is illegal, because point lies inside */
+            {
+              /* let's see whether the point lies in the triangle, or on a side, or opposite of one convex edge */
+
+              non_convex = convex_edge_test(T, tt, tip_index, &convex_edge);
+
+              if(non_convex == 0) /* we can make a 2-3 flip */
+                {
+                  int ww;
+
+                  if(nfree_on_stack)
+                    ww = freestack[--nfree_on_stack];
+                  else
+                    ww = T->Ndt++;
+
+                  if(T->Ndt > T->MaxNdt)
+                    {
+                      T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR;
+                      T->MaxNdt = T->Indi.AllocFacNdt;
+#ifdef VERBOSE
+                      printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt,
+                             T->Indi.AllocFacNdt);
+#endif /* #ifdef VERBOSE */
+                      T->DT  = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra));
+                      T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center));
+                      T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char));
+
+                      if(T->Ndt > T->MaxNdt)
+                        terminate("Ndt > MaxNdt");
+                    }
+
+                  if(n_faces_to_check >= STACKSIZE_TETRA - 3)
+                    terminate("stacksize exceeded");
+
+                  make_a_2_to_3_flip(T, tt, tip_index, qq, T->DT[tt].s[tip_index], ppp, ww);
+
+                  to_check[n_faces_to_check++] = tt;
+                  to_check[n_faces_to_check++] = qq;
+                  to_check[n_faces_to_check++] = ww;
+                  T->DTF[tt]                   = 0;
+                  T->DTF[qq]                   = 0;
+                  T->DTF[ww]                   = 0;
+                }
+              else if(non_convex == 1) /* we might be able to make a 3-2 flip, or we deal with a convex edge on the outer hull */
+                {
+                  /* test whether the reflex edge is surrounded by exactly three tetrahedra */
+
+                  i = convex_edge + 2;
+                  if(i >= 3)
+                    i -= 3;
+                  i = access_triangles[tip_index][i];
+
+                  for(j = 0; j < 4; j++)
+                    if(DT[tt].p[i] == DT[qq].p[j])
+                      break;
+
+                  if(j >= 4)
+                    {
+                      terminate("not found");
+                    }
+
+                  if(DT[tt].t[i] == DT[qq].t[j]) /* this means there is exactly one tetrahedron between them, i.e. we have found the
+                                                    third partner for the flip */
+                    {
+                      int ww;
+
+                      ww = DT[tt].t[i];
+
+                      make_a_3_to_2_flip(T, tt, qq, ww, tip_index, convex_edge, DT[tt].s[tip_index]);
+
+                      DT[ww].t[0] = -1; /* mark as deleted */
+
+                      if(nfree_on_stack < STACKSIZE_TETRA)
+                        freestack[nfree_on_stack++] = ww;
+                      else
+                        terminate("stack full");
+
+                      tetra_with_p = tt;
+                      if(n_faces_to_check >= STACKSIZE_TETRA - 2)
+                        terminate("stack too full");
+
+                      to_check[n_faces_to_check++] = tt;
+                      to_check[n_faces_to_check++] = qq;
+                      T->DTF[tt]                   = 0;
+                      T->DTF[qq]                   = 0;
+                    }
+                  else
+                    {
+                      if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DPinfinity && DT[DT[qq].t[j]].p[DT[qq].s[j]] == DPinfinity)
+                        {
+                          printf("convex edge between points=%d %d on outer hull found\n",
+                                 (int)(DT[tt].p[access_triangles[tip_index][convex_edge]]),
+                                 (int)(DT[tt].p[access_triangles[tip_index][convex_edge < 2 ? convex_edge + 1 : 0]]));
+
+                          terminate("inconsistency"); /* this should not occur since we have embedded the points into a convex big
+                                                         triangle */
+                        }
+                    }
+                }
+              else if(non_convex == 2) /* we might be able to make a 4-4 flip */
+                {
+                  i = convex_edge + 2;
+                  if(i >= 3)
+                    i -= 3;
+                  i = access_triangles[tip_index][i]; /* this is the point opposite of edge (but not tip) */
+
+                  tetra *DT = T->DT;
+                  char *DTF = T->DTF;
+
+                  for(j = 0; j < 4; j++)
+                    if(DT[tt].p[i] == DT[qq].p[j])
+                      break;
+
+                  if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DT[DT[qq].t[j]].p[DT[qq].s[j]])
+                    {
+                      /* ok, so we really have 4 tetra. The opposite points match up */
+
+                      to_check[n_faces_to_check++] = tt;
+                      to_check[n_faces_to_check++] = qq;
+                      to_check[n_faces_to_check++] = DT[tt].t[i];
+                      to_check[n_faces_to_check++] = DT[qq].t[j];
+                      DTF[tt]                      = 0;
+                      DTF[qq]                      = 0;
+                      DTF[DT[tt].t[i]]             = 0;
+                      DTF[DT[qq].t[j]]             = 0;
+
+                      make_a_4_to_4_flip(T, tt, tip_index, convex_edge);
+                    }
+                }
+            }
+          else
+            tetra_with_p = tt;
+        }
+    }
+
+  return tetra_with_p;
+}
+
+/*! \brief Tests edges and detects if a flip is needed.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] tt Index in DT array.
+ *  \param[in] tip Index of forth point (tip of tetrahedron).
+ *  \param[out] edgenr Index of edge.
+ *
+ *  \return (-1,0,1,2), depending on which flip is necessary.
+ */
+int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  tetra *t  = &DT[tt];
+  int i0, i1, i2, i3;
+  int vol, flag0, flag1, flag2;
+  int count_zeros = 0;
+
+  i0 = access_triangles[tip][0];
+  i1 = access_triangles[tip][1];
+  i2 = access_triangles[tip][2];
+  i3 = tip;
+
+  point *p0 = &DP[t->p[i0]];
+  point *p1 = &DP[t->p[i1]];
+  point *p2 = &DP[t->p[i2]];
+  point *p3 = &DP[t->p[i3]];
+  point *p4 = &DP[DT[t->t[i3]].p[t->s[i3]]];
+
+  CountConvexEdgeTest++;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double ax = p1->xx - p0->xx;
+  double ay = p1->yy - p0->yy;
+  double az = p1->zz - p0->zz;
+
+  double bx = p2->xx - p0->xx;
+  double by = p2->yy - p0->yy;
+  double bz = p2->zz - p0->zz;
+
+  double cx = p3->xx - p0->xx;
+  double cy = p3->yy - p0->yy;
+  double cz = p3->zz - p0->zz;
+
+  double qx = p4->xx - p0->xx;
+  double qy = p4->yy - p0->yy;
+  double qz = p4->zz - p0->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz;
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p0, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p4, pB_ixyz, pB_xyz);
+  qx = pB_xyz[0] - pA_xyz[0];
+  qy = pB_xyz[1] - pA_xyz[1];
+  qz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+
+  double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz};
+  double x[3];
+
+  int status;
+
+  status = solve_linear_equations(mv_data, x);
+
+  /* x now contains the coordinates of the point p4 expanded in the basis (a,b,c) */
+  /* the coordinates of point 3 in this basis are (0,0,1) */
+
+  if(status >= 0)
+    {
+      if(fabs(1.0 - x[2]) < INSIDE_EPS)
+        terminate("inconsistency");
+
+      double u, v, w;
+
+      w = 1.0 / (1.0 - x[2]);
+
+      u = w * x[0];
+      v = w * x[1];
+
+      if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS)
+        {
+          /* we have a point safely in the triangle: 2-3 flip should be fine */
+          return 0;
+        }
+
+      if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS)
+        {
+          /* edge 0 is clearly reflect,  3-2 flip allowed around edge 0 */
+          *edgenr = 0;
+          return 1;
+        }
+
+      if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS)
+        {
+          // printf("3-2 flip allowed since edge 1 is reflex\n");
+          *edgenr = 1;
+          return 1;
+        }
+
+      if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS)
+        {
+          // printf("3-2 flip allowed since edge 2 is reflex\n");
+          *edgenr = 2;
+          return 1;
+        }
+
+      if(u < -INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS)
+        return -1; /* two reflex edges */
+
+      if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS)
+        return -1; /* two reflex edges */
+
+      if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS)
+        return -1; /* two reflex edges */
+    }
+
+  CountConvexEdgeTestExact++;
+
+  /* Now we need to test in more detail if we are on one of the edges */
+
+  vol = Orient3d_Exact(p0, p1, p2, p3);
+
+  if(vol <= 0)
+    {
+      printf("flat or negatively tetrahedron found (vol=%d)\n", vol);
+      {
+        printf("p0=%d  %g %g %g\n", (int)(p0 - DP), p0->x, p0->y, p0->z);
+        printf("p1=%d  %g %g %g\n", (int)(p1 - DP), p1->x, p1->y, p1->z);
+        printf("p2=%d  %g %g %g\n", (int)(p2 - DP), p2->x, p2->y, p2->z);
+        printf("p3=%d  %g %g %g\n", (int)(p3 - DP), p3->x, p3->y, p3->z);
+        dump_points(T);
+        terminate("inconsistent tetrahedron");
+      }
+    }
+
+  flag0 = Orient3d_Exact(p1, p3, p2, p4);
+  flag1 = Orient3d_Exact(p0, p2, p3, p4);
+  flag2 = Orient3d_Exact(p0, p3, p1, p4);
+
+  if(flag0 == 0)
+    count_zeros++;
+
+  if(flag1 == 0)
+    count_zeros++;
+
+  if(flag2 == 0)
+    count_zeros++;
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 < 0)
+    {
+      //  printf("3-2 flip allowed since edge 0 is reflex\n");
+      *edgenr = 0;
+      return 1;
+    }
+
+  if(flag0 < 0 && flag1 >= 0 && flag2 >= 0)
+    {
+      // printf("3-2 flip allowed since edge 1 is reflex\n");
+      *edgenr = 1;
+      return 1;
+    }
+
+  if(flag0 >= 0 && flag1 < 0 && flag2 >= 0)
+    {
+      // printf("3-2 flip allowed since edge 2 is reflex\n");
+      *edgenr = 2;
+      return 1;
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 == 0)
+    {
+      // printf("4-4 flip around edge 0 may be possible\n");
+      *edgenr = 0;
+      return 2;
+    }
+
+  if(flag0 >= 0 && flag1 == 0 && flag2 >= 0)
+    {
+      // printf("4-4 flip around edge 2 may be possible\n");
+      *edgenr = 2;
+      return 2;
+    }
+
+  if(flag0 == 0 && flag1 >= 0 && flag2 >= 0)
+    {
+      // printf("4-4 flip around edge 1 may be possible\n");
+      *edgenr = 1;
+      return 2;
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0)
+    {
+      /* we seem to have a point in the triangle: 2-3 flip should be fine */
+      return 0;
+    }
+
+  return -1;
+}
+
+/*! \brief Performs face split.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt0 First index in DT array.
+ *  \param[in] face_nr Index of face.
+ *  \param[in] pp Index of point.
+ *  \param[in] tt1 Second index in DT array.
+ *  \param[in] tt2 Third index in DT array.
+ *  \param[in] qq1 Index in DT array.
+ *  \param[in] qq2 Index in DT array.
+ *
+ *  \return void
+ */
+void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+  int qq0   = t0->t[face_nr];
+  tetra *q0 = &DT[qq0];
+  tetra *q1 = &DT[qq1];
+  tetra *q2 = &DT[qq2];
+
+  int m, i0 = -1, i1 = -1, i2 = -1, i3 = -1, j0 = -1, j1 = -1, j2 = -1, j3 = -1;
+
+  Count_FaceSplits++;
+  CountFlips++;
+
+  *t1 = *t0;
+  *t2 = *t0;
+
+  *q1 = *q0;
+  *q2 = *q0;
+
+  i3 = face_nr;
+  j3 = t0->s[face_nr];
+
+  switch(i3)
+    {
+      case 3:
+        i0 = 0;
+        i1 = 1;
+        i2 = 2;
+        break;
+      case 2:
+        i0 = 0;
+        i1 = 3;
+        i2 = 1;
+        break;
+      case 1:
+        i0 = 0;
+        i1 = 2;
+        i2 = 3;
+        break;
+      case 0:
+        i0 = 1;
+        i1 = 3;
+        i2 = 2;
+        break;
+    }
+
+  for(m = 0; m < 4; m++)
+    {
+      if(q0->p[m] == t0->p[i0])
+        j0 = m;
+      if(q0->p[m] == t0->p[i1])
+        j2 = m;
+      if(q0->p[m] == t0->p[i2])
+        j1 = m;
+    }
+
+  if(i0 < 0 || i1 < 0 || i2 < 0 || i3 < 0 || j0 < 0 || j1 < 0 || j2 < 0 || j3 < 0)
+    terminate("inconsistency");
+
+  t0->p[i2] = pp;
+  t1->p[i0] = pp;
+  t2->p[i1] = pp;
+
+  q0->p[j1] = pp;
+  q1->p[j0] = pp;
+  q2->p[j2] = pp;
+
+  t0->t[i0] = tt1;
+  t1->t[i2] = tt0;
+  t0->s[i0] = i2;
+  t1->s[i2] = i0;
+
+  t1->t[i1] = tt2;
+  t2->t[i0] = tt1;
+  t1->s[i1] = i0;
+  t2->s[i0] = i1;
+
+  t2->t[i2] = tt0;
+  t0->t[i1] = tt2;
+  t2->s[i2] = i1;
+  t0->s[i1] = i2;
+
+  q0->t[j0] = qq1;
+  q1->t[j1] = qq0;
+  q0->s[j0] = j1;
+  q1->s[j1] = j0;
+
+  q1->t[j2] = qq2;
+  q2->t[j0] = qq1;
+  q1->s[j2] = j0;
+  q2->s[j0] = j2;
+
+  q2->t[j1] = qq0;
+  q0->t[j2] = qq2;
+  q2->s[j1] = j2;
+  q0->s[j2] = j1;
+
+  t0->t[i3] = qq0;
+  q0->t[j3] = tt0;
+  t0->s[i3] = j3;
+  q0->s[j3] = i3;
+
+  t1->t[i3] = qq1;
+  q1->t[j3] = tt1;
+  t1->s[i3] = j3;
+  q1->s[j3] = i3;
+
+  t2->t[i3] = qq2;
+  q2->t[j3] = tt2;
+  t2->s[i3] = j3;
+  q2->s[j3] = i3;
+
+  DT[t0->t[i2]].t[t0->s[i2]] = tt0;
+  DT[t1->t[i0]].t[t1->s[i0]] = tt1;
+  DT[t2->t[i1]].t[t2->s[i1]] = tt2;
+
+  DT[q0->t[j1]].t[q0->s[j1]] = qq0;
+  DT[q1->t[j0]].t[q1->s[j0]] = qq1;
+  DT[q2->t[j2]].t[q2->s[j2]] = qq2;
+}
+
+/*! \brief Performs edge split.
+ *
+ *  \param[in, out] T Pointer to tessellation
+ *  \param[in] tt0 Index in DT array
+ *  \param[in] edge_nr Index of edge
+ *  \param[in] count Number of elements in lists.
+ *  \param[in] pp Index to point.
+ *  \param[in] ttlist List of indices in DT.
+ */
+void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *prev, *next;
+  tetra **tlist, **t_orig_list;
+  int *i_list, *j_list, *k_list, *l_list;
+  int i, j, k, l, ii, jj, kk, ll, m, nr, nrm, nrp;
+
+  Count_EdgeSplits++;
+  CountFlips++;
+
+  tlist       = mymalloc("tlist", count * sizeof(tetra *));
+  t_orig_list = mymalloc("t_orig_list", count * sizeof(tetra *));
+  i_list      = mymalloc("i_list", sizeof(int) * count);
+  j_list      = mymalloc("j_list", sizeof(int) * count);
+  k_list      = mymalloc("k_list", sizeof(int) * count);
+  l_list      = mymalloc("l_list", sizeof(int) * count);
+
+  for(i = 0; i < count; i++)
+    tlist[i] = &DT[ttlist[i]];
+
+  i = edge_start[edge_nr];
+  j = edge_end[edge_nr];
+  k = edge_opposite[edge_nr];
+  l = edge_nexttetra[edge_nr];
+
+  nr   = 0;
+  prev = t0;
+  do
+    {
+      t_orig_list[nr] = prev;
+      i_list[nr]      = i;
+      j_list[nr]      = j;
+      k_list[nr]      = k;
+      l_list[nr]      = l;
+
+      next = &DT[prev->t[l]];
+
+      for(m = 0, ll = ii = jj = -1; m < 4; m++)
+        {
+          if(next->p[m] == prev->p[k])
+            ll = m;
+          if(next->p[m] == prev->p[i])
+            ii = m;
+          if(next->p[m] == prev->p[j])
+            jj = m;
+        }
+
+      if(ll < 0 || ii < 0 || jj < 0)
+        terminate("inconsistency");
+
+      kk = 6 - (ll + ii + jj);
+
+      prev = next;
+      i    = ii;
+      l    = ll;
+      j    = jj;
+      k    = kk;
+
+      nr++;
+    }
+  while(next != t0);
+
+  for(nr = 0; nr < count; nr++)
+    {
+      *tlist[nr] = *t_orig_list[nr];
+
+      t_orig_list[nr]->p[j_list[nr]] = pp;
+      tlist[nr]->p[i_list[nr]]       = pp;
+
+      t_orig_list[nr]->t[i_list[nr]] = tlist[nr] - DT;
+      tlist[nr]->t[j_list[nr]]       = t_orig_list[nr] - DT;
+
+      t_orig_list[nr]->s[i_list[nr]] = j_list[nr];
+      tlist[nr]->s[j_list[nr]]       = i_list[nr];
+
+      DT[tlist[nr]->t[i_list[nr]]].t[tlist[nr]->s[i_list[nr]]] = tlist[nr] - DT;
+
+      nrp = nr + 1;
+      if(nrp >= count)
+        nrp -= count;
+
+      nrm = nr - 1;
+      if(nrm < 0)
+        nrm += count;
+
+      tlist[nr]->t[l_list[nr]] = tlist[nrp] - DT;
+      tlist[nr]->s[l_list[nr]] = k_list[nrp];
+
+      tlist[nr]->t[k_list[nr]] = tlist[nrm] - DT;
+      tlist[nr]->s[k_list[nr]] = l_list[nrm];
+    }
+
+  myfree(l_list);
+  myfree(k_list);
+  myfree(j_list);
+  myfree(i_list);
+
+  myfree(t_orig_list);
+  myfree(tlist);
+}
+
+/*! \brief Make a 4 to 4 flip.
+ *
+ *  See Springel (2010) for discussion on flips.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index in DT array.
+ *  \param[in] tip_index Index of the point making up the tip of the
+ *             tetrahedron.
+ *  \param[in] edge_nr Index of edge.
+ *
+ *  \return void
+ */
+void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr)
+{
+  tetra *DT = T->DT;
+  //  printf("4-to-4 flip\n");
+  tetra *t = &DT[tt];
+  int i0, i1, i2, j;
+  int ww, qq, uu;
+  tetra *w, *q, *u;
+  tetra *t_top[4], *t_bottom[4];
+  int s_top[4], s_bottom[4];
+  int p[6];
+
+  Count_4_to_4_Flips++;
+  CountFlips++;
+
+  uu = 0;
+  u  = NULL;
+
+  for(j = 0; j < 4; j++)
+    {
+      t_top[j]    = NULL;
+      t_bottom[j] = NULL;
+      s_top[j]    = -1;
+      s_bottom[j] = -1;
+    }
+
+  i0 = access_triangles[tip_index][edge_nr];
+  edge_nr += 1;
+  if(edge_nr >= 3)
+    edge_nr -= 3;
+  i1 = access_triangles[tip_index][edge_nr];
+  edge_nr += 1;
+  if(edge_nr >= 3)
+    edge_nr -= 3;
+  i2 = access_triangles[tip_index][edge_nr];
+
+  t_top[0] = &DT[t->t[i0]];
+  s_top[0] = t->s[i0];
+
+  t_top[1] = &DT[t->t[i1]];
+  s_top[1] = t->s[i1];
+
+  ww = t->t[i2];
+  w  = &DT[ww];
+  qq = t->t[tip_index];
+  q  = &DT[qq];
+
+  for(j = 0; j < 4; j++)
+    {
+      if(w->p[j] == t->p[i0])
+        {
+          t_top[3] = &DT[w->t[j]];
+          s_top[3] = w->s[j];
+        }
+
+      if(w->p[j] == t->p[i1])
+        {
+          t_top[2] = &DT[w->t[j]];
+          s_top[2] = w->s[j];
+        }
+
+      if(w->p[j] == t->p[tip_index])
+        {
+          uu = w->t[j];
+          u  = &DT[uu];
+        }
+    }
+
+  for(j = 0; j < 4; j++)
+    {
+      if(u->p[j] == t->p[i0])
+        {
+          t_bottom[3] = &DT[u->t[j]];
+          s_bottom[3] = u->s[j];
+        }
+
+      if(u->p[j] == t->p[i1])
+        {
+          t_bottom[2] = &DT[u->t[j]];
+          s_bottom[2] = u->s[j];
+        }
+
+      if(q->p[j] == t->p[i0])
+        {
+          t_bottom[0] = &DT[q->t[j]];
+          s_bottom[0] = q->s[j];
+        }
+
+      if(q->p[j] == t->p[i1])
+        {
+          t_bottom[1] = &DT[q->t[j]];
+          s_bottom[1] = q->s[j];
+        }
+    }
+
+  p[0] = t->p[i1];
+  p[1] = t->p[i2];
+  p[2] = t->p[i0];
+  p[3] = DT[t->t[i2]].p[t->s[i2]];
+  p[4] = t->p[tip_index];
+  p[5] = DT[t->t[tip_index]].p[t->s[tip_index]];
+
+  for(j = 0; j < 4; j++)
+    {
+      if(t_top[j] == NULL || t_bottom[j] == NULL)
+        {
+          printf("bad!\n");
+          terminate("inconsistency");
+        }
+    }
+
+  for(j = 0; j < 4; j++)
+    {
+      if(t_top[j] == NULL || t_bottom[j] == NULL)
+        {
+          printf("bad!\n");
+          terminate("inconsistency");
+        }
+    }
+
+  t->p[0] = p[0];
+  t->p[1] = p[1];
+  t->p[2] = p[5];
+  t->p[3] = p[4];
+
+  q->p[0] = p[1];
+  q->p[1] = p[2];
+  q->p[2] = p[5];
+  q->p[3] = p[4];
+
+  u->p[0] = p[2];
+  u->p[1] = p[3];
+  u->p[2] = p[5];
+  u->p[3] = p[4];
+
+  w->p[0] = p[3];
+  w->p[1] = p[0];
+  w->p[2] = p[5];
+  w->p[3] = p[4];
+
+  t->t[0] = qq;
+  q->t[1] = tt;
+  t->s[0] = 1;
+  q->s[1] = 0;
+
+  q->t[0] = uu;
+  u->t[1] = qq;
+  q->s[0] = 1;
+  u->s[1] = 0;
+
+  u->t[0] = ww;
+  w->t[1] = uu;
+  u->s[0] = 1;
+  w->s[1] = 0;
+
+  w->t[0] = tt;
+  t->t[1] = ww;
+  w->s[0] = 1;
+  t->s[1] = 0;
+
+  t->t[2]                = t_top[0] - DT;
+  t->s[2]                = s_top[0];
+  DT[t->t[2]].t[t->s[2]] = tt;
+  DT[t->t[2]].s[t->s[2]] = 2;
+
+  t->t[3]                = t_bottom[0] - DT;
+  t->s[3]                = s_bottom[0];
+  DT[t->t[3]].t[t->s[3]] = tt;
+  DT[t->t[3]].s[t->s[3]] = 3;
+
+  q->t[2]                = t_top[1] - DT;
+  q->s[2]                = s_top[1];
+  DT[q->t[2]].t[q->s[2]] = qq;
+  DT[q->t[2]].s[q->s[2]] = 2;
+
+  q->t[3]                = t_bottom[1] - DT;
+  q->s[3]                = s_bottom[1];
+  DT[q->t[3]].t[q->s[3]] = qq;
+  DT[q->t[3]].s[q->s[3]] = 3;
+
+  u->t[2]                = t_top[2] - DT;
+  u->s[2]                = s_top[2];
+  DT[u->t[2]].t[u->s[2]] = uu;
+  DT[u->t[2]].s[u->s[2]] = 2;
+
+  u->t[3]                = t_bottom[2] - DT;
+  u->s[3]                = s_bottom[2];
+  DT[u->t[3]].t[u->s[3]] = uu;
+  DT[u->t[3]].s[u->s[3]] = 3;
+
+  w->t[2]                = t_top[3] - DT;
+  w->s[2]                = s_top[3];
+  DT[w->t[2]].t[w->s[2]] = ww;
+  DT[w->t[2]].s[w->s[2]] = 2;
+
+  w->t[3]                = t_bottom[3] - DT;
+  w->s[3]                = s_bottom[3];
+  DT[w->t[3]].t[w->s[3]] = ww;
+  DT[w->t[3]].s[w->s[3]] = 3;
+}
+
+/*! \brief Make a 1 to 4 flip.
+ *
+ *  See Springel (2010) for discussion on flips.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of new point.
+ *  \param[in] tt0 Index or first point in DT array.
+ *  \param[in] tt1 Index of second point in DT array.
+ *  \param[in] tt2 Index of third point in DT array.
+ *  \param[in] tt3 Index of forth point in DT array.
+ *
+ *  \return void
+ */
+void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3)
+{
+  tetra *DT = T->DT;
+
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+  tetra *t3 = &DT[tt3];
+
+  Count_1_to_4_Flips++;
+  CountFlips++;
+
+  *t1 = *t0;
+  *t2 = *t0;
+  *t3 = *t0;
+
+  t0->p[0] = pp;
+  t1->p[1] = pp;
+  t2->p[2] = pp;
+  t3->p[3] = pp;
+
+  t0->t[1] = tt1;
+  t1->t[0] = tt0;
+  t0->s[1] = 0;
+  t1->s[0] = 1;
+
+  t1->t[2] = tt2;
+  t2->t[1] = tt1;
+  t1->s[2] = 1;
+  t2->s[1] = 2;
+
+  t2->t[0] = tt0;
+  t0->t[2] = tt2;
+  t2->s[0] = 2;
+  t0->s[2] = 0;
+
+  t0->t[3] = tt3;
+  t3->t[0] = tt0;
+  t0->s[3] = 0;
+  t3->s[0] = 3;
+
+  t1->t[3] = tt3;
+  t3->t[1] = tt1;
+  t1->s[3] = 1;
+  t3->s[1] = 3;
+
+  t2->t[3] = tt3;
+  t3->t[2] = tt2;
+  t2->s[3] = 2;
+  t3->s[2] = 3;
+
+  DT[t0->t[0]].t[t0->s[0]] = tt0;
+  DT[t1->t[1]].t[t1->s[1]] = tt1;
+  DT[t2->t[2]].t[t2->s[2]] = tt2;
+  DT[t3->t[3]].t[t3->s[3]] = tt3;
+}
+
+/*! \brief Make a 3 to 2 flip.
+ *
+ *  See Springel (2010) for discussion on flips.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of new point.
+ *  \param[in] tt0 Index or first point in DT array.
+ *  \param[in] tt1 Index of second point in DT array.
+ *  \param[in] tt2 Index of third point in DT array.
+ *  \param[in] tip Index of point making up tip of tetrahedron.
+ *  \param[in] edge Index of edge.
+ *  \param[in] bottom Tetrahedron on bottom.
+ *
+ *  \return void
+ */
+void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+
+  int i, j, k, ii, jj, iii, jjj;
+  tetra qbak, tbak, wbak;
+
+  Count_3_to_2_Flips++;
+  CountFlips++;
+
+  tbak = *t0;
+  qbak = *t1;
+  wbak = *t2;
+
+  i = edge;
+  j = i + 1;
+  k = i + 2;
+  if(j >= 3)
+    j -= 3;
+  if(k >= 3)
+    k -= 3;
+
+  i = access_triangles[tip][i];
+  j = access_triangles[tip][j];
+  k = access_triangles[tip][k];
+
+  for(ii = 0; ii < 4; ii++)
+    if(tbak.p[i] == qbak.p[ii])
+      break;
+
+  for(iii = 0; iii < 4; iii++)
+    if(tbak.p[i] == wbak.p[iii])
+      break;
+
+  for(jj = 0; jj < 4; jj++)
+    if(tbak.p[j] == qbak.p[jj])
+      break;
+
+  for(jjj = 0; jjj < 4; jjj++)
+    if(tbak.p[j] == wbak.p[jjj])
+      break;
+
+  t0->p[0] = qbak.p[bottom];
+  t0->p[1] = tbak.p[k];
+  t0->p[2] = tbak.p[i];
+  t0->p[3] = tbak.p[tip];
+
+  t1->p[0] = qbak.p[bottom];
+  t1->p[1] = tbak.p[j];
+  t1->p[2] = tbak.p[k];
+  t1->p[3] = tbak.p[tip];
+
+  t0->t[2] = tt1;
+  t1->t[1] = tt0;
+  t0->s[2] = 1;
+  t1->s[1] = 2;
+
+  t0->t[0]                 = tbak.t[j];
+  t0->s[0]                 = tbak.s[j];
+  DT[t0->t[0]].s[t0->s[0]] = 0;
+  DT[t0->t[0]].t[t0->s[0]] = tt0;
+
+  t0->t[3]                 = qbak.t[jj];
+  t0->s[3]                 = qbak.s[jj];
+  DT[t0->t[3]].s[t0->s[3]] = 3;
+  DT[t0->t[3]].t[t0->s[3]] = tt0;
+
+  t0->t[1]                 = wbak.t[jjj];
+  t0->s[1]                 = wbak.s[jjj];
+  DT[t0->t[1]].s[t0->s[1]] = 1;
+  DT[t0->t[1]].t[t0->s[1]] = tt0;
+
+  t1->t[0]                 = tbak.t[i];
+  t1->s[0]                 = tbak.s[i];
+  DT[t1->t[0]].s[t1->s[0]] = 0;
+  DT[t1->t[0]].t[t1->s[0]] = tt1;
+
+  t1->t[3]                 = qbak.t[ii];
+  t1->s[3]                 = qbak.s[ii];
+  DT[t1->t[3]].s[t1->s[3]] = 3;
+  DT[t1->t[3]].t[t1->s[3]] = tt1;
+
+  t1->t[2]                 = wbak.t[iii];
+  t1->s[2]                 = wbak.s[iii];
+  DT[t1->t[2]].s[t1->s[2]] = 2;
+  DT[t1->t[2]].t[t1->s[2]] = tt1;
+
+  CountFlips++;
+}
+
+/*! \brief Make a 2 to 3 flip
+ *
+ *  See Springel (2010) for discussion on flips.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] pp Index of new point.
+ *  \param[in] tt0 Index or first point in DT array.
+ *  \param[in] tip Index of point makting up tip of tetrahedron.
+ *  \param[in] tt1 Index of second point in DT array.
+ *  \param[in] bottom Tetrahedron on bottom.
+ *  \param[in] qq Index of point.
+ *  \param[in] tt2 Index of third point in DT array.
+ *
+ *  \return void
+ */
+void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2)
+{
+  tetra *DT = T->DT;
+  tetra *t0 = &DT[tt0];
+  tetra *t1 = &DT[tt1];
+  tetra *t2 = &DT[tt2];
+  tetra qbak, tbak;
+  int k;
+
+  Count_2_to_3_Flips++;
+
+  tbak = *t0;
+  qbak = *t1; /* to save info */
+
+  *t1 = *t0;
+  *t2 = *t0;
+
+  /* redefine points */
+  t0->p[access_triangles[tip][0]] = qq;
+  t1->p[access_triangles[tip][1]] = qq;
+  t2->p[access_triangles[tip][2]] = qq;
+
+  /* make neighbour connections */
+  t0->t[access_triangles[tip][1]] = tt1;
+  t1->t[access_triangles[tip][0]] = tt0;
+  t0->s[access_triangles[tip][1]] = access_triangles[tip][0];
+  t1->s[access_triangles[tip][0]] = access_triangles[tip][1];
+
+  t0->t[access_triangles[tip][2]] = tt2;
+  t2->t[access_triangles[tip][0]] = tt0;
+  t0->s[access_triangles[tip][2]] = access_triangles[tip][0];
+  t2->s[access_triangles[tip][0]] = access_triangles[tip][2];
+
+  t1->t[access_triangles[tip][2]] = tt2;
+  t2->t[access_triangles[tip][1]] = tt1;
+  t1->s[access_triangles[tip][2]] = access_triangles[tip][1];
+  t2->s[access_triangles[tip][1]] = access_triangles[tip][2];
+
+  /* these are the ones on the top */
+  DT[t0->t[access_triangles[tip][0]]].t[t0->s[access_triangles[tip][0]]] = tt0;
+  DT[t1->t[access_triangles[tip][1]]].t[t1->s[access_triangles[tip][1]]] = tt1;
+  DT[t2->t[access_triangles[tip][2]]].t[t2->s[access_triangles[tip][2]]] = tt2;
+
+  /* now the one at the bottom */
+
+  if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][0]])
+    k = 0;
+  else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][0]])
+    k = 1;
+  else
+    k = 2;
+
+  t0->t[tip]                   = qbak.t[access_triangles[bottom][k]];
+  t0->s[tip]                   = qbak.s[access_triangles[bottom][k]];
+  DT[t0->t[tip]].t[t0->s[tip]] = tt0;
+  DT[t0->t[tip]].s[t0->s[tip]] = tip;
+
+  if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][1]])
+    k = 0;
+  else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][1]])
+    k = 1;
+  else
+    k = 2;
+
+  t1->t[tip]                   = qbak.t[access_triangles[bottom][k]];
+  t1->s[tip]                   = qbak.s[access_triangles[bottom][k]];
+  DT[t1->t[tip]].t[t1->s[tip]] = tt1;
+  DT[t1->t[tip]].s[t1->s[tip]] = tip;
+
+  if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][2]])
+    k = 0;
+  else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][2]])
+    k = 1;
+  else
+    k = 2;
+
+  t2->t[tip]                   = qbak.t[access_triangles[bottom][k]];
+  t2->s[tip]                   = qbak.s[access_triangles[bottom][k]];
+  DT[t2->t[tip]].t[t2->s[tip]] = tt2;
+  DT[t2->t[tip]].s[t2->s[tip]] = tip;
+}
+
+static int ErrorFlag = 0;
+
+/*! \brief Gets tetrahedron.
+ *
+ *  Returns the index of the tetrahedron containing the point DP[pp].
+ *  The search is started from the tetrahedron DT[ttstart].
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] p Point.
+ *  \param[out] moves The number of moves necessary to find tetrahedron.
+ *  \param[out] flag The return value from InTetra, specifying whether
+ *              the point is inside or on the edge/face.
+ *  \param[out] edgeface_nr The edge/face number on the tetrahedron containing
+ *              the point, in case flag is >1.
+ *
+ *  \return Index of tetrahedron.
+ */
+int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr)
+{
+  int ret, count_moves = 0;
+  int tt, next_tetra;
+
+  tt = ttstart;
+
+#define MAX_COUNT_MOVES 1000000
+
+  while((ret = InTetra(T, tt, p, edgeface_nr, &next_tetra)) == 0)
+    {
+      count_moves++;
+
+      if(count_moves > MAX_COUNT_MOVES)
+        {
+          ErrorFlag = 1;
+
+          if(count_moves > MAX_COUNT_MOVES + 10)
+            terminate("too many moves");
+        }
+
+      tt = next_tetra;
+    }
+
+  *moves = count_moves;
+  *flag  = ret;
+
+  return tt;
+}
+
+/*! \brief Is point in tetrahedron?
+ *
+ *  Tests whether point DP[pp] lies in the tetrahedron DT[tt]. The
+ *  return value is 0 if the point is outside, 1 if it's inside, 2 if
+ *  it's on a face, and 3 if it's on an edge. If it's either of the
+ *  last two, the edgeface_nr is set to the corresponding index of the
+ *  edge or face. If the point is outside, nexttetra is set to the
+ *  index of a neighboring tetrahedron in the direction of the
+ *  point, otherwise it's unmodified.
+ *
+ *  \param[in] T Tesslation.
+ *  \param[in] tt Index of tetrahedron in DT array.
+ *  \param[in] p Point.
+ *  \param[out] edgeface_nr The edge/face number on the tetrahedron containing
+ *              the point, in case flag is >1.
+ *  \param[out] nexttetra Index of tetrahedron.
+ *
+ *  \return Point in thetrahedron?
+ *
+ */
+int InTetra(tessellation *T, int tt, point *p, int *edgeface_nr, int *nexttetra)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  tetra *t  = &DT[tt];
+
+  point *p0 = &DP[t->p[0]];
+  point *p1 = &DP[t->p[1]];
+  point *p2 = &DP[t->p[2]];
+  point *p3 = &DP[t->p[3]];
+
+  // test if we are in an "infinity tetra", which are the ones that
+  // bound the tesselated volume. Arepo terminates if this happens,
+  // but for Sunrise this is a valid occurence so we'll return -1 to
+  // indicate the point is outside the volume. XXX Actually it
+  // shouldn't do this anymore because we now do box tests instead
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    {
+#ifndef LONGIDS
+      printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%d\n", ThisTask, tt,
+             p->x, p->y, p->z, p->ID);
+#else  /* #ifndef LONGIDS */
+      printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%llu\n", ThisTask,
+             tt, p->x, p->y, p->z, p->ID);
+#endif /* #ifndef LONGIDS #else */
+      terminate("invalid tetrahedron");
+    }
+
+  Count_InTetra++;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double ax = p1->xx - p0->xx;
+  double ay = p1->yy - p0->yy;
+  double az = p1->zz - p0->zz;
+
+  double bx = p2->xx - p0->xx;
+  double by = p2->yy - p0->yy;
+  double bz = p2->zz - p0->zz;
+
+  double cx = p3->xx - p0->xx;
+  double cy = p3->yy - p0->yy;
+  double cz = p3->zz - p0->zz;
+
+  double qx = p->xx - p0->xx;
+  double qy = p->yy - p0->yy;
+  double qz = p->zz - p0->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz;
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p0, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p, pB_ixyz, pB_xyz);
+  qx = pB_xyz[0] - pA_xyz[0];
+  qy = pB_xyz[1] - pA_xyz[1];
+  qz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz};
+  double x[3];
+
+  int ivol, flag3, flag2, flag1, flag0;
+  int count_zeros = 0;
+
+  int status;
+
+  status = solve_linear_equations(mv_data, x);
+
+  if(status < 0)
+    {
+      ivol = Orient3d_Exact(p0, p1, p2, p3);
+      if(ivol <= 0)
+        {
+          printf("flat or negatively tetrahedron found (ivol=%d) tt=%d\n", ivol, tt);
+          terminate("invalid tetrahedron");
+        }
+    }
+
+  /* x now contains the coordinates of the point p expanded in the basis (a,b,c) */
+
+  if(ErrorFlag)
+    {
+      ivol  = Orient3d_Exact(p0, p1, p2, p3);
+      flag3 = Orient3d_Exact(p0, p1, p2, p);
+      flag2 = Orient3d_Exact(p0, p3, p1, p);
+      flag1 = Orient3d_Exact(p0, p2, p3, p);
+      flag0 = Orient3d_Exact(p1, p3, p2, p);
+
+      printf("\n\nTetra=%d\n", (int)(t - DT));
+      printf("ivol=%d  flag0=%d %d %d %d\n", ivol, flag0, flag1, flag2, flag3);
+      printf("xx = %g %g %g   1-sum=%g\n", x[0], x[1], x[2], 1 - (x[0] + x[1] + x[2]));
+      printf("a= %g %g %g\n", ax, ay, az);
+      printf("b= %g %g %g\n", bx, by, bz);
+      printf("c= %g %g %g\n", cx, cy, cz);
+      printf("q= %g %g %g\n", qx, qy, qz);
+      printf("(axb)*c) = %g\n", (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz);
+      printf("next tetras=%d %d %d %d\n", t->t[0], t->t[1], t->t[2], t->t[3]);
+    }
+
+  if(status >= 0)
+    {
+      if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && x[2] > INSIDE_EPS && (1 - (x[0] + x[1] + x[2])) > INSIDE_EPS)
+        {
+          /* looks like we are safely inside the tetrahedron */
+
+          return 1; /* our point is really nicely inside the tetrahedron */
+        }
+
+      if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || x[2] < -INSIDE_EPS || (1 - (x[0] + x[1] + x[2])) < -INSIDE_EPS)
+        {
+          /* looks like we are clearly outside the tetrahedron.
+             Let's look for a good neighbouring tetrahedron to continue the search */
+
+          /* note: in the (a,b,c) basis, the center-of-mass has coordinates (1/4, 1/4, 1/4) */
+
+          double w, u, v;
+
+          if(ErrorFlag)
+            {
+              w = 0.25 / (0.25 - x[2]);
+              u = 0.25 + w * (x[0] - 0.25);
+              v = 0.25 + w * (x[1] - 0.25);
+              printf("[3] w=%g u=%g v=%g    fabs(x[2] - 0.25)=%g\n", w, u, v, fabs(x[2] - 0.25));
+
+              w = 0.25 / (0.25 - x[1]);
+              u = 0.25 + w * (x[0] - 0.25);
+              v = 0.25 + w * (x[2] - 0.25);
+              printf("[3] w=%g u=%g v=%g    fabs(x[1] - 0.25)=%g\n", w, u, v, fabs(x[1] - 0.25));
+
+              w = 0.25 / (0.25 - x[0]);
+              u = 0.25 + w * (x[1] - 0.25);
+              v = 0.25 + w * (x[2] - 0.25);
+              printf("[3] w=%g u=%g v=%g    fabs(x[0] - 0.25)=%g\n", w, u, v, fabs(x[0] - 0.25));
+            }
+
+          if(fabs(x[2] - 0.25) > INSIDE_EPS)
+            {
+              w = 0.25 / (0.25 - x[2]);
+              if(w > 0)
+                {
+                  u = 0.25 + w * (x[0] - 0.25);
+                  v = 0.25 + w * (x[1] - 0.25);
+                  if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS))
+                    {
+                      *nexttetra = t->t[3];
+                      return 0;
+                    }
+                }
+            }
+
+          if(fabs(x[1] - 0.25) > INSIDE_EPS)
+            {
+              w = 0.25 / (0.25 - x[1]);
+              if(w > 0)
+                {
+                  u = 0.25 + w * (x[0] - 0.25);
+                  v = 0.25 + w * (x[2] - 0.25);
+                  if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS))
+                    {
+                      *nexttetra = t->t[2];
+                      return 0;
+                    }
+                }
+            }
+
+          if(fabs(x[0] - 0.25) > INSIDE_EPS)
+            {
+              w = 0.25 / (0.25 - x[0]);
+              if(w > 0)
+                {
+                  u = 0.25 + w * (x[1] - 0.25);
+                  v = 0.25 + w * (x[2] - 0.25);
+                  if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS))
+                    {
+                      *nexttetra = t->t[1];
+                      return 0;
+                    }
+                }
+            }
+
+          *nexttetra = t->t[0];
+          return 0;
+        }
+    }
+
+  /* here we need to decide whether we have a degenerate case, i.e.
+     whether we think the point lies on a face or an edge of the tetrahedron */
+
+  if(ErrorFlag)
+    {
+      printf("doing exact test for tetra=%d\n", (int)(t - DT));
+    }
+
+  Count_InTetraExact++;
+
+  if((ivol = Orient3d_Exact(p0, p1, p2, p3)) <= 0)
+    {
+      printf("flat or negatively oriented tetrahedron found (vol=%d)\n", ivol);
+      terminate("invalid tetrahedron");
+    }
+
+  flag3 = Orient3d_Exact(p0, p1, p2, p);
+  flag2 = Orient3d_Exact(p0, p3, p1, p);
+  flag1 = Orient3d_Exact(p0, p2, p3, p);
+  flag0 = Orient3d_Exact(p1, p3, p2, p);
+
+  if(flag0 == 0)
+    count_zeros++;
+
+  if(flag1 == 0)
+    count_zeros++;
+
+  if(flag2 == 0)
+    count_zeros++;
+
+  if(flag3 == 0)
+    count_zeros++;
+
+  if(count_zeros > 2)
+    {
+      printf("task=%d flags=%d %d %d %d  (axb)*c = %g\n", ThisTask, flag0, flag1, flag2, flag3,
+             (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz);
+
+      printf(
+          "task=%d pp0=%ld pp1=%ld pp2=%ld pp3=%ld p=%ld IDs=(%llu %llu %llu %llu %llu) pos_0=(%g|%g|%g) pos_1=(%g|%g|%g) "
+          "pos_2=(%g|%g|%g) pos_3=(%g|%g|%g) pos=(%g|%g|%g)\n",
+          ThisTask, p0 - DP, p1 - DP, p2 - DP, p3 - DP, p - DP, (long long)p0->ID, (long long)p1->ID, (long long)p2->ID,
+          (long long)p3->ID, (long long)p->ID, p0->x, p0->y, p0->z, p1->x, p1->y, p1->z, p2->x, p2->y, p2->z, p3->x, p3->y, p3->z,
+          p->x, p->y, p->z);
+
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+      printf("task=%d imageflags=(%d %d %d %d %d)\n", ThisTask, p0->image_flags, p1->image_flags, p2->image_flags, p3->image_flags,
+             p->image_flags);
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+      terminate("strange zero count");
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0)
+    {
+      /* we have a point inside the tetra, but it may still be on one of the edges */
+
+      if(count_zeros == 0)
+        {
+          /* ok, let's split the tetra in 4, we are apparently well enough inside */
+          return 1;
+        }
+
+      if(count_zeros == 1) /* we lie on a face */
+        {
+          if(flag0 == 0)
+            {
+              *edgeface_nr = 0;
+              return 2;
+            }
+
+          if(flag1 == 0)
+            {
+              *edgeface_nr = 1;
+              return 2;
+            }
+
+          if(flag2 == 0)
+            {
+              *edgeface_nr = 2;
+              return 2;
+            }
+
+          if(flag3 == 0)
+            {
+              *edgeface_nr = 3;
+              return 2;
+            }
+        }
+
+      if(count_zeros == 2) /* we lie on an edge */
+        {
+          if(flag0 == 0 && flag1 == 0)
+            {
+              *edgeface_nr = 5;
+              return 3;
+            }
+
+          if(flag0 == 0 && flag2 == 0)
+            {
+              *edgeface_nr = 4;
+              return 3;
+            }
+
+          if(flag0 == 0 && flag3 == 0)
+            {
+              *edgeface_nr = 3;
+              return 3;
+            }
+
+          if(flag1 == 0 && flag2 == 0)
+            {
+              *edgeface_nr = 2;
+              return 3;
+            }
+
+          if(flag1 == 0 && flag3 == 0)
+            {
+              *edgeface_nr = 1;
+              return 3;
+            }
+
+          if(flag2 == 0 && flag3 == 0)
+            {
+              *edgeface_nr = 0;
+              return 3;
+            }
+        }
+    }
+
+  /* we seem to be lying clearly outside the tetrahedron */
+  /* Let's determine a suitable neighbour */
+
+  /* if there is a single negative value, let's pick this side */
+
+  if(flag0 < 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0)
+    {
+      *nexttetra = t->t[0];
+      return 0;
+    }
+
+  if(flag0 >= 0 && flag1 < 0 && flag2 >= 0 && flag3 >= 0)
+    {
+      *nexttetra = t->t[1];
+      return 0;
+    }
+
+  if(flag0 >= 0 && flag1 >= 0 && flag2 < 0 && flag3 >= 0)
+    {
+      *nexttetra = t->t[2];
+      return 0;
+    }
+  if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 < 0)
+    {
+      *nexttetra = t->t[3];
+      return 0;
+    }
+
+  /* there are at least two negative values. Let's pick a random one */
+
+  int ind = -1;
+
+  if(flag0 < 0)
+    {
+      if(ind < 0)
+        ind = 0;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 0;
+        }
+    }
+
+  if(flag1 < 0)
+    {
+      if(ind < 0)
+        ind = 1;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 1;
+        }
+    }
+
+  if(flag2 < 0)
+    {
+      if(ind < 0)
+        ind = 2;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 2;
+        }
+    }
+
+  if(flag3 < 0)
+    {
+      if(ind < 0)
+        ind = 3;
+      else
+        {
+          if(get_random_number() < 0.5)
+            ind = 3;
+        }
+    }
+
+  *nexttetra = t->t[ind];
+  return 0;
+}
+
+/*! \brief Computes the circum-circle of all tetrahedra in mesh.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void compute_circumcircles(tessellation *T)
+{
+  tetra *DT = T->DT;
+  char *DTF = T->DTF;
+  int i;
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if(DTF[i] & 1)
+        continue;
+      DTF[i] |= 1;
+
+      if(DT[i].t[0] < 0) /* deleted ? */
+        continue;
+
+      if(DT[i].p[0] == DPinfinity)
+        continue;
+      if(DT[i].p[1] == DPinfinity)
+        continue;
+      if(DT[i].p[2] == DPinfinity)
+        continue;
+      if(DT[i].p[3] == DPinfinity)
+        continue;
+
+      update_circumcircle(T, i);
+    }
+}
+
+/*! \brief Determinant calculation with arbitrary precision arithmetics.
+ *
+ *  Auxiliary function for exact circum-circle calculation.
+ *
+ *  \return void
+ */
+void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay, mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx, mpz_t cy, mpz_t cz)
+{
+  mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz;
+
+  mpz_init(bz_cy);
+  mpz_mul(bz_cy, bz, cy);
+
+  mpz_init(by_cz);
+  mpz_mul(by_cz, by, cz);
+
+  mpz_init(cz_ay);
+  mpz_mul(cz_ay, cz, ay);
+
+  mpz_init(cy_az);
+  mpz_mul(cy_az, cy, az);
+
+  mpz_init(az_by);
+  mpz_mul(az_by, az, by);
+
+  mpz_init(ay_bz);
+  mpz_mul(ay_bz, ay, bz);
+
+  mpz_t bzcy_bycz, czay_cyaz, azby_aybz;
+
+  mpz_init(bzcy_bycz);
+  mpz_init(czay_cyaz);
+  mpz_init(azby_aybz);
+
+  mpz_sub(bzcy_bycz, bz_cy, by_cz);
+  mpz_sub(czay_cyaz, cz_ay, cy_az);
+  mpz_sub(azby_aybz, az_by, ay_bz);
+
+  mpz_t a, b, c, ab;
+
+  mpz_init(a);
+  mpz_init(b);
+  mpz_init(c);
+
+  mpz_mul(a, bzcy_bycz, ax);
+  mpz_mul(b, czay_cyaz, bx);
+  mpz_mul(c, azby_aybz, cx);
+
+  mpz_init(ab);
+
+  mpz_add(ab, a, b);
+  mpz_add(det, ab, c);
+
+  mpz_clear(ab);
+  mpz_clear(c);
+  mpz_clear(b);
+  mpz_clear(a);
+  mpz_clear(azby_aybz);
+  mpz_clear(czay_cyaz);
+  mpz_clear(bzcy_bycz);
+  mpz_clear(ay_bz);
+  mpz_clear(az_by);
+  mpz_clear(cy_az);
+  mpz_clear(cz_ay);
+  mpz_clear(by_cz);
+  mpz_clear(bz_cy);
+}
+
+/*! \brief Arbitrary precision calculation of circum-circle.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index in DT array.
+ *  \param[out] x X coordinate of circum-circle center.
+ *  \param[out] y Y coordinate of circum-circle center.
+ *  \param[out] z Z coordinate of circum-circle center.
+ *
+ *  \return void
+ */
+void get_circumcircle_exact(tessellation *T, int tt, double *x, double *y, double *z)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  tetra *t  = &DT[tt];
+
+  point *p0 = &DP[t->p[0]];
+  point *p1 = &DP[t->p[1]];
+  point *p2 = &DP[t->p[2]];
+  point *p3 = &DP[t->p[3]];
+
+  mpz_t det, detA, detB, detC;
+  mpz_t qx, qy, qz;
+  mpz_t a2, b2, c2, tmp, AA, BB, CC;
+  mpz_t ax, ay, az, bx, by, bz, cx, cy, cz;
+
+  mpz_init(det);
+  mpz_init(detA);
+  mpz_init(detB);
+  mpz_init(detC);
+  mpz_init(qx);
+  mpz_init(qy);
+  mpz_init(qz);
+
+  mpz_init(a2);
+  mpz_init(b2);
+  mpz_init(c2);
+  mpz_init(tmp);
+  mpz_init(AA);
+  mpz_init(BB);
+  mpz_init(CC);
+
+  mpz_init(ax);
+  mpz_init(ay);
+  mpz_init(az);
+  mpz_init(bx);
+  mpz_init(by);
+  mpz_init(bz);
+  mpz_init(cx);
+  mpz_init(cy);
+  mpz_init(cz);
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  MY_mpz_set_si(tmp, p1->ix);
+  MY_mpz_sub_ui(ax, tmp, p0->ix);
+  MY_mpz_set_si(tmp, p1->iy);
+  MY_mpz_sub_ui(ay, tmp, p0->iy);
+  MY_mpz_set_si(tmp, p1->iz);
+  MY_mpz_sub_ui(az, tmp, p0->iz);
+
+  MY_mpz_set_si(tmp, p2->ix);
+  MY_mpz_sub_ui(bx, tmp, p0->ix);
+  MY_mpz_set_si(tmp, p2->iy);
+  MY_mpz_sub_ui(by, tmp, p0->iy);
+  MY_mpz_set_si(tmp, p2->iz);
+  MY_mpz_sub_ui(bz, tmp, p0->iz);
+
+  MY_mpz_set_si(tmp, p3->ix);
+  MY_mpz_sub_ui(cx, tmp, p0->ix);
+  MY_mpz_set_si(tmp, p3->iy);
+  MY_mpz_sub_ui(cy, tmp, p0->iy);
+  MY_mpz_set_si(tmp, p3->iz);
+  MY_mpz_sub_ui(cz, tmp, p0->iz);
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+  double pA_xyz[3], pB_xyz[3];
+
+  get_integers_for_point(p0, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  MY_mpz_set_si(tmp, pB_ixyz[0]);
+  MY_mpz_sub_ui(ax, tmp, pA_ixyz[0]);
+  MY_mpz_set_si(tmp, pB_ixyz[1]);
+  MY_mpz_sub_ui(ay, tmp, pA_ixyz[1]);
+  MY_mpz_set_si(tmp, pB_ixyz[2]);
+  MY_mpz_sub_ui(az, tmp, pA_ixyz[2]);
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  MY_mpz_set_si(tmp, pB_ixyz[0]);
+  MY_mpz_sub_ui(bx, tmp, pA_ixyz[0]);
+  MY_mpz_set_si(tmp, pB_ixyz[1]);
+  MY_mpz_sub_ui(by, tmp, pA_ixyz[1]);
+  MY_mpz_set_si(tmp, pB_ixyz[2]);
+  MY_mpz_sub_ui(bz, tmp, pA_ixyz[2]);
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  MY_mpz_set_si(tmp, pB_ixyz[0]);
+  MY_mpz_sub_ui(cx, tmp, pA_ixyz[0]);
+  MY_mpz_set_si(tmp, pB_ixyz[1]);
+  MY_mpz_sub_ui(cy, tmp, pA_ixyz[1]);
+  MY_mpz_set_si(tmp, pB_ixyz[2]);
+  MY_mpz_sub_ui(cz, tmp, pA_ixyz[2]);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  mpz_set(tmp, ax);
+  mpz_mul(AA, tmp, ax);
+  mpz_set(tmp, ay);
+  mpz_mul(BB, tmp, ay);
+  mpz_set(tmp, az);
+  mpz_mul(CC, tmp, az);
+  mpz_add(tmp, AA, BB);
+  mpz_add(a2, tmp, CC);
+
+  mpz_set(tmp, bx);
+  mpz_mul(AA, tmp, bx);
+  mpz_set(tmp, by);
+  mpz_mul(BB, tmp, by);
+  mpz_set(tmp, bz);
+  mpz_mul(CC, tmp, bz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(b2, tmp, CC);
+
+  mpz_set(tmp, cx);
+  mpz_mul(AA, tmp, cx);
+  mpz_set(tmp, cy);
+  mpz_mul(BB, tmp, cy);
+  mpz_set(tmp, cz);
+  mpz_mul(CC, tmp, cz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(c2, tmp, CC);
+
+  calc_mpz_determinant(det, ax, ay, az, bx, by, bz, cx, cy, cz);
+  calc_mpz_determinant(detA, a2, ay, az, b2, by, bz, c2, cy, cz);
+  calc_mpz_determinant(detB, ax, a2, az, bx, b2, bz, cx, c2, cz);
+  calc_mpz_determinant(detC, ax, ay, a2, bx, by, b2, cx, cy, c2);
+
+  mpz_cdiv_q(tmp, detA, det);
+  mpz_tdiv_q_2exp(qx, tmp, 1);
+
+  mpz_cdiv_q(tmp, detB, det);
+  mpz_tdiv_q_2exp(qy, tmp, 1);
+
+  mpz_cdiv_q(tmp, detC, det);
+  mpz_tdiv_q_2exp(qz, tmp, 1);
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  MY_mpz_set_si(tmp, p0->ix);
+  mpz_add(AA, qx, tmp);
+
+  MY_mpz_set_si(tmp, p0->iy);
+  mpz_add(BB, qy, tmp);
+
+  MY_mpz_set_si(tmp, p0->iz);
+  mpz_add(CC, qz, tmp);
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  MY_mpz_set_si(tmp, pA_ixyz[0]);
+  mpz_add(AA, qx, tmp);
+
+  MY_mpz_set_si(tmp, pA_ixyz[1]);
+  mpz_add(BB, qy, tmp);
+
+  MY_mpz_set_si(tmp, pA_ixyz[2]);
+  mpz_add(CC, qz, tmp);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+  double xx, yy, zz;
+
+  xx = mpz_get_d(AA);
+  yy = mpz_get_d(BB);
+  zz = mpz_get_d(CC);
+
+  xx /= (1LLu << USEDBITS);
+  yy /= (1LLu << USEDBITS);
+  zz /= (1LLu << USEDBITS);
+
+  xx = xx / ConversionFac + CentralOffsetX;
+  yy = yy / ConversionFac + CentralOffsetY;
+  zz = zz / ConversionFac + CentralOffsetZ;
+
+  *x = xx;
+  *y = yy;
+  *z = zz;
+
+  mpz_clear(det);
+  mpz_clear(detA);
+  mpz_clear(detB);
+  mpz_clear(detC);
+  mpz_clear(qx);
+  mpz_clear(qy);
+  mpz_clear(qz);
+
+  mpz_clear(a2);
+  mpz_clear(b2);
+  mpz_clear(c2);
+  mpz_clear(tmp);
+  mpz_clear(AA);
+  mpz_clear(BB);
+  mpz_clear(CC);
+
+  mpz_clear(ax);
+  mpz_clear(ay);
+  mpz_clear(az);
+  mpz_clear(bx);
+  mpz_clear(by);
+  mpz_clear(bz);
+  mpz_clear(cx);
+  mpz_clear(cy);
+  mpz_clear(cz);
+}
+
+/*! \brief Computes the circum-circle of tetrahedron tt.
+ *
+ *  \param[in, out] T Pointer to tessellation.
+ *  \param[in] tt Index of triangle in DT array.
+ *
+ *  \return void
+ */
+void update_circumcircle(tessellation *T, int tt)
+{
+  tetra *DT         = T->DT;
+  tetra_center *DTC = T->DTC;
+  point *DP         = T->DP;
+  tetra *t          = &DT[tt];
+  tetra_center *tc  = &DTC[tt];
+
+  if(t->t[0] < 0) /* deleted ? */
+    return;
+
+  point *p0 = &DP[t->p[0]];
+  point *p1 = &DP[t->p[1]];
+  point *p2 = &DP[t->p[2]];
+  point *p3 = &DP[t->p[3]];
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double ax = p1->xx - p0->xx;
+  double ay = p1->yy - p0->yy;
+  double az = p1->zz - p0->zz;
+
+  double bx = p2->xx - p0->xx;
+  double by = p2->yy - p0->yy;
+  double bz = p2->zz - p0->zz;
+
+  double cx = p3->xx - p0->xx;
+  double cy = p3->yy - p0->yy;
+  double cz = p3->zz - p0->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double ax, ay, az, bx, by, bz, cx, cy, cz;
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p0, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  double aa = 0.5 * (ax * ax + ay * ay + az * az);
+  double bb = 0.5 * (bx * bx + by * by + bz * bz);
+  double cc = 0.5 * (cx * cx + cy * cy + cz * cz);
+
+  double mv_data[] = {ax, ay, az, aa, bx, by, bz, bb, cx, cy, cz, cc};
+  double x[3];
+
+  int status = solve_linear_equations(mv_data, x);
+
+  if(status < 0)
+    {
+      if(Orient3d_Exact(p0, p1, p2, p3) != 1)
+        {
+          printf("p0 = %g %g %g\n", p0->x, p0->y, p0->z);
+          printf("p1 = %g %g %g\n", p1->x, p1->y, p1->z);
+          printf("p2 = %g %g %g\n", p2->x, p2->y, p2->z);
+          printf("p3 = %g %g %g\n", p3->x, p3->y, p3->z);
+
+          printf("Orient-Test=%d\n", Orient3d_Exact(p0, p1, p2, p3));
+          printf("tetra-volume=%g  tetra=%d\n", calculate_tetra_volume(p0, p1, p2, p3), tt);
+
+          return;
+        }
+
+      double xc, yc, zc;
+
+      get_circumcircle_exact(T, tt, &xc, &yc, &zc);
+
+      tc->cx = xc;
+      tc->cy = yc;
+      tc->cz = zc;
+    }
+  else
+    {
+#ifndef OPTIMIZE_MEMORY_USAGE
+      x[0] += p0->xx;
+      x[1] += p0->yy;
+      x[2] += p0->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+      x[0] += pA_xyz[0];
+      x[1] += pA_xyz[1];
+      x[2] += pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+      tc->cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX;
+      tc->cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY;
+      tc->cz = (x[2] - 1.0) / ConversionFac + CentralOffsetZ;
+    }
+}
+
+/*! \brief Returns the orientation of the tetrahedron.
+ *
+ *  \param[in] p0 Point spanning the tetrahedron.
+ *  \param[in] p1 Point spanning the tetrahedron.
+ *  \param[in] p2 Point spanning the tetrahedron.
+ *  \param[in] p3 Point spanning the tetrahedron.
+ *
+ *  \return -1: negative orientation; +1 positive orientation.
+ */
+int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3)
+{
+  double nx, ny, nz;
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return +1;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  nx = (p1->yy - p0->yy) * (p2->zz - p0->zz) - (p1->zz - p0->zz) * (p2->yy - p0->yy);
+  ny = (p1->zz - p0->zz) * (p2->xx - p0->xx) - (p1->xx - p0->xx) * (p2->zz - p0->zz);
+  nz = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx);
+  if(nx * (p3->xx - p0->xx) + ny * (p3->yy - p0->yy) + nz * (p3->zz - p0->zz) >= 0)
+    return +1;
+  else
+    return -1;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  IntegerMapType p0_ixyz[3], p1_ixyz[3], p2_ixyz[3], p3_ixyz[3];
+  double p0_xyz[3], p1_xyz[3], p2_xyz[3], p3_xyz[3];
+
+  get_integers_for_point(p0, p0_ixyz, p0_xyz);
+  get_integers_for_point(p1, p1_ixyz, p1_xyz);
+  get_integers_for_point(p2, p2_ixyz, p2_xyz);
+  get_integers_for_point(p3, p3_ixyz, p3_xyz);
+
+  nx = (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[2] - p0_xyz[2]) - (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[1] - p0_xyz[1]);
+  ny = (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[0] - p0_xyz[0]) - (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[2] - p0_xyz[2]);
+  nz = (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[1] - p0_xyz[1]) - (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[0] - p0_xyz[0]);
+
+  get_integers_for_point(p3, p3_ixyz, p3_xyz);
+
+  if(nx * (p3_xyz[0] - p0_xyz[0]) + ny * (p3_xyz[1] - p0_xyz[1]) + nz * (p3_xyz[2] - p0_xyz[2]) >= 0)
+    return +1;
+  else
+    return -1;
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+}
+
+/*! \brief Calculate the volume of a tetrahedron.
+ *
+ *  \param[in] p0 Point spanning the tetrahedron.
+ *  \param[in] p1 Point spanning the tetrahedron.
+ *  \param[in] p2 Point spanning the tetrahedron.
+ *  \param[in] p3 Point spanning the tetrahedron.
+ *
+ *  \return Volume of the tetrahedron.
+ */
+double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3)
+{
+  double nx, ny, nz;
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return +1;
+
+  nx = (p1->y - p0->y) * (p2->z - p0->z) - (p1->z - p0->z) * (p2->y - p0->y);
+  ny = (p1->z - p0->z) * (p2->x - p0->x) - (p1->x - p0->x) * (p2->z - p0->z);
+  nz = (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x);
+
+  return nx * (p3->x - p0->x) + ny * (p3->y - p0->y) + nz * (p3->z - p0->z);
+}
+
+/*! \brief Add row in matrix equation.
+ *
+ *  Auxiliary function for solve_linear_equations.
+ *
+ *  \param[in, out] m Matrix.
+ *  \param[in] r1 Index of row to be modified.
+ *  \param[in] r2 Index of row which is added to r1.
+ *  \param[in] fac Factor by which row r2 is multiplied before adding to r1.
+ *
+ *  \return void
+ */
+void add_row(double *m, int r1, int r2, double fac)
+{
+  int i;
+
+  for(i = 0; i < 4; i++)
+    m[r1 * 4 + i] += fac * m[r2 * 4 + i];
+}
+
+/*! \brief Solve system of linear equations for 3d Voronoi construction.
+ *
+ *  \param[in, out] m Matrix.
+ *  \param[out] res Result.
+ *
+ *  \return 0 if success, <0 else.
+ */
+int solve_linear_equations(double *m, double *res)
+{
+  int ix, iy, iz, itmp;
+
+  if(fabs(m[4]) > fabs(m[0]))
+    {
+      ix = 1;
+      iy = 0;
+      iz = 2;
+    }
+  else
+    {
+      ix = 0;
+      iy = 1;
+      iz = 2;
+    }
+
+  if(fabs(m[8]) > fabs(m[ix * 4]))
+    {
+      ix = 2;
+      iy = 0;
+      iz = 1;
+    }
+
+  add_row(m, iy, ix, -m[iy * 4] / m[ix * 4]);
+  add_row(m, iz, ix, -m[iz * 4] / m[ix * 4]);
+
+  if(fabs(m[iz * 4 + 1]) > fabs(m[iy * 4 + 1]))
+    {
+      /* swap iy/iz */
+      itmp = iy;
+      iy   = iz;
+      iz   = itmp;
+    }
+
+  if(fabs(m[iy * 4 + 1]) < GAUSS_EPS)
+    return -1;
+
+  add_row(m, iz, iy, -m[iz * 4 + 1] / m[iy * 4 + 1]);
+
+  res[2] = m[iz * 4 + 3] / m[iz * 4 + 2];
+  res[1] = (m[iy * 4 + 3] - res[2] * m[iy * 4 + 2]) / m[iy * 4 + 1];
+  res[0] = (m[ix * 4 + 3] - res[2] * m[ix * 4 + 2] - res[1] * m[ix * 4 + 1]) / m[ix * 4];
+
+  if(fabs(m[iz * 4 + 2]) < GAUSS_EPS)
+    {
+      return -1;
+    }
+  if(fabs(m[iy * 4 + 1]) < GAUSS_EPS)
+    {
+      return -2;
+    }
+  if(fabs(m[ix * 4]) < GAUSS_EPS)
+    {
+      return -3;
+    }
+
+  return 0;
+}
+
+/*! \brief Converts coordinates of point p to integer values.
+ *
+ *  \param[in, out] p Point.
+ *
+ *  \return void
+ */
+#ifndef OPTIMIZE_MEMORY_USAGE
+void set_integers_for_pointer(point *p)
+{
+  p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0;
+  p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0;
+  p->zz = (p->z - CentralOffsetZ) * ConversionFac + 1.0;
+
+  if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0 || p->zz < 1.0 || p->zz >= 2.0)
+    {
+      printf("(%g, %g, %g) (%g, %g, %g)\n", p->x, p->y, p->z, p->xx, p->yy, p->zz);
+      terminate("invalid coordinate range");
+    }
+
+  p->ix = double_to_voronoiint(p->xx);
+  p->iy = double_to_voronoiint(p->yy);
+  p->iz = double_to_voronoiint(p->zz);
+
+  p->xx = mask_voronoi_int(p->xx);
+  p->yy = mask_voronoi_int(p->yy);
+  p->zz = mask_voronoi_int(p->zz);
+}
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+
+/*! \brief Checks if point is within a sphere using arbitrary precision
+ *         operations.
+ *
+ *  \param p0 Point 1 of tetrahedron.
+ *  \param p1 Point 2 of tetrahedron.
+ *  \param p2 Point 3 of tetrahedron.
+ *  \param p3 Point 4 of tetrahedron.
+ *  \param p Point to be checked if it is in cricumsphere.
+ *
+ *  \return (-1,1); -1 in sphere, 1 outside.
+ */
+int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p)
+{
+  IntegerMapType ax, bx, cx, dx;
+  IntegerMapType ay, by, cy, dy;
+  IntegerMapType az, bz, cz, dz;
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return -1;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  ax = p0->ix - p->ix;
+  ay = p0->iy - p->iy;
+  az = p0->iz - p->iz;
+
+  bx = p1->ix - p->ix;
+  by = p1->iy - p->iy;
+  bz = p1->iz - p->iz;
+
+  cx = p2->ix - p->ix;
+  cy = p2->iy - p->iy;
+  cz = p2->iz - p->iz;
+
+  dx = p3->ix - p->ix;
+  dy = p3->iy - p->iy;
+  dz = p3->iz - p->iz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_ixyz[0] - pA_ixyz[0];
+  ay = pB_ixyz[1] - pA_ixyz[1];
+  az = pB_ixyz[2] - pA_ixyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_ixyz[0] - pA_ixyz[0];
+  by = pB_ixyz[1] - pA_ixyz[1];
+  bz = pB_ixyz[2] - pA_ixyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_ixyz[0] - pA_ixyz[0];
+  cy = pB_ixyz[1] - pA_ixyz[1];
+  cz = pB_ixyz[2] - pA_ixyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  dx = pB_ixyz[0] - pA_ixyz[0];
+  dy = pB_ixyz[1] - pA_ixyz[1];
+  dz = pB_ixyz[2] - pA_ixyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  mpz_t ab, bc, cd, da, ac, bd;
+
+  mpz_init(ab);
+  mpz_init(bc);
+  mpz_init(cd);
+  mpz_init(da);
+  mpz_init(ac);
+  mpz_init(bd);
+
+  mpz_t tmp, AA, BB, CC;
+
+  mpz_init(tmp);
+  mpz_init(AA);
+  mpz_init(BB);
+  mpz_init(CC);
+
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(AA, tmp, by);
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(BB, tmp, ay);
+  mpz_sub(ab, AA, BB);
+
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(AA, tmp, cy);
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(BB, tmp, by);
+  mpz_sub(bc, AA, BB);
+
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(AA, tmp, dy);
+  MY_mpz_set_si(tmp, dx);
+  MY_mpz_mul_si(BB, tmp, cy);
+  mpz_sub(cd, AA, BB);
+
+  MY_mpz_set_si(tmp, dx);
+  MY_mpz_mul_si(AA, tmp, ay);
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(BB, tmp, dy);
+  mpz_sub(da, AA, BB);
+
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(AA, tmp, cy);
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(BB, tmp, ay);
+  mpz_sub(ac, AA, BB);
+
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(AA, tmp, dy);
+  MY_mpz_set_si(tmp, dx);
+  MY_mpz_mul_si(BB, tmp, by);
+  mpz_sub(bd, AA, BB);
+
+  mpz_t abc, bcd, cda, dab;
+
+  mpz_init(abc);
+  mpz_init(bcd);
+  mpz_init(cda);
+  mpz_init(dab);
+
+  MY_mpz_mul_si(AA, bc, az);
+  MY_mpz_mul_si(BB, ac, -bz);
+  MY_mpz_mul_si(CC, ab, cz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(abc, tmp, CC);
+
+  MY_mpz_mul_si(AA, cd, bz);
+  MY_mpz_mul_si(BB, bd, -cz);
+  MY_mpz_mul_si(CC, bc, dz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(bcd, tmp, CC);
+
+  MY_mpz_mul_si(AA, da, cz);
+  MY_mpz_mul_si(BB, ac, dz);
+  MY_mpz_mul_si(CC, cd, az);
+  mpz_add(tmp, AA, BB);
+  mpz_add(cda, tmp, CC);
+
+  MY_mpz_mul_si(AA, ab, dz);
+  MY_mpz_mul_si(BB, bd, az);
+  MY_mpz_mul_si(CC, da, bz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(dab, tmp, CC);
+
+  mpz_t a2, b2, c2, d2;
+
+  mpz_init(a2);
+  mpz_init(b2);
+  mpz_init(c2);
+  mpz_init(d2);
+
+  MY_mpz_set_si(tmp, ax);
+  MY_mpz_mul_si(AA, tmp, ax);
+  MY_mpz_set_si(tmp, ay);
+  MY_mpz_mul_si(BB, tmp, ay);
+  MY_mpz_set_si(tmp, az);
+  MY_mpz_mul_si(CC, tmp, az);
+  mpz_add(tmp, AA, BB);
+  mpz_add(a2, tmp, CC);
+
+  MY_mpz_set_si(tmp, bx);
+  MY_mpz_mul_si(AA, tmp, bx);
+  MY_mpz_set_si(tmp, by);
+  MY_mpz_mul_si(BB, tmp, by);
+  MY_mpz_set_si(tmp, bz);
+  MY_mpz_mul_si(CC, tmp, bz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(b2, tmp, CC);
+
+  MY_mpz_set_si(tmp, cx);
+  MY_mpz_mul_si(AA, tmp, cx);
+  MY_mpz_set_si(tmp, cy);
+  MY_mpz_mul_si(BB, tmp, cy);
+  MY_mpz_set_si(tmp, cz);
+  MY_mpz_mul_si(CC, tmp, cz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(c2, tmp, CC);
+
+  MY_mpz_set_si(tmp, dx);
+  MY_mpz_mul_si(AA, tmp, dx);
+  MY_mpz_set_si(tmp, dy);
+  MY_mpz_mul_si(BB, tmp, dy);
+  MY_mpz_set_si(tmp, dz);
+  MY_mpz_mul_si(CC, tmp, dz);
+  mpz_add(tmp, AA, BB);
+  mpz_add(d2, tmp, CC);
+
+  /* now calculate final result */
+
+  mpz_mul(AA, c2, dab);
+  mpz_mul(BB, d2, abc);
+  mpz_sub(tmp, AA, BB);
+
+  mpz_mul(AA, a2, bcd);
+  mpz_mul(BB, b2, cda);
+  mpz_sub(CC, AA, BB);
+
+  mpz_add(AA, tmp, CC);
+
+  /* AA now contains the result */
+
+  int sign = mpz_sgn(AA);
+
+  mpz_clear(d2);
+  mpz_clear(c2);
+  mpz_clear(b2);
+  mpz_clear(a2);
+  mpz_clear(dab);
+  mpz_clear(cda);
+  mpz_clear(bcd);
+  mpz_clear(abc);
+  mpz_clear(CC);
+  mpz_clear(BB);
+  mpz_clear(AA);
+  mpz_clear(tmp);
+  mpz_clear(bd);
+  mpz_clear(ac);
+  mpz_clear(da);
+  mpz_clear(cd);
+  mpz_clear(bc);
+  mpz_clear(ab);
+
+  return sign;
+}
+
+/*! \brief Checks if point is within a sphere.
+ *
+ *  \param p0 Point 1 of tetrahedron.
+ *  \param p1 Point 2 of tetrahedron.
+ *  \param p2 Point 3 of tetrahedron.
+ *  \param p3 Point 4 of tetrahedron.
+ *  \param p Point to be checked if it is in cricumsphere.
+ *
+ *  \return (-1,0,1); -1: in sphere, 0: on surfrace, 1: outside.
+ */
+int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p)
+{
+  double ax, bx, cx, dx;
+  double ay, by, cy, dy;
+  double az, bz, cz, dz;
+  double a2, b2, c2, d2;
+  double ab, bc, cd, da, ac, bd;
+  double abc, bcd, cda, dab;
+  double x;
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return -1;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  ax = p0->xx - p->xx;
+  ay = p0->yy - p->yy;
+  az = p0->zz - p->zz;
+
+  bx = p1->xx - p->xx;
+  by = p1->yy - p->yy;
+  bz = p1->zz - p->zz;
+
+  cx = p2->xx - p->xx;
+  cy = p2->yy - p->yy;
+  cz = p2->zz - p->zz;
+
+  dx = p3->xx - p->xx;
+  dy = p3->yy - p->yy;
+  dz = p3->zz - p->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  dx = pB_xyz[0] - pA_xyz[0];
+  dy = pB_xyz[1] - pA_xyz[1];
+  dz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  ab = ax * by - bx * ay;
+  bc = bx * cy - cx * by;
+  cd = cx * dy - dx * cy;
+  da = dx * ay - ax * dy;
+  ac = ax * cy - cx * ay;
+  bd = bx * dy - dx * by;
+
+  abc = az * bc - bz * ac + cz * ab;
+  bcd = bz * cd - cz * bd + dz * bc;
+  cda = cz * da + dz * ac + az * cd;
+  dab = dz * ab + az * bd + bz * da;
+
+  a2 = ax * ax + ay * ay + az * az;
+  b2 = bx * bx + by * by + bz * bz;
+  c2 = cx * cx + cy * cy + cz * cz;
+  d2 = dx * dx + dy * dy + dz * dz;
+
+  x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda));
+
+  if(x < 0)
+    return -1;
+  if(x > 0)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Checks if point is within a sphere with some error margin.
+ *
+ *  \param p0 Point 1 of tetrahedron.
+ *  \param p1 Point 2 of tetrahedron.
+ *  \param p2 Point 3 of tetrahedron.
+ *  \param p3 Point 4 of tetrahedron.
+ *  \param p Point to be checked if it is in cricumsphere.
+ *
+ *  \return (-1,0,1); -1: in sphere, 0: on surfrace (within error margin),
+ *                    +1: outside.
+ */
+int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p)
+{
+  double ax, bx, cx, dx;
+  double ay, by, cy, dy;
+  double az, bz, cz, dz;
+  double a2, b2, c2, d2;
+  double ab, bc, cd, da, ac, bd;
+  double abc, bcd, cda, dab;
+  double x;
+
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return -1;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  ax = p0->xx - p->xx;
+  ay = p0->yy - p->yy;
+  az = p0->zz - p->zz;
+
+  bx = p1->xx - p->xx;
+  by = p1->yy - p->yy;
+  bz = p1->zz - p->zz;
+
+  cx = p2->xx - p->xx;
+  cy = p2->yy - p->yy;
+  cz = p2->zz - p->zz;
+
+  dx = p3->xx - p->xx;
+  dy = p3->yy - p->yy;
+  dz = p3->zz - p->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p3, pB_ixyz, pB_xyz);
+  dx = pB_xyz[0] - pA_xyz[0];
+  dy = pB_xyz[1] - pA_xyz[1];
+  dz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  double axby = ax * by;
+  double bxay = bx * ay;
+  double bxcy = bx * cy;
+  double cxby = cx * by;
+  double cxdy = cx * dy;
+  double dxcy = dx * cy;
+  double dxay = dx * ay;
+  double axdy = ax * dy;
+  double axcy = ax * cy;
+  double cxay = cx * ay;
+  double bxdy = bx * dy;
+  double dxby = dx * by;
+
+  ab = axby - bxay;
+  bc = bxcy - cxby;
+  cd = cxdy - dxcy;
+  da = dxay - axdy;
+  ac = axcy - cxay;
+  bd = bxdy - dxby;
+
+  abc = az * bc - bz * ac + cz * ab;
+  bcd = bz * cd - cz * bd + dz * bc;
+  cda = cz * da + dz * ac + az * cd;
+  dab = dz * ab + az * bd + bz * da;
+
+  a2 = ax * ax + ay * ay + az * az;
+  b2 = bx * bx + by * by + bz * bz;
+  c2 = cx * cx + cy * cy + cz * cz;
+  d2 = dx * dx + dy * dy + dz * dz;
+
+  x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda));
+
+  /* calculate absolute maximum size */
+
+  ab = fabs(axby) + fabs(bxay);
+  bc = fabs(bxcy) + fabs(cxby);
+  cd = fabs(cxdy) + fabs(dxcy);
+  da = fabs(dxay) + fabs(axdy);
+  ac = fabs(axcy) + fabs(cxay);
+  bd = fabs(bxdy) + fabs(dxby);
+
+  az = fabs(az);
+  bz = fabs(bz);
+  cz = fabs(cz);
+  dz = fabs(dz);
+
+  abc = az * bc + bz * ac + cz * ab;
+  bcd = bz * cd + cz * bd + dz * bc;
+  cda = cz * da + dz * ac + az * cd;
+  dab = dz * ab + az * bd + bz * da;
+
+  double sizelimit = ((c2 * dab + d2 * abc) + (a2 * bcd + b2 * cda));
+
+  double errbound = 1.0e-14 * sizelimit;
+
+  if(x < -errbound)
+    return -1;
+  else if(x > errbound)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Returns orientation of tetrahedron using arbitrary precision
+ *         floating point operations.
+ *
+ *  \param[in] p0 First point of tetrahedron.
+ *  \param[in] p1 Second point of tetrahedron.
+ *  \param[in] p2 Third point of tetrahedron.
+ *  \param[in] p3 Forth point of tetrahedron.
+ *
+ *  \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if
+ *                   positively oriented.
+ */
+int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3)
+{
+  IntegerMapType ax, bx, cx;
+  IntegerMapType ay, by, cy;
+  IntegerMapType az, bz, cz;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  ax = p0->ix - p3->ix;
+  ay = p0->iy - p3->iy;
+  az = p0->iz - p3->iz;
+
+  bx = p1->ix - p3->ix;
+  by = p1->iy - p3->iy;
+  bz = p1->iz - p3->iz;
+
+  cx = p2->ix - p3->ix;
+  cy = p2->iy - p3->iy;
+  cz = p2->iz - p3->iz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p3, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_ixyz[0] - pA_ixyz[0];
+  ay = pB_ixyz[1] - pA_ixyz[1];
+  az = pB_ixyz[2] - pA_ixyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_ixyz[0] - pA_ixyz[0];
+  by = pB_ixyz[1] - pA_ixyz[1];
+  bz = pB_ixyz[2] - pA_ixyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_ixyz[0] - pA_ixyz[0];
+  cy = pB_ixyz[1] - pA_ixyz[1];
+  cz = pB_ixyz[2] - pA_ixyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+
+  mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz;
+  mpz_t bz2, by2, cz2, cy2, az2, ay2;
+
+  mpz_init(bz_cy);
+  mpz_init(bz2);
+  MY_mpz_set_si(bz2, bz);
+  MY_mpz_mul_si(bz_cy, bz2, cy);
+
+  mpz_init(by_cz);
+  mpz_init(by2);
+  MY_mpz_set_si(by2, by);
+  MY_mpz_mul_si(by_cz, by2, cz);
+
+  mpz_init(cz_ay);
+  mpz_init(cz2);
+  MY_mpz_set_si(cz2, cz);
+  MY_mpz_mul_si(cz_ay, cz2, ay);
+
+  mpz_init(cy_az);
+  mpz_init(cy2);
+  MY_mpz_set_si(cy2, cy);
+  MY_mpz_mul_si(cy_az, cy2, az);
+
+  mpz_init(az_by);
+  mpz_init(az2);
+  MY_mpz_set_si(az2, az);
+  MY_mpz_mul_si(az_by, az2, by);
+
+  mpz_init(ay_bz);
+  mpz_init(ay2);
+  MY_mpz_set_si(ay2, ay);
+  MY_mpz_mul_si(ay_bz, ay2, bz);
+
+  mpz_t bzcy_bycz, czay_cyaz, azby_aybz;
+
+  mpz_init(bzcy_bycz);
+  mpz_init(czay_cyaz);
+  mpz_init(azby_aybz);
+
+  mpz_sub(bzcy_bycz, bz_cy, by_cz);
+  mpz_sub(czay_cyaz, cz_ay, cy_az);
+  mpz_sub(azby_aybz, az_by, ay_bz);
+
+  mpz_t a, b, c, ab, res;
+
+  mpz_init(a);
+  mpz_init(b);
+  mpz_init(c);
+
+  MY_mpz_mul_si(a, bzcy_bycz, ax);
+  MY_mpz_mul_si(b, czay_cyaz, bx);
+  MY_mpz_mul_si(c, azby_aybz, cx);
+
+  mpz_init(ab);
+  mpz_init(res);
+
+  mpz_add(ab, a, b);
+  mpz_add(res, ab, c);
+
+  int sign = mpz_sgn(res);
+
+  mpz_clear(res);
+  mpz_clear(ab);
+  mpz_clear(c);
+  mpz_clear(b);
+  mpz_clear(a);
+  mpz_clear(azby_aybz);
+  mpz_clear(czay_cyaz);
+  mpz_clear(bzcy_bycz);
+  mpz_clear(ay2);
+  mpz_clear(ay_bz);
+  mpz_clear(az2);
+  mpz_clear(az_by);
+  mpz_clear(cy2);
+  mpz_clear(cy_az);
+  mpz_clear(cz2);
+  mpz_clear(cz_ay);
+  mpz_clear(by2);
+  mpz_clear(by_cz);
+  mpz_clear(bz2);
+  mpz_clear(bz_cy);
+
+  return sign;
+}
+
+/*! \brief Returns orientation of tetrahedron.
+ *
+ *  \param[in] p0 First point of tetrahedron.
+ *  \param[in] p1 Second point of tetrahedron.
+ *  \param[in] p2 Third point of tetrahedron.
+ *  \param[in] p3 Forth point of tetrahedron.
+ *
+ *  \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if
+ *                   positively oriented.
+ */
+int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3)
+{
+  double ax, bx, cx;
+  double ay, by, cy;
+  double az, bz, cz;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  ax = p0->xx - p3->xx;
+  ay = p0->yy - p3->yy;
+  az = p0->zz - p3->zz;
+
+  bx = p1->xx - p3->xx;
+  by = p1->yy - p3->yy;
+  bz = p1->zz - p3->zz;
+
+  cx = p2->xx - p3->xx;
+  cy = p2->yy - p3->yy;
+  cz = p2->zz - p3->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p3, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  double x = (ax * (bz * cy - by * cz) + bx * (cz * ay - cy * az) + cx * (az * by - ay * bz));
+
+  if(x < 0)
+    return -1;
+  else if(x > 0)
+    return +1;
+
+  return 0;
+}
+
+/* \brief Returns orientation of tetrahedron.
+ *
+ *  \param[in] p0 First point of tetrahedron.
+ *  \param[in] p1 Second point of tetrahedron.
+ *  \param[in] p2 Third point of tetrahedron.
+ *  \param[in] p3 Forth point of tetrahedron.
+ *
+ *  \return (-1,0,1) the orientation of the 4 points as +/-1. If either of the
+ *          points is an infinity point, return 0.
+ */
+int Orient3d(point *p0, point *p1, point *p2, point *p3)
+{
+  if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+    return 0;
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+  double ax = p0->xx - p3->xx;
+  double ay = p0->yy - p3->yy;
+  double az = p0->zz - p3->zz;
+
+  double bx = p1->xx - p3->xx;
+  double by = p1->yy - p3->yy;
+  double bz = p1->zz - p3->zz;
+
+  double cx = p2->xx - p3->xx;
+  double cy = p2->yy - p3->yy;
+  double cz = p2->zz - p3->zz;
+#else  /* #ifndef OPTIMIZE_MEMORY_USAGE */
+  double ax, ay, az, bx, by, bz, cx, cy, cz;
+  double pA_xyz[3], pB_xyz[3];
+  IntegerMapType pA_ixyz[3], pB_ixyz[3];
+
+  get_integers_for_point(p3, pA_ixyz, pA_xyz);
+
+  get_integers_for_point(p0, pB_ixyz, pB_xyz);
+  ax = pB_xyz[0] - pA_xyz[0];
+  ay = pB_xyz[1] - pA_xyz[1];
+  az = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p1, pB_ixyz, pB_xyz);
+  bx = pB_xyz[0] - pA_xyz[0];
+  by = pB_xyz[1] - pA_xyz[1];
+  bz = pB_xyz[2] - pA_xyz[2];
+
+  get_integers_for_point(p2, pB_ixyz, pB_xyz);
+  cx = pB_xyz[0] - pA_xyz[0];
+  cy = pB_xyz[1] - pA_xyz[1];
+  cz = pB_xyz[2] - pA_xyz[2];
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */
+
+  double bzcy = bz * cy;
+  double bycz = by * cz;
+  double czay = cz * ay;
+  double cyaz = cy * az;
+  double azby = az * by;
+  double aybz = ay * bz;
+
+  double x = ax * (bzcy - bycz) + bx * (czay - cyaz) + cx * (azby - aybz);
+
+  double sizelimit =
+      fabs(ax) * (fabs(bzcy) + fabs(bycz)) + fabs(bx) * (fabs(czay) + fabs(cyaz)) + fabs(cx) * (fabs(azby) + fabs(aybz));
+
+  double errbound = 1.0e-14 * sizelimit;
+
+  if(x < -errbound)
+    return -1;
+  else if(x > errbound)
+    return +1;
+
+  return Orient3d_Exact(p0, p1, p2, p3);
+}
+
+/*! \brief Data structure for face sort
+ */
+struct data_face_sort /* for sorting faces */
+{
+  MyIDType ID;     /* ID of corresponding cell */
+  float normal[3]; /* non-normalized normal vector */
+  int start;       /* start index into vertex list */
+  int len;         /* number of vertices */
+};
+
+static int *VertexEntries;       /* face index list */
+static float *VertexCoordinates; /* Voronoi vertex coordinates (circumsphere centers of delaunay tetras) */
+static float *FaceNormals;       /* normal vectors */
+static int Nvertices;            /* number of Voronoi vertices */
+static int Nnormals;             /* number of normals */
+static int Nentries;             /* number of entries in Voronoi face vertex list (including IDs and face vertex count) */
+static int Nsort;                /* number of ID sorted faces */
+static int MaxEntries, MaxFaces; /* for allocation */
+static struct data_face_sort *FaceSort;
+
+/*! \brief  Face sorting kernel
+ *
+ *  Compares ID of data_face_sort types.
+ *
+ *  \param[in] a Fist element.
+ *  \param[in] b Second element.
+ *
+ *  \return (-1,0,1), -1 if a->ID < b ->ID.
+ */
+int compare_face_sort(const void *a, const void *b)
+{
+  if(((struct data_face_sort *)a)->ID < ((struct data_face_sort *)b)->ID)
+    return -1;
+
+  if(((struct data_face_sort *)a)->ID > ((struct data_face_sort *)b)->ID)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Gathers faces in list.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void get_voronoi_face_vertex_indices(tessellation *T)
+{
+  int i, j, k, l, m, ii, jj, kk, ll, tetra_nr, edge_nr, next_tetra_nr, count, dp_1, dp_2;
+  tetra *prev, *next;
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  int bit, nr_next;
+
+  /* loop over tetras */
+  for(tetra_nr = 0; tetra_nr < Mesh.Ndt; tetra_nr++)
+    {
+      if(Mesh.DT[tetra_nr].t[0] < 0) /* skip deleted tetras */
+        continue;
+
+      /* edge flagging */
+      bit     = 1;
+      edge_nr = 0;
+
+      /* loop over edges */
+      while(Edge_visited[tetra_nr] != EDGE_ALL)
+        {
+          if((Edge_visited[tetra_nr] & bit) != 0)
+            {
+              bit <<= 1;
+              edge_nr++;
+              continue;
+            }
+
+          tetra *t = &DT[tetra_nr];
+
+          /* edge-point relation */
+          i = edge_start[edge_nr];
+          j = edge_end[edge_nr];
+          k = edge_opposite[edge_nr];
+          l = edge_nexttetra[edge_nr];
+
+          /* mark edge as visited */
+          Edge_visited[tetra_nr] |= (1 << edge_nr);
+
+          /* delaunay points on both side of face */
+          dp_1 = t->p[i];
+          dp_2 = t->p[j];
+
+          /* skip large tetra */
+          if(dp_1 < 0 || dp_2 < 0)
+            {
+              bit <<= 1;
+              edge_nr++;
+              continue;
+            }
+
+          /* skip ghost points (both local and foreign) */
+          if((DP[dp_1].task != ThisTask || DP[dp_1].index < 0 || DP[dp_1].index >= NumGas) &&
+             (DP[dp_2].task != ThisTask || DP[dp_2].index < 0 || DP[dp_2].index >= NumGas))
+            {
+              bit <<= 1;
+              edge_nr++;
+              continue;
+            }
+
+          /* count number of face vertices */
+          count = 0;
+          prev  = t;
+
+          do
+            {
+              count++;
+              next_tetra_nr = prev->t[l];
+              next          = &DT[next_tetra_nr];
+
+              for(m = 0, ll = ii = jj = -1; m < 4; m++)
+                {
+                  if(next->p[m] == prev->p[k])
+                    ll = m;
+                  if(next->p[m] == prev->p[i])
+                    ii = m;
+                  if(next->p[m] == prev->p[j])
+                    jj = m;
+                }
+
+              if(ll < 0 || ii < 0 || jj < 0)
+                terminate("inconsistency");
+
+              kk = 6 - (ll + ii + jj);
+              i  = ii;
+              l  = ll;
+              j  = jj;
+              k  = kk;
+
+              prev = next;
+            }
+          while(next != t);
+
+          count++;
+
+          /* get face normals (from both sides) */
+          FaceNormals[Nnormals++] = (DP[dp_2].x - DP[dp_1].x);
+          FaceNormals[Nnormals++] = (DP[dp_2].y - DP[dp_1].y);
+          FaceNormals[Nnormals++] = (DP[dp_2].z - DP[dp_1].z);
+          FaceNormals[Nnormals++] = (DP[dp_1].x - DP[dp_2].x);
+          FaceNormals[Nnormals++] = (DP[dp_1].y - DP[dp_2].y);
+          FaceNormals[Nnormals++] = (DP[dp_1].z - DP[dp_2].z);
+
+          /* fill vertex entry list, first ID, count then tetra numbers */
+          VertexEntries[Nentries++] = (int)DP[dp_1].ID;
+          VertexEntries[Nentries++] = (int)DP[dp_2].ID;
+          VertexEntries[Nentries++] = (int)count;
+          VertexEntries[Nentries++] = (int)tetra_nr;
+
+          /* get tetra indices of face vertices */
+          count = 0;
+          prev  = t;
+          do
+            {
+              count++;
+              next_tetra_nr = prev->t[l];
+              next          = &DT[next_tetra_nr];
+
+              VertexEntries[Nentries++] = (int)next_tetra_nr;
+
+              for(m = 0, ll = ii = jj = -1; m < 4; m++)
+                {
+                  if(next->p[m] == prev->p[k])
+                    ll = m;
+                  if(next->p[m] == prev->p[i])
+                    ii = m;
+                  if(next->p[m] == prev->p[j])
+                    jj = m;
+                }
+
+              if(ll < 0 || ii < 0 || jj < 0)
+                terminate("inconsistency");
+
+              kk = 6 - (ll + ii + jj);
+
+              /* flag edge */
+              for(nr_next = 0; nr_next < 6; nr_next++)
+                if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii))
+                  {
+                    if((Edge_visited[next_tetra_nr] & (1 << nr_next)) && next != t)
+                      terminate("inconsistency");
+
+                    Edge_visited[next_tetra_nr] |= (1 << nr_next);
+                    break;
+                  }
+
+              i = ii;
+              l = ll;
+              j = jj;
+              k = kk;
+
+              prev = next;
+
+              if(Nentries > MaxEntries)
+                terminate("Nentries > MaxEntries");
+
+              if(Nnormals > MaxFaces)
+                terminate("Nentries > MaxEntries");
+            }
+          while(next != t);
+
+          bit <<= 1;
+          edge_nr++;
+        }
+    }
+}
+
+/*! \brief Set Vertex coordinates in the respective array.
+ *
+ *  Copys the coordinates from the DTC array of the tessellation to a
+ *  designated array VertexCoordinates.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void get_voronoi_face_vertex_coordinates(tessellation *T)
+{
+  int tetra_nr = 0;
+
+  for(tetra_nr = 0; tetra_nr < T->Ndt; tetra_nr++)
+    {
+      VertexCoordinates[3 * Nvertices + 0] = T->DTC[tetra_nr].cx;
+      VertexCoordinates[3 * Nvertices + 1] = T->DTC[tetra_nr].cy;
+      VertexCoordinates[3 * Nvertices + 2] = T->DTC[tetra_nr].cz;
+      Nvertices++;
+    }
+}
+
+/*! \brief Function calls qsort for sorting faces by ID.
+ *
+ *  Uses compare_face_sort as comparison function. Requires array FaceSort.
+ *
+ *  \return void
+ */
+void sort_faces_by_ID(void)
+{
+  int i = 0, j = 0, k = 0;
+
+  do
+    {
+      FaceSort[j].ID        = VertexEntries[i + 0];
+      FaceSort[j].start     = i + 3;
+      FaceSort[j].len       = VertexEntries[i + 2];
+      FaceSort[j].normal[0] = FaceNormals[k++];
+      FaceSort[j].normal[1] = FaceNormals[k++];
+      FaceSort[j].normal[2] = FaceNormals[k++];
+      j++;
+
+      FaceSort[j].ID        = VertexEntries[i + 1];
+      FaceSort[j].start     = i + 3;
+      FaceSort[j].len       = VertexEntries[i + 2];
+      FaceSort[j].normal[0] = FaceNormals[k++];
+      FaceSort[j].normal[1] = FaceNormals[k++];
+      FaceSort[j].normal[2] = FaceNormals[k++];
+      j++;
+
+      i += 3 + VertexEntries[i + 2];
+
+      if(j > MaxFaces)
+        terminate("j > MaxFaces");
+    }
+  while(i < Nentries);
+
+  Nsort = j;
+
+  /* sort faces by ID */
+  qsort(FaceSort, Nsort, sizeof(struct data_face_sort), compare_face_sort);
+}
+
+/*! \brief Outputs Voronoi vertex indices to file.
+ *
+ *  Outputs the Voronoi vertex indices from task writeTask to lastTask in file
+ *  fname.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] fname1 File name of file index data is written in.
+ *  \param[in] fname2 File name of file face data is written in.
+ *  \param[in] writeTask Task that gathers information and writes data.
+ *  \param[in] lastTask Last task that is included in this dump.
+ *
+ *  \return void
+ */
+void write_voronoi_face_vertex_indices(tessellation *T, char *fname1, char *fname2, int writeTask, int lastTask)
+{
+  FILE *fd1, *fd2;
+  MPI_Status status;
+  int nVertices_tot, nEntries_tot, nNormals_tot;
+  int nVertices_before, i, task, *tmp;
+  int *Nvertices_list, *Nentries_list, *Nnormals_list, *Nsort_list;
+  struct data_face_sort *tmp_sort;
+
+  VertexEntries = mymalloc("VertexEntries", MaxEntries * sizeof(int));
+  FaceNormals   = mymalloc("VertexEntries", MaxFaces * sizeof(int));
+
+  /* get faces */
+  get_voronoi_face_vertex_indices(T);
+
+  FaceSort = (struct data_face_sort *)mymalloc("face_sort", sizeof(struct data_face_sort) * MaxFaces);
+
+  /* sort faces */
+  sort_faces_by_ID();
+
+  Nentries = 0;
+  for(i = 0; i < Nsort; i++)
+    Nentries += FaceSort[i].len + 2;
+
+  /* I/O */
+  Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask);
+  Nentries_list  = mymalloc("Nentries_list", sizeof(int) * NTask);
+  Nsort_list     = mymalloc("Nsort_list", sizeof(int) * NTask);
+  Nnormals_list  = mymalloc("Nnormals_list", sizeof(int) * NTask);
+
+  if(ThisTask == writeTask)
+    {
+      nVertices_tot = Nvertices;
+      nEntries_tot  = Nentries;
+      nNormals_tot  = Nnormals;
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
+          MPI_Recv(&Nentries_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status);
+          MPI_Recv(&Nsort_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status);
+          MPI_Recv(&Nnormals_list[task], 1, MPI_INT, task, TAG_LOCALN + 3, MPI_COMM_WORLD, &status);
+          MPI_Send(&nVertices_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD);
+          nVertices_tot += Nvertices_list[task];
+          nEntries_tot += Nentries_list[task];
+          nNormals_tot += Nnormals_list[task];
+        }
+      if(!(fd1 = fopen(fname1, "w")))
+        terminate("I/O error");
+
+      if(!(fd2 = fopen(fname2, "w")))
+        terminate("I/O error");
+
+      my_fwrite(&nEntries_tot, sizeof(int), 1, fd1);
+      my_fwrite(&nNormals_tot, sizeof(int), 1, fd2);
+      for(i = 0; i < Nsort; i++)
+        {
+          my_fwrite(&FaceSort[i].ID, sizeof(int), 1, fd1);
+          my_fwrite(&FaceSort[i].len, sizeof(int), 1, fd1);
+          my_fwrite(&VertexEntries[FaceSort[i].start], sizeof(int) * FaceSort[i].len, 1, fd1);
+          my_fwrite(FaceSort[i].normal, 3 * sizeof(float), 1, fd2);
+        }
+
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp_sort = (struct data_face_sort *)mymalloc("tmp_sort", sizeof(struct data_face_sort) * Nsort_list[task]);
+          tmp      = mymalloc("tmp", sizeof(int) * Nentries_list[task]);
+          MPI_Recv(tmp, Nentries_list[task], MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD, &status);
+          MPI_Recv(tmp_sort, Nsort_list[task] * sizeof(struct data_face_sort), MPI_BYTE, task, TAG_N + 2, MPI_COMM_WORLD, &status);
+
+          for(i = 0; i < Nsort_list[task]; i++)
+            {
+              my_fwrite(&tmp_sort[i].ID, sizeof(int), 1, fd1);
+              my_fwrite(&tmp_sort[i].len, sizeof(int), 1, fd1);
+              my_fwrite(&tmp[tmp_sort[i].start], sizeof(int) * tmp_sort[i].len, 1, fd1);
+              my_fwrite(tmp_sort[i].normal, 3 * sizeof(float), 1, fd2);
+            }
+          myfree(tmp);
+          myfree(tmp_sort);
+        }
+      fclose(fd2);
+      fclose(fd1);
+    }
+  else
+    {
+      MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
+      MPI_Send(&Nentries, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD);
+      MPI_Send(&Nsort, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD);
+      MPI_Send(&Nnormals, 1, MPI_INT, writeTask, TAG_LOCALN + 3, MPI_COMM_WORLD);
+      MPI_Recv(&nVertices_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status);
+      for(i = 0; i < Nentries; i++)
+        if(VertexEntries[i] >= 0)
+          VertexEntries[i] += nVertices_before;
+      MPI_Send(VertexEntries, Nentries, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD);
+      MPI_Send(FaceSort, Nsort * sizeof(struct data_face_sort), MPI_BYTE, writeTask, TAG_N + 2, MPI_COMM_WORLD);
+    }
+
+  myfree(Nnormals_list);
+  myfree(Nsort_list);
+  myfree(Nentries_list);
+  myfree(Nvertices_list);
+  myfree(FaceSort);
+  myfree(FaceNormals);
+  myfree(VertexEntries);
+}
+
+/*! \brief Outputs Voronoi vertex coordinates to file.
+ *
+ *  Outputs the Voronoi vertex coordinates from task write Task to lastTask in
+ *  file fname.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] fname File name of file the data is written in.
+ *  \param[in] writeTask Task that gathers information and writes data.
+ *  \param[in] lastTask Last task that is included in this dump.
+ *
+ *  \return void
+ */
+void write_voronoi_face_vertex_coordinates(tessellation *T, char *fname, int writeTask, int lastTask)
+{
+  FILE *fd;
+  MPI_Status status;
+  int *Nvertices_list;
+  int nVertices_tot, task;
+  float *tmp;
+
+  VertexCoordinates = mymalloc("VertexCoordinates", MaxEntries * 3 * sizeof(float));
+
+  /* get coordinates */
+  get_voronoi_face_vertex_coordinates(T);
+
+  /* I/O */
+  Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask);
+  if(ThisTask == writeTask)
+    {
+      nVertices_tot = Nvertices;
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
+          nVertices_tot += Nvertices_list[task];
+        }
+
+      if(!(fd = fopen(fname, "w")))
+        terminate("I/O error");
+
+      my_fwrite(&nVertices_tot, sizeof(int), 1, fd);
+      my_fwrite(VertexCoordinates, sizeof(float), 3 * Nvertices, fd);
+      for(task = writeTask + 1; task <= lastTask; task++)
+        {
+          tmp = mymalloc("tmp", 3 * sizeof(float) * Nvertices_list[task]);
+          MPI_Recv(tmp, 3 * Nvertices_list[task], MPI_FLOAT, task, TAG_N + 1, MPI_COMM_WORLD, &status);
+          my_fwrite(tmp, sizeof(float), 3 * Nvertices_list[task], fd);
+          myfree(tmp);
+        }
+      fclose(fd);
+    }
+  else
+    {
+      MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
+      MPI_Send(VertexCoordinates, 3 * Nvertices, MPI_FLOAT, writeTask, TAG_N + 1, MPI_COMM_WORLD);
+    }
+  myfree(Nvertices_list);
+  myfree(VertexCoordinates);
+}
+
+/*! \brief Outputs Voronoi mesh to file.
+ *
+ *  Outputs the Voronoi mesh data from task write Task to lastTask in file
+ *  fname.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] fname File name of file the data is written in.
+ *  \param[in] writeTask Task that gathers information and writes data.
+ *  \param[in] lastTask Last task that is included in this dump.
+ *
+ *  \return void
+ */
+void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask)
+{
+  char buf1[255], buf2[255];
+
+  MaxEntries = 1000 * NumGas;
+  MaxFaces   = 100 * NumGas;
+
+  /* coordinates */
+  Nvertices = 0;
+  sprintf(buf1, "%s_coordinates.dat", fname);
+  write_voronoi_face_vertex_coordinates(T, buf1, writeTask, lastTask);
+
+  /* indices */
+  Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char));
+  int i;
+  for(i = 0; i < Mesh.Ndt; i++)
+    Edge_visited[i] = 0;
+
+  Nentries = 0;
+  Nnormals = 0;
+  sprintf(buf1, "%s_indices.dat", fname);
+  sprintf(buf2, "%s_normals.dat", fname);
+  write_voronoi_face_vertex_indices(T, buf1, buf2, writeTask, lastTask);
+  myfree(Edge_visited);
+}
+
+#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c
new file mode 100644
index 0000000000..42c6f06b1f
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c
@@ -0,0 +1,407 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_check.c
+ * \date        05/2018
+ * \brief       Algorithms to check Voronoi mesh construction.
+ * \details     contains functions:
+ *                void check_for_min_distance(tessellation * T)
+ *                void check_links(tessellation * T)
+ *                void check_orientations(tessellation * T)
+ *                void check_tetras(tessellation * T, int npoints)
+ *                int points_compare(const void *a, const void *b)
+ *                void check_triangles(tessellation * T, int npoints)
+ *                void check_orientations(tessellation * T)
+ *                void check_links(tessellation * T)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 22.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if !defined(TWODIMS) && !defined(ONEDIMS) /* three-dimensional test code */
+
+int points_compare(const void *a, const void *b);
+
+/*! \brief Checks minimum distance between Delaunay points making sure it is
+ *         nonzero.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void check_for_min_distance(tessellation *T)
+{
+  point *DP = T->DP;
+  int i, j;
+  double r2, r2min;
+  char msg[200];
+
+  for(i = 0, r2min = 1.0e30; i < T->Ndp; i++)
+    {
+      printf("i=%d\n", i);
+
+      for(j = i + 1; j < T->Ndp; j++)
+        {
+          r2 = (DP[i].x - DP[j].x) * (DP[i].x - DP[j].x) + (DP[i].y - DP[j].y) * (DP[i].y - DP[j].y) +
+               (DP[i].z - DP[j].z) * (DP[i].z - DP[j].z);
+          if(r2 < r2min)
+            r2min = r2;
+
+          if(r2min == 0)
+            {
+              sprintf(msg, "i=%d j=%d equal.  DP[i].index=%d DP[j].index=%d\n", i, j, DP[i].index, DP[j].index);
+              terminate(msg)
+            }
+        }
+    }
+
+  printf("min distance=%g\n", sqrt(r2min));
+}
+
+/*! \brief Checks if tessellation links are correct.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void check_links(tessellation *T)
+{
+  tetra *DT = T->DT;
+  int i, j, s, c, flag = 0;
+  int pl[3], pr[3];
+  char msg[200];
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if(DT[i].t[0] < 0) /* deleted ? */
+        continue;
+
+      for(j = 0; j < 4; j++)
+        {
+          if(DT[DT[i].t[j]].t[DT[i].s[j]] != i)
+            {
+              printf("LINK for tetra=%d j=%d DT[i].s[j]=%d incorrect %d\n", i, j, DT[i].s[j], (int)(DT[DT[i].t[j]].t[DT[i].s[j]]));
+            }
+        }
+
+      for(j = 0; j < 4; j++)
+        {
+          for(s = 0, c = 0; s < 4; s++)
+            if(s != j)
+              pl[c++] = DT[i].p[s];
+
+          for(s = 0, c = 0; s < 4; s++)
+            if(s != DT[i].s[j])
+              pr[c++] = DT[DT[i].t[j]].p[s];
+
+          /* sort the points */
+
+          mysort(&pl[0], 3, sizeof(int), points_compare);
+          mysort(&pr[0], 3, sizeof(int), points_compare);
+
+          for(s = 0; s < 3; s++)
+            {
+              if(pl[s] != pr[s])
+                {
+                  sprintf(msg, "LINK for i=%d j=%d incorrect. points of triangles don't match up s=%d\n", i, j, s);
+                  flag = 1;
+                }
+            }
+
+          if(flag)
+            terminate(msg);
+        }
+    }
+
+  printf("links ok\n");
+}
+
+/*! \brief Checks if orientations of tetrahedra are positive.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void check_orientations(tessellation *T)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  int i, ivol;
+  double vol, volmin = 1.0e30;
+  char msg[200];
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      tetra *t = &DT[i];
+
+      point *p0 = &DP[t->p[0]];
+      point *p1 = &DP[t->p[1]];
+      point *p2 = &DP[t->p[2]];
+      point *p3 = &DP[t->p[3]];
+
+      if(t->t[0] < 0) /* deleted ? */
+        continue;
+
+      if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+        continue;
+
+      vol  = calculate_tetra_volume(p0, p1, p2, p3);
+      ivol = Orient3d_Exact(p0, p1, p2, p3);
+
+      if(ivol <= 0)
+        {
+          sprintf(msg, "Tetra %d is NEGATIVE (%d %d %d %d) oriented or FLAT: ivol=%d vol=%g\n", i, (int)(t->p[0]), (int)(t->p[1]),
+                  (int)(t->p[2]), (int)(t->p[3]), ivol, vol);
+          terminate(msg);
+        }
+
+      if(vol < volmin)
+        volmin = vol;
+    }
+
+  printf("orientations ok, volmin=%g\n", volmin);
+}
+
+/*! \brief Checks if tetrahedra are valid.
+ *
+ *  \param[in] T pointer to tessellation.
+ *  \param[in] npoints Number of points.
+ *
+ *  \return void
+ */
+void check_tetras(tessellation *T, int npoints)
+{
+  tetra *DT = T->DT;
+  point *DP = T->DP;
+  int i, j, res, res_exact;
+  char msg[200];
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if((i % 100) == 0)
+        printf("check tetra i=%d/%d\n", i, T->Ndt);
+
+      tetra *t = &DT[i];
+
+      point *p0 = &DP[t->p[0]];
+      point *p1 = &DP[t->p[1]];
+      point *p2 = &DP[t->p[2]];
+      point *p3 = &DP[t->p[3]];
+
+      if(t->t[0] < 0) /* deleted ? */
+        continue;
+
+      if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3))
+        continue;
+
+      if(test_tetra_orientation(p0, p1, p2, p3) > 0)
+        {
+        }
+      else
+        {
+          sprintf(msg, "Tetra %d is NEGATIVE oriented\n", i);
+          terminate(msg);
+        }
+
+      for(j = 0; j < npoints; j++)
+        {
+          if(t->p[0] != j)
+            if(t->p[1] != j)
+              if(t->p[2] != j)
+                if(t->p[3] != j)
+                  {
+                    res = InSphere_Errorbound(p0, p1, p2, p3, &DP[j]);
+
+                    if(res >= 0)
+                      {
+                        res_exact = InSphere_Exact(p0, p1, p2, p3, &DP[j]);
+
+                        if(res_exact > 0)
+                          {
+                            sprintf(msg, "ERROR tetra=%d: point=%d  in tetra with edges=%d|%d|%d|%d   res=%d|%d\n", i, j,
+                                    (int)(t->p[0]), (int)(t->p[1]), (int)(t->p[2]), (int)(t->p[3]), res, res_exact);
+                            terminate(msg);
+                          }
+                      }
+                  }
+        }
+    }
+
+  printf("Tetrahedra OK\n");
+}
+
+/*! \brief Compare integer value of two variables.
+ *
+ *  \param[in] a Pointer to first value.
+ *  \param[in] b Pointer to second value.
+ *
+ *  \return (-1,0,1) -1 iF a < b.
+ */
+int points_compare(const void *a, const void *b)
+{
+  if(*((int *)a) < *((int *)b))
+    return -1;
+
+  if(*((int *)a) > *((int *)b))
+    return +1;
+
+  return 0;
+}
+
+#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */
+
+#ifdef TWODIMS /* two-dimensional test code */
+
+/*! \brief Check 2d Voronoi mesh triangles.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] npoints Number of points.
+ *
+ *  \return void
+ */
+void check_triangles(tessellation *T, int npoints)
+{
+  int i, j, res, res_exact;
+  char msg[200];
+
+  tetra *DT = T->DT;
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if(DT[i].p[0] == DPinfinity)
+        continue;
+      if(DT[i].p[1] == DPinfinity)
+        continue;
+      if(DT[i].p[2] == DPinfinity)
+        continue;
+
+      if(Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]) != 1)
+        {
+          sprintf(msg, "Triangle %d is NEGATIVE oriented or FLAT\n", i);
+          terminate(msg);
+        }
+
+      for(j = 0; j < npoints; j++)
+        {
+          if(DT[i].p[0] != j)
+            if(DT[i].p[1] != j)
+              if(DT[i].p[2] != j)
+                {
+                  res = InCircle_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j);
+
+                  if(res > 0)
+                    {
+                      res_exact = InCircle_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j);
+
+                      if(res_exact > 0)
+                        {
+                          sprintf(msg, "ERROR: point=%d lies in triangle=%d with edges=%d|%d|%d   res=%d|%d\n", j, i,
+                                  (int)(DT[i].p[0]), (int)(DT[i].p[1]), (int)(DT[i].p[2]), res, res_exact);
+                          terminate(msg);
+                        }
+                    }
+                }
+        }
+    }
+
+  printf("triangles ok\n");
+}
+
+/*! \brief Check the orientations of triangles in 2d Voronoi mesh.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void check_orientations(tessellation *T)
+{
+  int i, ivol;
+  double vol, volmin = 1.0e30;
+  char msg[200];
+
+  tetra *DT = T->DT;
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      if(DT[i].p[0] == DPinfinity)
+        continue;
+      if(DT[i].p[1] == DPinfinity)
+        continue;
+      if(DT[i].p[2] == DPinfinity)
+        continue;
+
+      vol  = test_triangle_orientation(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]);
+      ivol = Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]);
+
+      if(ivol <= 0)
+        {
+          double vol2 = Orient2d_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]);
+
+          sprintf(msg, "Triangle %d is NEGATIVE (%d %d %d) oriented or FLAT: ivol=%d vol=%g|%g\n", i, (int)(DT[i].p[0]),
+                  (int)(DT[i].p[1]), (int)(DT[i].p[2]), ivol, vol, vol2);
+          terminate(msg);
+        }
+
+      if(vol < volmin)
+        volmin = vol;
+    }
+
+  printf("orientations ok, volmin=%g\n", volmin);
+}
+
+/*! \brief Check links in 2d Voronoi mesh.
+ *
+ *  \param[in] T Pointer to tesselation.
+ *
+ *  \return void
+ */
+void check_links(tessellation *T)
+{
+  int i, j;
+  char msg[200];
+
+  tetra *DT = T->DT;
+
+  for(i = 0; i < T->Ndt; i++)
+    {
+      for(j = 0; j < 3; j++)
+        {
+          if(DT[DT[i].t[j]].t[DT[i].s[j]] != i)
+            {
+              sprintf(msg, "LINK for i=%d j=%d  incorrect\n", i, j);
+              terminate(msg);
+            }
+        }
+    }
+}
+
+#endif /* #ifdef TWODIMS */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c
new file mode 100644
index 0000000000..99afd85cc0
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c
@@ -0,0 +1,1088 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_derefinement.c
+ * \date        05/2018
+ * \brief       Contains routines for de-refinement.
+ * \details     contains functions:
+ *                static void derefine_add_ngb(int edge, int i, int j, double
+ *                  area, int t, int nr)
+ *                int do_derefinements(void)
+ *                static void derefine_apply_probe_list(void)
+ *                static void derefine_apply_flux_list(void)
+ *                static int derefine_flux_list_data_compare(const void *a,
+ *                  const void *b)
+ *                static int derefine_probe_list_data_compare_task(const
+ *                  void *a, const void *b)
+ *                static int derefine_compare_seq_DP_ID(const void *a,
+ *                  const void *b)
+ *                static void derefine_exchange_flag(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 22.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS)
+#define DEREF_SA_FAC 1.0e-4
+
+int do_derefinements(void);
+static void derefine_add_ngb(int edge, int i, int j, double area, int tt, int nr);
+static int derefine_compare_seq_DP_ID(const void *a, const void *b);
+static int derefine_flux_list_data_compare(const void *a, const void *b);
+static void derefine_apply_flux_list(void);
+static void derefine_exchange_flag(void);
+static void derefine_apply_probe_list(void);
+static int derefine_probe_list_data_compare_task(const void *a, const void *b);
+
+/*! \brief Data for derefinement: flag for de-refinement and index of cell.
+ */
+static struct derefine_particle_data
+{
+  int Flag;
+  int dp_index;
+} * deref_SphP;
+
+/*! \brief Data structure for communicating de-refinement flags.
+ */
+static struct flagexch
+{
+  int Flag;
+  MyIDType ID;
+} * FlagExch;
+
+/*! \brief Data structure to flag Delaunay data.
+ */
+static struct flag_delaunay_data
+{
+  int Flag;
+} * flag_DP;
+
+/*! \brief Structure defining auxiliary Delaunay data (for sorting).
+ */
+static struct seq_delaunay_data
+{
+  MyFloat rnd;
+  int rank, index;
+  MyIDType ID;
+} * seq_DP;
+
+/*! \brief Structure defining probe list element.
+ */
+static struct probe_list_data
+{
+  int task, index;
+  int sendpart;
+  int flag;
+} * ProbeList;
+
+/*! \brief Structure defining flux list element.
+ */
+static struct flux_list_data
+{
+  int task, index;
+  double dM, dP[3];
+#ifdef MHD
+  double dB[3];
+#endif /* #ifdef MHD */
+
+#ifndef ISOTHERM_EQS
+  double dEnergy;
+#endif /* #ifndef ISOTHERM_EQS */
+
+#ifdef MAXSCALARS
+  double dConservedScalars[MAXSCALARS];
+#endif /* #ifdef MAXSCALARS */
+} * FluxList;
+
+static int Nflux, MaxNflux;
+
+static int *first_ngb, *last_ngb, first_free_ngb;
+
+/*! \brief Structure defining neighbour data.
+ */
+static struct ngb_data
+{
+#ifdef OPTIMIZE_MEMORY_USAGE
+  MyFloat area;
+#else  /* #ifdef OPTIMIZE_MEMORY_USAGE */
+  double area;
+#endif /* #ifdef OPTIMIZE_MEMORY_USAGE #else */
+  int index;
+  int edge;
+  int next_ngb;
+  int t, nr; /* delaunay tetra and edge number that generated this face */
+} * ngb;
+
+static int n_tri, max_n_tri;
+static triangle *trilist;
+
+#ifdef REFINEMENT_SPLIT_CELLS
+extern char *FlagDoNotRefine;
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+
+/*! \brief Adds cell in list ngb.
+ *
+ *  \param[in] edge Element 'edge' in ngb.
+ *  \param[in] i Index in first_ngb and last_ngb lists.
+ *  \param[in] j Element 'index' in ngb.
+ *  \param[in] area Element 'area' in ngb.
+ *  \param[in] t Element 't' in ngb.
+ *  \param[in] nr Element 'nr' in ngb.
+ *
+ *  \return void
+ */
+static void derefine_add_ngb(int edge, int i, int j, double area, int t, int nr)
+{
+  if(i >= 0 && j >= 0)
+    {
+      if(i >= Mesh.Ndp || j >= Mesh.Ndp)
+        {
+          terminate("i>= Ndp || j>= Ndp");
+        }
+
+      if(first_ngb[i] >= 0)
+        {
+          ngb[last_ngb[i]].next_ngb = first_free_ngb;
+          last_ngb[i]               = first_free_ngb;
+        }
+      else
+        {
+          first_ngb[i] = last_ngb[i] = first_free_ngb;
+        }
+
+      ngb[first_free_ngb].area     = area;
+      ngb[first_free_ngb].edge     = edge;
+      ngb[first_free_ngb].t        = t;
+      ngb[first_free_ngb].nr       = nr;
+      ngb[first_free_ngb].index    = j;
+      ngb[first_free_ngb].next_ngb = -1;
+      first_free_ngb++;
+    }
+}
+
+/*! \brief Loop over all active cells and derefine the ones that need to be
+ *         derefined.
+ *
+ *  \return Number of derefined cells.
+ */
+int do_derefinements(void)
+{
+  int idx, i, j, k, count, countall;
+
+  TIMER_START(CPU_DEREFINE);
+
+  deref_SphP = mymalloc_movable(&deref_SphP, "deref_SphP", NumGas * sizeof(struct derefine_particle_data));
+
+  FlagExch = mymalloc_movable(&FlagExch, "FlagExch", Mesh_nimport * sizeof(struct flagexch));
+
+  /* first, check whether we have cells to derefine */
+  for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+#ifdef REFINEMENT_SPLIT_CELLS
+      FlagDoNotRefine[i] = 0;
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+
+      if(i >= NumGas)
+        terminate("index of gas cell greater than NumGas");
+
+      deref_SphP[i].Flag     = 0;
+      deref_SphP[i].dp_index = -1;
+
+      if(derefine_should_this_cell_be_merged(i, deref_SphP[i].Flag))
+        {
+          deref_SphP[i].Flag = 1;
+          count++;
+        }
+    }
+
+  MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  mpi_printf("DEREFINE: Number of cells that want to be de-refined: %d\n", countall);
+
+  if(countall)
+    {
+      int max_assumed_ntri = 0;
+
+      /* tell the ghost cells whether they want to be refined or not */
+      derefine_exchange_flag();
+
+      /* let's create an explicit list of the neighbors of each cell */
+
+      first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int));
+      ngb       = mymalloc("ngb", 2 * Mesh.Nvf * sizeof(struct ngb_data));
+
+      last_ngb = mymalloc("last_ngb", Mesh.Ndp * sizeof(int));
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        first_ngb[i] = last_ngb[i] = -1;
+
+      for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++)
+        {
+          derefine_add_ngb(i, Mesh.VF[i].p1, Mesh.VF[i].p2, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr);
+          derefine_add_ngb(i, Mesh.VF[i].p2, Mesh.VF[i].p1, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr);
+        }
+
+      myfree(last_ngb);
+
+      /* we now make a list of the delaunay points that we can sort in a globally unique way */
+      flag_DP = mymalloc_movable(&flag_DP, "flag_DP", Mesh.Ndp * sizeof(struct flag_delaunay_data));
+      seq_DP  = mymalloc("seq_DP", Mesh.Ndp * sizeof(struct seq_delaunay_data));
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        {
+          seq_DP[i].rank  = i;
+          seq_DP[i].index = Mesh.DP[i].index;
+
+          if(Mesh.DP[i].task == ThisTask)
+            {
+              int li = Mesh.DP[i].index;
+              if(li < 0)
+                {
+                  flag_DP[i].Flag = 0;
+                  seq_DP[i].ID    = 0;
+                  seq_DP[i].rnd   = 0;
+                }
+              else
+                {
+                  if(li < NumGas)
+                    if(deref_SphP[li].dp_index < 0)
+                      deref_SphP[li].dp_index = i; /* only guaranteed to be set for active cells */
+
+                  if(li >= NumGas)
+                    li -= NumGas;
+
+                  flag_DP[i].Flag = deref_SphP[li].Flag;
+                  seq_DP[i].ID    = P[li].ID;
+                  seq_DP[i].rnd   = get_random_number();
+                }
+            }
+          else
+            {
+              flag_DP[i].Flag = FlagExch[Mesh.DP[i].index].Flag;
+              seq_DP[i].ID    = FlagExch[Mesh.DP[i].index].ID;
+              seq_DP[i].rnd   = get_random_number();
+            }
+        }
+
+      /* sort according to ID */
+      mysort(seq_DP, Mesh.Ndp, sizeof(struct seq_delaunay_data), derefine_compare_seq_DP_ID);
+
+      /* now let's go through in sorted order. For each cell that is supposed to be refined, check whether any of the
+       * neighbors is already refined. If yes, don't allow it to be refined.
+       * Also, if there is a neighbour with the same ID, don't refine it, because this must be a mirrored particle
+       */
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        {
+          if(seq_DP[i].ID != 0)
+            {
+              j = seq_DP[i].rank;
+
+              if(flag_DP[j].Flag == 1) /* this cell is still eligible for derefinement */
+                {
+                  /* go through its neighbours and check whether one of them is already up for derefinement */
+
+                  int n = 0;
+                  k     = first_ngb[j];
+                  while(k >= 0)
+                    {
+                      /* we only need to consider neighboring cells if they are active */
+                      int q = ngb[k].index;
+
+                      if(q >= 0)
+                        {
+                          int timebin;
+
+                          if(Mesh.DP[q].task == ThisTask)
+                            {
+                              if(Mesh.DP[q].index < NumGas)
+                                timebin = P[Mesh.DP[q].index].TimeBinHydro;
+                              else
+                                timebin = P[Mesh.DP[q].index - NumGas].TimeBinHydro;
+                            }
+                          else
+                            {
+#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT
+                              timebin = PrimExch[Mesh.DP[q].index].TimeBinHydro;
+#else  /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */
+                              timebin = RefExch[Mesh.DP[q].index].TimeBinHydro;
+#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */
+                            }
+
+                          if(TimeBinSynchronized[timebin])
+                            {
+                              if(flag_DP[q].Flag == 2 || flag_DP[q].Flag == 3)
+                                n++;
+
+                              if(Mesh.DP[q].ID == seq_DP[i].ID) /* same ID, so we have a mirrored particle */
+                                n++;
+                            }
+                        }
+
+                      k = ngb[k].next_ngb;
+                    }
+
+                  if(n == 0) /* ok, none have been found. This means this cell is allowed to be refined */
+                    flag_DP[j].Flag = 2;
+                  else
+                    flag_DP[j].Flag = 3;
+                }
+            }
+        }
+
+      myfree(seq_DP);
+
+      /* copy of the refinement flags to the cell structure */
+      for(i = 0; i < Mesh.Ndp; i++)
+        if(Mesh.DP[i].task == ThisTask && Mesh.DP[i].index >= 0 && Mesh.DP[i].index < NumGas)
+          deref_SphP[Mesh.DP[i].index].Flag = flag_DP[i].Flag;
+
+      myfree(flag_DP);
+
+      /* now let's count again how many cells we would like to derefine */
+
+      for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(deref_SphP[i].Flag == 2)
+            count++;
+        }
+
+      int in[2], out[2];
+      in[0] = count;
+
+      /* now we carry out an auxiliary check to make sure that we really
+         avoid de-refining two neighboring cells.  If such a pair is
+         found, both cells will not be derefined. */
+
+      MaxNflux  = Mesh.Indi.AllocFacNflux;
+      Nflux     = 0;
+      ProbeList = mymalloc_movable(&ProbeList, "ProbeList", MaxNflux * sizeof(struct probe_list_data));
+
+      count = 0;
+
+      for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(deref_SphP[i].Flag == 2)
+            {
+              j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */
+              if(j < 0)
+                terminate("j < 0");
+
+              k = first_ngb[j];
+
+              int flag = 0;
+
+              while(k >= 0)
+                {
+                  if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea)
+                    {
+                      int q = ngb[k].index;
+
+                      if(Mesh.DP[q].task == ThisTask)
+                        {
+                          int p = Mesh.DP[q].index;
+
+                          if(p < 0)
+                            terminate("p < 0");
+
+                          if(p >= NumGas) /* this is a local ghost point */
+                            p -= NumGas;
+
+                          if(TimeBinSynchronized[P[p].TimeBinHydro])
+                            if(deref_SphP[p].Flag == 2)
+                              flag++;
+                        }
+                      else
+                        {
+                          /* here we have a foreign ghost point */
+                          if(Nflux >= MaxNflux)
+                            {
+                              Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR;
+                              MaxNflux = Mesh.Indi.AllocFacNflux;
+#ifdef VERBOSE
+                              printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux,
+                                     Mesh.Indi.AllocFacNflux);
+#endif /* #ifdef VERBOSE */
+                              ProbeList = myrealloc_movable(ProbeList, MaxNflux * sizeof(struct probe_list_data));
+
+                              if(Nflux >= MaxNflux)
+                                terminate("Nflux >= MaxNflux");
+                            }
+
+                          ProbeList[Nflux].task     = Mesh.DP[q].task;
+                          ProbeList[Nflux].index    = Mesh.DP[q].originalindex;
+                          ProbeList[Nflux].sendpart = i;
+                          ProbeList[Nflux].flag     = 0;
+
+                          Nflux++;
+                        }
+                    }
+                  k = ngb[k].next_ngb;
+                }
+
+              if(flag)
+                {
+                  /* ups. It looks like a neigboring point is also about to be dissolved. We hence do not
+                     dissolve the current point
+                   */
+                  deref_SphP[i].Flag = 0;
+                  count++;
+                }
+            }
+        }
+
+      /* now let's probe on other tasks */
+
+      derefine_apply_probe_list();
+
+      for(i = 0; i < Nflux; i++)
+        {
+          if(ProbeList[i].flag)
+            if(deref_SphP[ProbeList[i].sendpart].Flag == 2)
+              {
+                deref_SphP[ProbeList[i].sendpart].Flag = 0;
+                count++;
+              }
+        }
+
+      myfree(ProbeList);
+
+      in[1] = count;
+      MPI_Reduce(in, out, 2, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+      mpi_printf("DEREFINE: Number of cells that we could de-refine: %d, number of cells we exclude from this set:  %d\n", out[0],
+                 out[1]);
+
+      /* we now distribute the conserved quantities of the cell among the neighbours */
+
+      MaxNflux = Mesh.Indi.AllocFacNflux;
+      Nflux    = 0;
+      FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data));
+
+      for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(deref_SphP[i].Flag == 2)
+            {
+              j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */
+              if(j < 0)
+                terminate("j < 0");
+
+              max_n_tri = 300000;
+              n_tri     = 0;
+
+              trilist = mymalloc("trilist", max_n_tri * sizeof(triangle));
+
+              /* get a list of all the triangles that make up the Voronoi cell of j */
+              k = first_ngb[j];
+              while(k >= 0)
+                {
+                  n_tri = derefine_refine_get_triangles(&Mesh, ngb[k].t, ngb[k].nr, &Mesh.DP[j], trilist, n_tri, max_n_tri);
+
+                  k = ngb[k].next_ngb;
+                }
+
+              /* assign the first point as owner to all tetras */
+              k     = first_ngb[j];
+              int q = ngb[k].index;
+              int t;
+              for(t = 0; t < n_tri; t++)
+                trilist[t].owner = q;
+
+              double vol = 0;
+              for(k = 0; k < n_tri; k++)
+                vol += get_tri_volume(k, trilist);
+
+              /* now consider all the other points and split the triangles if needed */
+              k = first_ngb[j];
+              k = ngb[k].next_ngb;
+              while(k >= 0)
+                {
+                  int q = ngb[k].index;
+                  n_tri = derefine_add_point_and_split_tri(q, trilist, n_tri, max_n_tri, vol);
+                  k     = ngb[k].next_ngb;
+                }
+
+              if(n_tri > max_assumed_ntri)
+                max_assumed_ntri = n_tri;
+
+              double *volume = mymalloc("volume", Mesh.Ndp * sizeof(double));
+
+              /* clear the volume entries of the neighbors */
+              k = first_ngb[j];
+              while(k >= 0)
+                {
+                  int q     = ngb[k].index;
+                  volume[q] = 0;
+                  k         = ngb[k].next_ngb;
+                }
+
+              /* now assign the volume of the triangles to the neighbors */
+              for(k = 0; k < n_tri; k++)
+                {
+                  if(trilist[k].owner < 0 || trilist[k].owner >= Mesh.Ndp)
+                    terminate("strange owner");
+
+                  volume[trilist[k].owner] += get_tri_volume(k, trilist);
+                }
+
+              /* first, let's establish the surface area sum for this cell */
+              double voltot = 0;
+              k             = first_ngb[j];
+              while(k >= 0)
+                {
+                  if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea)
+                    {
+                      int q = ngb[k].index;
+                      voltot += volume[q];
+                    }
+                  k = ngb[k].next_ngb;
+                }
+
+              /* now, distribute conserved quantities proportional to the gained volume */
+              double facsum = 0;
+              k             = first_ngb[j];
+              while(k >= 0)
+                {
+                  if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea)
+                    {
+                      int q = ngb[k].index;
+
+                      double fac = volume[q] / voltot;
+
+                      if(fac < 0)
+                        {
+                          warn("strange: fac=%g\n", fac);
+                          fac = 0;
+                          // terminate("strange");
+                        }
+                      facsum += fac;
+
+                      if(Mesh.DP[q].task == ThisTask)
+                        {
+                          int p = Mesh.DP[q].index;
+
+                          if(p < 0)
+                            terminate("p < 0");
+
+                          if(p >= NumGas) /* this is a local ghost point */
+                            p -= NumGas;
+                          P[p].Mass += fac * P[i].Mass;
+                          SphP[p].Momentum[0] += fac * SphP[i].Momentum[0];
+                          SphP[p].Momentum[1] += fac * SphP[i].Momentum[1];
+                          SphP[p].Momentum[2] += fac * SphP[i].Momentum[2];
+
+#ifdef MHD
+                          SphP[p].BConserved[0] += fac * SphP[i].BConserved[0];
+                          SphP[p].BConserved[1] += fac * SphP[i].BConserved[1];
+                          SphP[p].BConserved[2] += fac * SphP[i].BConserved[2];
+#endif /* #ifdef MHD */
+
+#ifndef ISOTHERM_EQS
+                          SphP[p].Energy += fac * SphP[i].Energy;
+#endif /* #ifndef ISOTHERM_EQS */
+
+#ifdef MAXSCALARS
+                          for(int s = 0; s < N_Scalar; s++)
+                            *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[s].offset_mass) +=
+                                fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass));
+#endif /* #ifdef MAXSCALARS */
+
+#ifdef REFINEMENT_SPLIT_CELLS
+                          FlagDoNotRefine[p] = 1;
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+                        }
+                      else
+                        {
+                          /* here we have a foreign ghost point */
+                          if(Mesh.DP[q].originalindex < 0)
+                            {
+                              char buf[1000];
+                              sprintf(buf, "---> task=%d  q=%d j=%d Ndp=%d\n", ThisTask, q, j, Mesh.Ndp);
+                              terminate(buf);
+                            }
+
+                          if(Nflux >= MaxNflux)
+                            {
+                              Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR;
+                              MaxNflux = Mesh.Indi.AllocFacNflux;
+#ifdef VERBOSE
+                              printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux,
+                                     Mesh.Indi.AllocFacNflux);
+#endif /* #ifdef VERBOSE */
+                              FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data));
+
+                              if(Nflux >= MaxNflux)
+                                terminate("Nflux >= MaxNflux");
+                            }
+
+                          FluxList[Nflux].task  = Mesh.DP[q].task;
+                          FluxList[Nflux].index = Mesh.DP[q].originalindex;
+                          FluxList[Nflux].dM    = fac * P[i].Mass;
+                          FluxList[Nflux].dP[0] = fac * SphP[i].Momentum[0];
+                          FluxList[Nflux].dP[1] = fac * SphP[i].Momentum[1];
+                          FluxList[Nflux].dP[2] = fac * SphP[i].Momentum[2];
+#ifdef MHD
+                          FluxList[Nflux].dB[0] = fac * SphP[i].BConserved[0];
+                          FluxList[Nflux].dB[1] = fac * SphP[i].BConserved[1];
+                          FluxList[Nflux].dB[2] = fac * SphP[i].BConserved[2];
+#endif /* #ifdef MHD */
+
+#ifndef ISOTHERM_EQS
+                          FluxList[Nflux].dEnergy = fac * SphP[i].Energy;
+#endif /* #ifndef ISOTHERM_EQS */
+
+#ifdef MAXSCALARS
+                          for(int s = 0; s < N_Scalar; s++)
+                            FluxList[Nflux].dConservedScalars[s] =
+                                fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass));
+#endif /* #ifdef MAXSCALARS */
+                          Nflux++;
+                        }
+                    }
+
+                  k = ngb[k].next_ngb;
+                }
+
+              if(fabs(facsum - 1) > 1.0e-3)
+                {
+                  char buf[1000];
+                  sprintf(buf, "facsum=%g\n", facsum);
+                  terminate(buf);
+                }
+
+              myfree(volume);
+              myfree(trilist);
+
+              /* we set the dissolved cell to zero mass and zero ID. It will be eliminated from the list
+               * of cells in the next domain decomposition
+               */
+              P[i].Mass   = 0;
+              P[i].ID     = 0;
+              P[i].Vel[0] = 0;
+              P[i].Vel[1] = 0;
+              P[i].Vel[2] = 0;
+
+              SphP[i].VelVertex[0] = 0;
+              SphP[i].VelVertex[1] = 0;
+              SphP[i].VelVertex[2] = 0;
+
+              timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro);
+
+              voronoi_remove_connection(i);
+            }
+        }
+
+      /* now let's apply the flux-list */
+      derefine_apply_flux_list();
+      myfree(FluxList);
+
+      myfree(ngb);
+      myfree(first_ngb);
+
+#ifdef VERBOSE
+      MPI_Reduce(&max_assumed_ntri, &n_tri, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
+      if(ThisTask == 0)
+        printf("DEREFINE: maximum assumed n_tri = %d\n", n_tri);
+#endif /* #ifdef VERBOSE */
+    }
+
+  myfree(FlagExch);
+  myfree(deref_SphP);
+
+  /* remove removed cells from list of active gravity cells */
+  timebin_cleanup_list_of_active_particles(&TimeBinsGravity);
+
+  TIMER_STOP(CPU_DEREFINE);
+
+  return countall;
+}
+
+/*! \brief Communicates probe list data if needed.
+ *
+ *  \return void
+ */
+static void derefine_apply_probe_list(void)
+{
+  int i, j, p, nimport, ngrp, recvTask;
+
+  /* now exchange the probe-list and apply it where needed */
+
+  mysort(ProbeList, Nflux, sizeof(struct probe_list_data), derefine_probe_list_data_compare_task);
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0; i < Nflux; i++)
+    Send_count[ProbeList[i].task]++;
+
+  if(Send_count[ThisTask] > 0)
+    terminate("Send_count[ThisTask]");
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  struct probe_list_data *ProbeListGet = (struct probe_list_data *)mymalloc("ProbeListGet", nimport * sizeof(struct probe_list_data));
+
+  /* exchange particle data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&ProbeList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &ProbeListGet[Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  /* apply the probes */
+
+  for(i = 0; i < nimport; i++)
+    {
+      p = ProbeListGet[i].index;
+
+      if(TimeBinSynchronized[P[p].TimeBinHydro])
+        if(deref_SphP[p].Flag == 2)
+          ProbeListGet[i].flag = 1;
+    }
+
+  /* send results back */
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&ProbeListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &ProbeList[Send_offset[recvTask]],
+                           Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(ProbeListGet);
+}
+
+/*! \brief Communicate flux list data if needed.
+ *
+ *  \return void
+ */
+static void derefine_apply_flux_list(void)
+{
+  int i, j, p, nimport, ngrp, recvTask;
+
+  /* now exchange the flux-list and apply it when needed */
+
+  mysort(FluxList, Nflux, sizeof(struct flux_list_data), derefine_flux_list_data_compare);
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0; i < Nflux; i++)
+    Send_count[FluxList[i].task]++;
+
+  if(Send_count[ThisTask] > 0)
+    terminate("Send_count[ThisTask]");
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data));
+
+  /* exchange particle data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask,
+                           TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  /* apply the fluxes */
+
+  for(i = 0; i < nimport; i++)
+    {
+      p = FluxListGet[i].index;
+
+      if(P[p].ID == 0)
+        {
+          char buf[1000];
+#ifndef LONGIDS
+          printf("On task=%d flux to ID=%d, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p);
+#else  /* #ifndef LONGIDS */
+          printf("On task=%d flux to ID=%llu, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p);
+#endif /* #ifndef LONGIDS #else */
+          terminate(buf);
+        }
+
+      P[p].Mass += FluxListGet[i].dM;
+      SphP[p].Momentum[0] += FluxListGet[i].dP[0];
+      SphP[p].Momentum[1] += FluxListGet[i].dP[1];
+      SphP[p].Momentum[2] += FluxListGet[i].dP[2];
+#ifdef MHD
+      SphP[p].BConserved[0] += FluxListGet[i].dB[0];
+      SphP[p].BConserved[1] += FluxListGet[i].dB[1];
+      SphP[p].BConserved[2] += FluxListGet[i].dB[2];
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+      int k;
+      for(k = 0; k < N_Scalar; k++)
+        *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k];
+#endif /* #ifdef MAXSCALARS */
+
+#ifndef ISOTHERM_EQS
+      SphP[p].Energy += FluxListGet[i].dEnergy;
+#endif /* #ifndef ISOTHERM_EQS */
+
+#ifdef REFINEMENT_SPLIT_CELLS
+      FlagDoNotRefine[p] = 1;
+#endif /* #ifdef REFINEMENT_SPLIT_CELLS */
+    }
+
+  myfree(FluxListGet);
+}
+
+/*! \brief Compares flux list data task of two elements.
+ *
+ *  \param[in] a Pointer to first flux list data object.
+ *  \param[in] b Pointer to second flux list data object.
+ *
+ *  \return (-1,0,1); -1 if a->task < b->task.
+ */
+static int derefine_flux_list_data_compare(const void *a, const void *b)
+{
+  if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task))
+    return -1;
+
+  if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compares probe list data task of two elements.
+ *
+ *  \param[in] a Pointer to first probe list data object.
+ *  \param[in] b Pointer to second probe list data object.
+ *
+ *  \return (-1,0,1); -1 if a->task < b->task.
+ */
+static int derefine_probe_list_data_compare_task(const void *a, const void *b)
+{
+  if(((struct probe_list_data *)a)->task < (((struct probe_list_data *)b)->task))
+    return -1;
+
+  if(((struct probe_list_data *)a)->task > (((struct probe_list_data *)b)->task))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compares seq delaunay data task of two elements.
+ *
+ *  Comparison criteria (most important first)
+ *    rnd
+ *    ID
+ *    index
+ *    rank
+ *
+ *  \param[in] a Pointer to first seq delaunay data object.
+ *  \param[in] b Pointer to second seq delaunay data object.
+ *
+ *  \return (-1,0,1); -1 if a < b.
+ */
+static int derefine_compare_seq_DP_ID(const void *a, const void *b)
+{
+  if(((struct seq_delaunay_data *)a)->rnd < (((struct seq_delaunay_data *)b)->rnd))
+    return -1;
+
+  if(((struct seq_delaunay_data *)a)->rnd > (((struct seq_delaunay_data *)b)->rnd))
+    return +1;
+
+  if(((struct seq_delaunay_data *)a)->ID < (((struct seq_delaunay_data *)b)->ID))
+    return -1;
+
+  if(((struct seq_delaunay_data *)a)->ID > (((struct seq_delaunay_data *)b)->ID))
+    return +1;
+
+  if(((struct seq_delaunay_data *)a)->index < (((struct seq_delaunay_data *)b)->index))
+    return -1;
+
+  if(((struct seq_delaunay_data *)a)->index > (((struct seq_delaunay_data *)b)->index))
+    return +1;
+
+  if(((struct seq_delaunay_data *)a)->rank < (((struct seq_delaunay_data *)b)->rank))
+    return -1;
+
+  if(((struct seq_delaunay_data *)a)->rank > (((struct seq_delaunay_data *)b)->rank))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Sets exchange flag in de-refinement algorithm.
+ *
+ *  Loops through gas cells in mesh, sets set export flag and communicates this
+ *  information to the appropriate tasks.
+ *
+ *  \return void
+ */
+static void derefine_exchange_flag(void)
+{
+  int listp;
+  int i, j, p, task, off;
+  int ngrp, recvTask, place;
+
+  struct exchange_data
+  {
+    int Flag;
+    MyIDType ID;
+  } * tmpExch, *tmpRecv;
+
+  tmpExch = (struct exchange_data *)mymalloc("tmpExch", Mesh_nexport * sizeof(struct exchange_data));
+
+  /* prepare data for export */
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if((task = ListExports[listp].origin) != ThisTask)
+            {
+              place = ListExports[listp].index;
+              off   = Mesh_Send_offset[task] + Mesh_Send_count[task]++;
+
+              tmpExch[off].Flag = 0;
+              tmpExch[off].ID   = P[place].ID;
+
+              if(P[place].Type == 0)
+                if(TimeBinSynchronized[P[place].TimeBinHydro])
+                  if(!(P[place].Mass == 0 && P[place].ID == 0))
+                    tmpExch[off].Flag = deref_SphP[place].Flag;
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  /* exchange data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0)
+            {
+              tmpRecv = (struct exchange_data *)mymalloc("tmpRecv", Mesh_Recv_count[recvTask] * sizeof(struct exchange_data));
+
+              /* get the values */
+              MPI_Sendrecv(&tmpExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE,
+                           recvTask, TAG_DENS_A, tmpRecv, Mesh_Recv_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE, recvTask,
+                           TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              for(i = 0; i < Mesh_Recv_count[recvTask]; i++)
+                {
+                  if(Mesh_Recv_offset[recvTask] + i >= Mesh_nimport)
+                    terminate("number of imported mesh points grater than Mesh_nimport");
+                  FlagExch[Mesh_Recv_offset[recvTask] + i].Flag = tmpRecv[i].Flag;
+                  FlagExch[Mesh_Recv_offset[recvTask] + i].ID   = tmpRecv[i].ID;
+                }
+
+              myfree(tmpRecv);
+            }
+        }
+    }
+
+  myfree(tmpExch);
+}
+
+#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c
new file mode 100644
index 0000000000..7640029045
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c
@@ -0,0 +1,1037 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_dynamic_update.c
+ * \date        05/2018
+ * \brief       Algorithms for Voronoi dynamic update.
+ * \details     contains functions:
+ *                int voronoi_get_connected_particles(tessellation * T)
+ *                void voronoi_init_connectivity(tessellation * T)
+ *                void voronoi_update_connectivity(tessellation * T)
+ *                void voronoi_remove_connection(int i)
+ *                int compare_foreign_connection(const void *a, const void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 22.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+int Nvc;    /* number of connections */
+int MaxNvc; /* maximum number of connections */
+int Largest_Nvc;
+connection *DC; /* Connections */
+
+/*! Data structure for non-local connection.
+ */
+struct foreign_connection
+{
+  int task;
+  int origin;
+  int index;
+  int image_flags;
+} * ForeignDC, *ImportedDC;
+
+#define MASK_X_SHIFT_RIGHT 38347922
+#define MASK_X_SHIFT_LEFT 76695844
+#define MASK_Y_SHIFT_RIGHT 14708792
+#define MASK_Y_SHIFT_LEFT 117670336
+#define MASK_Z_SHIFT_RIGHT 261632
+#define MASK_Z_SHIFT_LEFT 133955584
+#define MASK ((1 << 27) - 1)
+
+int FirstUnusedConnection;
+
+/*! \brief Gets connected active cells from a mesh.
+ *
+ *  \param[in] T Pointer to tesselation.
+ *
+ *  \return Number of cells.
+ */
+int voronoi_get_connected_particles(tessellation *T)
+{
+  int idx, i, j, p, q, count = 0, duplicates, image_flags, listp, nexport, nimport, origin;
+  int ngrp, recvTask;
+
+  CPU_Step[CPU_MISC] += measure_time();
+
+  /* first, let's add all the primary active points */
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      p = TimeBinsHydro.ActiveParticleList[idx];
+      if(p < 0)
+        continue;
+
+      if(P[p].Type == 0)
+        {
+          Ngb_Marker[p] = Ngb_MarkerValue;
+
+          if(P[p].Mass == 0 && P[p].ID == 0) /* skip cells that have been swallowed or eliminated */
+            {
+              List_P[p].firstexport   = -1;
+              List_P[p].currentexport = -1;
+              continue;
+            }
+
+          if(Ninlist >= MaxNinlist)
+            {
+              T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+              MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+              printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                     T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+              ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+              if(Ninlist >= MaxNinlist)
+                terminate("Ninlist >= MaxNinlist");
+            }
+
+          List_InMesh[NumGasInMesh++] = p;
+
+          List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+          ListExports[List_P[p].currentexport].image_bits = 1;
+          ListExports[List_P[p].currentexport].nextexport = -1;
+          ListExports[List_P[p].currentexport].origin     = ThisTask;
+          ListExports[List_P[p].currentexport].index      = p;
+
+          if(T->Ndp >= T->MaxNdp)
+            {
+              T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+              T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+              printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp,
+                     T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+              T->DP -= 5;
+              T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+              T->DP += 5;
+
+              if(T->Ndp >= T->MaxNdp)
+                terminate("Ndp >= MaxNdp");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          point *dp = &T->DP[T->Ndp];
+
+          dp->x             = P[p].Pos[0];
+          dp->y             = P[p].Pos[1];
+          dp->z             = P[p].Pos[2];
+          dp->ID            = P[p].ID;
+          dp->task          = ThisTask;
+          dp->index         = p;
+          dp->originalindex = -1;
+          dp->timebin       = P[p].TimeBinHydro;
+          dp->image_flags   = 1;
+#ifdef DOUBLE_STENCIL
+          dp->Hsml             = SphP[p].Hsml;
+          dp->first_connection = -1;
+          dp->last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+          T->Ndp++;
+          count++;
+        }
+    }
+
+  /* now, we go through the connection list and see whether we have any additional points to add */
+  int count_foreign = 0;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      q = SphP[i].first_connection;
+
+      while(q >= 0)
+        {
+          if(q < 0 || q >= MaxNvc)
+            {
+              char buf[1000];
+              sprintf(buf, "strange connectivity q=%d Nvc=%d", q, MaxNvc);
+              terminate(buf);
+            }
+
+          if(DC[q].task >= 0 && DC[q].task < NTask)
+            {
+              if(ThisTask == DC[q].task) /* this one is local */
+                {
+                  p = DC[q].index; /* particle index */
+
+                  if(P[p].Type == 0)
+                    {
+                      if(!(P[p].Mass == 0 && P[p].ID == 0)) /* skip cells that have been swallowed or dissolved */
+                        {
+                          if(P[p].Ti_Current != All.Ti_Current)
+                            {
+                              drift_particle(p, All.Ti_Current);
+                            }
+
+                          if(p < 0 || p >= NumGas)
+                            {
+                              char buf[1000];
+                              sprintf(buf, "strange p=%d (Ngas=%d) for q=%d Nvc=%d", p, NumGas, q, Nvc);
+                              terminate(buf);
+                            }
+
+                          image_flags = (DC[q].image_flags & MASK);
+
+                          if(Ngb_Marker[p] != Ngb_MarkerValue)
+                            {
+                              Ngb_Marker[p]           = Ngb_MarkerValue;
+                              List_P[p].firstexport   = -1;
+                              List_P[p].currentexport = -1;
+                            }
+
+                          listp = List_P[p].firstexport;
+
+                          /* now we need to check whether this particle has already been made part of the list */
+                          if(List_P[p].firstexport >= 0)
+                            {
+                              if(ListExports[List_P[p].currentexport].origin != ThisTask)
+                                terminate("can't be");
+                            }
+                          else
+                            {
+                              /* this one apparently hasn't been added at all yet */
+                              if(Ninlist >= MaxNinlist)
+                                {
+                                  T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+                                  MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+                                  printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask,
+                                         MaxNinlist, T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+                                  ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+                                  if(Ninlist >= MaxNinlist)
+                                    terminate("Ninlist >= MaxNinlist");
+                                }
+
+                              List_InMesh[NumGasInMesh++] = p;
+
+                              List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+                              ListExports[List_P[p].currentexport].image_bits = 0;
+                              ListExports[List_P[p].currentexport].nextexport = -1;
+                              ListExports[List_P[p].currentexport].origin     = ThisTask;
+                              ListExports[List_P[p].currentexport].index      = p;
+                            }
+
+                          if(!(ListExports[List_P[p].currentexport].image_bits & image_flags)) /* already in list */
+                            {
+                              ListExports[List_P[p].currentexport].image_bits |= image_flags;
+
+                              if(T->Ndp >= T->MaxNdp)
+                                {
+                                  T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+                                  T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+                                  printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp,
+                                         T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+                                  T->DP -= 5;
+                                  T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+                                  T->DP += 5;
+
+                                  if(T->Ndp >= T->MaxNdp)
+                                    terminate("Ndp >= MaxNdp");
+                                }
+
+                              SphP[p].ActiveArea = 0;
+
+                              MyDouble x = P[p].Pos[0];
+                              MyDouble y = P[p].Pos[1];
+                              MyDouble z = P[p].Pos[2];
+
+                              /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three,
+                               * i.e. x*3^0 + y*3^1 + z*3^2 */
+
+#ifndef REFLECTIVE_X
+                              if((image_flags & MASK_X_SHIFT_RIGHT))
+                                x += boxSize_X;
+                              else if((image_flags & MASK_X_SHIFT_LEFT))
+                                x -= boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+                              if((image_flags & MASK_X_SHIFT_RIGHT))
+                                x = -x;
+                              else if((image_flags & MASK_X_SHIFT_LEFT))
+                                x = 2 * boxSize_X - x;
+#endif /* #ifndef REFLECTIVE_X #else */
+#ifndef REFLECTIVE_Y
+                              if((image_flags & MASK_Y_SHIFT_RIGHT))
+                                y += boxSize_Y;
+                              else if((image_flags & MASK_Y_SHIFT_LEFT))
+                                y -= boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+                              if((image_flags & MASK_Y_SHIFT_RIGHT))
+                                y = -y;
+                              else if((image_flags & MASK_Y_SHIFT_LEFT))
+                                y = 2 * boxSize_Y - y;
+#endif /* #ifndef REFLECTIVE_Y #else */
+#ifndef REFLECTIVE_Z
+                              if((image_flags & MASK_Z_SHIFT_RIGHT))
+                                z += boxSize_Z;
+                              else if((image_flags & MASK_Z_SHIFT_LEFT))
+                                z -= boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+                              if((image_flags & MASK_Z_SHIFT_RIGHT))
+                                z = -z;
+                              else if((image_flags & MASK_Z_SHIFT_LEFT))
+                                z = 2 * boxSize_Z - z;
+#endif /* #ifndef REFLECTIVE_Z #else */
+
+                              point *dp = &T->DP[T->Ndp];
+
+                              dp->x = x;
+                              dp->y = y;
+                              dp->z = z;
+
+                              dp->task = ThisTask;
+                              dp->ID   = P[p].ID;
+                              if(image_flags != 1)
+                                dp->index = p + NumGas; /* this is a replicated/mirrored local point */
+                              else
+                                dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */
+                              dp->originalindex = p;
+                              dp->timebin       = P[p].TimeBinHydro;
+
+                              dp->image_flags = image_flags;
+#ifdef DOUBLE_STENCIL
+                              dp->Hsml             = SphP[p].Hsml;
+                              dp->first_connection = -1;
+                              dp->last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+                              T->Ndp++;
+                              count++;
+                            }
+                        }
+                    }
+                }
+              else
+                {
+                  /* here we have a foreign neighbor that we want */
+                  count_foreign++;
+                }
+            }
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+    }
+
+  /* we now compile a list of the foreign neighbors we want in the mesh */
+
+  ForeignDC = mymalloc_movable(&ForeignDC, "ForeignDC", count_foreign * sizeof(struct foreign_connection));
+
+  int count_foreign_bak = count_foreign;
+
+  count_foreign = 0;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      q = SphP[i].first_connection;
+
+      while(q >= 0)
+        {
+          if(DC[q].task >= 0 && DC[q].task < NTask)
+            {
+              if(ThisTask != DC[q].task) /* this one is not local */
+                {
+                  p = DC[q].index;
+
+                  ForeignDC[count_foreign].task        = DC[q].task;
+                  ForeignDC[count_foreign].origin      = ThisTask;
+                  ForeignDC[count_foreign].index       = DC[q].index;
+                  ForeignDC[count_foreign].image_flags = (DC[q].image_flags & MASK);
+
+                  /* here we have a foreign neighbor that we want */
+                  count_foreign++;
+                }
+            }
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+    }
+
+  if(count_foreign_bak != count_foreign)
+    terminate("bad");
+
+  /* we sort this list by tasks, and then eliminate duplicates */
+  mysort(ForeignDC, count_foreign, sizeof(struct foreign_connection), compare_foreign_connection);
+
+  for(j = 0; j < NTask; j++)
+    Send_count[j] = 0;
+
+  for(i = 0, j = -1, duplicates = 0; i < count_foreign; i++)
+    {
+      if(j >= 0)
+        if(memcmp(&ForeignDC[i], &ForeignDC[j], sizeof(struct foreign_connection)) == 0)
+          {
+            duplicates++;
+            continue;
+          }
+
+      j++;
+
+      ForeignDC[j] = ForeignDC[i];
+      Send_count[ForeignDC[j].task]++;
+    }
+
+  count_foreign -= duplicates;
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  if(nexport != count_foreign)
+    {
+      char buf[1000];
+      sprintf(buf, "nexport=%d  count_foreign=%d\n", nexport, count_foreign);
+      terminate(buf);
+    }
+
+  if(Send_count[ThisTask] != 0)
+    terminate("bad");
+
+  ImportedDC = mymalloc_movable(&ImportedDC, "ImportedDC", nimport * sizeof(struct foreign_connection));
+
+  /* get the point requests */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              MPI_Sendrecv(&ForeignDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE,
+                           recvTask, TAG_DENS_B, &ImportedDC[Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  point *DP_Buffer = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", nimport * sizeof(point));
+
+  /* now we prepare the points */
+  for(j = 0; j < NTask; j++)
+    Recv_count[j] = 0;
+
+  for(i = 0; i < nimport; i++)
+    {
+      p           = ImportedDC[i].index;
+      origin      = ImportedDC[i].origin;
+      image_flags = ImportedDC[i].image_flags;
+
+      /* it could happen that the requested point has been refined or was turned into a star, that's why
+       * we not necessarily will find all the points requested.
+       */
+      if(P[p].Type != 0)
+        continue;
+
+      if(P[p].Mass == 0 && P[p].ID == 0)
+        continue; /* skip cells that have been swallowed or dissolved */
+
+      if(P[p].Ti_Current != All.Ti_Current)
+        {
+          drift_particle(p, All.Ti_Current);
+        }
+
+      /* mark the points in the export lists */
+
+      if(Ngb_Marker[p] != Ngb_MarkerValue)
+        {
+          Ngb_Marker[p]           = Ngb_MarkerValue;
+          List_P[p].firstexport   = -1;
+          List_P[p].currentexport = -1;
+        }
+
+      if(List_P[p].firstexport >= 0)
+        {
+          if(ListExports[List_P[p].currentexport].origin != origin)
+            {
+              listp = List_P[p].firstexport;
+              while(listp >= 0)
+                {
+                  if(ListExports[listp].origin == origin)
+                    {
+                      List_P[p].currentexport = listp;
+                      break;
+                    }
+
+                  if(ListExports[listp].nextexport < 0)
+                    {
+                      if(Ninlist >= MaxNinlist)
+                        {
+                          T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+                          MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+                          printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                                 T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+                          ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+                          if(Ninlist >= MaxNinlist)
+                            terminate("Ninlist >= MaxNinlist");
+                        }
+
+                      List_P[p].currentexport                         = Ninlist++;
+                      ListExports[List_P[p].currentexport].image_bits = 0;
+                      ListExports[List_P[p].currentexport].nextexport = -1;
+                      ListExports[List_P[p].currentexport].origin     = origin;
+                      ListExports[List_P[p].currentexport].index      = p;
+                      ListExports[listp].nextexport                   = List_P[p].currentexport;
+                      break;
+                    }
+                  listp = ListExports[listp].nextexport;
+                }
+            }
+        }
+      else
+        {
+          /* here we have a local particle that hasn't been made part of the mesh */
+
+          if(Ninlist >= MaxNinlist)
+            {
+              T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+              MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                     T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+              ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+              if(Ninlist >= MaxNinlist)
+                terminate("Ninlist >= MaxNinlist");
+            }
+
+          List_InMesh[NumGasInMesh++] = p;
+
+          List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+          ListExports[List_P[p].currentexport].image_bits = 0;
+          ListExports[List_P[p].currentexport].nextexport = -1;
+          ListExports[List_P[p].currentexport].origin     = origin;
+          ListExports[List_P[p].currentexport].index      = p;
+        }
+
+      ListExports[List_P[p].currentexport].image_bits |= image_flags;
+
+      MyDouble x = P[p].Pos[0];
+      MyDouble y = P[p].Pos[1];
+      MyDouble z = P[p].Pos[2];
+
+      /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three, i.e. x*3^0 + y*3^1 + z*3^2
+       */
+#ifndef REFLECTIVE_X
+      if((image_flags & MASK_X_SHIFT_RIGHT))
+        x += boxSize_X;
+      else if((image_flags & MASK_X_SHIFT_LEFT))
+        x -= boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+      if((image_flags & MASK_X_SHIFT_RIGHT))
+        x = -x;
+      else if((image_flags & MASK_X_SHIFT_LEFT))
+        x = 2 * boxSize_X - x;
+#endif /* #ifndef REFLECTIVE_X #else */
+
+#ifndef REFLECTIVE_Y
+      if((image_flags & MASK_Y_SHIFT_RIGHT))
+        y += boxSize_Y;
+      else if((image_flags & MASK_Y_SHIFT_LEFT))
+        y -= boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+      if((image_flags & MASK_Y_SHIFT_RIGHT))
+        y = -y;
+      else if((image_flags & MASK_Y_SHIFT_LEFT))
+        y = 2 * boxSize_Y - y;
+#endif /* #ifndef REFLECTIVE_Y #else */
+
+#ifndef REFLECTIVE_Z
+      if((image_flags & MASK_Z_SHIFT_RIGHT))
+        z += boxSize_Z;
+      else if((image_flags & MASK_Z_SHIFT_LEFT))
+        z -= boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+      if((image_flags & MASK_Z_SHIFT_RIGHT))
+        z = -z;
+      else if((image_flags & MASK_Z_SHIFT_LEFT))
+        z = 2 * boxSize_Z - z;
+#endif /* #ifndef REFLECTIVE_Z #else */
+
+      int k = Recv_offset[origin] + Recv_count[origin]++;
+
+      SphP[p].ActiveArea = 0;
+
+      DP_Buffer[k].x             = x;
+      DP_Buffer[k].y             = y;
+      DP_Buffer[k].z             = z;
+      DP_Buffer[k].ID            = P[p].ID;
+      DP_Buffer[k].task          = ThisTask;
+      DP_Buffer[k].index         = p;
+      DP_Buffer[k].originalindex = p;
+      DP_Buffer[k].timebin       = P[p].TimeBinHydro;
+
+      DP_Buffer[k].image_flags = image_flags;
+#ifdef DOUBLE_STENCIL
+      DP_Buffer[k].Hsml             = SphP[p].Hsml;
+      DP_Buffer[k].first_connection = -1;
+      DP_Buffer[k].last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+    }
+
+  /* because we may have dropped some of the points because they were turned
+   * into stars we need to redetermine the send-offsets and counts
+   */
+
+  MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          /* note: the Recv_offsets stay at this point */
+        }
+    }
+
+  /* now get the additional Delaunay points from the other processors */
+
+  while(nexport + T->Ndp > T->MaxNdp)
+    {
+      T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+      T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+      printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+      T->DP -= 5;
+      T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+      T->DP += 5;
+    }
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the Delaunay points */
+
+              MPI_Sendrecv(&DP_Buffer[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B,
+                           &T->DP[T->Ndp + Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask,
+                           TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  T->Ndp += nexport;
+  count += nexport;
+
+  myfree(DP_Buffer);
+  myfree(ImportedDC);
+  myfree(ForeignDC);
+
+  mpi_printf("VORONOI: done with connected particles\n");
+
+  CPU_Step[CPU_MESH_DYNAMIC] += measure_time();
+
+  /* at this point, it might make sense to sort the Delaunay point again
+   * according to Peano-Hilbert, in an extended region that allows for the
+   * ghost regions
+   */
+
+  peano_hilbert_order_DP();
+
+  CPU_Step[CPU_PEANO] += measure_time();
+
+  return count;
+}
+
+/*! \brief Initialises connectivity.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void voronoi_init_connectivity(tessellation *T)
+{
+  int i;
+
+  mpi_printf("VORONOI: init connectivity\n");
+
+  MaxNvc = T->Indi.AllocFacNvc;
+  DC     = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection));
+
+  Nvc = 0;
+
+  /* we use a chaining list to keep track of unused entries in the list of connections */
+  /* here we set it up to contain all available spaces */
+  FirstUnusedConnection = 0;
+  for(i = 0; i < MaxNvc - 1; i++)
+    {
+      DC[i].next = i + 1;
+      DC[i].task = -1; /* mark that this is unused */
+    }
+  DC[MaxNvc - 1].next = -1;
+  DC[MaxNvc - 1].task = -1;
+
+  /* initially, all particle have empty connection lists */
+  for(i = 0; i < NumGas; i++)
+    SphP[i].first_connection = SphP[i].last_connection = -1;
+
+  mpi_printf("VORONOI: done with init of connectivity\n");
+}
+
+/*! \brief Updates connectivity.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return void
+ */
+void voronoi_update_connectivity(tessellation *T)
+{
+  int idx, i, k, q, p_task, p_index, q_task, q_index, q_dp_index, q_image_flags;
+  MyIDType p_ID;
+
+  CPU_Step[CPU_MISC] += measure_time();
+
+  /* let's clear the connection lists of active particles */
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(i >= NumGas)
+        terminate("i >= NumGas");
+
+      q = SphP[i].first_connection;
+
+      if(q >= 0) /* we have connections, let's add them to the free list */
+        {
+          while(q >= 0)
+            {
+              Nvc--;
+              DC[q].task = -1; /* mark that this is unused */
+
+              if(q == SphP[i].last_connection)
+                break;
+
+              q = DC[q].next;
+            }
+
+          /* we add the new free spots at the beginning of the free list */
+          DC[SphP[i].last_connection].next = FirstUnusedConnection;
+          FirstUnusedConnection            = SphP[i].first_connection;
+
+          SphP[i].first_connection = -1;
+          SphP[i].last_connection  = -1;
+        }
+    }
+
+  for(i = 0; i < T->Nvf; i++)
+    {
+      for(k = 0; k < 2; k++)
+        {
+          point *DP = T->DP;
+          face *VF  = T->VF;
+
+          if(k == 0)
+            {
+              p_task        = DP[VF[i].p1].task;
+              p_index       = DP[VF[i].p1].index;
+              p_ID          = DP[VF[i].p1].ID;
+              q_task        = DP[VF[i].p2].task;
+              q_index       = DP[VF[i].p2].index;
+              q_dp_index    = VF[i].p2;
+              q_image_flags = (DP[VF[i].p2].image_flags & MASK);
+            }
+          else
+            {
+              p_task        = DP[VF[i].p2].task;
+              p_index       = DP[VF[i].p2].index;
+              p_ID          = DP[VF[i].p2].ID;
+              q_task        = DP[VF[i].p1].task;
+              q_index       = DP[VF[i].p1].index;
+              q_dp_index    = VF[i].p1;
+              q_image_flags = (DP[VF[i].p1].image_flags & MASK);
+            }
+
+          if(p_task == ThisTask && p_index >= 0 && p_index < NumGas)
+            {
+              if(TimeBinSynchronized[P[p_index].TimeBinHydro])
+                {
+                  if(P[p_index].Type != 0)
+                    continue;
+
+                  if(P[p_index].Mass == 0 && P[p_index].ID == 0)
+                    continue; /* skip cells that have been swallowed or dissolved */
+
+                  /* need to add the connection to the other point to this particle */
+
+                  if(FirstUnusedConnection < 0 || Nvc == MaxNvc)
+                    {
+                      if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc))
+                        {
+                          char buf[1000];
+                          sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc);
+                          terminate(buf);
+                        }
+
+                      int n, old_MaxNvc = MaxNvc;
+                      T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR;
+                      MaxNvc = T->Indi.AllocFacNvc;
+#ifdef VERBOSE
+                      printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc,
+                             T->Indi.AllocFacNvc);
+#endif /* #ifdef VERBOSE */
+                      DC = myrealloc_movable(DC, MaxNvc * sizeof(connection));
+                      DP = T->DP;
+                      VF = T->VF;
+
+                      FirstUnusedConnection = old_MaxNvc;
+                      for(n = old_MaxNvc; n < MaxNvc - 1; n++)
+                        {
+                          DC[n].next = n + 1;
+                          DC[n].task = -1;
+                        }
+                      DC[MaxNvc - 1].next = -1;
+                      DC[MaxNvc - 1].task = -1;
+                    }
+
+                  if(SphP[p_index].last_connection >= 0)
+                    {
+                      DC[SphP[p_index].last_connection].next = FirstUnusedConnection;
+                      SphP[p_index].last_connection          = FirstUnusedConnection;
+                    }
+                  else
+                    {
+                      SphP[p_index].last_connection  = FirstUnusedConnection;
+                      SphP[p_index].first_connection = FirstUnusedConnection;
+                    }
+
+                  FirstUnusedConnection = DC[FirstUnusedConnection].next;
+                  Nvc++;
+
+                  DC[SphP[p_index].last_connection].task        = q_task;
+                  DC[SphP[p_index].last_connection].image_flags = q_image_flags;
+                  DC[SphP[p_index].last_connection].ID          = p_ID;
+
+                  if(q_task == ThisTask && q_index >= NumGas)
+                    DC[SphP[p_index].last_connection].index = q_index - NumGas;
+                  else
+                    DC[SphP[p_index].last_connection].index = q_index;
+
+                  DC[SphP[p_index].last_connection].dp_index = q_dp_index;
+#ifdef TETRA_INDEX_IN_FACE
+                  DC[SphP[p_index].last_connection].dt_index = VF[i].dt_index;
+#endif                                                            /* #ifdef TETRA_INDEX_IN_FACE */
+                  DC[SphP[p_index].last_connection].vf_index = i; /* index to the corresponding face */
+
+                  if(SphP[p_index].last_connection >= MaxNvc)
+                    {
+                      terminate("this is wrong");
+                    }
+                }
+            }
+
+#ifdef DOUBLE_STENCIL
+          int index;
+          if(k == 0)
+            index = VF[i].p1;
+          else
+            index = VF[i].p2;
+
+          if(!(p_task == ThisTask && p_index >= 0 && p_index < NumGas) && DP[index].flag_primary_triangle > 0 && index >= 0)
+            {
+              /* need to add the connection to the other point to this particle */
+
+              if(FirstUnusedConnection < 0 || Nvc == MaxNvc)
+                {
+                  if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc))
+                    {
+                      char buf[1000];
+                      sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc);
+                      terminate(buf);
+                    }
+
+                  int n, old_MaxNvc = MaxNvc;
+                  T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR;
+                  MaxNvc = T->Indi.AllocFacNvc;
+#ifdef VERBOSE
+                  printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc,
+                         T->Indi.AllocFacNvc);
+#endif /* #ifdef VERBOSE */
+                  DC = myrealloc_movable(DC, MaxNvc * sizeof(connection));
+                  DP = T->DP;
+                  VF = T->VF;
+
+                  FirstUnusedConnection = old_MaxNvc;
+                  for(n = old_MaxNvc; n < MaxNvc - 1; n++)
+                    {
+                      DC[n].next = n + 1;
+                      DC[n].task = -1;
+                    }
+                  DC[MaxNvc - 1].next = -1;
+                  DC[MaxNvc - 1].task = -1;
+                }
+
+              if(DP[index].last_connection >= 0)
+                {
+                  DC[DP[index].last_connection].next = FirstUnusedConnection;
+                  DP[index].last_connection          = FirstUnusedConnection;
+                }
+              else
+                {
+                  DP[index].last_connection  = FirstUnusedConnection;
+                  DP[index].first_connection = FirstUnusedConnection;
+                }
+
+              FirstUnusedConnection = DC[FirstUnusedConnection].next;
+              Nvc++;
+
+              DC[DP[index].last_connection].task        = q_task;
+              DC[DP[index].last_connection].image_flags = q_image_flags;
+              DC[DP[index].last_connection].ID          = p_ID;
+
+              if(q_task == ThisTask && q_index >= NumGas)
+                DC[DP[index].last_connection].index = q_index - NumGas;
+              else
+                DC[DP[index].last_connection].index = q_index;
+
+              DC[DP[index].last_connection].dp_index = q_dp_index;
+
+              DC[DP[index].last_connection].vf_index = i; /* index to the corresponding face */
+
+              if(DP[index].last_connection >= MaxNvc)
+                {
+                  terminate("this is wrong");
+                }
+            }
+#endif /* #ifdef DOUBLE_STENCIL */
+        }
+    }
+
+  mpi_printf("VORONOI: done with updating connectivity.\n");
+
+  CPU_Step[CPU_MESH_DYNAMIC] += measure_time();
+}
+
+/*! \brief Remove connection from cell.
+ *
+ *  \param[in] i Index of cell.
+ *
+ *  \return void
+ */
+void voronoi_remove_connection(int i)
+{
+  int q;
+  if((q = SphP[i].first_connection) >= 0) /* we have connections, let's add them to the free list */
+    {
+      while(q >= 0)
+        {
+          Nvc--;
+          DC[q].task = -1; /* mark that this is unused */
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+
+      /* we add the new free spots at the beginning of the free list */
+      DC[SphP[i].last_connection].next = FirstUnusedConnection;
+      FirstUnusedConnection            = SphP[i].first_connection;
+
+      SphP[i].first_connection = -1;
+      SphP[i].last_connection  = -1;
+    }
+}
+
+/*! \brief Compares two foreign connection objects.
+ *
+ *  Compares (highest priority first):
+ *      task
+ *      index
+ *      image_flags
+ *
+ *  \param[in] a First object.
+ *  \param[in] b Second object.
+ *
+ *  \return (-1,0,1); -1: a < b.
+ */
+int compare_foreign_connection(const void *a, const void *b)
+{
+  if(((struct foreign_connection *)a)->task < (((struct foreign_connection *)b)->task))
+    return -1;
+
+  if(((struct foreign_connection *)a)->task > (((struct foreign_connection *)b)->task))
+    return +1;
+
+  if(((struct foreign_connection *)a)->index < (((struct foreign_connection *)b)->index))
+    return -1;
+
+  if(((struct foreign_connection *)a)->index > (((struct foreign_connection *)b)->index))
+    return +1;
+
+  if(((struct foreign_connection *)a)->image_flags < (((struct foreign_connection *)b)->image_flags))
+    return -1;
+
+  if(((struct foreign_connection *)a)->image_flags > (((struct foreign_connection *)b)->image_flags))
+    return +1;
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c
new file mode 100644
index 0000000000..9b2f79684e
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c
@@ -0,0 +1,531 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_exchange.c
+ * \date        05/2018
+ * \brief       Algorithms that handle communication of Voronoi mesh data
+ *              between MPI tasks.
+ * \details     contains functions:
+ *                void mesh_setup_exchange(void)
+ *                void exchange_primitive_variables(void)
+ *                void exchange_primitive_variables_and_gradients(void)
+ *                int compare_primexch(const void *a, const void *b)
+ *                void voronoi_update_ghost_velvertex(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 22.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+/*! \brief Auxiliary data structure for communication of primitive variables.
+ *
+ */
+struct data_primexch_compare
+{
+  int rank, task, index;
+} * SortPrimExch, *SortPrimExch2;
+
+/*! \brief Prepares exchange of primitive variables.
+ *
+ *  \return void
+ */
+void mesh_setup_exchange(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  TIMER_START(CPU_MESH_EXCHANGE);
+
+  int listp;
+  struct indexexch
+  {
+    int task, index;
+  } * tmpIndexExch, *IndexExch;
+  int i, j, p, task, off, count;
+  int ngrp, recvTask, place;
+
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if(ListExports[listp].origin != ThisTask)
+            {
+              Mesh_Send_count[ListExports[listp].origin]++;
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  MPI_Alltoall(Mesh_Send_count, 1, MPI_INT, Mesh_Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, Mesh_nimport = 0, Mesh_nexport = 0, Mesh_Recv_offset[0] = 0, Mesh_Send_offset[0] = 0; j < NTask; j++)
+    {
+      Mesh_nimport += Mesh_Recv_count[j];
+      Mesh_nexport += Mesh_Send_count[j];
+
+      if(j > 0)
+        {
+          Mesh_Send_offset[j] = Mesh_Send_offset[j - 1] + Mesh_Send_count[j - 1];
+          Mesh_Recv_offset[j] = Mesh_Recv_offset[j - 1] + Mesh_Recv_count[j - 1];
+        }
+    }
+
+  IndexExch    = (struct indexexch *)mymalloc("IndexExch", Mesh_nimport * sizeof(struct indexexch));
+  tmpIndexExch = (struct indexexch *)mymalloc("tmpIndexExch", Mesh_nexport * sizeof(struct indexexch));
+
+  /* prepare data for export */
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if((task = ListExports[listp].origin) != ThisTask)
+            {
+              place = ListExports[listp].index;
+              off   = Mesh_Send_offset[task] + Mesh_Send_count[task]++;
+
+              tmpIndexExch[off].task  = ThisTask;
+              tmpIndexExch[off].index = place;
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  /* exchange data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&tmpIndexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct indexexch), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &IndexExch[Mesh_Recv_offset[recvTask]],
+                           Mesh_Recv_count[recvTask] * sizeof(struct indexexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(tmpIndexExch);
+
+  /* now we need to associate the imported data with the points stored in the DP[] array */
+
+  SortPrimExch = (struct data_primexch_compare *)mymalloc("SortPrimExch", Mesh_nimport * sizeof(struct data_primexch_compare));
+
+  for(i = 0; i < Mesh_nimport; i++)
+    {
+      SortPrimExch[i].rank  = i;
+      SortPrimExch[i].task  = IndexExch[i].task;
+      SortPrimExch[i].index = IndexExch[i].index;
+    }
+
+  /* let sort the data according to task and index */
+  mysort(SortPrimExch, Mesh_nimport, sizeof(struct data_primexch_compare), compare_primexch);
+
+  SortPrimExch2 = (struct data_primexch_compare *)mymalloc("SortPrimExch2", Mesh.Ndp * sizeof(struct data_primexch_compare));
+
+  for(i = 0, count = 0; i < Mesh.Ndp; i++)
+    {
+      if(Mesh.DP[i].task != ThisTask)
+        {
+          SortPrimExch2[count].rank  = i;
+          SortPrimExch2[count].task  = Mesh.DP[i].task;
+          SortPrimExch2[count].index = Mesh.DP[i].index;
+          count++;
+        }
+    }
+
+  /* let sort according to task and index */
+  mysort(SortPrimExch2, count, sizeof(struct data_primexch_compare), compare_primexch);
+
+  /* count can be larger than nimport because a foreigh particle can appear
+     multiple times on the local domain, due to periodicity */
+
+  for(i = 0, j = 0; i < count; i++)
+    {
+      if(SortPrimExch2[i].task != SortPrimExch[j].task || SortPrimExch2[i].index != SortPrimExch[j].index)
+        j++;
+
+      if(j >= Mesh_nimport)
+        terminate("j >= Mesh_nimport");
+
+      Mesh.DP[SortPrimExch2[i].rank].index =
+          SortPrimExch[j].rank; /* note: this change is now permanent and available for next exchange */
+    }
+
+  myfree(SortPrimExch2);
+  myfree(SortPrimExch);
+  myfree(IndexExch);
+
+  /* allocate structures needed to exchange the actual information for ghost cells */
+  PrimExch = (struct primexch *)mymalloc_movable(&PrimExch, "PrimExch", Mesh_nimport * sizeof(struct primexch));
+  GradExch = (struct grad_data *)mymalloc_movable(&GradExch, "GradExch", Mesh_nimport * sizeof(struct grad_data));
+
+  TIMER_STOP(CPU_MESH_EXCHANGE);
+}
+
+/*! \brief Communicate primitive variables across MPI tasks.
+ *
+ *  This routine is called before gradient calculation, afterwards,
+ *  exchange_primitive_variables_and_gradients is called.
+ *
+ *  \return void
+ */
+void exchange_primitive_variables(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  TIMER_START(CPU_MESH_EXCHANGE);
+
+  int listp;
+  struct primexch *tmpPrimExch;
+  int i, j, p, task, off;
+  int ngrp, recvTask, place;
+
+  tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch));
+
+  /* prepare data for export */
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if((task = ListExports[listp].origin) != ThisTask)
+            {
+              place = ListExports[listp].index;
+              off   = Mesh_Send_offset[task] + Mesh_Send_count[task]++;
+
+              tmpPrimExch[off].Volume = SphP[place].Volume;
+
+              tmpPrimExch[off].Density = SphP[place].Density;
+
+              tmpPrimExch[off].Pressure = SphP[place].Pressure;
+
+#ifdef MHD
+              tmpPrimExch[off].B[0] = SphP[place].B[0];
+              tmpPrimExch[off].B[1] = SphP[place].B[1];
+              tmpPrimExch[off].B[2] = SphP[place].B[2];
+#ifdef MHD_POWELL
+              tmpPrimExch[off].DivB = SphP[place].DivB;
+#endif /* #ifdef MHD_POWELL */
+#endif /* #ifdef MHD */
+
+              tmpPrimExch[off].OldMass      = SphP[place].OldMass;
+              tmpPrimExch[off].SurfaceArea  = SphP[place].SurfaceArea;
+              tmpPrimExch[off].ActiveArea   = SphP[place].ActiveArea;
+              tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro;
+
+#ifdef MAXSCALARS
+              for(j = 0; j < N_Scalar; j++)
+                tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset);
+#endif /* #ifdef MAXSCALARS */
+
+              tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate;
+
+              for(j = 0; j < 3; j++)
+                {
+                  tmpPrimExch[off].VelGas[j] = P[place].Vel[j];
+                  tmpPrimExch[off].Center[j] = SphP[place].Center[j];
+                }
+              tmpPrimExch[off].Csnd = get_sound_speed(place);
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  /* exchange data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]],
+                           Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(tmpPrimExch);
+
+  TIMER_STOP(CPU_MESH_EXCHANGE);
+}
+
+/*! \brief Communicate primitive variables and gradients across MPI tasks.
+ *
+ *  This routine is called after gradient calculation.
+ *
+ *  \return void
+ */
+void exchange_primitive_variables_and_gradients(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  TIMER_START(CPU_MESH_EXCHANGE);
+
+  int listp;
+  struct grad_data *tmpGradExch;
+  struct primexch *tmpPrimExch;
+
+  int i, j, p, task, off;
+  int ngrp, recvTask, place;
+
+  tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch));
+  tmpGradExch = (struct grad_data *)mymalloc("tmpGradExch", Mesh_nexport * sizeof(struct grad_data));
+
+  /* prepare data for export */
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      /* in case previous steps already lowered the Mass, update OldMass to yield together with metallicity vector conservative
+       * estimate of metal mass of each species contained in cell */
+      if(P[p].Mass < SphP[p].OldMass)
+        SphP[p].OldMass = P[p].Mass;
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if((task = ListExports[listp].origin) != ThisTask)
+            {
+              place = ListExports[listp].index;
+              off   = Mesh_Send_offset[task] + Mesh_Send_count[task]++;
+
+              tmpPrimExch[off].Volume   = SphP[place].Volume;
+              tmpPrimExch[off].Density  = SphP[place].Density;
+              tmpPrimExch[off].Pressure = SphP[place].Pressure;
+
+#ifdef MHD
+              tmpPrimExch[off].B[0] = SphP[place].B[0];
+              tmpPrimExch[off].B[1] = SphP[place].B[1];
+              tmpPrimExch[off].B[2] = SphP[place].B[2];
+#ifdef MHD_POWELL
+              tmpPrimExch[off].DivB = SphP[place].DivB;
+#endif /* #ifdef MHD_POWELL */
+#endif /* #ifdef MHD */
+
+              tmpPrimExch[off].OldMass     = SphP[place].OldMass;
+              tmpPrimExch[off].SurfaceArea = SphP[place].SurfaceArea;
+              tmpPrimExch[off].ActiveArea  = SphP[place].ActiveArea;
+
+              tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro;
+
+#ifdef MAXSCALARS
+              for(j = 0; j < N_Scalar; j++)
+                tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset);
+#endif /* #ifdef MAXSCALARS */
+
+              tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate;
+
+              for(j = 0; j < 3; j++)
+                {
+                  tmpPrimExch[off].VelGas[j]    = P[place].Vel[j];
+                  tmpPrimExch[off].Center[j]    = SphP[place].Center[j];
+                  tmpPrimExch[off].VelVertex[j] = SphP[place].VelVertex[j];
+                }
+
+              tmpGradExch[off] = SphP[place].Grad;
+
+              tmpPrimExch[off].Csnd = get_sound_speed(place);
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  /* exchange data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0)
+            {
+              /* exchange the data */
+              MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]],
+                           Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+
+              MPI_Sendrecv(&tmpGradExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct grad_data), MPI_BYTE,
+                           recvTask, TAG_HYDRO_A, &GradExch[Mesh_Recv_offset[recvTask]],
+                           Mesh_Recv_count[recvTask] * sizeof(struct grad_data), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  myfree(tmpGradExch);
+  myfree(tmpPrimExch);
+
+  TIMER_STOP(CPU_MESH_EXCHANGE);
+
+  /* note: because the sequence is the same as before, we don't have to do the sorts again */
+}
+
+/*! \brief Compare two data primexch compare objects.
+ *
+ *  The following variables (most important first):
+ *      task
+ *      index
+ *
+ *  \param[in] a Pointer to first data primexch compare object.
+ *  \param[in] b Pointer to second data primexch compare object.
+ *
+ *  \return (-1,0,1); -1 if a < b.
+ */
+int compare_primexch(const void *a, const void *b)
+{
+  if(((struct data_primexch_compare *)a)->task < ((struct data_primexch_compare *)b)->task)
+    return -1;
+
+  if(((struct data_primexch_compare *)a)->task > ((struct data_primexch_compare *)b)->task)
+    return +1;
+
+  if(((struct data_primexch_compare *)a)->index < ((struct data_primexch_compare *)b)->index)
+    return -1;
+
+  if(((struct data_primexch_compare *)a)->index > ((struct data_primexch_compare *)b)->index)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Communicates vertex velocity divergence data across MPI tasks.
+ *
+ *  \return 0
+ */
+#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE
+void voronoi_update_ghost_velvertex(void)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+
+  int listp;
+  int i, j, p, task, off;
+  int ngrp, recvTask, place;
+  struct velvertex_data
+  {
+    MyFloat VelVertex[3];
+  } * tmpVelVertexExch, *tmpVelVertexRecv;
+
+  tmpVelVertexExch = (struct velvertex_data *)mymalloc("tmpVelVertexExch", Mesh_nexport * sizeof(struct velvertex_data));
+
+  /* prepare data for export */
+  for(j = 0; j < NTask; j++)
+    Mesh_Send_count[j] = 0;
+
+  for(i = 0; i < NumGasInMesh; i++)
+    {
+      p = List_InMesh[i];
+
+      listp = List_P[p].firstexport;
+      while(listp >= 0)
+        {
+          if((task = ListExports[listp].origin) != ThisTask)
+            {
+              place = ListExports[listp].index;
+              off   = Mesh_Send_offset[task] + Mesh_Send_count[task]++;
+
+              for(j = 0; j < 3; j++)
+                {
+                  tmpVelVertexExch[off].VelVertex[j] = SphP[place].VelVertex[j];
+                }
+            }
+          listp = ListExports[listp].nextexport;
+        }
+    }
+
+  /* exchange data */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0)
+            {
+              tmpVelVertexRecv =
+                  (struct velvertex_data *)mymalloc("tmpVelVertexRecv", Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data));
+
+              /* get the values */
+              MPI_Sendrecv(&tmpVelVertexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct velvertex_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, tmpVelVertexRecv, Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data),
+                           MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+              for(i = 0; i < Mesh_Recv_count[recvTask]; i++)
+                {
+                  for(j = 0; j < 3; j++)
+                    {
+                      PrimExch[Mesh_Recv_offset[recvTask] + i].VelVertex[j] = tmpVelVertexExch[i].VelVertex[j];
+                    }
+                }
+
+              myfree(tmpVelVertexRecv);
+            }
+        }
+    }
+
+  myfree(tmpVelVertexExch);
+
+  CPU_Step[CPU_SET_VERTEXVELS] += measure_time();
+}
+#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c
new file mode 100644
index 0000000000..6c147b7901
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c
@@ -0,0 +1,1773 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_ghost_search.c
+ * \date        05/2018
+ * \brief       Algorithms to search for (ghost) cells from other domains.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                int voronoi_ghost_search(tessellation * TT)
+ *                static void voronoi_pick_up_additional_DP_points(void)
+ *                int voronoi_ghost_search_evaluate(tessellation * T,
+ *                  int target, int mode, int q, int thread_id)
+ *                int ngb_treefind_ghost_search(tessellation * T, MyDouble
+ *                  searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat
+ *                  maxdist, int target, int origin, int *startnode, int
+ *                  bitflags, int mode, int *nexport, int *nsend_local)
+ *                int ngb_treefind_ghost_search(tessellation * T, MyDouble
+ *                  searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat
+ *                  maxdist, int target, int origin, int mode, int thread_id,
+ *                  int numnodes, int *firstnode)
+ *                int count_undecided_tetras(tessellation * T)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#include "voronoi.h"
+
+#if !defined(ONEDIMS)
+
+static void voronoi_pick_up_additional_DP_points(void);
+
+static tessellation *T;
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyDouble RefPos[3];
+  MyFloat MaxDist;
+  int Origin;
+
+  int Firstnode;
+
+#ifdef EXTENDED_GHOST_SEARCH
+  unsigned char BitFlagList[NODELISTLENGTH];
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+} data_in;
+
+static data_in *DataGet, *DataIn;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  point *DP         = T->DP;
+  tetra *DT         = T->DT;
+  tetra_center *DTC = T->DTC;
+
+  int k, q;
+
+  for(k = 0, q = -1; k < (NUMDIMS + 1); k++)
+    {
+#ifndef DOUBLE_STENCIL
+      if(DP[DT[i].p[k]].task == ThisTask)
+        if(DP[DT[i].p[k]].index >= 0 && DP[DT[i].p[k]].index < NumGas)
+          {
+            if(TimeBinSynchronized[P[DP[DT[i].p[k]].index].TimeBinHydro])
+              {
+                q = DT[i].p[k];
+                break;
+              }
+          }
+#else  /* #ifndef DOUBLE_STENCIL */
+      if(DP[DT[i].p[k]].flag_primary_triangle && DT[i].p[k] >= 0)
+        {
+          q = DT[i].p[k];
+          break;
+        }
+#endif /* #ifndef DOUBLE_STENCIL #else */
+    }
+
+  if(q == -1)
+    terminate("q=-1");
+
+  in->Pos[0] = DTC[i].cx;
+  in->Pos[1] = DTC[i].cy;
+  in->Pos[2] = DTC[i].cz;
+
+  in->RefPos[0] = DP[q].x;
+  in->RefPos[1] = DP[q].y;
+  in->RefPos[2] = DP[q].z;
+
+  in->Origin = ThisTask;
+
+  in->MaxDist = SphP[DP[q].index].Hsml;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  int Count; /* counts how many have been found */
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *             communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES || mode == MODE_IMPORTED_PARTICLES)
+    if(out->Count)
+      T->DTF[i] -= (T->DTF[i] & 2);
+}
+
+#include "../../utils/generic_comm_helpers2.h"
+
+#ifdef EXTENDED_GHOST_SEARCH
+/*! Data structure for extended ghost search.
+ */
+static struct data_nodelist_special
+{
+  unsigned char BitFlagList[NODELISTLENGTH];
+} * DataNodeListSpecial;
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+
+static point *DP_Buffer;
+static int MaxN_DP_Buffer, N_DP_Buffer;
+static int NadditionalPoints;
+static int *send_count_new;
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i, j, q;
+
+  /* do local particles and prepare export list */
+  {
+    int thread_id = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[thread_id].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[thread_id].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= T->Ndt)
+          break;
+
+        if((T->DTF[i] & 2) == 0) /* DT that is not flagged as tested ok */
+          {
+            T->DTF[i] |= 2; /* if we find a particle, need to clear this flag again! */
+
+            point *DP = T->DP;
+            tetra *DT = T->DT;
+
+            if(DT[i].t[0] < 0) /* deleted ? */
+              continue;
+
+            if(DT[i].p[0] == DPinfinity || DT[i].p[1] == DPinfinity || DT[i].p[2] == DPinfinity)
+              continue;
+
+#ifndef TWODIMS
+            if(DT[i].p[3] == DPinfinity)
+              continue;
+#endif /* #ifndef TWODIMS */
+
+#ifndef DOUBLE_STENCIL
+            for(j = 0, q = -1; j < (NUMDIMS + 1); j++)
+              {
+                if(DP[DT[i].p[j]].task == ThisTask)
+                  if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas)
+                    {
+                      if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro])
+                        {
+                          q = DT[i].p[j];
+                          break;
+                        }
+                    }
+              }
+
+            if(j == (NUMDIMS + 1)) /* this triangle does not have a local point. No need to test it */
+              continue;
+
+            if(q == -1)
+              terminate("q==-1");
+#else  /* #ifndef DOUBLE_STENCIL */
+            /* here comes the check for a double stencil */
+            for(j = 0, q = -1; j < (NUMDIMS + 1); j++)
+              {
+                if(DP[DT[i].p[j]].flag_primary_triangle && DT[i].p[j] >= 0)
+                  {
+                    q = DT[i].p[j];
+                    break;
+                  }
+              }
+
+            if(j ==
+               (NUMDIMS +
+                1)) /* this triangle does not have a point which is not at least neighbor to a primary point. No need to test it */
+              continue;
+
+            if(q == -1)
+              terminate("q==-1");
+#endif /* #ifndef DOUBLE_STENCIL #else */
+            voronoi_ghost_search_evaluate(T, i, MODE_LOCAL_PARTICLES, q, thread_id);
+          }
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, count = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = count++;
+
+        if(i >= Nimport)
+          break;
+
+        voronoi_ghost_search_evaluate(T, i, MODE_IMPORTED_PARTICLES, 0, threadid);
+      }
+  }
+}
+
+/*! \brief Main routine to perform ghost search.
+ *
+ *  \param[in, out] TT Pointer to tessellation.
+ *
+ *  \return Number of additional points.
+ */
+int voronoi_ghost_search(tessellation *TT)
+{
+  T = TT;
+  int j, ndone, ndone_flag;
+
+  NadditionalPoints = 0;
+
+  /* allocate buffers to arrange communication */
+
+  send_count_new = (int *)mymalloc_movable(&send_count_new, "send_count_new", NTask * sizeof(int));
+
+  MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer;
+  DP_Buffer      = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", MaxN_DP_Buffer * sizeof(point));
+
+#ifdef DOUBLE_STENCIL
+  {
+    point *DP = T->DP;
+    tetra *DT = T->DT;
+    int i;
+
+    for(i = 0; i < T->Ndp; i++)
+      DP[i].flag_primary_triangle = 0;
+
+    for(i = 0; i < T->Ndt; i++)
+      {
+        for(j = 0; j < (NUMDIMS + 1); j++)
+          {
+            if(DP[DT[i].p[j]].task == ThisTask)
+              if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas)
+                if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro])
+                  break;
+          }
+
+        if(j != (NUMDIMS + 1)) /* this triangle does have a local point, so mark all its points */
+          {
+            for(j = 0; j < (NUMDIMS + 1); j++)
+              DP[DT[i].p[j]].flag_primary_triangle = 1;
+          }
+      }
+  }
+#endif /* #ifdef DOUBLE_STENCIL */
+
+  generic_set_MaxNexport();
+
+  NextParticle = 0;
+
+  do
+    {
+      for(j = 0; j < NTask; j++)
+        send_count_new[j] = 0;
+
+      N_DP_Buffer = 0;
+
+      /* allocate buffers to arrange communication */
+      generic_alloc_partlist_nodelist_ngblist_threadbufs();
+
+      kernel_local();
+
+      /* do all necessary bookkeeping and the data exchange */
+      generic_exchange(kernel_imported);
+
+      generic_free_partlist_nodelist_ngblist_threadbufs();
+
+      voronoi_pick_up_additional_DP_points();
+
+      if(NextParticle >= T->Ndt)
+        ndone_flag = 1;
+      else
+        ndone_flag = 0;
+
+      MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    }
+  while(ndone < NTask);
+
+  myfree(DP_Buffer);
+  myfree(send_count_new);
+
+#ifdef EXTENDED_GHOST_SEARCH
+  myfree(DataNodeListSpecial);
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+
+  return NadditionalPoints;
+}
+
+/*! \brief Gets additional Delaunay points.
+ *
+ *  \return void
+ */
+static void voronoi_pick_up_additional_DP_points(void)
+{
+  int nimport;
+
+  /* The data blocks stored in DP_Buffer is not ordered according to processor rank, but rather in a permutated way.
+   * We need to take this into account in calculating the offsets to in the send buffer.
+   */
+
+  for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        Send_count[ncnt++] = send_count_new[recvTask];
+    }
+
+  Recv_offset[0] = 0;
+  for(int j = 1; j < NTask; j++)
+    Recv_offset[j] = Recv_offset[j - 1] + Send_count[j - 1];
+
+  for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        Send_offset[recvTask] = Recv_offset[ncnt++];
+    }
+
+  memcpy(Send_count, send_count_new, NTask * sizeof(int));
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  Recv_offset[0] = 0;
+  nimport        = Recv_count[0];
+
+  for(int j = 1; j < NTask; j++)
+    {
+      nimport += Recv_count[j];
+      Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+    }
+
+  while(nimport + T->Ndp > T->MaxNdp)
+    {
+      T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+      T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+      printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+      T->DP -= 5;
+      T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+      T->DP += 5;
+
+      if(nimport + T->Ndp > T->MaxNdp && NumGas == 0)
+        terminate("nimport + Ndp > MaxNdp");
+    }
+
+  /* get the delaunay points */
+  for(int ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the particles */
+              MPI_Sendrecv(&DP_Buffer[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B,
+                           &T->DP[T->Ndp + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask,
+                           TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  T->Ndp += nimport;
+  NadditionalPoints += nimport;
+
+  if(N_DP_Buffer > Largest_N_DP_Buffer)
+    Largest_N_DP_Buffer = N_DP_Buffer;
+}
+
+/*! \brief Evaluate function for voronoi_ghost_search.
+ *
+ *  Called in both mode local particles and then in mode imported particles.
+ *
+ *  \param[] T Pointer to tessellation.
+ *  \param[in] target index in DTC and DTF arrays.
+ *  \param[in] mode Mode of call (local/imported).
+ *  \param[in] q index in DP array.
+ *  \param[in] thread_id Thread_id, needed for ngb_treefind_ghost_search.
+ *
+ *  \return 0
+ */
+int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id)
+{
+  int origin, numnodes, *firstnode;
+  int numngb;
+  double h, dx, dy, dz, maxdist;
+  MyDouble pos[3], refpos[3];
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      pos[0]    = T->DTC[target].cx;
+      pos[1]    = T->DTC[target].cy;
+      pos[2]    = T->DTC[target].cz;
+      refpos[0] = T->DP[q].x;
+      refpos[1] = T->DP[q].y;
+      refpos[2] = T->DP[q].z;
+#ifndef DOUBLE_STENCIL
+      maxdist = SphP[T->DP[q].index].Hsml;
+#else  /* #ifndef DOUBLE_STENCIL */
+      maxdist = T->DP[q].Hsml;
+#endif /* #ifndef DOUBLE_STENCIL #else */
+      origin = ThisTask;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      /* note: we do not use a pointer here to VoroDataGet[target].Pos, because VoroDataGet may be moved in a realloc operation */
+      pos[0]    = DataGet[target].Pos[0];
+      pos[1]    = DataGet[target].Pos[1];
+      pos[2]    = DataGet[target].Pos[2];
+      refpos[0] = DataGet[target].RefPos[0];
+      refpos[1] = DataGet[target].RefPos[1];
+      refpos[2] = DataGet[target].RefPos[2];
+      maxdist   = DataGet[target].MaxDist;
+      origin    = DataGet[target].Origin;
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  dx = refpos[0] - pos[0];
+  dy = refpos[1] - pos[1];
+  dz = refpos[2] - pos[2];
+
+  h = 1.0001 * sqrt(dx * dx + dy * dy + dz * dz);
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    if(maxdist < 2 * h)
+      T->DTF[target] -=
+          (T->DTF[target] &
+           2); /* since we restrict the search radius, we are not guaranteed to search the full circumcircle of the triangle */
+
+  numngb = ngb_treefind_ghost_search(T, pos, refpos, h, maxdist, target, origin, mode, thread_id, numnodes, firstnode);
+
+  out.Count = numngb;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#ifdef EXTENDED_GHOST_SEARCH /* this allowes for mirrored images in a full 3x3 grid in terms of the principal domain */
+/*! \brief Tree-search algorithm for ghost cells in EXTENDED_GHOST_SEARCH mode.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] searchcenter[3] Postion of the search center.
+ *  \param[in] refpos[3] Reference position.
+ *  \param[in] hsml Search radius.
+ *  \param[in] maxdist Maximum distance.
+ *  \param[in] target Index in DTF array.
+ *  \param[in] origin Original task.
+ *  \param[in] startnode Startnode.
+ *  \param[in] bitflags Bitflags for ghost search.
+ *  \param[in] mode Mode.
+ *  \param[in, out] nexport Number of exported particles.
+ *  \param[out] nsend_local Array with number of particles to be sent.
+ *
+ *  \return Number of points found.
+ */
+int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target,
+                              int origin, int *startnode, int bitflags, int mode, int *nexport, int *nsend_local)
+{
+  int i, numngb, no, p, task, nexport_save, ndp_save, nadditionalpoints_save;
+  int image_flag;
+  struct NgbNODE *current;
+  MyDouble dx, dy, dz, hsml2, maxdist2;
+  int listp;
+  double dx_ref, dy_ref, dz_ref, mindistance, thisdistance;
+  double min_x = 0, min_y = 0, min_z = 0;
+  int min_p = 0, min_imageflag = 0;
+  MyFloat search_min[3], search_max[3], newcenter[3], newrefpos[3];
+  MyFloat refsearch_min[3], refsearch_max[3];
+
+  nadditionalpoints_save = NadditionalPoints;
+  ndp_save               = T->Ndp;
+  nexport_save           = *nexport;
+
+  numngb      = 0;
+  mindistance = 1.0e70;
+
+  int repx, repy, repz = 0;
+  int repx_A, repy_A, repz_A;
+  int repx_B, repy_B, repz_B;
+  int xbits;
+  int ybits;
+  int zbits;
+  int count;
+
+  if(mode == 0)
+    {
+      repx_A = -1;
+      repx_B = 1;
+      repy_A = -1;
+      repy_B = 1;
+      repz_A = -1;
+      repz_B = 1;
+      xbits = ybits = zbits = 0;
+    }
+  else
+    {
+      zbits = (bitflags / 9);
+      ybits = (bitflags - zbits * 9) / 3;
+      xbits = bitflags - zbits * 9 - ybits * 3;
+
+      if(xbits == 1)
+        repx_A = repx_B = -1;
+      else if(xbits == 2)
+        repx_A = repx_B = 1;
+      else
+        repx_A = repx_B = 0;
+
+      if(ybits == 1)
+        repy_A = repy_B = -1;
+      else if(ybits == 2)
+        repy_A = repy_B = 1;
+      else
+        repy_A = repy_B = 0;
+
+      if(zbits == 1)
+        repz_A = repz_B = -1;
+      else if(zbits == 2)
+        repz_A = repz_B = 1;
+      else
+        repz_A = repz_B = 0;
+    }
+
+  hsml2    = hsml * hsml;
+  maxdist2 = maxdist * maxdist;
+
+  for(repx = repx_A; repx <= repx_B; repx++)
+    for(repy = repy_A; repy <= repy_B; repy++)
+#if !defined(TWODIMS)
+      for(repz = repz_A; repz <= repz_B; repz++)
+#endif /* #if !defined(TWODIMS) */
+        {
+          image_flag = 0; /* for each coordinate there are three possibilities.
+                             We encodee them to basis three, i.e. x*3^0 + y*3^1 + z*3^2
+                           */
+          if(repx == 0)
+            {
+              newcenter[0] = searchcenter[0];
+              newrefpos[0] = refpos[0];
+            }
+          else if(repx == -1)
+            {
+#ifndef REFLECTIVE_X
+              newcenter[0] = searchcenter[0] - boxSize_X;
+              newrefpos[0] = refpos[0] - boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+            newcenter[0] = -searchcenter[0];
+            newrefpos[0] = -refpos[0];
+#endif /* #ifndef REFLECTIVE_X #else */
+              image_flag += 1;
+            }
+          else /* repx == 1 */
+            {
+#ifndef REFLECTIVE_X
+              newcenter[0] = searchcenter[0] + boxSize_X;
+              newrefpos[0] = refpos[0] + boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+            newcenter[0] = -searchcenter[0] + 2 * boxSize_X;
+            newrefpos[0] = -refpos[0] + 2 * boxSize_X;
+#endif /* #ifndef REFLECTIVE_X #else */
+              image_flag += 2;
+            }
+
+          if(repy == 0)
+            {
+              newcenter[1] = searchcenter[1];
+              newrefpos[1] = refpos[1];
+            }
+          else if(repy == -1)
+            {
+#ifndef REFLECTIVE_Y
+              newcenter[1] = searchcenter[1] - boxSize_Y;
+              newrefpos[1] = refpos[1] - boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+            newcenter[1] = -searchcenter[1];
+            newrefpos[1] = -refpos[1];
+#endif /* #ifndef REFLECTIVE_Y #else */
+              image_flag += 1 * 3;
+            }
+          else /*  repy == 1 */
+            {
+#ifndef REFLECTIVE_Y
+              newcenter[1] = searchcenter[1] + boxSize_Y;
+              newrefpos[1] = refpos[1] + boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+            newcenter[1] = -searchcenter[1] + 2 * boxSize_Y;
+            newrefpos[1] = -refpos[1] + 2 * boxSize_Y;
+#endif /* #ifndef REFLECTIVE_Y #else */
+              image_flag += 2 * 3;
+            }
+
+          if(repz == 0)
+            {
+              newcenter[2] = searchcenter[2];
+              newrefpos[2] = refpos[2];
+            }
+#if !defined(TWODIMS)
+          else if(repz == -1)
+            {
+#ifndef REFLECTIVE_Z
+              newcenter[2] = searchcenter[2] - boxSize_Z;
+              newrefpos[2] = refpos[2] - boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+              newcenter[2] = -searchcenter[2];
+              newrefpos[2] = -refpos[2];
+#endif /* #ifndef REFLECTIVE_Z #else */
+              image_flag += 1 * 9;
+            }
+          else /* repz == 1 */
+            {
+#ifndef REFLECTIVE_Z
+              newcenter[2] = searchcenter[1] + boxSize_Z;
+              newrefpos[2] = refpos[1] + boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+              newcenter[2] = -searchcenter[2] + 2 * boxSize_Z;
+              newrefpos[2] = -refpos[2] + 2 * boxSize_Z;
+#endif /* #ifndef REFLECTIVE_Z #else */
+              image_flag += 2 * 9;
+            }
+#endif /* #if !defined(TWODIMS) */
+
+          for(i = 0; i < 3; i++)
+            {
+              search_min[i]    = newcenter[i] - hsml;
+              search_max[i]    = newcenter[i] + hsml;
+              refsearch_min[i] = newrefpos[i] - maxdist;
+              refsearch_max[i] = newrefpos[i] + maxdist;
+            }
+
+          if(mode == 1)
+            if(bitflags != image_flag)
+              {
+                printf("bitflags=%d image_flag=%d xbits=%d ybits=%d zbits=%d  \n", bitflags, image_flag, xbits, ybits, zbits);
+                terminate("problem");
+              }
+
+          no    = *startnode;
+          count = 0;
+
+          while(no >= 0)
+            {
+              count++;
+              if(no < Ngb_MaxPart) /* single particle */
+                {
+                  p  = no;
+                  no = Ngb_Nextnode[no];
+
+                  if(P[p].Type > 0)
+                    continue;
+
+                  if(P[p].Mass == 0 && P[p].ID == 0)
+                    continue; /* skip cells that have been swallowed or dissolved */
+
+                  dx = P[p].Pos[0] - newcenter[0];
+                  dy = P[p].Pos[1] - newcenter[1];
+                  dz = P[p].Pos[2] - newcenter[2];
+
+                  if(dx * dx + dy * dy + dz * dz > hsml2)
+                    continue;
+
+                  dx_ref = P[p].Pos[0] - newrefpos[0];
+                  dy_ref = P[p].Pos[1] - newrefpos[1];
+                  dz_ref = P[p].Pos[2] - newrefpos[2];
+
+                  if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdist2)
+                    continue;
+
+                  /* now we need to check whether this particle has already been sent to
+                     the requesting cpu for this particular image shift */
+
+                  if(thisdistance >= mindistance)
+                    continue;
+
+                  if(Ngb_Marker[p] != Ngb_MarkerValue)
+                    {
+                      Ngb_Marker[p]           = Ngb_MarkerValue;
+                      List_P[p].firstexport   = -1;
+                      List_P[p].currentexport = -1;
+                    }
+
+                  if(List_P[p].firstexport >= 0)
+                    {
+                      if(ListExports[List_P[p].currentexport].origin != origin)
+                        {
+                          listp = List_P[p].firstexport;
+                          while(listp >= 0)
+                            {
+                              if(ListExports[listp].origin == origin)
+                                {
+                                  List_P[p].currentexport = listp;
+                                  break;
+                                }
+
+                              listp = ListExports[listp].nextexport;
+                            }
+
+                          if(listp >= 0)
+                            if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */
+                              continue;
+                        }
+                      else
+                        {
+                          if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */
+                            continue;
+                        }
+                    }
+
+                  /* here we have found a new closest particle that has not been inserted yet */
+
+                  numngb        = 1;
+                  mindistance   = thisdistance;
+                  min_p         = p;
+                  min_imageflag = image_flag;
+
+                  /* determine the point coordinates in min_x, min_y, min_z */
+                  if(repx == 0)
+                    min_x = P[p].Pos[0];
+                  else if(repx == -1)
+                    {
+#ifndef REFLECTIVE_X
+                      min_x = P[p].Pos[0] + boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+                    min_x = -P[p].Pos[0];
+#endif /* #ifndef REFLECTIVE_X #else */
+                    }
+                  else if(repx == 1)
+                    {
+#ifndef REFLECTIVE_X
+                      min_x = P[p].Pos[0] - boxSize_X;
+#else  /* #ifndef REFLECTIVE_X */
+                    min_x = -P[p].Pos[0] + 2 * boxSize_X;
+#endif /* #ifndef REFLECTIVE_X #else */
+                    }
+
+                  if(repy == 0)
+                    min_y = P[p].Pos[1];
+                  else if(repy == -1)
+                    {
+#ifndef REFLECTIVE_Y
+                      min_y = P[p].Pos[1] + boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+                    min_y = -P[p].Pos[1];
+#endif /* #ifndef REFLECTIVE_Y #else */
+                    }
+                  else if(repy == 1)
+                    {
+#ifndef REFLECTIVE_Y
+                      min_y = P[p].Pos[1] - boxSize_Y;
+#else  /* #ifndef REFLECTIVE_Y */
+                    min_y = -P[p].Pos[1] + 2 * boxSize_Y;
+#endif /* #ifndef REFLECTIVE_Y #else */
+                    }
+
+                  if(repz == 0)
+                    min_z = P[p].Pos[2];
+#if !defined(TWODIMS)
+                  else if(repz == -1)
+                    {
+#ifndef REFLECTIVE_Z
+                      min_z = P[p].Pos[2] + boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+                      min_z = -P[p].Pos[2];
+#endif /* #ifndef REFLECTIVE_Z #else */
+                    }
+                  else if(repz == 1)
+                    {
+#ifndef REFLECTIVE_Z
+                      min_z = P[p].Pos[2] - boxSize_Z;
+#else  /* #ifndef REFLECTIVE_Z */
+                      min_z = -P[p].Pos[2] + 2 * boxSize_Z;
+#endif /* #ifndef REFLECTIVE_Z #else */
+                    }
+#endif /* #if !defined(TWODIMS) */
+                }
+              else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */
+                {
+                  if(mode == 1)
+                    {
+                      if(no < Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the
+                                                           branch */
+                        {
+                          break;
+                        }
+                    }
+
+                  current = &Ngb_Nodes[no];
+                  no      = current->u.d.sibling; /* in case the node can be discarded */
+
+                  if(search_min[0] > current->u.d.range_max[0])
+                    continue;
+                  if(search_max[0] < current->u.d.range_min[0])
+                    continue;
+                  if(refsearch_min[0] > current->u.d.range_max[0])
+                    continue;
+                  if(refsearch_max[0] < current->u.d.range_min[0])
+                    continue;
+
+                  if(search_min[1] > current->u.d.range_max[1])
+                    continue;
+                  if(search_max[1] < current->u.d.range_min[1])
+                    continue;
+                  if(refsearch_min[1] > current->u.d.range_max[1])
+                    continue;
+                  if(refsearch_max[1] < current->u.d.range_min[1])
+                    continue;
+
+                  if(search_min[2] > current->u.d.range_max[2])
+                    continue;
+                  if(search_max[2] < current->u.d.range_min[2])
+                    continue;
+                  if(refsearch_min[2] > current->u.d.range_max[2])
+                    continue;
+                  if(refsearch_max[2] < current->u.d.range_min[2])
+                    continue;
+
+                  no = current->u.d.nextnode; /* ok, we need to open the node */
+                }
+              else /* pseudo particle */
+                {
+                  if(mode == 1)
+                    terminate("mode == 1");
+
+                  if(target >= 0) /* if no target is given, export will not occur */
+                    {
+                      if(Exportflag[task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)]] != target)
+                        {
+                          Exportflag[task]      = target;
+                          Exportnodecount[task] = NODELISTLENGTH;
+                        }
+
+                      if(Exportnodecount[task] == NODELISTLENGTH)
+                        {
+                          if(*nexport >= All.BunchSize)
+                            {
+                              T->Ndp            = ndp_save;
+                              NadditionalPoints = nadditionalpoints_save;
+                              *nexport          = nexport_save;
+                              if(nexport_save == 0)
+                                terminate(
+                                    "nexport_save == 0"); /* in this case, the buffer is too small to process even a single particle */
+                              for(task = 0; task < NTask; task++)
+                                nsend_local[task] = 0;
+                              for(no = 0; no < nexport_save; no++)
+                                nsend_local[DataIndexTable[no].Task]++;
+                              return -1;
+                            }
+                          Exportnodecount[task]             = 0;
+                          Exportindex[task]                 = *nexport;
+                          DataIndexTable[*nexport].Task     = task;
+                          DataIndexTable[*nexport].Index    = target;
+                          DataIndexTable[*nexport].IndexGet = *nexport;
+                          *nexport                          = *nexport + 1;
+                          nsend_local[task]++;
+                        }
+
+                      DataNodeListSpecial[Exportindex[task]].BitFlagList[Exportnodecount[task]] = image_flag;
+                      DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]++] =
+                          Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)];
+
+                      if(Exportnodecount[task] < NODELISTLENGTH)
+                        DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]] = -1;
+                    }
+
+                  no = Ngb_Nextnode[no - Ngb_MaxNodes];
+                  continue;
+                }
+            }
+        }
+
+  *startnode = -1;
+
+  if(numngb)
+    {
+      p = min_p;
+
+      image_flag = min_imageflag;
+
+      if(Ngb_Marker[p] != Ngb_MarkerValue)
+        {
+          Ngb_Marker[p]           = Ngb_MarkerValue;
+          List_P[p].firstexport   = -1;
+          List_P[p].currentexport = -1;
+        }
+
+      if(List_P[p].firstexport >= 0)
+        {
+          if(ListExports[List_P[p].currentexport].origin != origin)
+            {
+              listp = List_P[p].firstexport;
+              while(listp >= 0)
+                {
+                  if(ListExports[listp].origin == origin)
+                    {
+                      List_P[p].currentexport = listp;
+                      break;
+                    }
+
+                  if(ListExports[listp].nextexport < 0)
+                    {
+                      if(Ninlist >= MaxNinlist)
+                        {
+                          T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+                          MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+                          printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                                 T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+                          ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+                          if(Ninlist >= MaxNinlist)
+                            terminate("Ninlist >= MaxNinlist");
+                        }
+
+                      List_P[p].currentexport                         = Ninlist++;
+                      ListExports[List_P[p].currentexport].image_bits = 0;
+                      ListExports[List_P[p].currentexport].nextexport = -1;
+                      ListExports[List_P[p].currentexport].origin     = origin;
+                      ListExports[List_P[p].currentexport].index      = p;
+                      ListExports[listp].nextexport                   = List_P[p].currentexport;
+                      break;
+                    }
+                  listp = ListExports[listp].nextexport;
+                }
+            }
+        }
+      else
+        {
+          /* here we have a local particle that hasn't been made part of the mesh */
+
+          if(Ninlist >= MaxNinlist)
+            {
+              T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+              MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                     T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+              ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+              if(Ninlist >= MaxNinlist)
+                terminate("Ninlist >= MaxNinlist");
+            }
+
+          List_InMesh[NumGasInMesh++] = p;
+
+          List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+          ListExports[List_P[p].currentexport].image_bits = 0;
+          ListExports[List_P[p].currentexport].nextexport = -1;
+          ListExports[List_P[p].currentexport].origin     = origin;
+          ListExports[List_P[p].currentexport].index      = p;
+        }
+
+      if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag)))
+        terminate("this should not happen");
+
+      ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag);
+
+      /* add the particle to the ones that need to be exported */
+
+      if(origin == ThisTask)
+        {
+          if(mode == 1)
+            terminate("mode==1: how can this be?");
+
+          if(T->Ndp >= T->MaxNdp)
+            {
+              T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+              T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+              T->DP -= 5;
+              T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+              T->DP += 5;
+
+              if(T->Ndp >= T->MaxNdp)
+                terminate("Ndp >= MaxNdp");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          point *dp = &T->DP[T->Ndp];
+          dp->x     = min_x;
+          dp->y     = min_y;
+          dp->z     = min_z;
+          dp->task  = ThisTask;
+          dp->ID    = P[p].ID;
+          if(image_flag)
+            dp->index = p + NumGas; /* this is a replicated/mirrored local point */
+          else
+            dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */
+          dp->originalindex = p;
+          dp->timebin       = P[p].TimeBinHydro;
+          dp->image_flags   = (1 << image_flag);
+
+#ifdef DOUBLE_STENCIL
+          dp->Hsml             = SphP[p].Hsml;
+          dp->first_connection = -1;
+          dp->last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+          T->Ndp++;
+          NadditionalPoints++;
+        }
+      else
+        {
+          if(mode == 0)
+            terminate("mode == 0: how can this be?");
+
+          if(N_DP_Buffer >= MaxN_DP_Buffer)
+            {
+              T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR;
+              MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer,
+                     T->Indi.AllocFacN_DP_Buffer);
+#endif /* #ifdef VERBOSE */
+              DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point));
+
+              if(N_DP_Buffer >= MaxN_DP_Buffer)
+                terminate("(N_DP_Buffer >= MaxN_DP_Buffer");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          DP_Buffer[N_DP_Buffer].x             = min_x;
+          DP_Buffer[N_DP_Buffer].y             = min_y;
+          DP_Buffer[N_DP_Buffer].z             = min_z;
+          DP_Buffer[N_DP_Buffer].ID            = P[p].ID;
+          DP_Buffer[N_DP_Buffer].task          = ThisTask;
+          DP_Buffer[N_DP_Buffer].index         = p;
+          DP_Buffer[N_DP_Buffer].originalindex = p;
+          DP_Buffer[N_DP_Buffer].timebin       = P[p].TimeBinHydro;
+          DP_Buffer[N_DP_Buffer].image_flags   = (1 << image_flag);
+#ifdef DOUBLE_STENCIL
+          DP_Buffer[N_DP_Buffer].Hsml             = SphP[p].Hsml;
+          DP_Buffer[N_DP_Buffer].first_connection = -1;
+          DP_Buffer[N_DP_Buffer].last_connection  = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+          send_count_new[origin]++;
+          N_DP_Buffer++;
+        }
+    }
+
+  return numngb;
+}
+
+#else /* #ifdef EXTENDED_GHOST_SEARCH */
+
+/*! \brief Tree-search algorithm for ghost cells without EXTENDED_GHOST_SEARCH.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *  \param[in] searchcenter[3] Postion of the search center.
+ *  \param[in] refpos[3] Reference position.
+ *  \param[in] hsml Search radius.
+ *  \param[in] maxdist Maximum distance.
+ *  \param[in] target Index in DTF array.
+ *  \param[in] origin Original task.
+ *  \param[in] mode Mode (local/imported).
+ *  \param[in] thread_id ID of this thread.
+ *  \param[in] numnodes Number of nodes.
+ *  \param[in] firstnode Index of first node.
+ *
+ *  \return Number of points found.
+ */
+int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target,
+                              int origin, int mode, int thread_id, int numnodes, int *firstnode)
+{
+  int i, k, numngb, no, p;
+  int image_flag = 0;
+  struct NgbNODE *current;
+  MyDouble x, y, z, dx, dy, dz;
+  int listp;
+  double dx_ref, dy_ref, dz_ref, mindistance, thisdistance, maxdistSquared, hsmlSquared;
+  double min_x = 0, min_y = 0, min_z = 0;
+  int min_p = 0, min_imageflag = 0;
+  double offx, offy, offz;
+  MyFloat search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3];
+  MyFloat refsearch_min[3], refsearch_max[3], refsearch_max_Lsub[3], refsearch_min_Ladd[3];
+
+  for(i = 0; i < 3; i++)
+    {
+      search_min[i] = searchcenter[i] - hsml;
+      search_max[i] = searchcenter[i] + hsml;
+      refsearch_min[i] = refpos[i] - maxdist;
+      refsearch_max[i] = refpos[i] + maxdist;
+    }
+
+#if !defined(REFLECTIVE_X)
+  search_max_Lsub[0] = search_max[0] - boxSize_X;
+  search_min_Ladd[0] = search_min[0] + boxSize_X;
+  refsearch_max_Lsub[0] = refsearch_max[0] - boxSize_X;
+  refsearch_min_Ladd[0] = refsearch_min[0] + boxSize_X;
+#else  /* #if !defined(REFLECTIVE_X) */
+  search_max_Lsub[0]    = 2 * boxSize_X - search_max[0];
+  search_min_Ladd[0]    = -search_min[0];
+  refsearch_max_Lsub[0] = 2 * boxSize_X - refsearch_max[0];
+  refsearch_min_Ladd[0] = -refsearch_min[0];
+#endif /* #if !defined(REFLECTIVE_X) #else */
+
+#if !defined(REFLECTIVE_Y)
+  search_max_Lsub[1] = search_max[1] - boxSize_Y;
+  search_min_Ladd[1] = search_min[1] + boxSize_Y;
+  refsearch_max_Lsub[1] = refsearch_max[1] - boxSize_Y;
+  refsearch_min_Ladd[1] = refsearch_min[1] + boxSize_Y;
+#else  /* #if !defined(REFLECTIVE_Y) */
+  search_max_Lsub[1]    = 2 * boxSize_Y - search_max[1];
+  search_min_Ladd[1]    = -search_min[1];
+  refsearch_max_Lsub[1] = 2 * boxSize_Y - refsearch_max[1];
+  refsearch_min_Ladd[1] = -refsearch_min[1];
+#endif /* #if !defined(REFLECTIVE_Y) #else */
+
+#if !defined(REFLECTIVE_Z)
+  search_max_Lsub[2] = search_max[2] - boxSize_Z;
+  search_min_Ladd[2] = search_min[2] + boxSize_Z;
+  refsearch_max_Lsub[2] = refsearch_max[2] - boxSize_Z;
+  refsearch_min_Ladd[2] = refsearch_min[2] + boxSize_Z;
+#else  /* #if !defined(REFLECTIVE_Z) */
+  search_max_Lsub[2]    = 2 * boxSize_Z - search_max[2];
+  search_min_Ladd[2]    = -search_min[2];
+  refsearch_max_Lsub[2] = 2 * boxSize_Z - refsearch_max[2];
+  refsearch_min_Ladd[2] = -refsearch_min[2];
+#endif /* #if !defined(REFLECTIVE_Z) #else */
+
+  numngb = 0;
+  mindistance = 1.0e70;
+  int count;
+
+  count = 0;
+
+  maxdistSquared = maxdist * maxdist;
+  hsmlSquared = hsml * hsml;
+
+  numngb = 0;
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Ngb_MaxPart; /* root node */
+
+#ifdef EXTENDED_GHOST_SEARCH
+          bitflags = 0;
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+        }
+      else
+        {
+          no = firstnode[k];
+
+#ifdef EXTENDED_GHOST_SEARCH
+          bitflags = first_bitflag[k];
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+          no = Ngb_Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          count++;
+          if(no < Ngb_MaxPart) /* single particle */
+            {
+              p = no;
+              no = Ngb_Nextnode[no];
+
+              if(P[p].Type > 0)
+                continue;
+
+              if(P[p].Mass == 0 && P[p].ID == 0)
+                continue; /* skip cells that have been swallowed or eliminated */
+
+              if(P[p].Ti_Current != All.Ti_Current)
+                {
+                  drift_particle(p, All.Ti_Current);
+                }
+
+              offx = offy = offz = 0;
+
+              image_flag = 0; /* for each coordinates there are three possibilities. We
+                                 encode them to basis three, i.e. x*3^0 + y*3^1 + z*3^2 */
+
+#if !defined(REFLECTIVE_X)
+              if(P[p].Pos[0] - refpos[0] < -boxHalf_X)
+                {
+                  offx = boxSize_X;
+                  image_flag += 1;
+                }
+              else if(P[p].Pos[0] - refpos[0] > boxHalf_X)
+                {
+                  offx = -boxSize_X;
+                  image_flag += 2;
+                }
+#endif /* #if !defined(REFLECTIVE_X) */
+
+#if !defined(REFLECTIVE_Y)
+              if(P[p].Pos[1] - refpos[1] < -boxHalf_Y)
+                {
+                  offy = boxSize_Y;
+                  image_flag += 1 * 3;
+                }
+              else if(P[p].Pos[1] - refpos[1] > boxHalf_Y)
+                {
+                  offy = -boxSize_Y;
+                  image_flag += 2 * 3;
+                }
+#endif /* #if !defined(REFLECTIVE_Y) */
+
+#if !defined(REFLECTIVE_Z) && !defined(TWODIMS)
+              if(P[p].Pos[2] - refpos[2] < -boxHalf_Z)
+                {
+                  offz = boxSize_Z;
+                  image_flag += 1 * 9;
+                }
+              else if(P[p].Pos[2] - refpos[2] > boxHalf_Z)
+                {
+                  offz = -boxSize_Z;
+                  image_flag += 2 * 9;
+                }
+#endif /* #if !defined(REFLECTIVE_Z) && !defined(TWODIMS) */
+
+              int image_flag_periodic_bnds = image_flag;
+
+#if defined(REFLECTIVE_X)
+              int repx;
+              for(repx = -1; repx <= 1; repx++, offx = 0)
+#endif /* #if defined(REFLECTIVE_X) */
+                {
+#if defined(REFLECTIVE_Y)
+                  int repy;
+                  for(repy = -1; repy <= 1; repy++, offy = 0)
+#endif /* #if defined(REFLECTIVE_Y) */
+                    {
+#if defined(REFLECTIVE_Z) && !defined(TWODIMS)
+                      int repz;
+                      for(repz = -1; repz <= 1; repz++, offz = 0)
+#endif /* #if defined(REFLECTIVE_Z) && !defined(TWODIMS) */
+                        {
+                          image_flag = image_flag_periodic_bnds;
+
+                          x = P[p].Pos[0];
+                          y = P[p].Pos[1];
+                          z = P[p].Pos[2];
+
+#if defined(REFLECTIVE_X)
+                          if(repx == 1)
+                            {
+                              offx = 2 * boxSize_X;
+                              image_flag += 2;
+                            }
+                          else if(repx == -1)
+                            {
+                              image_flag += 1;
+                            }
+                          if(repx != 0)
+                            x = -x;
+#endif /* #if defined(REFLECTIVE_X) */
+
+#if defined(REFLECTIVE_Y)
+                          if(repy == 1)
+                            {
+                              offy = 2 * boxSize_Y;
+                              image_flag += 2 * 3;
+                            }
+                          else if(repy == -1)
+                            {
+                              image_flag += 1 * 3;
+                            }
+                          if(repy != 0)
+                            y = -y;
+#endif /* #if  defined(REFLECTIVE_Y) */
+
+#if defined(REFLECTIVE_Z) && !defined(TWODIMS)
+                          if(repz == 1)
+                            {
+                              offz = 2 * boxSize_Z;
+                              image_flag += 2 * 9;
+                            }
+                          else if(repz == -1)
+                            {
+                              image_flag += 1 * 9;
+                            }
+                          if(repz != 0)
+                            z = -z;
+#endif /* #if  defined(REFLECTIVE_Z) && !defined(TWODIMS) */
+
+                          x += offx;
+                          y += offy;
+                          z += offz;
+
+                          dx_ref = x - refpos[0];
+                          dy_ref = y - refpos[1];
+                          dz_ref = z - refpos[2];
+
+                          if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdistSquared)
+                            continue;
+
+                          dx = x - searchcenter[0];
+                          dy = y - searchcenter[1];
+                          dz = z - searchcenter[2];
+
+                          if(dx * dx + dy * dy + dz * dz > hsmlSquared)
+                            continue;
+
+                          /* now we need to check whether this particle has already been sent to
+                             the requesting cpu for this particular image shift */
+
+                          if(thisdistance >= mindistance)
+                            continue;
+
+                          if(Ngb_Marker[p] != Ngb_MarkerValue)
+                            {
+                              Ngb_Marker[p] = Ngb_MarkerValue;
+                              List_P[p].firstexport = -1;
+                              List_P[p].currentexport = -1;
+                            }
+
+                          if(List_P[p].firstexport >= 0)
+                            {
+                              if(ListExports[List_P[p].currentexport].origin != origin)
+                                {
+                                  listp = List_P[p].firstexport;
+                                  while(listp >= 0)
+                                    {
+                                      if(ListExports[listp].origin == origin)
+                                        {
+                                          List_P[p].currentexport = listp;
+                                          break;
+                                        }
+
+                                      listp = ListExports[listp].nextexport;
+                                    }
+
+                                  if(listp >= 0)
+                                    if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */
+                                      continue;
+                                }
+                              else
+                                {
+                                  if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */
+                                    continue;
+                                }
+                            }
+
+                          /* here we have found a new closest particle that has not been inserted yet */
+
+                          numngb = 1;
+                          mindistance = thisdistance;
+                          min_p = p;
+                          min_imageflag = image_flag;
+                          min_x = x;
+                          min_y = y;
+                          min_z = z;
+
+                          maxdistSquared = thisdistance;
+                        }
+                    }
+                }
+            }
+          else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              current = &Ngb_Nodes[no];
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              if(current->Ti_Current != All.Ti_Current)
+                {
+                  drift_node(current, All.Ti_Current);
+                }
+
+#if !defined(REFLECTIVE_X)
+              if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0])
+                continue;
+              if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0])
+                continue;
+#else  /* #if !defined(REFLECTIVE_X) */
+              if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] > current->u.d.range_max[0])
+                continue;
+              if(search_min_Ladd[0] < current->u.d.range_min[0] && search_max[0] < current->u.d.range_min[0])
+                continue;
+#endif /* #if !defined(REFLECTIVE_X) #else */
+
+#if !defined(REFLECTIVE_Y)
+              if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1])
+                continue;
+              if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1])
+                continue;
+#else  /* #if !defined(REFLECTIVE_Y) */
+              if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] > current->u.d.range_max[1])
+                continue;
+              if(search_min_Ladd[1] < current->u.d.range_min[1] && search_max[1] < current->u.d.range_min[1])
+                continue;
+#endif /* #if !defined(REFLECTIVE_Y) #else */
+
+#if !defined(REFLECTIVE_Z)
+              if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2])
+                continue;
+              if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2])
+                continue;
+#else  /* #if !defined(REFLECTIVE_Z) */
+              if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] > current->u.d.range_max[2])
+                continue;
+              if(search_min_Ladd[2] < current->u.d.range_min[2] && search_max[2] < current->u.d.range_min[2])
+                continue;
+#endif /* #if !defined(REFLECTIVE_Z) #else */
+
+                /* now deal with the search region of the reference point */
+
+#if !defined(REFLECTIVE_X)
+              if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] < current->u.d.range_min[0])
+                continue;
+              if(refsearch_min_Ladd[0] > current->u.d.range_max[0] && refsearch_max[0] < current->u.d.range_min[0])
+                continue;
+#else  /* #if !defined(REFLECTIVE_X) */
+              if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] > current->u.d.range_max[0])
+                continue;
+              if(refsearch_min_Ladd[0] < current->u.d.range_min[0] && refsearch_max[0] < current->u.d.range_min[0])
+                continue;
+#endif /* #if !defined(REFLECTIVE_X) #else */
+
+#if !defined(REFLECTIVE_Y)
+              if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] < current->u.d.range_min[1])
+                continue;
+              if(refsearch_min_Ladd[1] > current->u.d.range_max[1] && refsearch_max[1] < current->u.d.range_min[1])
+                continue;
+#else  /* #if !defined(REFLECTIVE_Y) */
+              if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] > current->u.d.range_max[1])
+                continue;
+              if(refsearch_min_Ladd[1] < current->u.d.range_min[1] && refsearch_max[1] < current->u.d.range_min[1])
+                continue;
+#endif /* #if !defined(REFLECTIVE_Y) #else */
+
+#if !defined(REFLECTIVE_Z)
+              if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] < current->u.d.range_min[2])
+                continue;
+              if(refsearch_min_Ladd[2] > current->u.d.range_max[2] && refsearch_max[2] < current->u.d.range_min[2])
+                continue;
+#else  /* #if !defined(REFLECTIVE_Z) */
+              if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] > current->u.d.range_max[2])
+                continue;
+              if(refsearch_min_Ladd[2] < current->u.d.range_min[2] && refsearch_max[2] < current->u.d.range_min[2])
+                continue;
+#endif /* #if !defined(REFLECTIVE_Z) #else */
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else /* pseudo particle */
+            {
+              if(mode == 1)
+                terminate("mode == 1");
+
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES should not occur here");
+
+              if(target >= 0) /* if no target is given, export will not occur */
+                ngb_treefind_export_node_threads(no, target, thread_id, image_flag);
+
+              no = Ngb_Nextnode[no - Ngb_MaxNodes];
+              continue;
+            }
+        }
+    }
+
+  if(numngb)
+    {
+      p = min_p;
+
+      image_flag = min_imageflag;
+
+      if(Ngb_Marker[p] != Ngb_MarkerValue)
+        {
+          Ngb_Marker[p] = Ngb_MarkerValue;
+          List_P[p].firstexport = -1;
+          List_P[p].currentexport = -1;
+        }
+
+      if(List_P[p].firstexport >= 0)
+        {
+          if(ListExports[List_P[p].currentexport].origin != origin)
+            {
+              listp = List_P[p].firstexport;
+              while(listp >= 0)
+                {
+                  if(ListExports[listp].origin == origin)
+                    {
+                      List_P[p].currentexport = listp;
+                      break;
+                    }
+
+                  if(ListExports[listp].nextexport < 0)
+                    {
+                      if(Ninlist >= MaxNinlist)
+                        {
+                          T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+                          MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+                          printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                                 T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+                          ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+                          if(Ninlist >= MaxNinlist)
+                            terminate("Ninlist >= MaxNinlist");
+                        }
+
+                      List_P[p].currentexport = Ninlist++;
+                      ListExports[List_P[p].currentexport].image_bits = 0;
+                      ListExports[List_P[p].currentexport].nextexport = -1;
+                      ListExports[List_P[p].currentexport].origin = origin;
+                      ListExports[List_P[p].currentexport].index = p;
+                      ListExports[listp].nextexport = List_P[p].currentexport;
+                      break;
+                    }
+                  listp = ListExports[listp].nextexport;
+                }
+            }
+        }
+      else
+        {
+          /* here we have a local particle that hasn't been made part of the mesh */
+
+          if(Ninlist >= MaxNinlist)
+            {
+              T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR;
+              MaxNinlist = T->Indi.AllocFacNinlist;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist,
+                     T->Indi.AllocFacNinlist);
+#endif /* #ifdef VERBOSE */
+              ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data));
+
+              if(Ninlist >= MaxNinlist)
+                terminate("Ninlist >= MaxNinlist");
+            }
+
+          List_InMesh[NumGasInMesh++] = p;
+
+          List_P[p].currentexport = List_P[p].firstexport = Ninlist++;
+          ListExports[List_P[p].currentexport].image_bits = 0;
+          ListExports[List_P[p].currentexport].nextexport = -1;
+          ListExports[List_P[p].currentexport].origin = origin;
+          ListExports[List_P[p].currentexport].index = p;
+        }
+
+      if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag)))
+        terminate("this should not happen");
+
+      ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag);
+
+      /* add the particle to the ones that need to be exported */
+
+      if(P[p].Ti_Current != All.Ti_Current)
+        terminate("surprise! we don't expect this here anymore");
+
+      if(origin == ThisTask)
+        {
+          if(mode == 1)
+            terminate("mode==1: how can this be?");
+
+          if(T->Ndp >= T->MaxNdp)
+            {
+              T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+              T->MaxNdp = T->Indi.AllocFacNdp;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+              T->DP -= 5;
+              T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point));
+              T->DP += 5;
+
+              if(T->Ndp >= T->MaxNdp)
+                terminate("Ndp >= MaxNdp");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          point *dp = &T->DP[T->Ndp];
+          dp->x = min_x;
+          dp->y = min_y;
+          dp->z = min_z;
+          dp->task = ThisTask;
+          dp->ID = P[p].ID;
+          if(image_flag)
+            dp->index = p + NumGas; /* this is a replicated/mirrored local point */
+          else
+            dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */
+          dp->originalindex = p;
+          dp->timebin = P[p].TimeBinHydro;
+          dp->image_flags = (1 << image_flag);
+#ifdef DOUBLE_STENCIL
+          dp->Hsml = SphP[p].Hsml;
+          dp->first_connection = -1;
+          dp->last_connection = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+          T->Ndp++;
+          NadditionalPoints++;
+        }
+      else
+        {
+          if(mode == 0)
+            terminate("mode == 0: how can this be?");
+
+          if(N_DP_Buffer >= MaxN_DP_Buffer)
+            {
+              T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR;
+              MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer;
+#ifdef VERBOSE
+              printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer,
+                     T->Indi.AllocFacN_DP_Buffer);
+#endif /* #ifdef VERBOSE */
+              DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point));
+
+              if(N_DP_Buffer >= MaxN_DP_Buffer)
+                terminate("(N_DP_Buffer >= MaxN_DP_Buffer");
+            }
+
+          SphP[p].ActiveArea = 0;
+
+          DP_Buffer[N_DP_Buffer].x = min_x;
+          DP_Buffer[N_DP_Buffer].y = min_y;
+          DP_Buffer[N_DP_Buffer].z = min_z;
+          DP_Buffer[N_DP_Buffer].ID = P[p].ID;
+          DP_Buffer[N_DP_Buffer].task = ThisTask;
+          DP_Buffer[N_DP_Buffer].index = p;
+          DP_Buffer[N_DP_Buffer].originalindex = p;
+          DP_Buffer[N_DP_Buffer].timebin = P[p].TimeBinHydro;
+          DP_Buffer[N_DP_Buffer].image_flags = (1 << image_flag);
+#ifdef DOUBLE_STENCIL
+          DP_Buffer[N_DP_Buffer].Hsml = SphP[p].Hsml;
+          DP_Buffer[N_DP_Buffer].first_connection = -1;
+          DP_Buffer[N_DP_Buffer].last_connection = -1;
+#endif /* #ifdef DOUBLE_STENCIL */
+          send_count_new[origin]++;
+          N_DP_Buffer++;
+        }
+    }
+
+  return numngb;
+}
+
+#endif /* #ifdef EXTENDED_GHOST_SEARCH #else */
+
+/*! \brief Counts up undecided tetrahedra.
+ *
+ *  \param[in] T Pointer to tessellation.
+ *
+ *  \return (Local) number of undecided tetrahedra.
+ */
+int count_undecided_tetras(tessellation *T)
+{
+  int i, count;
+
+  for(i = 0, count = 0; i < T->Ndt; i++)
+    if((T->DTF[i] & 2) == 0)
+      count++;
+
+  return count;
+}
+
+#endif /* #if !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c
new file mode 100644
index 0000000000..4323ab0a6d
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c
@@ -0,0 +1,944 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_gradients.c
+ * \date        05/2018
+ * \brief       Least square fit gradient calculation.
+ * \details     Described in Pakmor et al (2016).
+ *              contains functions:
+ *                static void inline add_row(double X[NUMDIMS][NUMDIMS],
+ *                  double y[NUMDIMS], int source_row, double fac,
+ *                  int target_row)
+ *                static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS],
+ *                  double y[NUMDIMS], double grad[NUMDIMS])
+ *                void calculate_gradients(void)
+ *                void compute_divergences()
+ *                void correct_for_reflective_boundaries(double *ValueOther,
+ *                  double Value, int type, unsigned int *image_flags)
+ *                void limit_gradients(void)
+ *                void limit_vel_gradient(double *d, MySingle * grad_vx,
+ *                  MySingle * grad_vy, MySingle * grad_vz, double csnd)
+ *                void limit_gradient(double *d, double phi, double min_phi,
+ *                  double max_phi, MySingle * dphi)
+ *                double boundaryX(double dx)
+ *                double boundaryY(double dx)
+ *                double boundaryZ(double dx)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 23.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if !defined(ONEDIMS)
+
+static double *minvalues, *maxvalues;
+
+static void limit_gradients();
+static void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags);
+
+static double boundaryX(double dx);
+static double boundaryY(double dy);
+static double boundaryZ(double dz);
+
+#if defined(OUTPUT_DIVVEL) || defined(MHD)
+static void compute_divergences();
+#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */
+
+/*! \brief Adds row to another one in matrix equation.
+ *
+ *  Auxiliary routine to solve_matrix_problem.
+ *
+ *  \param[in, out] X Matrix.
+ *  \param[in, out] y Vector.
+ *  \param[in] source_row Index of row that should be added.
+ *  \param[in] fac Factor by which row is multiplied before adding.
+ *  \param[in] target_row Index of row to which to add source row.
+ *
+ *  \return void
+ */
+static void inline add_row(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], int source_row, double fac, int target_row)
+{
+  y[target_row] += fac * y[source_row];
+
+  for(int i = 0; i < NUMDIMS; i++)
+    {
+      X[target_row][i] += fac * X[source_row][i];
+    }
+}
+
+/*! \brief Solve a matrix problem X*grad = y.
+ *
+ *   Note that we know here that X is symmetric, and that we can pivot on the
+ *   diagonal elements.
+ *
+ *  \param[in, out] x Matrix.
+ *  \param[in, out] y Vector.
+ *  \param[out] grad Gradient.
+ *
+ */
+static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], double grad[NUMDIMS])
+{
+#if NUMDIMS == 2
+  int perm[NUMDIMS];
+
+  if(fabs(X[0][0]) > fabs(X[1][1]))
+    {
+      perm[0] = 0;
+      perm[1] = 1;
+    }
+  else
+    {
+      perm[0] = 1;
+      perm[1] = 0;
+    }
+
+  add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]);
+
+  grad[perm[1]] = y[perm[1]] / X[perm[1]][perm[1]];
+  grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]]) / X[perm[0]][perm[0]];
+
+#else /* #if NUMDIMS==2 */
+
+  int perm[NUMDIMS];
+
+  if(fabs(X[2][2]) > fabs(X[1][1]) && fabs(X[2][2]) > fabs(X[0][0]))
+    {
+      perm[0] = 2;
+      perm[1] = 0;
+      perm[2] = 1;
+    }
+  else if(fabs(X[1][1]) > fabs(X[0][0]))
+    {
+      perm[0] = 1;
+      perm[1] = 0;
+      perm[2] = 2;
+    }
+  else
+    {
+      perm[0] = 0;
+      perm[1] = 1;
+      perm[2] = 2;
+    }
+
+  add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]);
+  add_row(X, y, perm[0], -X[perm[2]][perm[0]] / X[perm[0]][perm[0]], perm[2]);
+
+  if(fabs(X[perm[1]][perm[1]]) < fabs(X[perm[2]][perm[2]]))
+    {
+      int p   = perm[1];
+      perm[1] = perm[2];
+      perm[2] = p;
+    }
+
+  add_row(X, y, perm[1], -X[perm[2]][perm[1]] / X[perm[1]][perm[1]], perm[2]);
+
+  grad[perm[2]] = y[perm[2]] / X[perm[2]][perm[2]];
+  grad[perm[1]] = (y[perm[1]] - X[perm[1]][perm[2]] * grad[perm[2]]) / X[perm[1]][perm[1]];
+  grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]] - X[perm[0]][perm[2]] * grad[perm[2]]) / X[perm[0]][perm[0]];
+
+#endif /* #if NUMDIMS==2 #else */
+}
+
+/*! \brief Loop through all active cells and calculate gradients.
+ *
+ *  \return void
+ */
+void calculate_gradients(void)
+{
+  TIMER_START(CPU_GRADIENTS);
+
+  mpi_printf("VORONOI: Calculating Gradients...\n");
+
+  minvalues = mymalloc("gradmin", NumGas * N_Grad * sizeof(double));
+  maxvalues = mymalloc("gradmax", NumGas * N_Grad * sizeof(double));
+
+  struct matrix_vec_data
+  {
+    double X[NUMDIMS][NUMDIMS]; /* input matrix */
+    double y[NUMDIMS];          /* input vector */
+    double grad[NUMDIMS];       /* output */
+  } * mdata;
+
+  mdata = mymalloc("mdata", N_Grad * sizeof(struct matrix_vec_data));
+
+  double *Value = mymalloc("Value", N_Grad * sizeof(double));
+
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      for(int k = 0; k < N_Grad; k++)
+        {
+          minvalues[i * N_Grad + k] = +MAX_REAL_NUMBER;
+          maxvalues[i * N_Grad + k] = -MAX_REAL_NUMBER;
+
+          if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) ||
+             (grad_elements[k].type == GRADIENT_TYPE_VELZ))
+            {
+              Value[k] = *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset) / All.cf_atime;
+            }
+          else
+            Value[k] = *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset);
+        }
+
+      MyDouble *Center = SphP[i].Center;
+
+      /* reset matrix and vector to 0 */
+      memset(mdata, 0, N_Grad * sizeof(struct matrix_vec_data));
+
+#ifdef REFLECTIVE_X
+      int OutFlowX = 0;
+#endif /* #ifdef REFLECTIVE_X */
+#ifdef REFLECTIVE_Y
+      int OutFlowY = 0;
+#endif /* #ifdef REFLECTIVE_Y */
+#ifdef REFLECTIVE_Z
+      int OutFlowZ = 0;
+#endif /* #ifdef REFLECTIVE_Z */
+
+      int q = SphP[i].first_connection;
+
+      while(q >= 0)
+        {
+          int dp       = DC[q].dp_index;
+          int vf       = DC[q].vf_index;
+          int particle = Mesh.DP[dp].index;
+
+          if(particle < 0)
+            {
+              /* cell has been removed */
+              q = DC[q].next;
+              continue;
+            }
+
+          if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea)
+            {
+              MyDouble *CenterOther, Mirror[3];
+
+              if(particle >= NumGas && Mesh.DP[dp].task == ThisTask)
+                particle -= NumGas;
+
+#ifdef REFLECTIVE_X
+              if((Mesh.DP[dp].image_flags & REFL_X_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_X))
+                OutFlowX = 1;
+#endif /* #ifdef REFLECTIVE_X */
+#ifdef REFLECTIVE_Y
+              if((Mesh.DP[dp].image_flags & REFL_Y_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Y))
+                OutFlowY = 1;
+#endif /* #ifdef REFLECTIVE_Y */
+#ifdef REFLECTIVE_Z
+              if((Mesh.DP[dp].image_flags & REFL_Z_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Z))
+                OutFlowZ = 1;
+#endif /* #ifdef REFLECTIVE_Z */
+
+              if(Mesh.DP[dp].task == ThisTask)
+                {
+#ifndef VORONOI_STATIC_MESH
+                  if(P[particle].Ti_Current != All.Ti_Current)
+                    terminate("surprise! we don't expect this here anymore");
+#endif /* #ifndef VORONOI_STATIC_MESH */
+
+                  if(P[particle].ID == P[i].ID)
+                    {
+                      /* mirrored cell, we have to mirror the Center */
+
+                      /* calculate normal vector of the interface */
+                      double nx = Mesh.DP[dp].x - P[i].Pos[0];
+                      double ny = Mesh.DP[dp].y - P[i].Pos[1];
+                      double nz = Mesh.DP[dp].z - P[i].Pos[2];
+
+                      /* perpendicular on the surface */
+                      double nn = sqrt(nx * nx + ny * ny + nz * nz);
+                      nx /= nn;
+                      ny /= nn;
+                      nz /= nn;
+                      double fx = (Center[0] - Mesh.VF[vf].cx);
+                      double fy = (Center[1] - Mesh.VF[vf].cy);
+                      double fz = (Center[2] - Mesh.VF[vf].cz);
+                      double ff = (fx * nx + fy * ny + fz * nz);
+
+                      double px = Center[0] - ff * nx;
+                      double py = Center[1] - ff * ny;
+                      double pz = Center[2] - ff * nz;
+
+                      Mirror[0]   = 2. * px - Center[0];
+                      Mirror[1]   = 2. * py - Center[1];
+                      Mirror[2]   = 2. * pz - Center[2];
+                      CenterOther = Mirror;
+                    }
+                  else
+                    CenterOther = SphP[particle].Center;
+                }
+              else
+                CenterOther = PrimExch[particle].Center;
+
+              double norm[3];
+              norm[0] = boundaryX(CenterOther[0] - Center[0]);
+              norm[1] = boundaryY(CenterOther[1] - Center[1]);
+              norm[2] = boundaryZ(CenterOther[2] - Center[2]);
+
+              double dist    = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]);
+              double distinv = 1.0 / dist;
+              norm[0] *= distinv;
+              norm[1] *= distinv;
+              norm[2] *= distinv;
+
+              double weight = Mesh.VF[vf].area;
+
+              for(int k = 0; k < N_Grad; k++)
+                {
+                  double ValueOther;
+
+                  if(Mesh.DP[dp].task == ThisTask)
+                    {
+                      if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) ||
+                         (grad_elements[k].type == GRADIENT_TYPE_VELZ))
+                        {
+                          ValueOther = *(MyFloat *)(((char *)(&P[particle])) + grad_elements[k].offset);
+                        }
+                      else
+                        ValueOther = *(MyFloat *)(((char *)(&SphP[particle])) + grad_elements[k].offset);
+                    }
+                  else
+                    {
+                      ValueOther = *(MyFloat *)(((char *)(&PrimExch[particle])) + grad_elements[k].offset_exch);
+                    }
+
+                  if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) ||
+                     (grad_elements[k].type == GRADIENT_TYPE_VELZ))
+                    {
+                      ValueOther /= All.cf_atime;
+
+#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z)
+                      correct_for_reflective_boundaries(&ValueOther, Value[k], grad_elements[k].type, &Mesh.DP[dp].image_flags);
+#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */
+                      if(grad_elements[k].type == GRADIENT_TYPE_VELX)
+                        ValueOther += norm[0] * dist * All.cf_atime * All.cf_Hrate;
+                      else if(grad_elements[k].type == GRADIENT_TYPE_VELY)
+                        ValueOther += norm[1] * dist * All.cf_atime * All.cf_Hrate;
+                      else if(grad_elements[k].type == GRADIENT_TYPE_VELZ)
+                        ValueOther += norm[2] * dist * All.cf_atime * All.cf_Hrate;
+                    }
+
+                  double fac = weight * (ValueOther - Value[k]) / dist;
+
+                  for(int ia = 0; ia < NUMDIMS; ia++)
+                    {
+                      mdata[k].y[ia] += fac * norm[ia];
+
+                      for(int ib = 0; ib < NUMDIMS; ib++)
+                        mdata[k].X[ia][ib] += weight * norm[ia] * norm[ib];
+                    }
+
+                  if(ValueOther < minvalues[i * N_Grad + k])
+                    minvalues[i * N_Grad + k] = ValueOther;
+
+                  if(ValueOther > maxvalues[i * N_Grad + k])
+                    maxvalues[i * N_Grad + k] = ValueOther;
+                }
+            }
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+
+      for(int k = 0; k < N_Grad; k++)
+        {
+          solve_matrix_problem(mdata[k].X, mdata[k].y, mdata[k].grad);
+
+          MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad);
+          for(int j = 0; j < NUMDIMS; j++)
+            data[j] = mdata[k].grad[j];
+          for(int j = NUMDIMS; j < 3; j++)
+            data[j] = 0.;
+
+#ifdef REFLECTIVE_X
+          if(OutFlowX)
+            data[0] = 0;
+#endif /* #ifdef REFLECTIVE_X */
+#ifdef REFLECTIVE_Y
+          if(OutFlowY)
+            data[1] = 0;
+#endif /* #ifdef REFLECTIVE_Y */
+#ifdef REFLECTIVE_Z
+          if(OutFlowZ)
+            data[2] = 0;
+#endif /* #ifdef REFLECTIVE_Z */
+        }
+    }
+
+  myfree(Value);
+  myfree(mdata);
+
+#ifdef MHD
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      SphP[i].CurlB[0] = SphP[i].Grad.dB[2][1] - SphP[i].Grad.dB[1][2];
+      SphP[i].CurlB[1] = SphP[i].Grad.dB[0][2] - SphP[i].Grad.dB[2][0];
+      SphP[i].CurlB[2] = SphP[i].Grad.dB[1][0] - SphP[i].Grad.dB[0][1];
+    }
+#endif /* #ifdef MHD */
+
+  limit_gradients();
+
+#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED
+  /* compute magnitude of curl */
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+      double curlx = SphP[i].Grad.dvel[2][1] - SphP[i].Grad.dvel[1][2];
+      double curly = SphP[i].Grad.dvel[0][2] - SphP[i].Grad.dvel[2][0];
+      double curlz = SphP[i].Grad.dvel[1][0] - SphP[i].Grad.dvel[0][1];
+
+      SphP[i].CurlVel = sqrt(curlx * curlx + curly * curly + curlz * curlz);
+    }
+#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */
+
+  myfree(maxvalues);
+  myfree(minvalues);
+
+#if defined(OUTPUT_DIVVEL) || defined(MHD)
+  compute_divergences();
+#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD */
+
+  TIMER_STOP(CPU_GRADIENTS);
+}
+
+#if defined(OUTPUT_DIVVEL) || defined(MHD)
+/*! \brief Computes divergences applying the Gauss' law.
+ *
+ *  Loops through all active cells and computes the fluxes through all
+ *  its interfaces.
+ *
+ *  \return 0
+ */
+void compute_divergences()
+{
+  mpi_printf("VORONOI: Computing divergences... \n");
+
+  exchange_primitive_variables_and_gradients();
+
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+#if defined(OUTPUT_DIVVEL)
+      SphP[i].DivVel = 0;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+      SphP[i].DivB = 0;
+#endif /* #ifdef MHD */
+
+      MyDouble *CenterOther, Mirror[3];
+#if defined(OUTPUT_DIVVEL)
+      MyFloat *VelOther;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+      MyFloat *BOther, B[3];
+      struct grad_data *GradOther;
+#endif /* #ifdef MHD */
+
+      int q = SphP[i].first_connection;
+      while(q >= 0)
+        {
+          int dp       = DC[q].dp_index;
+          int vf       = DC[q].vf_index;
+          int particle = Mesh.DP[dp].index;
+
+          if(particle < 0)
+            {
+              /* cell has been removed */
+              q = DC[q].next;
+              continue;
+            }
+
+          if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea)
+            {
+#ifdef MHD
+              double dx = boundaryX(Mesh.VF[vf].cx - SphP[i].Center[0]);
+              double dy = boundaryY(Mesh.VF[vf].cy - SphP[i].Center[1]);
+              double dz = boundaryZ(Mesh.VF[vf].cz - SphP[i].Center[2]);
+
+              for(int j = 0; j < 3; j++)
+                B[j] = SphP[i].B[j] + SphP[i].Grad.dB[j][0] * dx + SphP[i].Grad.dB[j][1] * dy + SphP[i].Grad.dB[j][2] * dz;
+#endif /* #ifdef MHD */
+
+              if(particle >= NumGas && Mesh.DP[dp].task == ThisTask)
+                particle -= NumGas;
+
+              if(Mesh.DP[dp].task == ThisTask)
+                {
+                  if(P[particle].ID == P[i].ID)
+                    {
+                      /* mirrored cell, we have to mirror the Center */
+                      /* calculate normal vector of the interface */
+                      double nx = Mesh.DP[dp].x - P[i].Pos[0];
+                      double ny = Mesh.DP[dp].y - P[i].Pos[1];
+                      double nz = Mesh.DP[dp].z - P[i].Pos[2];
+                      /* perpendicular on the surface */
+                      double nn = sqrt(nx * nx + ny * ny + nz * nz);
+                      nx /= nn;
+                      ny /= nn;
+                      nz /= nn;
+                      double fx   = (SphP[i].Center[0] - Mesh.VF[vf].cx);
+                      double fy   = (SphP[i].Center[1] - Mesh.VF[vf].cy);
+                      double fz   = (SphP[i].Center[2] - Mesh.VF[vf].cz);
+                      double ff   = (fx * nx + fy * ny + fz * nz);
+                      double px   = SphP[i].Center[0] - ff * nx;
+                      double py   = SphP[i].Center[1] - ff * ny;
+                      double pz   = SphP[i].Center[2] - ff * nz;
+                      Mirror[0]   = 2. * px - SphP[i].Center[0];
+                      Mirror[1]   = 2. * py - SphP[i].Center[1];
+                      Mirror[2]   = 2. * pz - SphP[i].Center[2];
+                      CenterOther = Mirror;
+                    }
+                  else
+                    CenterOther = SphP[particle].Center;
+
+#if defined(OUTPUT_DIVVEL)
+                  VelOther = P[particle].Vel;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+                  GradOther = &SphP[particle].Grad;
+                  BOther    = SphP[particle].B;
+#endif /* #ifdef MHD */
+                }
+              else
+                {
+                  CenterOther = PrimExch[particle].Center;
+#if defined(OUTPUT_DIVVEL)
+                  VelOther = PrimExch[particle].VelGas;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+                  GradOther = &GradExch[particle];
+                  BOther    = PrimExch[particle].B;
+#endif /* #ifdef MHD */
+                }
+
+#ifdef MHD
+              dx = boundaryX(Mesh.VF[vf].cx - CenterOther[0]);
+              dy = boundaryY(Mesh.VF[vf].cy - CenterOther[1]);
+              dz = boundaryZ(Mesh.VF[vf].cz - CenterOther[2]);
+
+              for(int j = 0; j < 3; j++)
+                B[j] = 0.5 * (B[j] + BOther[j] + GradOther->dB[j][0] * dx + GradOther->dB[j][1] * dy + GradOther->dB[j][2] * dz);
+#endif /* #ifdef MHD */
+
+              double norm[3];
+              norm[0] = boundaryX(CenterOther[0] - SphP[i].Center[0]);
+              norm[1] = boundaryY(CenterOther[1] - SphP[i].Center[1]);
+              norm[2] = boundaryZ(CenterOther[2] - SphP[i].Center[2]);
+
+              double dist = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]);
+              norm[0] /= dist;
+              norm[1] /= dist;
+              norm[2] /= dist;
+
+#if defined(OUTPUT_DIVVEL)
+              double Vel[3];
+              for(int j = 0; j < 3; j++)
+                Vel[j] = 0.5 * (P[i].Vel[j] + VelOther[j]);
+              double nVel = Vel[0] * norm[0] + Vel[1] * norm[1] + Vel[2] * norm[2];
+              SphP[i].DivVel += Mesh.VF[vf].area * nVel;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+              double nB = B[0] * norm[0] + B[1] * norm[1] + B[2] * norm[2];
+              SphP[i].DivB += Mesh.VF[vf].area * nB;
+#endif /* #ifdef MHD */
+            }
+
+          if(q == SphP[i].last_connection)
+            break;
+
+          q = DC[q].next;
+        }
+
+#if defined(OUTPUT_DIVVEL)
+      SphP[i].DivVel /= SphP[i].Volume;
+#endif /* #if defined(OUTPUT_DIVVEL) */
+#ifdef MHD
+      SphP[i].DivB /= SphP[i].Volume;
+#endif /* #ifdef MHD */
+    }
+}
+#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */
+
+/*! \brief Correct values for gradient calculation for reflective boundary
+ *         conditions.
+ *
+ *
+ *  \param[in, out] Value of other cell.
+ *  \param[in] Value Value of this cell.
+ *  \param[in] type Type of gradient (x,y,z direction).
+ *  \param[in] image_flags Flag that signals boundary interface.
+ *
+ *  \return void
+ */
+void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags)
+{
+#if defined(REFLECTIVE_X)
+  if(type == GRADIENT_TYPE_VELX)
+    {
+      if((*image_flags & REFL_X_FLAGS) && !(*image_flags & OUTFLOW_X))
+        *ValueOther *= -1;
+      if((*image_flags & REFL_X_FLAGS) && (*image_flags & OUTFLOW_X))
+        *ValueOther = Value;
+    }
+#endif /* #if defined(REFLECTIVE_X) */
+
+#if defined(REFLECTIVE_Y)
+  if(type == GRADIENT_TYPE_VELY)
+    {
+      if((*image_flags & REFL_Y_FLAGS) && !(*image_flags & OUTFLOW_Y))
+        *ValueOther *= -1;
+      if((*image_flags & REFL_Y_FLAGS) && (*image_flags & OUTFLOW_Y))
+        *ValueOther = Value;
+    }
+#endif /* #if defined(REFLECTIVE_Y) */
+
+#if defined(REFLECTIVE_Z)
+  if(type == GRADIENT_TYPE_VELZ)
+    {
+      if((*image_flags & REFL_Z_FLAGS) && !(*image_flags & OUTFLOW_Z))
+        *ValueOther *= -1;
+      if((*image_flags & REFL_Z_FLAGS) && (*image_flags & OUTFLOW_Z))
+        *ValueOther = Value;
+    }
+#endif /* #if defined(REFLECTIVE_Z) */
+}
+
+/*! \brief Loops through mesh and limits associated gradients.
+ *
+ *  \return void
+ */
+void limit_gradients(void)
+{
+  mpi_printf("VORONOI: Limiting gradients...\n");
+
+  point *DP = Mesh.DP;
+  face *VF  = Mesh.VF;
+
+  for(int i = 0; i < Mesh.Nvf; i++)
+    {
+      if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0)
+        continue;
+      for(int j = 0; j < 2; j++)
+        {
+          point *p;
+          if(j == 0)
+            {
+              p = &DP[VF[i].p1];
+            }
+          else
+            {
+              p = &DP[VF[i].p2];
+            }
+
+          if(p->task == ThisTask && p->index >= 0 && p->index < NumGas)
+            {
+              int q = p->index;
+              if(TimeBinSynchronized[P[q].TimeBinHydro])
+                {
+                  double d[3];
+                  d[0] = VF[i].cx - SphP[q].Center[0];
+                  d[1] = VF[i].cy - SphP[q].Center[1];
+                  d[2] = VF[i].cz - SphP[q].Center[2];
+#if !defined(REFLECTIVE_X)
+                  double xtmp;
+                  d[0] = NEAREST_X(d[0]);
+#endif /* #if !defined(REFLECTIVE_X) */
+#if !defined(REFLECTIVE_Y)
+                  double ytmp;
+                  d[1] = NEAREST_Y(d[1]);
+#endif /* #if !defined(REFLECTIVE_Y) */
+#if !defined(REFLECTIVE_Z)
+                  double ztmp;
+                  d[2] = NEAREST_Z(d[2]);
+#endif /* #if !defined(REFLECTIVE_Z) */
+                  double value;
+                  MySingle *data;
+                  if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea)
+                    {
+                      for(int k = 0; k < N_Grad; k++)
+                        {
+                          if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) ||
+                             (grad_elements[k].type == GRADIENT_TYPE_VELZ))
+                            {
+                              value = *(MyFloat *)(((char *)(&P[q])) + grad_elements[k].offset);
+                              value /= All.cf_atime;
+                            }
+                          else
+                            value = *(MyFloat *)(((char *)(&SphP[q])) + grad_elements[k].offset);
+
+                          data = (MySingle *)(((char *)(&(SphP[q].Grad))) + grad_elements[k].offset_grad);
+
+                          if(grad_elements[k].type != GRADIENT_TYPE_RTF)
+                            limit_gradient(d, value, minvalues[q * N_Grad + k], maxvalues[q * N_Grad + k], data);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+#ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING
+  for(int i = 0; i < Mesh.Nvf; i++)
+    {
+      if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0)
+        continue;
+      for(int j = 0; j < 2; j++)
+        {
+          point *p;
+
+          if(j == 0)
+            {
+              p = &DP[VF[i].p1];
+            }
+          else
+            {
+              p = &DP[VF[i].p2];
+            }
+
+          if(p->task == ThisTask && p->index >= 0 && p->index < NumGas)
+            {
+              int q = p->index;
+              if(TimeBinSynchronized[P[q].TimeBinHydro])
+                {
+                  double d[3];
+                  d[0] = VF[i].cx - SphP[q].Center[0];
+                  d[1] = VF[i].cy - SphP[q].Center[1];
+                  d[2] = VF[i].cz - SphP[q].Center[2];
+#if !defined(REFLECTIVE_X)
+                  double xtmp;
+                  d[0] = NEAREST_X(d[0]);
+#endif
+#if !defined(REFLECTIVE_Y)
+                  double ytmp;
+                  d[1] = NEAREST_Y(d[1]);
+#endif
+#if !defined(REFLECTIVE_Z)
+                  double ztmp;
+                  d[2] = NEAREST_Z(d[2]);
+#endif
+                  double value;
+                  MySingle *data;
+
+                  if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea)
+                    {
+                      /* let's now limit the overall size of the velocity gradient */
+                      MySingle *grad_vx = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelx->offset_grad);
+                      MySingle *grad_vy = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVely->offset_grad);
+                      MySingle *grad_vz = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelz->offset_grad);
+                      limit_vel_gradient(d, grad_vx, grad_vy, grad_vz, get_sound_speed(q));
+                    }
+                }
+            }
+        }
+    }
+#endif /* #ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING */
+}
+
+/*! \brief Limits velocity gradient.
+ *
+ *  Limit velocity change to the sound speed.
+ *
+ *  \param[in] d Direction vector.
+ *  \param[in, out] grad_vx X-velocity gradient.
+ *  \param[in, out] grad_vy Y-velocity gradient.
+ *  \param[in, out] grad_vz Z-velocity gradient.
+ *  \param[in] csnd sound speed.
+ *
+ *  \return void
+ */
+void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd)
+{
+#define VEL_GRADIENT_LIMIT_FAC 1.0
+  if(All.ComovingIntegrationOn)
+    {
+      grad_vx[0] -= All.cf_atime * All.cf_Hrate;
+      grad_vy[1] -= All.cf_atime * All.cf_Hrate;
+      grad_vz[2] -= All.cf_atime * All.cf_Hrate;
+    }
+
+  double dvx = fabs(grad_vx[0] * d[0] + grad_vx[1] * d[1] + grad_vx[2] * d[2]);
+  double dvy = fabs(grad_vy[0] * d[0] + grad_vy[1] * d[1] + grad_vy[2] * d[2]);
+  double dvz = fabs(grad_vz[0] * d[0] + grad_vz[1] * d[1] + grad_vz[2] * d[2]);
+  if(dvx > VEL_GRADIENT_LIMIT_FAC * csnd)
+    {
+      double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvx;
+      for(int i = 0; i < 3; i++)
+        {
+          grad_vx[i] *= fac;
+        }
+    }
+
+  if(dvy > VEL_GRADIENT_LIMIT_FAC * csnd)
+    {
+      double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvy;
+      for(int i = 0; i < 3; i++)
+        {
+          grad_vy[i] *= fac;
+        }
+    }
+  if(dvz > VEL_GRADIENT_LIMIT_FAC * csnd)
+    {
+      double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvz;
+      for(int i = 0; i < 3; i++)
+        {
+          grad_vz[i] *= fac;
+        }
+    }
+
+  if(All.ComovingIntegrationOn)
+    {
+      grad_vx[0] += All.cf_atime * All.cf_Hrate;
+      grad_vy[1] += All.cf_atime * All.cf_Hrate;
+      grad_vz[2] += All.cf_atime * All.cf_Hrate;
+    }
+}
+
+/*! \brief Limits gradients.
+ *
+ *  Slope limiter.
+ *
+ *  \param[in] d Direction vector.
+ *  \param[in] phi Value.
+ *  \param[in] min_phi Lower bound for value+gradient*dx.
+ *  \param[in] max_phi Upper bound for value+gradient*dx.
+ *  \param[in, out] dphi Gradient.
+ *
+ *  \return void
+ */
+void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi)
+{
+  double dp = dphi[0] * d[0] + dphi[1] * d[1] + dphi[2] * d[2];
+
+  if(dp > 0)
+    {
+      if(phi + dp > max_phi)
+        {
+          double fac;
+
+          if(max_phi > phi)
+            fac = (max_phi - phi) / dp;
+          else
+            fac = 0;
+          if(fac < 0 || fac > 1)
+            terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi);
+          dphi[0] *= fac;
+          dphi[1] *= fac;
+          dphi[2] *= fac;
+        }
+    }
+  else if(dp < 0)
+    {
+      if(phi + dp < min_phi)
+        {
+          double fac;
+
+          if(min_phi < phi)
+            fac = (min_phi - phi) / dp;
+          else
+            fac = 0;
+          if(fac < 0 || fac > 1)
+            terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi);
+          dphi[0] *= fac;
+          dphi[1] *= fac;
+          dphi[2] *= fac;
+        }
+    }
+}
+
+/*! \brief Distance in x direction.
+ *
+ *  Taking into account periodicity of simulation box, if given.
+ *
+ *  \param[in] dx Distance in x direction, not taking into account periodic
+ *             boundaries.
+ *
+ *  \return Distance in x direction.
+ */
+double boundaryX(double dx)
+{
+#if !defined(REFLECTIVE_X)
+  if(dx < -boxHalf_X)
+    dx += boxSize_X;
+  if(dx > boxHalf_X)
+    dx -= boxSize_X;
+#endif /* #if !defined(REFLECTIVE_X) */
+  return dx;
+}
+
+/*! \brief Distance in y direction.
+ *
+ *  Taking into account periodicity of simulation box, if given.
+ *
+ *  \param[in] dy Distance in y direction, not taking into account periodic
+ *             boundaries.
+ *
+ *  \return Distance in y direction.
+ */
+double boundaryY(double dy)
+{
+#if !defined(REFLECTIVE_Y)
+  if(dy < -boxHalf_Y)
+    dy += boxSize_Y;
+  if(dy > boxHalf_Y)
+    dy -= boxSize_Y;
+#endif /* #if !defined(REFLECTIVE_Y) */
+  return dy;
+}
+
+/*! \brief Distance in z direction.
+ *
+ *  Taking into account periodicity of simulation box, if given.
+ *
+ *  \param[in] dz Distance in z direction, not taking into account periodic
+ *             boundaries.
+ *
+ *  \return Distance in z direction.
+ */
+double boundaryZ(double dz)
+{
+#if !defined(REFLECTIVE_Z)
+  if(dz < -boxHalf_Z)
+    dz += boxSize_Z;
+  if(dz > boxHalf_Z)
+    dz -= boxSize_Z;
+#endif /* #if !defined(REFLECTIVE_Z) */
+  return dz;
+}
+
+#endif /* #if !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c
new file mode 100644
index 0000000000..d3e770a0da
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c
@@ -0,0 +1,204 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_gradients.c
+ * \date        05/2018
+ * \brief       Algorithms to calculate the gradients in 1d simulations.
+ * \details     contains functions:
+ *                double getValue(int i, int k)
+ *                void calculate_gradients(void)
+ *                void compute_divvel()
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 23.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if defined(ONEDIMS)
+
+#ifdef OUTPUT_DIVVEL
+static void compute_divvel();
+#endif /* #ifdef OUTPUT_DIVVEL */
+
+/*! \brief Gets a value of a quantity.
+ *
+ *  \param[i] Index of cell in P and SphP array.
+ *  \param[i] Index in grad_elements array (determines which quantity).
+ *
+ *  \return value
+ */
+double getValue(int i, int k)
+{
+  if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) ||
+     (grad_elements[k].type == GRADIENT_TYPE_VELZ))
+    return *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset);
+  else
+    return *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset);
+}
+
+/*! \brief Calculates gradients in a 1d simulation.
+ *
+ *  \return void
+ */
+void calculate_gradients(void)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+
+  printf("Calculating 1D gradients...\n");
+
+  int idx, i, k;
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      for(k = 0; k < N_Grad; k++)
+        {
+          double Value = getValue(i, k);
+          double Pos   = P[i].Pos[0];
+
+#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X)
+          if(i == 0 || i == NumGas - 1)
+            {
+              MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad);
+              memset(data, 0, 3 * sizeof(MySingle));
+              continue;
+            }
+#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */
+          /* if we get here, we have periodic boundary conditions or are not at the boundaries */
+          double ValueL, ValueR;
+
+          if(i == 0)
+            ValueL = getValue(NumGas - 1, k);
+          else
+            ValueL = getValue(i - 1, k);
+
+          if(i == NumGas - 1)
+            ValueR = getValue(0, k);
+          else
+            ValueR = getValue(i + 1, k);
+
+          double PosL = Mesh.DP[i - 1].x;
+          double PosR = Mesh.DP[i + 1].x;
+
+          double grad = (ValueL - ValueR) / (PosL - PosR);
+
+          MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad);
+          data[0]        = grad;
+          data[1]        = 0;
+          data[2]        = 0;
+
+          double ValueMin = dmin(ValueL, ValueR);
+          double ValueMax = dmax(ValueL, ValueR);
+
+          if(Value + grad * (PosL - Pos) < ValueMin)
+            {
+              if(ValueMin < Value)
+                grad = (ValueMin - Value) / (PosL - Pos);
+              else
+                grad = 0.;
+            }
+
+          if(Value + grad * (PosL - Pos) > ValueMax)
+            {
+              if(ValueMax > Value)
+                grad = (ValueMax - Value) / (PosL - Pos);
+              else
+                grad = 0.;
+            }
+
+          if(Value + grad * (PosR - Pos) < ValueMin)
+            {
+              if(ValueMin < Value)
+                grad = (ValueMin - Value) / (PosR - Pos);
+              else
+                grad = 0.;
+            }
+
+          if(Value + grad * (PosR - Pos) > ValueMax)
+            {
+              if(ValueMax > Value)
+                grad = (ValueMax - Value) / (PosR - Pos);
+              else
+                grad = 0.;
+            }
+
+          data[0] = grad;
+        }
+    }
+
+#ifdef OUTPUT_DIVVEL
+  compute_divvel();
+#endif /* #ifdef OUTPUT_DIVVEL */
+
+  CPU_Step[CPU_GRADIENTS] += measure_time();
+}
+
+#ifdef OUTPUT_DIVVEL
+/*! \brief Calculates velocity divergence in 1d simulation.
+ *
+ *  Using Gauss' theorem.
+ *
+ *  \return void
+ */
+void compute_divvel()
+{
+  face *VF = Mesh.VF;
+  double VelxL, VelxR;
+
+  int idx, i;
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(i == 0)
+        {
+#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X)
+          VelxL = P[i].Vel[0];
+#else  /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */
+          VelxL = P[NumGas - 1].Vel[0];
+#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */
+        }
+      else
+        VelxL = P[i - 1].Vel[0];
+
+      if(i == NumGas - 1)
+        {
+#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X)
+          VelxR = P[i].Vel[0];
+#else  /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */
+          VelxR = P[0].Vel[0];
+#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */
+        }
+      else
+        VelxR = P[i + 1].Vel[0];
+
+      SphP[i].DivVel = 0.5 * (VF[i].area * VelxR - VF[i - 1].area * VelxL) / SphP[i].Volume;
+    }
+}
+#endif /* #ifdef OUTPUT_DIVVEL */
+
+#endif /* #if defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c
new file mode 100644
index 0000000000..8077b9b0a0
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c
@@ -0,0 +1,425 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_refinement.c
+ * \date        05/2018
+ * \brief       Contains routines for refinement.
+ * \details     contains functions:
+ *                static void refine_add_ngb(int i, int j)
+ *                int do_refinements(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 23.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#ifdef REFINEMENT_SPLIT_CELLS
+
+static int *ref_SphP_dp_index;
+static int *first_ngb, *last_ngb, first_free_ngb;
+
+/*! \brief Linked list for neighbor data.
+ *
+ */
+static struct ngb_data
+{
+  int index;
+  int next_ngb;
+} * ngb;
+
+/*! \brief Add element to linked neighbor list.
+ *
+ *  \param[in] i Index of existing cell.
+ *  \param[in] j Index of new cell.
+ *
+ *  \return void
+ */
+static void refine_add_ngb(int i, int j)
+{
+  if(i >= 0 && j >= 0)
+    {
+      if(i >= Mesh.Ndp || j >= Mesh.Ndp)
+        {
+          terminate("i>= Ndp || j>= Ndp");
+        }
+
+      if(first_ngb[i] >= 0)
+        {
+          ngb[last_ngb[i]].next_ngb = first_free_ngb;
+          last_ngb[i]               = first_free_ngb;
+        }
+      else
+        {
+          first_ngb[i] = last_ngb[i] = first_free_ngb;
+        }
+
+      ngb[first_free_ngb].index    = j;
+      ngb[first_free_ngb].next_ngb = -1;
+      first_free_ngb++;
+    }
+}
+
+/*! \brief Loops through active cells and refine cells if needed.
+ *
+ *  Splits the cell in random direction; moves mesh-generating point by
+ *  0.025 cell radius and inserts a second mesh-generating point opposite to
+ *  split the cell into two.
+ *
+ *  \return Number of cells that were refined.
+ */
+int do_refinements(void)
+{
+  char buf[1000];
+  int idx, i, j, k, count, countall;
+  double rad, fac;
+  MyIDType newid = 0;
+
+  TIMER_START(CPU_REFINE);
+
+  ref_SphP_dp_index = mymalloc_movable(&ref_SphP_dp_index, "ref_SphP_dp_index", NumGas * sizeof(int));
+
+  int NActiveParticles = TimeBinsHydro.NActiveParticles; /* save this since refinement is going to change it */
+  for(idx = 0, count = 0; idx < NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(should_this_cell_be_split(i))
+        {
+          ref_SphP_dp_index[i] = -1;
+          count++;
+        }
+    }
+
+  MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  mpi_printf("REFINE: want to refine %d cells\n", countall);
+
+  if(countall)
+    {
+      domain_resize_storage(count, count, 2);
+
+      if(NumPart + count >= All.MaxPart)
+        {
+          sprintf(buf, "On Task=%d with NumPart=%d we try to produce %d cells. Sorry, no space left...(All.MaxPart=%d)\n", ThisTask,
+                  NumPart, count, All.MaxPart);
+          terminate(buf);
+        }
+
+      if(NumGas + count >= All.MaxPartSph)
+        {
+          sprintf(buf, "On Task=%d with NumGas=%d we try to produce %d cells. Sorry, no space left...(All.MaxPartSph=%d)\n", ThisTask,
+                  NumGas, count, All.MaxPartSph);
+          terminate(buf);
+        }
+
+      if(All.MaxID == 0) /* MaxID not calculated yet */
+        calculate_maxid();
+
+      int *list = mymalloc("list", NTask * sizeof(int));
+
+      MPI_Allgather(&count, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD);
+
+      newid = All.MaxID + 1;
+
+      for(i = 0; i < ThisTask; i++)
+        newid += list[i];
+
+      All.MaxID += countall;
+
+      myfree(list);
+
+      Ngb_MarkerValue++;
+      int nchanged  = 0;
+      int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int));
+
+      /*  create explicit list of neighbors */
+
+      first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int));
+      ngb       = mymalloc("ngbs", 2 * Mesh.Nvf * sizeof(struct ngb_data));
+      last_ngb  = mymalloc("last_ngb", Mesh.Ndp * sizeof(int));
+
+      for(i = 0; i < Mesh.Ndp; i++)
+        {
+          first_ngb[i] = last_ngb[i] = -1;
+
+          if(Mesh.DP[i].task == ThisTask)
+            {
+              int li = Mesh.DP[i].index;
+              if(li >= 0 && li < NumGas)
+                if(ref_SphP_dp_index[li] < 0)
+                  ref_SphP_dp_index[li] = i; /* only guaranteed to be set for active cells */
+            }
+        }
+
+      for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++)
+        {
+          refine_add_ngb(Mesh.VF[i].p1, Mesh.VF[i].p2);
+          refine_add_ngb(Mesh.VF[i].p2, Mesh.VF[i].p1);
+        }
+
+      myfree(last_ngb);
+
+      int NActiveParticles = TimeBinsHydro.NActiveParticles;
+      for(idx = 0, count = 0; idx < NActiveParticles; idx++)
+        {
+          i = TimeBinsHydro.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          if(should_this_cell_be_split(i))
+            {
+              int addToGravList = TimeBinSynchronized[P[i].TimeBinGrav];
+              if(NumPart > NumGas)
+                {
+                  move_collisionless_particle(NumPart + count, NumGas + count);
+                  if(TimeBinSynchronized[P[NumPart + count].TimeBinGrav] && P[i].Mass > 0)
+                    addToGravList = 0;
+
+                  /* there is already an entry in the list of active particles for
+                     gravity that points to the index that we will use for our new cell */
+                }
+
+              /* now split the gas cell */
+
+              j = NumGas + count;
+
+              P[j]    = P[i];
+              SphP[j] = SphP[i];
+
+              P[j].ID = newid++;
+
+              rad = get_cell_radius(i);
+
+              double dir[3];
+#ifdef TWODIMS
+              double phi = 2 * M_PI * get_random_number();
+
+              dir[0] = cos(phi);
+              dir[1] = sin(phi);
+              dir[2] = 0;
+#else  /* #ifdef TWODIMS */
+              double theta = acos(2 * get_random_number() - 1);
+              double phi   = 2 * M_PI * get_random_number();
+
+              dir[0] = sin(theta) * cos(phi);
+              dir[1] = sin(theta) * sin(phi);
+              dir[2] = cos(theta);
+#endif /* #ifdef TWODIMS */
+              fac = 0.025 * rad;
+
+              P[j].Pos[0] = P[i].Pos[0] + fac * dir[0];
+              P[j].Pos[1] = P[i].Pos[1] + fac * dir[1];
+              P[j].Pos[2] = P[i].Pos[2] + fac * dir[2];
+
+              SphP[j].SepVector[0] = SphP[i].SepVector[0] = dir[0];
+              SphP[j].SepVector[1] = SphP[i].SepVector[1] = dir[1];
+              SphP[j].SepVector[2] = SphP[i].SepVector[2] = dir[2];
+
+              /**** create the voronoi cell of i as an auxiliary mesh */
+
+              int jj = ref_SphP_dp_index[i]; /* this is the delaunay point of this cell */
+              if(jj < 0)
+                terminate("jj < 0");
+
+              initialize_and_create_first_tetra(&DeRefMesh);
+
+              DeRefMesh.DTC = mymalloc_movable(&DeRefMesh.DTC, "DeRefDTC", DeRefMesh.MaxNdt * sizeof(tetra_center));
+              DeRefMesh.DTF = mymalloc_movable(&DeRefMesh.DTF, "DeRefDTF", DeRefMesh.MaxNdt * sizeof(char));
+              for(k = 0; k < DeRefMesh.Ndt; k++)
+                DeRefMesh.DTF[k] = 0;
+
+              int tlast = 0;
+
+              k = first_ngb[jj];
+              while(k >= 0)
+                {
+                  int q = ngb[k].index;
+
+                  if(DeRefMesh.Ndp + 2 >= DeRefMesh.MaxNdp)
+                    {
+                      DeRefMesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR;
+                      DeRefMesh.MaxNdp = DeRefMesh.Indi.AllocFacNdp;
+#ifdef VERBOSE
+                      printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, DeRefMesh.MaxNdp,
+                             DeRefMesh.Indi.AllocFacNdp);
+#endif /* #ifdef VERBOSE */
+                      DeRefMesh.DP -= 5;
+                      DeRefMesh.DP = myrealloc_movable(DeRefMesh.DP, (DeRefMesh.MaxNdp + 5) * sizeof(point));
+                      DeRefMesh.DP += 5;
+                    }
+
+                  DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[q];
+
+                  double r =
+                      sqrt(pow(DeRefMesh.DP[DeRefMesh.Ndp].x - P[i].Pos[0], 2) + pow(DeRefMesh.DP[DeRefMesh.Ndp].y - P[i].Pos[1], 2) +
+                           pow(DeRefMesh.DP[DeRefMesh.Ndp].z - P[i].Pos[2], 2));
+
+                  if(r < 2 * fac)
+                    terminate("We are trying to split a heavily distorted cell... We better stop. Check your refinement criterion.");
+
+#ifndef OPTIMIZE_MEMORY_USAGE
+                  set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+                  tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast);
+
+                  DeRefMesh.Ndp++;
+                  k = ngb[k].next_ngb;
+                }
+
+              /* now add also the point jj itself (the one that is to be split */
+
+              DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[jj];
+#ifndef OPTIMIZE_MEMORY_USAGE
+              set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+              tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast);
+              DeRefMesh.Ndp++;
+
+              /* and finally, add the newly generated point */
+
+              DeRefMesh.DP[DeRefMesh.Ndp].x  = P[j].Pos[0];
+              DeRefMesh.DP[DeRefMesh.Ndp].y  = P[j].Pos[1];
+              DeRefMesh.DP[DeRefMesh.Ndp].z  = P[j].Pos[2];
+              DeRefMesh.DP[DeRefMesh.Ndp].ID = P[j].ID;
+#ifndef OPTIMIZE_MEMORY_USAGE
+              set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp);
+#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */
+              tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast);
+              DeRefMesh.Ndp++;
+
+              /* compute circumcircles */
+              compute_circumcircles(&DeRefMesh);
+
+              double *Volume = mymalloc("Volume", DeRefMesh.Ndp * sizeof(double));
+
+              derefine_refine_compute_volumes(Volume);
+
+              double voli = Volume[DeRefMesh.Ndp - 2];
+              double volj = Volume[DeRefMesh.Ndp - 1];
+
+              myfree(Volume);
+
+              myfree(DeRefMesh.DTF);
+              myfree(DeRefMesh.DTC);
+              DeRefMesh.DTC = NULL;
+
+              myfree(DeRefMesh.DT);
+              myfree(DeRefMesh.DP - 5);
+              myfree(DeRefMesh.VF);
+
+              /* now split the conserved variables according to the volume ratio of the split */
+
+              double faci = voli / (voli + volj);
+              double facj = volj / (voli + volj);
+
+              P[i].Mass *= faci;
+              P[j].Mass *= facj;
+              SphP[i].OldMass *= faci;
+              SphP[j].OldMass *= facj;
+
+              SphP[i].Energy *= faci;
+              SphP[j].Energy *= facj;
+
+#ifdef MHD
+              for(k = 0; k < 3; k++)
+                {
+                  SphP[i].B[k] = SphP[i].BConserved[k] / (voli + volj);
+                  SphP[j].B[k] =
+                      SphP[i].B[k] + SphP[i].Grad.dB[k][0] * (P[j].Pos[0] - P[i].Pos[0]) +
+                      SphP[i].Grad.dB[k][1] * (P[j].Pos[1] - P[i].Pos[1]) +
+                      SphP[i].Grad.dB[k][2] * (P[j].Pos[2] - P[i].Pos[2]); /* extrapolate B to the position of the new cell */
+
+                  /* update conserved variables */
+                  SphP[i].BConserved[k] = SphP[i].B[k] * voli;
+                  SphP[j].BConserved[k] = SphP[j].B[k] * volj;
+                }
+#endif /* #ifdef MHD */
+
+              for(k = 0; k < 3; k++)
+                {
+                  SphP[i].Momentum[k] *= faci;
+                  SphP[j].Momentum[k] *= facj;
+                }
+
+#ifdef USE_SFR
+              SphP[i].Sfr *= faci;
+              SphP[j].Sfr *= facj;
+#endif /* #ifdef USE_SFR */
+
+#ifdef MAXSCALARS
+              for(int s = 0; s < N_Scalar;
+                  s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */
+                {
+                  *(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass) *= faci;
+                  *(MyFloat *)(((char *)(&SphP[j])) + scalar_elements[s].offset_mass) *= facj;
+                }
+#endif /* #ifdef MAXSCALARS */
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+              /* the change in the SphP[].HighResMass is treated as part of the Scalars loop above */
+              SphP[i].AllowRefinement += 2; /* increment the refinement "generation" of both cells */
+              SphP[j].AllowRefinement += 2;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+              /* add the new particle into the neighbour tree */
+              int no          = Ngb_Nextnode[i];
+              Ngb_Nextnode[i] = j;
+              Ngb_Nextnode[j] = no;
+              Ngb_Father[j]   = Ngb_Father[i];
+
+              ngb_update_rangebounds(j, &nchanged, nodelist);
+
+              /* now add the new particle into the link-lists for the time integration */
+
+              timebin_add_particle(&TimeBinsHydro, j, i, P[i].TimeBinHydro, 1);
+              timebin_add_particle(&TimeBinsGravity, j, i, P[i].TimeBinGrav, addToGravList);
+
+              SphP[j].first_connection = -1;
+              SphP[j].last_connection  = -1;
+
+              count++;
+            }
+        }
+
+      NumPart += count;
+      NumGas += count;
+      All.TotNumPart += countall;
+      All.TotNumGas += countall;
+
+      myfree(ngb);
+      myfree(first_ngb);
+
+      ngb_finish_rangebounds_update(nchanged, nodelist);
+
+      myfree(nodelist);
+    }
+
+  myfree(ref_SphP_dp_index);
+
+  TIMER_STOP(CPU_REFINE);
+
+  return countall;
+}
+
+#endif /* REFINEMENT_SPLIT_CELLS */
diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c
new file mode 100644
index 0000000000..94ce562398
--- /dev/null
+++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c
@@ -0,0 +1,501 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mesh/voronoi/voronoi_utils.c
+ * \date        05/2018
+ * \brief       Utilities for 3d Voronoi mesh
+ * \details     contains functions:
+ *                double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n)
+ *                void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int
+ * *nof_polygon_elements) void intersection_plane_grid(double *center, double *n, const char *filename) static double
+ * polygon_area(double *polygon, unsigned int nof_elements) static int qs_partition(double *A, int p, int r, double *B) static void
+ * qs_sort(double *A, int p, int r, double *B) static double calc_phi(double x, double y) static void rotate_z(double *vec, const
+ * double alpha)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 23.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../../main/allvars.h"
+#include "../../main/proto.h"
+
+#if !defined(TWODIMS) && !defined(ONEDIMS)
+
+// helper functions for cross_section_plane_cell and intersections_plane_cell:
+static int qs_partition(double *A, int p, int r, double *B);
+static void qs_sort(double *A, int p, int r, double *B);
+static double calc_phi(double x, double y);
+static void rotate_z(double *vec, const double alpha);
+static void rotate_y(double *vec, const double alpha);
+
+#ifdef TETRA_INDEX_IN_FACE
+static double polygon_area(double *polygon, unsigned int nof_elements);
+static const unsigned int max_poly_elements = 128;
+
+/*! \brief Calculates the cross section between a plane and a Voronoi cell(3D).
+ *
+ *  \param[in] sphp_index The hydro index of the cell.
+ *  \param[in] dp_index The delaunay point index of the cell.
+ *  \param[in] center A point in the plane.
+ *  \param[in] n A vector starting at center and normal to the plane.
+ *
+ *  \return The cross section between the plane and the cell.
+ */
+double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n)
+{
+  double polygon[max_poly_elements];
+  unsigned int nof_elements = 0;
+
+  intersections_plane_cell(sphp_index, dp_index, center, n, polygon, &nof_elements);
+
+  // polygon has to contain at least 3 points
+  if(nof_elements < 6)
+    {
+      return 0;
+    }
+  else
+    {
+      return polygon_area(polygon, nof_elements);
+    }
+}
+
+/*! \brief Calculates the intersections between a plane and a cell.
+ *
+ *  \param[in] sphp_index The hydro index of the cell.
+ *  \param[in] dp_index The Delaunay point index of the cell.
+ *  \param[in] center A point in the plane.
+ *  \param[in] n A vector starting at center and normal to the plane.
+ *  \param[out] polygon Store the intersections (polygon) in this array.
+ *  \param[out] nof_polygon_elements The number of stored elements in the
+ *              polygon array.
+ *
+ *  \return void
+ */
+void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon,
+                              unsigned int *nof_polygon_elements)
+{
+  // memory for the line segments
+  unsigned int line_segments_max = 2000;
+  double *ls                     = (double *)mymalloc("line_segments", line_segments_max * sizeof(double));
+
+  // get the line segments
+  unsigned int nof_elements = 0;
+  get_line_segments(sphp_index, dp_index, ls, &nof_elements, line_segments_max);
+  assert(nof_elements % 6 == 0);  // 6 doubles represent one line segment
+
+  // start the calculation
+  unsigned int i;
+  double phi;
+
+  if(n[0] == 0 && n[1] == 0)
+    {
+      phi = 0;
+    }
+  else
+    {
+      phi = calc_phi(n[0], n[1]);
+    }
+
+  double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]);
+  assert(r > 0);
+  double theta = acos(n[2] / r);
+
+  double lambda;  // z1 + lambda * (z2 - z1) = 0
+
+  unsigned int max_phi_elms = max_poly_elements / 2;
+  double phi_values[max_phi_elms];  // phi coordinates of the points of the polygon
+  unsigned int p = 0;               // number of points of the polygon
+
+  // balance point of the polygon
+  double bal_p_x = 0;
+  double bal_p_y = 0;
+
+  for(i = 0; i < nof_elements; i += 6)
+    {
+      // transform line segment to the center frame
+      ls[i] -= center[0];      // x1
+      ls[i + 1] -= center[1];  // y1
+      ls[i + 2] -= center[2];  // z1
+      ls[i + 3] -= center[0];  // x2
+      ls[i + 4] -= center[1];  // y2
+      ls[i + 5] -= center[2];  // z2
+
+      // rotate line segment such that the cross secting plane is in the x-y plane / the normal vector of the plane is on the z-axis
+      rotate_z(&ls[i], -phi);
+      rotate_y(&ls[i], -theta);
+
+      rotate_z(&ls[i + 3], -phi);
+      rotate_y(&ls[i + 3], -theta);
+
+      if(ls[i + 2] == ls[i + 5])  // same z-coords
+        {
+          if(ls[i + 2] != 0)  // no intersection
+            {
+              lambda = -1;
+            }
+          else
+            {
+              lambda = 0;  // take first point as intersection
+            }
+        }
+      else
+        {
+          lambda = ls[i + 2] / (ls[i + 2] - ls[i + 5]);
+        }
+
+      if(lambda >= 0 && lambda <= 1)  // line segment intersects plane
+        {
+          if(p == max_phi_elms)
+            {
+              terminate("termination in voronoi_utils.c: intersections_plane_cell: not enough memory!\n");
+            }
+
+          polygon[2 * p]     = ls[i] + lambda * (ls[i + 3] - ls[i]);          // x coordinate of the intersection
+          polygon[2 * p + 1] = ls[i + 1] + lambda * (ls[i + 4] - ls[i + 1]);  // y coordinate of the intersection
+
+          bal_p_x += polygon[2 * p];
+          bal_p_y += polygon[2 * p + 1];
+
+          p++;
+        }
+    }
+
+  // free memory
+  myfree(ls);
+
+  // polygon has to contain at least 3 points
+  if(p < 3)
+    {
+      return;
+    }
+
+  // switch frame to balance point of the polygon
+  bal_p_x /= p;
+  bal_p_y /= p;
+
+  for(i = 0; i < p; i++)
+    {
+      polygon[2 * i] -= bal_p_x;
+      polygon[2 * i + 1] -= bal_p_y;
+
+      // calculate the phi values
+      phi_values[i] = calc_phi(polygon[2 * i], polygon[2 * i + 1]);
+    }
+
+  // sort polygon
+  qs_sort(phi_values, 0, p - 1, polygon);
+
+  // close polygon
+  polygon[2 * p]     = polygon[0];
+  polygon[2 * p + 1] = polygon[1];
+  phi_values[p]      = phi_values[0];
+  p++;
+
+  // transform back
+  for(i = 0; i < p; i++)
+    {
+      polygon[2 * i] += bal_p_x;
+      polygon[2 * i + 1] += bal_p_y;
+    }
+
+  *nof_polygon_elements = 2 * p;
+}
+
+/*! \brief Write out the intersections between a plane and the grid
+ *         (for plotting).
+ *
+ *  Binary output:
+ *  int: Number of elements in the first array.
+ *  int: Number of elements in the second array.
+ *  int[]: Array, which stores the number of intersections for each intersected
+ *         cell.
+ *         The j-th entry gives the number of elements in the intersections
+ *         array which correspond to the j-th intersected cell.
+ *  double[]: intersections array, all intersections are stored in the
+ *            order x1,y1,x2,y2,x3,y3,...
+ *
+ *  The intersections are given in a coordinate system where n is the z-axis
+ *  and which has its origin at center.
+ *
+ *  \param[in] center A point in the plane.
+ *  \param[in] n A vector starting at center and normal to the plane.
+ *  \param[in] filename Filename.
+ *
+ *  \return void
+ */
+void intersection_plane_grid(double *center, double *n, const char *filename)
+{
+  if(NTask != 1)
+    {
+      terminate("termination in voronoi_utils.c: intersection_plane_grid: not yet parallelized!\n");
+    }
+
+  double phi;
+
+  if(n[0] == 0 && n[1] == 0)
+    {
+      phi = 0;
+    }
+  else
+    {
+      phi = calc_phi(n[0], n[1]);
+    }
+
+  double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]);
+  assert(r > 0);
+  double theta = acos(n[2] / r);
+
+  double xaxis[3] = {1, 0, 0};
+  double yaxis[3] = {0, 1, 0};
+  double zaxis[3] = {0, 0, 1};
+
+  rotate_y(xaxis, theta);
+  rotate_z(xaxis, phi);
+
+  rotate_y(yaxis, theta);
+  rotate_z(yaxis, phi);
+
+  rotate_y(zaxis, theta);
+  rotate_z(zaxis, phi);
+
+  printf("normal vector: (%f, %f, %f)\n", n[0], n[1], n[2]);
+  printf("Coordinate system of output data: \n");
+  printf("center: (%f, %f, %f)\n", center[0], center[1], center[2]);
+  printf("x-axis: (%f, %f, %f)\n", xaxis[0], xaxis[1], xaxis[2]);
+  printf("y-axis: (%f, %f, %f)\n", yaxis[0], yaxis[1], yaxis[2]);
+  printf("z-axis: (%f, %f, %f)\n", zaxis[0], zaxis[1], zaxis[2]);
+
+  const int cells_max_elms = NumGas;
+  int *nof_intersections   = (int *)mymalloc("number of intersections", cells_max_elms * sizeof(int));
+  unsigned int l           = 0;
+
+  const int polygons_max_elms = NumGas * 5;
+  double *polygons            = (double *)mymalloc("polygons", polygons_max_elms * 5 * sizeof(int));
+  unsigned int j              = 0;
+
+  unsigned int nof_polygon_elements = 0;
+
+  unsigned int k = 0;
+
+  for(k = 0; k < NumGas; k++)
+    {
+      nof_polygon_elements = 0;
+      intersections_plane_cell(k, k, center, n, &polygons[j], &nof_polygon_elements);
+
+      if(nof_polygon_elements != 0)
+        {
+          nof_intersections[l] = (int)nof_polygon_elements;
+          l++;
+
+          j += nof_polygon_elements;
+
+          if(j > polygons_max_elms - 100)
+            {
+              terminate("termination in voronoi_utils.c: intersection_plane_grid: not enough memory for the polygons!\n");
+            }
+        }
+    }
+
+  // binary output
+  FILE *pFile;
+
+  pFile = fopen(filename, "wb");
+
+  fwrite(&l, sizeof(int), 1, pFile);  // number of intersected cells
+  fwrite(&j, sizeof(int), 1, pFile);  // number of elements in polygons array
+  fwrite(nof_intersections, sizeof(int), l, pFile);
+  fwrite(polygons, sizeof(double), j, pFile);
+
+  fclose(pFile);
+
+  myfree(polygons);
+  myfree(nof_intersections);
+}
+
+/*! \brief Calculate the area of a 2D polygon.
+ *
+ *  Formula (wikipedia):A = 0.5 * sum_i=0^{n-1}(x_i * y_{i+1} - x_{i+1} * y_i).
+ *
+ *  \param[in] polygon Array of points of the polygon: x1, y1, x2, y2, ...,
+ *             has to be sorted counterclockwise and closed
+ *             (x_n == x_0 && y_n == y_0).
+ *  \param[in] nof_elements Number of elements in the array.
+ *
+ *  \return Area of polygon.
+ */
+static double polygon_area(double *polygon, unsigned int nof_elements)
+{
+  assert(nof_elements >= 8);
+
+  double result = 0;
+
+  unsigned int k;
+
+  for(k = 0; k < nof_elements - 2; k += 2)
+    {
+      result += polygon[k] * polygon[k + 3] - polygon[k + 2] * polygon[k + 1];
+    }
+
+  result *= 0.5;
+
+  assert(result >= 0);
+
+  return result;
+}
+
+#endif /* #ifdef TETRA_INDEX_IN_FACE */
+
+/*! \brief Quicksort partitioning function, helper for qs_sort.
+ *
+ *  \param[in, out] A array to be sorted, usually angle phi.
+ *  \param[in] p Lower index for quicksort.
+ *  \param[in] r Upper index for quicksort.
+ *  \param[in, out] B Array that also changes ordering the same way as A.
+ *
+ *  \return Index for partitioning.
+ */
+static int qs_partition(double *A, int p, int r, double *B)
+{
+  double x = A[r];
+  double tmp;
+  double tmp2;
+  int i = p - 1;
+  int j;
+
+  for(j = p; j < r; j++)
+    {
+      if(A[j] <= x)
+        {
+          // switch phi values ( i <-> j )
+          i++;
+          tmp  = A[i];
+          A[i] = A[j];
+          A[j] = tmp;
+
+          // switch coordinates ( 2i, 2i+1 <-> 2j, 2j+1)
+          tmp          = B[2 * i];
+          tmp2         = B[2 * i + 1];
+          B[2 * i]     = B[2 * j];
+          B[2 * i + 1] = B[2 * j + 1];
+          B[2 * j]     = tmp;
+          B[2 * j + 1] = tmp2;
+        }
+    }
+
+  // switch phi values
+  tmp      = A[i + 1];
+  A[i + 1] = A[r];
+  A[r]     = tmp;
+
+  // switch coordinates
+  tmp  = B[(i + 1) * 2];
+  tmp2 = B[(i + 1) * 2 + 1];
+
+  B[(i + 1) * 2]     = B[2 * r];
+  B[(i + 1) * 2 + 1] = B[2 * r + 1];
+
+  B[2 * r]     = tmp;
+  B[2 * r + 1] = tmp2;
+
+  return i + 1;
+}
+
+/*! \brief Quick-sorts the points of the polygon with respect to phi.
+ *
+ *  \param[in, out] A array to be sorted, usually angle phi.
+ *  \param[in] p lower index for quicksort.
+ *  \param[in] r upper index for quicksort.
+ *  \param[in, out] B array that also changes ordering the same way as A;
+ *                  usually polygon.
+ *
+ *  \return void
+ */
+static void qs_sort(double *A, int p, int r, double *B)
+{
+  int q;
+
+  if(p < r)
+    {
+      q = qs_partition(A, p, r, B);
+      qs_sort(A, p, q - 1, B);
+      qs_sort(A, q + 1, r, B);
+    }
+}
+
+/*! \brief Calculates the phi coordinate of a point.
+ *
+ *  Calculates polar angle in a 2d coordinate system from Cartesian coordinate
+ *  system.
+ *
+ *  \param[in] x X coordinate.
+ *  \param[in] y Y coordinate.
+ *
+ *  \return Phi (polar angle).
+ */
+static double calc_phi(double x, double y)
+{
+  // if both arguments are zero an error occurs in atan2
+  if((x == 0) && (y == 0))
+    {
+      fprintf(stderr, "ERROR in calc_phi: both arguments are zero\n");
+      return 0;
+    }
+
+  double p = atan2(y, x);  // in [-pi,pi]
+
+  if(p < 0)
+    {
+      return p + 2 * M_PI;
+    }
+
+  return p;
+}
+
+/*! \brief Rotate a vector around the z axis.
+ *
+ *  \param[in, out] vec Array to 3 dimensional vector to be rotated.
+ *  \param[in] alpha Rotation angle.
+ *
+ *  \return void
+ */
+static void rotate_z(double *vec, const double alpha)
+{
+  double vx_tmp = vec[0];
+  vec[0]        = cos(alpha) * vec[0] - sin(alpha) * vec[1];
+  vec[1]        = sin(alpha) * vx_tmp + cos(alpha) * vec[1];
+}
+
+/*! \brief Rotate a vector around the y axis.
+ *
+ *  \param[in, out] vec Array to 3 dimensional vector to be rotated.
+ *  \param[in] alpha Rotation angle.
+ *
+ *  \return void
+ */
+static void rotate_y(double *vec, const double alpha)
+{
+  double vx_tmp = vec[0];
+
+  vec[0] = cos(alpha) * vec[0] + sin(alpha) * vec[2];
+  vec[2] = -sin(alpha) * vx_tmp + cos(alpha) * vec[2];
+}
+
+#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */
diff --git a/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c
new file mode 100644
index 0000000000..3fd92c29e6
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c
@@ -0,0 +1,321 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/checksummed_sendrecv.c
+ * \date        05/2018
+ * \brief       MPI send-receive communication with checksum to verify
+ *              communication.
+ * \details     contains functions:
+ *                int MPI_Check_Sendrecv(void *sendbuf, int sendcount,
+ *                  MPI_Datatype sendtype, int dest, int sendtag,
+ *                  void *recvbufreal, int recvcount, MPI_Datatype recvtype,
+ *                  int source, int recvtag, MPI_Comm comm,
+ *                  MPI_Status * status)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef MPISENDRECV_CHECKSUM
+
+#undef MPI_Sendrecv
+
+/*! \brief MPI_Sendrecv with built-in check if message arrived properly.
+ *
+ *  \param[in] sendbuf Initial address of send buffer.
+ *  \param[in] sendcount Number of elements in send buffer.
+ *  \param[in] sendtype Type of elements in send buffer.
+ *  \param[in] dest Rank of destination.
+ *  \param[in] sendtag Send tag.
+ *  \param[out] recvbufreal Initial adress of receive buffer.
+ *  \param[in] recvcount Number of elements in receive buffer.
+ *  \param[in] recvtype Type of elements in receive buffer .
+ *  \param[in] source Rank of source.
+ *  \param[in] recvtag Receive tag.
+ *  \param[in] comm Communicator
+ *  \param[out] status Status object; this refers to receive operation.
+ *
+ *  \return 0
+ */
+int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount,
+                       MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
+{
+  int checksumtag = 1000, errtag = 2000;
+  int i, iter                    = 0, err_flag, err_flag_imported, size_sendtype, size_recvtype;
+  long long sendCheckSum, recvCheckSum, importedCheckSum;
+  unsigned char *p, *buf, *recvbuf;
+  char msg[500];
+
+  if(dest != source)
+    terminate("destination task different from source task");
+
+  MPI_Type_size(sendtype, &size_sendtype);
+  MPI_Type_size(recvtype, &size_recvtype);
+
+  if(dest == ThisTask)
+    {
+      memcpy(recvbufreal, sendbuf, recvcount * size_recvtype);
+      return 0;
+    }
+
+  if(!(buf = mymalloc(recvcount * size_recvtype + 1024)))
+    terminate("not enough memory to allocate the buffer buf");
+
+  for(i = 0, p = buf; i < recvcount * size_recvtype + 1024; i++)
+    *p++ = 255;
+
+  recvbuf = buf + 512;
+
+  MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status);
+
+  for(i = 0, p = buf; i < 512; i++, p++)
+    {
+      if(*p != 255)
+        {
+          sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured before recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask,
+                  getenv("HOST"), recvcount, dest, i, *p);
+          terminate(msg);
+        }
+    }
+
+  for(i = 0, p = recvbuf + recvcount * size_recvtype; i < 512; i++, p++)
+    {
+      if(*p != 255)
+        {
+          sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured after recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask,
+                  getenv("HOST"), recvcount, dest, i, *p);
+          terminate(msg);
+        }
+    }
+
+  for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++)
+    sendCheckSum += *p;
+
+  importedCheckSum = 0;
+
+  if(dest > ThisTask)
+    {
+      if(sendcount > 0)
+        MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
+      if(recvcount > 0)
+        MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
+    }
+  else
+    {
+      if(recvcount > 0)
+        MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
+      if(sendcount > 0)
+        MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
+    }
+
+  checksumtag++;
+
+  for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount * size_recvtype; i++, p++)
+    recvCheckSum += *p;
+
+  err_flag = err_flag_imported = 0;
+
+  if(recvCheckSum != importedCheckSum)
+    {
+      printf(
+          "MPI-ERROR: Receive error on task=%d/%s from task=%d, message size=%d, sendcount=%d checksums= %d %d  %d %d. Try to fix "
+          "it...\n",
+          ThisTask, getenv("HOST"), source, recvcount, sendcount, (int)(recvCheckSum >> 32), (int)recvCheckSum,
+          (int)(importedCheckSum >> 32), (int)importedCheckSum);
+      myflush(stdout);
+
+      err_flag = 1;
+    }
+
+  if(dest > ThisTask)
+    {
+      MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
+      MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
+    }
+  else
+    {
+      MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
+      MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
+    }
+  errtag++;
+
+  if(err_flag > 0 || err_flag_imported > 0)
+    {
+      printf("Task=%d is on %s, wants to send %d and has checksum=%d %d of send data\n", ThisTask, getenv("HOST"), sendcount,
+             (int)(sendCheckSum >> 32), (int)sendCheckSum);
+      myflush(stdout);
+
+      do
+        {
+          sendtag++;
+          recvtag++;
+
+          for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
+            *p = 0;
+
+          if((iter & 1) == 0)
+            {
+              if(dest > ThisTask)
+                {
+                  if(sendcount > 0)
+                    MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD);
+                  if(recvcount > 0)
+                    MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status);
+                }
+              else
+                {
+                  if(recvcount > 0)
+                    MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status);
+                  if(sendcount > 0)
+                    MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD);
+                }
+            }
+          else
+            {
+              if(iter > 5)
+                {
+                  printf("we're trying to send each byte now on task=%d (iter=%d)\n", ThisTask, iter);
+                  myflush(stdout);
+                  if(dest > ThisTask)
+                    {
+                      for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++)
+                        MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD);
+                      for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
+                        MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status);
+                    }
+                  else
+                    {
+                      for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
+                        MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status);
+                      for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++)
+                        MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD);
+                    }
+                }
+              else
+                {
+                  MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm,
+                               status);
+                }
+            }
+
+          importedCheckSum = 0;
+
+          for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++)
+            sendCheckSum += *p;
+
+          printf("Task=%d gas send_checksum=%d %d\n", ThisTask, (int)(sendCheckSum >> 32), (int)sendCheckSum);
+          myflush(stdout);
+
+          if(dest > ThisTask)
+            {
+              if(sendcount > 0)
+                MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
+              if(recvcount > 0)
+                MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
+            }
+          else
+            {
+              if(recvcount > 0)
+                MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
+              if(sendcount > 0)
+                MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
+            }
+
+          for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount; i++, p++)
+            recvCheckSum += *p;
+
+          err_flag = err_flag_imported = 0;
+
+          if(recvCheckSum != importedCheckSum)
+            {
+              printf(
+                  "MPI-ERROR: Again (iter=%d) a receive error on task=%d/%s from task=%d, message size=%d, checksums= %d %d  %d %d. "
+                  "Try to fix it...\n",
+                  iter, ThisTask, getenv("HOST"), source, recvcount, (int)(recvCheckSum >> 32), (int)recvCheckSum,
+                  (int)(importedCheckSum >> 32), (int)importedCheckSum);
+              myflush(stdout);
+              err_flag = 1;
+            }
+
+          if(dest > ThisTask)
+            {
+              MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
+              MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
+            }
+          else
+            {
+              MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
+              MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
+            }
+
+          if(err_flag == 0 && err_flag_imported == 0)
+            break;
+
+          errtag++;
+          checksumtag++;
+          iter++;
+        }
+      while(iter < 10);
+
+      if(iter >= 10)
+        {
+          char buf[1000];
+          int length;
+          FILE *fd;
+
+          sprintf(buf, "send_data_%d.dat", ThisTask);
+          fd     = fopen(buf, "w");
+          length = sendcount * size_sendtype;
+          fwrite(&length, 1, sizeof(int), fd);
+          fwrite(sendbuf, sendcount, size_sendtype, fd);
+          fclose(fd);
+
+          sprintf(buf, "recv_data_%d.dat", ThisTask);
+          fd     = fopen(buf, "w");
+          length = recvcount * size_recvtype;
+          fwrite(&length, 1, sizeof(int), fd);
+          fwrite(recvbuf, recvcount, size_recvtype, fd);
+          fclose(fd);
+
+          sprintf(msg, "MPI-ERROR: Even 10 trials proved to be insufficient on task=%d/%s. Stopping\n", ThisTask, getenv("HOST"));
+          terminate(msg);
+        }
+    }
+
+  memcpy(recvbufreal, recvbuf, recvcount * size_recvtype);
+
+  myfree(buf);
+
+  return 0;
+}
+
+#endif /* #ifdef MPISENDRECV_CHECKSUM */
diff --git a/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c
new file mode 100644
index 0000000000..e421807148
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c
@@ -0,0 +1,94 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/hypercube_allgatherv.c
+ * \date        05/2018
+ * \brief       Home-made MPI_Allgatherv routine.
+ * \details     contains functions:
+ *                int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount,
+ *                  MPI_Datatype sendtype, void *recvbuf, int *recvcount,
+ *                  int *displs, MPI_Datatype recvtype, MPI_Comm comm)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef MPI_HYPERCUBE_ALLGATHERV
+
+#define TAG 100
+/*! \brief Allgatherv routine based on MPI_Sendrecv calls.
+ *
+ *  \param[in] sendbuf Starting address of send buffer.
+ *  \param[in] sendcount Number of elements in send buffer.
+ *  \param[in] sendtype Data type of send buffer elements.
+ *  \param[out] recvbuf Address of receive buffer.
+ *  \param[in] recvcount Integer array (of length group size) containing the
+ *             number of elements that are to be received from each process.
+ *  \param[in] displs Integer array (of length group size). Entry i specifies
+ *             the displacement (relative to recvbuf ) at which to place the
+ *             incoming data from process.
+ *  \param[in] recvtype Data type of receive buffer elements.
+ *  \param[in] comm Communicator.
+ *
+ *  \return 0
+ */
+int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs,
+                             MPI_Datatype recvtype, MPI_Comm comm)
+{
+  int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype;
+  MPI_Status status;
+
+  MPI_Comm_rank(comm, &thistask);
+  MPI_Comm_size(comm, &ntask);
+
+  MPI_Type_size(sendtype, &size_sendtype);
+  MPI_Type_size(recvtype, &size_recvtype);
+
+  for(ptask = 0; ntask > (1 << ptask); ptask++)
+    ;
+
+  for(ngrp = 1; ngrp < (1 << ptask); ngrp++)
+    {
+      int recvtask = thistask ^ ngrp;
+
+      if(recvtask < ntask)
+        MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask],
+                     recvtype, recvtask, TAG, comm, &status);
+    }
+
+  if(sendbuf != recvbuf + displs[thistask] * size_recvtype)
+    memcpy(recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype);
+
+  return 0;
+}
+
+#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */
diff --git a/src/amuse/community/arepo/src/mpi_utils/mpi_util.c b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c
new file mode 100644
index 0000000000..e5098c7396
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c
@@ -0,0 +1,375 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/mpi_util.c
+ * \date        05/2018
+ * \brief       Custom made auxiliary MPI functions.
+ * \details     contains functions:
+ *                void mpi_exchange_buffers(void *send_buf, int *send_count,
+ *                  int *send_offset, void *recv_buf, int *recv_count,
+ *                  int *recv_offset, int item_size, int commtag,
+ *                  int include_self)
+ *                int mpi_calculate_offsets(int *send_count, int *send_offset,
+ *                  int *recv_count, int *recv_offset, int send_identical)
+ *                int mesh_search_compare_task(const void *a, const void *b)
+ *                int intpointer_compare(const void *a, const void *b)
+ *                void *sort_based_on_mesh_search(mesh_search_data * search,
+ *                  void *data, int n_items, int item_size)
+ *                void *sort_based_on_field(void *data, int field_offset,
+ *                  int n_items, int item_size)
+ *                void mpi_distribute_items_from_search(mesh_search_data *
+ *                  search, void *data, int *n_items, int *max_n, int
+ *                  item_size, int commtag, int task_offset, int cell_offset)
+ *                void mpi_distribute_items_to_tasks(void *data,
+ *                  int task_offset, int *n_items, int *max_n, int item_size,
+ *                  int commtag)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <mpi.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+static char *SaveData2;
+
+/*! \brief Implements the common idiom of exchanging buffers with every other
+ *         MPI task.
+ *
+ *  All arrays should be allocated with NTask size.
+ *
+ *  \param[in] send_buf Pointer to data to be sent.
+ *  \param[in] send_count Number of elements to be sent.
+ *  \param[in] send_offset Array with offsets to communicate to specific task.
+ *  \param[out] recv_buf Pointert to dataspace for incoming data.
+ *  \param[in] recv_count Number of elements to be received.
+ *  \param[in] recv_offset Array with offsets in receive buffer from specific
+ *             task.
+ *  \param[in] item_size Size of one element.
+ *  \param[in] commtag Receive tag.
+ *  \param[in] include_self Communication with own task included?
+ *
+ *  \return void
+ */
+void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset,
+                          int item_size, int commtag, int include_self)
+{
+  int ngrp;
+  // this loop goes from 0 in some cases, but that doesn't make sense
+  // because then recvTask==ThisTask and nothing is done.
+  for(ngrp = include_self ? 0 : 1; ngrp < (1 << PTask); ngrp++)
+    {
+      int recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
+            {
+              /* exchange data */
+              MPI_Sendrecv((char *)send_buf + (size_t)send_offset[recvTask] * item_size, (size_t)send_count[recvTask] * item_size,
+                           MPI_BYTE, recvTask, commtag, (char *)recv_buf + (size_t)recv_offset[recvTask] * item_size,
+                           (size_t)recv_count[recvTask] * item_size, MPI_BYTE, recvTask, commtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+}
+
+/*! \brief Calculates offsets for MPI communication.
+ *
+ *  Calculates the recv_count, send_offset, and recv_offset arrays
+ *  based on the send_count. Returns nimport, the total number of
+ *  particles to be received. If an identical set of copies are to be
+ *  sent to all tasks, set send_identical=1 and the send_offset will
+ *  be zero for all tasks.
+ *
+ *  All arrays should be allocated with NTask size.
+ *
+ *  \param[in] send_count Number of element to be sent.
+ *  \param[out] send_offset Offset in send-buffer.
+ *  \param[out] recv_count Number of elements in receive.
+ *  \param[out] recv_offset Offest for receive buffer.
+ *  \param[in] send_identical Include self-communication?
+ *
+ */
+int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical)
+{
+  // Exchange the send/receive counts
+  MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  int nimport    = 0;
+  recv_offset[0] = 0;
+  send_offset[0] = 0;
+  int j;
+  for(j = 0; j < NTask; j++)
+    {
+      nimport += recv_count[j];
+
+      if(j > 0)
+        {
+          send_offset[j] = send_offset[j - 1] + (send_identical ? 0 : send_count[j - 1]);
+          recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
+        }
+    }
+  return nimport;
+}
+
+/*! \brief Comparison function used to sort the mesh_search data by task.
+ *
+ *  \param[in] a First object.
+ *  \param[in] b Second object.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int mesh_search_compare_task(const void *a, const void *b)
+{
+  if((*(mesh_search_data **)a)->Task < (*(mesh_search_data **)b)->Task)
+    return -1;
+
+  if((*(mesh_search_data **)a)->Task > (*(mesh_search_data **)b)->Task)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function used to sort an array of int pointers into order
+ *         of the pointer targets.
+ *
+ *  \param[in] a First object.
+ *  \param[in] b Second object.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int intpointer_compare(const void *a, const void *b)
+{
+  if((**(int **)a) < (**(int **)b))
+    return -1;
+
+  if((**(int **)a) > (**(int **)b))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief  Sort an opaque array according to the order implied by sorting the
+ *  search array by task. Returns a sorted copy of the data array,
+ *  that needs to be myfreed.
+ *
+ *  We do this by sorting an array of pointers to the elements in
+ *  search, and then using this array to reorder the data
+ *  array. Unfortunately this means making a copy of the data, but
+ *  this just replaces the copy after the mpi_exchange_buffers
+ *  anyway.
+ *
+ *  \param[in] search Array with sorting criterion.
+ *  \param[in] data Data to be sorted.
+ *  \param[in] n_items Number of elements.
+ *  \param[in] item_size Size of single element.
+ *
+ *  \return Pointer to sorted data.
+ */
+void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size)
+{
+  int i;
+  char *data2;
+  mesh_search_data **perm;
+
+  data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size);
+
+  SaveData2 = data2;
+
+  perm = mymalloc("perm", n_items * sizeof(*perm));
+
+  for(i = 0; i < n_items; ++i)
+    perm[i] = &search[i];
+
+  mysort(perm, n_items, sizeof(*perm), mesh_search_compare_task);
+
+  // reorder data into data2
+  for(i = 0; i < n_items; ++i)
+    {
+      size_t orig_pos = perm[i] - search;
+      memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size);
+    }
+
+  myfree(perm);
+
+  return (void *)data2;
+}
+
+/*! \brief  Sort an opaque array into increasing order of an int field, given
+ *  by the specified offset. (This would typically be field indicating
+ *  the task.) Returns a sorted copy of the data array, that needs to
+ *  be myfreed.
+ *
+ *  We do this by sorting an array of pointers to the task field, and
+ *  then using this array to deduce the reordering of the data
+ *  array. Unfortunately this means making a copy of the data, but
+ *  this just replaces the copy after the mpi_exchange_buffers
+ *  anyway.
+ *
+ *  \param[in] data Data to be sorted.
+ *  \param[in] field_offset offset of the sort field.
+ *  \param[in] n_items Number of elements.
+ *  \param[in] item_size Size of individual item.
+ *
+ *  \return Pointer to sorted array.
+ */
+void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size)
+{
+  int i;
+  char *data2;
+  int **perm;
+
+  data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size);
+
+  SaveData2 = data2;
+
+  perm = mymalloc("perm", n_items * sizeof(*perm));
+
+  for(i = 0; i < n_items; ++i)
+    perm[i] = (int *)((char *)data + (size_t)i * item_size + field_offset);
+
+  mysort(perm, n_items, sizeof(*perm), intpointer_compare);
+
+  // reorder data into data2
+  for(i = 0; i < n_items; ++i)
+    {
+      size_t orig_pos = ((char *)perm[i] - ((char *)data + field_offset)) / item_size;
+      myassert(((char *)perm[i] - ((char *)data + field_offset)) % item_size == 0);
+      memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size);
+    }
+
+  myfree(perm);
+
+  return (void *)data2;
+}
+
+/*! \brief  This function takes a mesh_search structure and exchanges the
+ *  members in an associated structure based on the index and task in
+ *  the search data. n_items is updated to the new size of data. max_n
+ *  is the allocated size of the data array.
+ *
+ *  Additionally, if the task_offset and cell_offset are nonnegative,
+ *  the Task and Index fields in the search results will be copied to
+ *  those fields in the data array.
+ *
+ *  \param[in] search Mesh search data.
+ *  \param[in, out] data Data to be sorted.
+ *  \param[in, out] n_items number of elements.
+ *  \param[in, out] max_n Allocated size of data array.
+ *  \param[in] item_size Size of individual element.
+ *  \param[in] commtag Communication tag.
+ *  \param[in] task_offset Offset of this task.
+ *  \param[in] cell_offset offset of cell.
+ *
+ *  \return void
+ */
+void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag,
+                                      int task_offset, int cell_offset)
+{
+  int i;
+
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+
+  for(i = 0; i < *n_items; i++)
+    {
+      int task = search[i].Task;
+      myassert(task >= 0 && task < NTask);
+      Send_count[task]++;
+
+      // copy task/index into data array, if applicable
+      if(task_offset >= 0)
+        *(int *)((char *)data + (size_t)i * item_size + task_offset) = task;
+      if(cell_offset >= 0)
+        *(int *)((char *)data + (size_t)i * item_size + cell_offset) = search[i].u.Index;
+    }
+
+  void *data2 = sort_based_on_mesh_search(search, data, *n_items, item_size);
+
+  int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0);
+
+  if(*max_n < nimport)
+    {
+      data   = myrealloc_movable(data, (size_t)nimport * item_size);
+      *max_n = nimport;
+    }
+
+  data2 = SaveData2;
+
+  mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1);
+
+  myfree_movable(data2);
+
+  *n_items = nimport;
+}
+
+/*! \brief This function distributes the members in an opaque structure to
+ *  the tasks based on a task field given by a specified offset into
+ *  the opaque struct. The task field must have int type. n_items is
+ *  updated to the new size of data. max_n is the allocated size of
+ *  the data array, and is updated if a realloc is necessary.
+ *
+ *  \param[in out] data Data array
+ *  \param[in] task_offset Offset of task.
+ *  \param[in, out] n_items Number of elements in array.
+ *  \param[in, out] max_n Allocated size of the data array.
+ *  \param[in] item_size Size of single element.
+ *  \param[in] commtag Communication tag.
+ *
+ *  \return void
+ */
+void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag)
+{
+  int i;
+
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+
+  for(i = 0; i < *n_items; i++)
+    {
+      int task = *(int *)((char *)data + (size_t)i * item_size + task_offset);
+      myassert(task >= 0 && task < NTask);
+      Send_count[task]++;
+    }
+
+  void *data2 = sort_based_on_field(data, task_offset, *n_items, item_size);
+
+  int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0);
+
+  if(*max_n < nimport)
+    {
+      data   = myrealloc_movable(data, (size_t)nimport * item_size);
+      *max_n = nimport;
+    }
+
+  data2 = SaveData2;
+
+  mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1);
+
+  myfree_movable(data2);
+
+  *n_items = nimport;
+}
diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c
new file mode 100644
index 0000000000..7133759b85
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c
@@ -0,0 +1,175 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/myIBarrier.c
+ * \date        05/2018
+ * \brief       Home-made MPI_Ibarrier routine.
+ * \details     Non-blocking version of MPI_Barrier; Once reaching this point,
+ *              a process notifies this to other tasks.
+ *              contains functions:
+ *                void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier)
+ *                void myIBarrierTest(struct sMyIBarrier *barrier, int *flag,
+ *                  MPI_Status * unused)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifdef MYIBARRIER
+
+#include <strings.h>
+
+#include "myIBarrier.h"
+
+/*! \brief Non-blocking MPI barrier; Notifies other tasks once it is called.
+ *
+ *  \param[in] comm MPI communicator.
+ *  \param[in, out] Object containing information about the barrier.
+ *
+ *  \return void
+ */
+void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier)
+{
+  barrier->comm = comm;
+  MPI_Comm_rank(comm, &barrier->rank);
+  MPI_Comm_size(comm, &barrier->nTasks);
+
+  barrier->nLevels   = fls(barrier->rank - 1);
+  barrier->LevelDone = mymalloc("myIBarrier", barrier->nLevels);
+  memset(barrier->LevelDone, 0, barrier->nLevels);
+
+  /* find messages we would expect from nonexisting tasks */
+  for(level = 0; level < barrier->nLevels; level++)
+    if((barrier->rank & (1 << level) == 0) && (barrier->rank + (1 << level) >= barrier->nTasks))
+      barrier->LevelDone[level] = 1;
+
+  /* find out if we have to send or wait */
+  int level = 0;
+  while(level < barrier->nLevels)
+    {
+      if(barrier->rank & (1 << level))
+        {
+          /* we need to send our result */
+          int target = barrier->rank - (1 << level);
+          int level  = barrier->nLevels;
+          MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm);
+          break;
+        }
+      else
+        {
+          /* check if there is something to recieve in which case we have to wait, otherwise go down one level */
+          if(barrier->rank + (1 << level) < barrier->nTasks)
+            {
+              barrier->levelDone[level] = 1;
+              break;
+            }
+          else
+            level++;
+        }
+    }
+}
+
+/*! \brief Test function for myIBarrier.
+ *
+ *  \param[in] barrier Object containing information about the barrier.
+ *  \param[out] flag Was test successful?
+ *  \param[in] unused Unused MPI_Status.
+ *
+ *  \return void
+ */
+void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused)
+{
+  flag = 0;
+
+  int rflag;
+  MPI_Status status;
+
+  MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_IBARRIER, barrier->comm, &rflag, &status);
+
+  if(rflag)
+    {
+      int source = status.MPI_SOURCE;
+
+      int level;
+      MPI_Recv(&level, 1, MPI_INT, source, MPI_TAG_IBARRIER, barrier->comm, MPI_STATUS_IGNORE);
+
+      if(source > barrier->rank)
+        {
+          /* we got another result, so lets check if we can send out further */
+          while((level < barrier->nLevels) && barrier->LevelDone[level])
+            level++;
+
+          if(level == barrier->nLevels)
+            {
+              if(barrier->rank != 0)
+                terminate("fail");
+              /* ok, the barrier resolved, tell everyone */
+
+              for(level = 0; level < barrier->nLevels; level++)
+                {
+                  if(barrier->rank & (1 << level) == 0)
+                    {
+                      int target = barrier->rank + (1 << level);
+                      if(target < barrier->nTasks)
+                        MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm);
+                    }
+                  else
+                    break;
+                }
+
+              flag = 1;
+            }
+          else
+            {
+              if(barrier->rank & (1 << level))
+                {
+                  /* we need to send our result */
+                  int target = barrier->rank - (1 << level);
+                  int level  = barrier->nLevels;
+                  MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm);
+                }
+              else
+                {
+                  barrier->LevelDone[level] = 1;
+                }
+            }
+        }
+      else
+        {
+          for(; level < barrier->nLevels; level++)
+            {
+              if(barrier->rank & (1 << level) == 0)
+                {
+                  int target = barrier->rank + (1 << level);
+                  if(target < barrier->nTasks)
+                    MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm);
+                }
+              else
+                break;
+            }
+
+          flag = 1;
+        }
+    }
+}
+
+#endif /* #ifdef MYIBARRIER */
diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h
new file mode 100644
index 0000000000..461f8626c9
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h
@@ -0,0 +1,51 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/myIBarrier.h
+ * \date        05/2018
+ * \brief       Header for myIBarrier functions.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef MYIBARRIER_H
+#define MYIBARRIER_H
+
+#ifdef MYIBARRIER
+#define MPI_TAG_IBARRIER 0x666
+
+struct sMyIBarrier
+{
+  MPI_Comm comm;
+  int rank;
+  int nTasks;
+  int nLevels;
+  char *LevelDone;
+};
+
+void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier);
+void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused);
+#endif /* #ifdef MYIBARRIER */
+
+#endif /* #ifndef MYIBARRIER_H */
diff --git a/src/amuse/community/arepo/src/mpi_utils/myalltoall.c b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c
new file mode 100644
index 0000000000..dcbb889c91
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c
@@ -0,0 +1,122 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/myalltoall.c
+ * \date        05/2018
+ * \brief       Specialized all-to-all MPI communication functions.
+ * \details     contains functions:
+ *                void myMPI_Alltoallv(void *sendb, size_t * sendcounts,
+ *                  size_t * sdispls, void *recvb, size_t * recvcounts,
+ *                  size_t * rdispls, int len, int big_flag, MPI_Comm comm)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief A wrapper around MPI_Alltoallv that can deal with data in
+ *         individual sends that are very big.
+ *
+ *  \param[in] sendb Starting address of send buffer.
+ *  \param[in] sendcounts Integer array equal to the group size specifying the
+ *             number of elements to send to each processor.
+ *  \param[in] sdispls Integer array (of length group size). Entry j specifies
+ *             the displacement (relative to sendbuf) from which to take the
+ *             outgoing data destined for process j.
+ *  \param[out] recvb Starting address of receive buffer.
+ *  \param[in] recvcounts Integer array equal to the group size specifying the
+ *             maximum number of elements that can be received from each
+ *             processor.
+ *  \param[in] rdispls Integer array (of length group size). Entry i specifies
+ *             the displacement (relative to recvbuf at which to place the
+ *             incoming data from process i.
+ *  \param[in] len Size of single element in send array.
+ *  \param[in] big_flag Flag if cummunication of large data. If not, the normal
+ *             MPI_Alltoallv function is used.
+ *  \param[in] comm MPI communicator.
+ *
+ *  \return void
+ */
+void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len,
+                     int big_flag, MPI_Comm comm)
+{
+  char *sendbuf = (char *)sendb;
+  char *recvbuf = (char *)recvb;
+
+  if(big_flag == 0)
+    {
+      int ntask;
+      MPI_Comm_size(comm, &ntask);
+
+      int *scount = (int *)mymalloc("scount", ntask * sizeof(int));
+      int *rcount = (int *)mymalloc("rcount", ntask * sizeof(int));
+      int *soff   = (int *)mymalloc("soff", ntask * sizeof(int));
+      int *roff   = (int *)mymalloc("roff", ntask * sizeof(int));
+
+      for(int i = 0; i < ntask; i++)
+        {
+          scount[i] = sendcounts[i] * len;
+          rcount[i] = recvcounts[i] * len;
+          soff[i]   = sdispls[i] * len;
+          roff[i]   = rdispls[i] * len;
+        }
+
+      MPI_Alltoallv(sendbuf, scount, soff, MPI_BYTE, recvbuf, rcount, roff, MPI_BYTE, comm);
+
+      myfree(roff);
+      myfree(soff);
+      myfree(rcount);
+      myfree(scount);
+    }
+  else
+    {
+      /* here we definitely have some large messages. We default to the
+       * pair-wise protocoll, which should be most robust anyway.
+       */
+
+      int ntask, thistask;
+      MPI_Comm_size(comm, &ntask);
+      MPI_Comm_rank(comm, &thistask);
+
+      for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
+        {
+          int target = thistask ^ ngrp;
+
+          if(target < ntask)
+            {
+              if(sendcounts[target] > 0 || recvcounts[target] > 0)
+                myMPI_Sendrecv(sendbuf + sdispls[target] * len, sendcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp,
+                               recvbuf + rdispls[target] * len, recvcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp, comm,
+                               MPI_STATUS_IGNORE);
+            }
+        }
+    }
+}
diff --git a/src/amuse/community/arepo/src/mpi_utils/pinning.c b/src/amuse/community/arepo/src/mpi_utils/pinning.c
new file mode 100644
index 0000000000..f7a6dbb04e
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/pinning.c
@@ -0,0 +1,292 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/pinning.c
+ * \date        05/2018
+ * \brief       Routines to pin MPI threads to cores.
+ * \details     contains functions:
+ *                void get_core_set(void)
+ *                void detect_topology(void)
+ *                void pin_to_core_set(void)
+ *                void report_pinning(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 08.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef IMPOSE_PINNING
+#include <hwloc.h>
+#include <hwloc/bitmap.h>
+
+#define MAX_CORES 4096
+
+static int flag_pinning_error = 0;
+
+static hwloc_cpuset_t cpuset, cpuset_after_MPI_init;
+static hwloc_topology_t topology;
+static int topodepth;
+static int sockets;
+static int cores;
+static int pus;
+static int hyperthreads_per_core;
+
+/*! \brief Gets the current physical binding of local process.
+ *
+ *  \return void
+ */
+void get_core_set(void)
+{
+  cpuset = hwloc_bitmap_alloc();
+  hwloc_get_proc_cpubind(topology, getpid(), cpuset, 0);
+}
+
+/*! \brief Determines the network topology Arepo is running on.
+ *
+ *  \return void
+ */
+void detect_topology(void)
+{
+  unsigned depth;
+
+  /* Allocate and initialize topology object. */
+  hwloc_topology_init(&topology);
+
+  /* Perform the topology detection. */
+  hwloc_topology_load(topology);
+
+  /* Get some additional topology information
+     in case we need the topology depth later. */
+  topodepth = hwloc_topology_get_depth(topology);
+
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET);
+
+  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    sockets = -1;
+  else
+    sockets = hwloc_get_nbobjs_by_depth(topology, depth);
+
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_CORE);
+
+  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    cores = -1;
+  else
+    cores = hwloc_get_nbobjs_by_depth(topology, depth);
+
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
+
+  if(depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    pus = -1;
+  else
+    pus = hwloc_get_nbobjs_by_depth(topology, depth);
+}
+
+/*! \brief Pins the MPI ranks to the available core set.
+ *
+ *  \return void
+ */
+void pin_to_core_set(void)
+{
+  int i, num_threads, thread;
+  char buf[MAX_CORES + 1];
+  char *p = getenv("OMP_NUM_THREADS");
+  if(p)
+    num_threads = atoi(p);
+  else
+    num_threads = 1;
+
+  mpi_printf("\n\n");
+  mpi_printf("PINNING: We have %d sockets, %d physical cores and %d logical cores on the first MPI-task's node.\n", sockets, cores,
+             pus);
+  if(cores <= 0 || sockets <= 0 || pus <= 0)
+    {
+      mpi_printf("PINNING: The topology cannot be recognized. We refrain from any pinning attempt.\n");
+      flag_pinning_error = 1;
+      return;
+    }
+
+  hyperthreads_per_core = pus / cores;
+
+  if(hyperthreads_per_core < 1)
+    terminate("Need at least one logical thread per physical core\n");
+
+  if(pus > cores)
+    mpi_printf("PINNING: Looks like %d hyperthreads per physical core are in principle possible.\n", hyperthreads_per_core);
+
+  cpuset_after_MPI_init = hwloc_bitmap_alloc();
+  hwloc_get_proc_cpubind(topology, getpid(), cpuset_after_MPI_init, 0);
+
+  if(!hwloc_bitmap_isequal(cpuset, cpuset_after_MPI_init))
+    mpi_printf("PINNING: Apparently, the MPI library set some pinning itself. We'll override this.\n");
+
+  int id, available_pus = 0;
+
+  for(id = hwloc_bitmap_first(cpuset); id != -1; id = hwloc_bitmap_next(cpuset, id))
+    available_pus++;
+
+  mpi_printf("PINNING: Looks like %d logical cores are available\n", available_pus);
+
+  if(available_pus == pus)
+    mpi_printf("PINNING: Looks like all available logical cores are at our disposal.\n");
+  else
+    {
+      if(available_pus >= 1)
+        {
+          mpi_printf("PINNING: Looks like allready before start of the code, a tight binding was imposed.\n");
+#ifdef IMPOSE_PINNING_OVERRIDE_MODE
+          for(id = 0; id < pus; id++)
+            hwloc_bitmap_set(cpuset, id);
+          available_pus = pus;
+          mpi_printf("PINNING: We are overridung this and make all %d available to us.\n", available_pus);
+#else  /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE */
+          mpi_printf(
+              "PINNING: We refrain from any pinning attempt ourselves. (This can be changed by setting USE_PINNING_OVERRIDE_MODE.)\n");
+          flag_pinning_error = 1;
+          return;
+#endif /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE #else */
+        }
+    }
+
+  for(i = 0; i < pus && i < MAX_CORES; i++)
+    if(hwloc_bitmap_isset(cpuset, i))
+      buf[i] = '1';
+    else
+      buf[i] = '-';
+  buf[pus] = 0;
+
+  mpi_printf("PINNING: Available logical cores on first node:  %s\n", buf);
+
+  int pus_per_task = available_pus / TasksInThisNode;
+
+  mpi_printf("PINNING: %d logical cores are available per MPI Task.\n", pus_per_task);
+
+  if(pus_per_task <= 0)
+    terminate("Need at least one logical core per MPI task for pinning to make sense.  available_pus=%d TasksInThisNode=%d\n",
+              available_pus, TasksInThisNode);
+
+  int depth, cid, cores_before, id_this, id_found, count;
+  hwloc_obj_t obj;
+  hwloc_cpuset_t cpuset_core;
+
+  /* go through all logical cores in sequence of proximity */
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
+
+  for(cid = 0, cores_before = 0; cores_before < RankInThisNode * pus_per_task && cid < pus; cid++)
+    {
+      obj = hwloc_get_obj_by_depth(topology, depth, cid);
+
+      cpuset_core = hwloc_bitmap_dup(obj->cpuset);
+      if(hwloc_bitmap_isincluded(cpuset_core, cpuset))
+        {
+          cores_before++;
+        }
+      hwloc_bitmap_free(cpuset_core);
+    }
+
+  int pus_per_thread, skip;
+
+  if(pus_per_task > NUM_THREADS)
+    pus_per_thread = pus_per_task / NUM_THREADS;
+  else
+    pus_per_thread = 1;
+
+  /* cid should now be the logical index of the first PU for this MPI task */
+  for(thread = 0, id_this = id_found = cid, count = 0; thread < NUM_THREADS; thread++)
+    {
+      obj                   = hwloc_get_obj_by_depth(topology, depth, id_found);
+      cpuset_thread[thread] = hwloc_bitmap_dup(obj->cpuset);
+
+      for(skip = 0; skip < pus_per_thread; skip++)
+        {
+          id_this++;
+          count++;
+
+          id_found = -1;
+          if(count >= pus_per_task)
+            {
+              id_this = cid;
+              count   = 0;
+            }
+          do
+            {
+              obj         = hwloc_get_obj_by_depth(topology, depth, id_this);
+              cpuset_core = hwloc_bitmap_dup(obj->cpuset);
+              if(hwloc_bitmap_isincluded(cpuset_core, cpuset))
+                {
+                  id_found = id_this;
+                }
+              else
+                {
+                  id_this++;
+                  if(id_this >= pus)
+                    terminate("id_this >= pus");
+                }
+              hwloc_bitmap_free(cpuset_core);
+            }
+          while(id_found < 0);
+        }
+    }
+
+  hwloc_set_proc_cpubind(topology, getpid(), cpuset_thread[0], HWLOC_CPUBIND_PROCESS);
+}
+
+/*! \brief Prints pinning information for each task.
+ *
+ *  \return void
+ */
+void report_pinning(void)
+{
+  int i;
+  char buf[MAX_CORES + 1];
+
+  if(flag_pinning_error)
+    return;
+
+  hwloc_get_cpubind(topology, cpuset, 0);
+
+  for(i = 0; i < pus && i < MAX_CORES; i++)
+    if(hwloc_bitmap_isset(cpuset, i))
+      buf[i] = '1';
+    else
+      buf[i] = '-';
+  buf[pus] = 0;
+
+  for(i = 0; i < NTask; i++)
+    {
+      if(ThisTask == i && ThisNode == 0)
+        printf("PINNING: Node=%4d: Task=%04d:                   %s\n", ThisNode, ThisTask, buf);
+      fflush(stdout);
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+}
+#endif /* #ifdef IMPOSE_PINNING */
diff --git a/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c
new file mode 100644
index 0000000000..6614f4ed03
--- /dev/null
+++ b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c
@@ -0,0 +1,116 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/mpi_utils/sizelimited_sendrecv.c
+ * \date        05/2018
+ * \brief       MPI_Sendrecv operations split into chunks of maximum size.
+ * \details     If the number of elements in the MPI_Sendrecv is larger than
+ *              count_limit, the function will split up the communication into
+ *              multiple chunks communicated by the usual MPI_Sendrecv routine.
+ *              contains functions:
+ *                int myMPI_Sendrecv(void *sendb, size_t sendcount,
+ *                  MPI_Datatype sendtype, int dest, int sendtag, void *recvb,
+ *                  size_t recvcount, MPI_Datatype recvtype, int source,
+ *                  int recvtag, MPI_Comm comm, MPI_Status * status)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 24.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Self-made sendrecv function with limiter to the number of elements
+ *         that can be sent in one go.
+ *
+ *  If the total message is longer, multiple MPI_Sendrecv calls are executed
+ *  until the entire message has been communicated.
+ *
+ *  \param[in] sendb Initial address of send buffer.
+ *  \param[in] sendcount Number of elements in send buffer.
+ *  \param[in] sendtype Type of elements in send buffer (handle).
+ *  \param[in] dest Rank of destination.
+ *  \param[in] sendtag Send tag.
+ *  \param[out] recvb Initial address of receive buffer.
+ *  \param[in] recvcount Number of elements in receive buffer.
+ *  \param[in] recvtype Type of elements in receive buffer (handle).
+ *  \param[in] source Rank of source.
+ *  \param[in] recvtag Receive tag.
+ *  \param[in] comm MPI communicator.
+ *  \param[out] status Status, referring to receive operation.
+ *
+ *  \return 0
+ */
+int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount,
+                   MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
+{
+  int iter      = 0, size_sendtype, size_recvtype, send_now, recv_now;
+  char *sendbuf = (char *)sendb;
+  char *recvbuf = (char *)recvb;
+
+  if(dest != source)
+    terminate("dest != source");
+
+  MPI_Type_size(sendtype, &size_sendtype);
+  MPI_Type_size(recvtype, &size_recvtype);
+
+  if(dest == ThisTask)
+    {
+      memcpy(recvbuf, sendbuf, recvcount * size_recvtype);
+      return 0;
+    }
+
+  size_t count_limit = MPI_MESSAGE_SIZELIMIT_IN_BYTES / size_sendtype;
+
+  while(sendcount > 0 || recvcount > 0)
+    {
+      if(sendcount > count_limit)
+        {
+          send_now = count_limit;
+          iter++;
+        }
+      else
+        send_now = sendcount;
+
+      if(recvcount > count_limit)
+        recv_now = count_limit;
+      else
+        recv_now = recvcount;
+
+      MPI_Sendrecv(sendbuf, send_now, sendtype, dest, sendtag, recvbuf, recv_now, recvtype, source, recvtag, comm, status);
+
+      sendcount -= send_now;
+      recvcount -= recv_now;
+
+      sendbuf += send_now * size_sendtype;
+      recvbuf += recv_now * size_recvtype;
+    }
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree.c b/src/amuse/community/arepo/src/ngbtree/ngbtree.c
new file mode 100644
index 0000000000..ea0ec2e8cb
--- /dev/null
+++ b/src/amuse/community/arepo/src/ngbtree/ngbtree.c
@@ -0,0 +1,1394 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/ngbtree/ngbtree.c
+ * \date        05/2018
+ * \brief       Construct neighbor tree.
+ * \details     This file contains the neighbor tree construction. This is a
+ *              tree structure that includes all gas cells, but no other
+ *              particle types.
+ *              contains functions:
+ *                int ngb_treebuild(int npart)
+ *                static inline unsigned long long ngb_double_to_int(double d)
+ *                int ngb_treebuild_construct(int npart)
+ *                int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z)
+ *                void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode)
+ *                void ngb_record_topnode_siblings(int no, int sib)
+ *                void ngb_exchange_topleafdata(void)
+ *                void drift_node(struct NgbNODE *current, integertime time1)
+ *                void ngb_update_velocities(void)
+ *                void ngb_update_vbounds(int i, int *nchanged, int *nodelist)
+ *                void ngb_finish_vounds_update(int nchanged, int *nodelist)
+ *                void ngb_update_rangebounds(int i, int *nchanged, int *nodelist)
+ *                void ngb_finish_rangebounds_update(int nchanged, int *nodelist)
+ *                void ngb_treemodifylength(int delta_NgbMaxPart)
+ *                void ngb_treeallocate(void)
+ *                void ngb_treefree(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../gravity/forcetree.h"
+
+static void ngb_record_topnode_siblings(int no, int sib);
+static int ngb_treebuild_construct(int npart);
+static void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode);
+static void ngb_exchange_topleafdata(void);
+static int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z);
+static void ngb_update_vbounds(int i, int *nchanged, int *nodelist);
+static void ngb_finish_vounds_update(int nchanged, int *nodelist);
+
+static int *Ngb_Node_Tmp_Sibling;
+
+/*! \brief This function is a driver routine for constructing the neighbor
+ *         oct-tree, which is done by calling a small number of other
+ *         functions.
+ *
+ *  Does not build a tree if All.TotNumGas == 0.
+ *
+ *  \param[in] npart Number of particles in tree.
+ *
+ *  \return Number of nodes in the tree.
+ */
+int ngb_treebuild(int npart)
+{
+  if(All.TotNumGas == 0)
+    return 0;
+
+  TIMER_START(CPU_NGBTREEBUILD);
+
+  mpi_printf("NGBTREE: Ngb-tree construction.  (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0));
+
+  double t0 = second();
+
+  int flag;
+  do
+    {
+      int flag_single = ngb_treebuild_construct(npart);
+
+      MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+      if(flag == -1)
+        {
+          myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart);
+          ngb_treefree();
+
+          All.NgbTreeAllocFactor *= 1.15;
+          mpi_printf("Increasing NgbTreeAllocFactor, new value=%g\n", All.NgbTreeAllocFactor);
+
+          ngb_treeallocate();
+        }
+    }
+  while(flag == -1);
+
+  int ntopleaves = DomainNLocalTopleave[ThisTask];
+  int *list      = DomainListOfLocalTopleaves + DomainFirstLocTopleave[ThisTask];
+
+  for(int i = 0; i < ntopleaves; i++)
+    {
+      int last = -1;
+      int no   = Ngb_DomainNodeIndex[list[i]];
+
+      if(no < Ngb_MaxPart || no >= Ngb_MaxPart + Ngb_MaxNodes)
+        terminate("i=%d no=%d  task=%d \n", i, no, DomainTask[list[i]]);
+
+      ngb_update_node_recursive(no, Ngb_Node_Tmp_Sibling[no], no, &last, 0);
+
+      /* if there was no particle in the node, we need to initialize nextnode of the node */
+      if(no == last)
+        Ngb_Nodes[no].u.d.nextnode = -1;
+
+      Ngb_Nodes[no].u.d.sibling = last; /* we temporarily store this here and will later restore this sibling pointer,
+                                           which is anyway equal to Ngb_Node_Tmp_Sibling[index] */
+    }
+
+  ngb_exchange_topleafdata();
+
+  /* now put in "pseudo" particles as nextnode in non-local topleaves */
+  for(int i = 0; i < NTopleaves; i++)
+    {
+      if(DomainTask[i] != ThisTask)
+        {
+          int index                     = Ngb_DomainNodeIndex[i];
+          Ngb_Nodes[index].u.d.nextnode = Ngb_MaxPart + Ngb_MaxNodes + i;
+        }
+    }
+
+  /* now update the top-level tree nodes */
+  int last = -1;
+  ngb_update_node_recursive(Ngb_MaxPart, -1, -1, &last, 1);
+
+  if(last >= Ngb_MaxPart)
+    {
+      if(last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */
+        Ngb_Nextnode[last - Ngb_MaxNodes] = -1;
+      else
+        Ngb_Nodes[last].u.d.nextnode = -1;
+    }
+  else
+    Ngb_Nextnode[last] = -1;
+
+  TIMER_STOPSTART(CPU_NGBTREEBUILD, CPU_LOGS);
+
+  double numnodes = Ngb_NumNodes, tot_numnodes;
+  MPI_Reduce(&numnodes, &tot_numnodes, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  double t1 = second();
+  mpi_printf("NGBTREE: Ngb-tree construction done. took %g sec  <numnodes>=%g  NTopnodes=%d NTopleaves=%d\n", timediff(t0, t1),
+             tot_numnodes / NTask, NTopnodes, NTopleaves);
+
+  myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart);
+
+  Ngb_MarkerValue = 0;
+  memset(Ngb_Marker, 0, (Ngb_MaxPart + Ngb_NumNodes) * sizeof(int));
+
+  TIMER_STOP(CPU_LOGS);
+
+  return Ngb_NumNodes;
+}
+
+/*! \brief Converts double precision coordinate to unsigned long long int.
+ *
+ *  \param[in] d Double precision coordinate that is to be converted.
+ *
+ *  \return Unsigned long long int represenation of d.
+ */
+static inline unsigned long long ngb_double_to_int(double d)
+{
+  union
+  {
+    double d;
+    unsigned long long ull;
+  } u;
+  u.d = d;
+  return (u.ull & 0xFFFFFFFFFFFFFllu);
+}
+
+/*! \brief Constructs the neighbor oct-tree.
+ *
+ *  The index convention for accessing tree nodes is the following:
+ *
+ *  0...NumPart-1                             reference single particles.
+ *  Ngb_MaxPart.... Ngb_MaxPart+Numnodes-1    references tree nodes.
+ *  Ngb_MaxPart + All.MaxNgb_Nodes....                reference "pseudo
+ *     particles", i.e. the marker that indicates a top-node lying on
+ *     another CPU.
+ *
+ *  `Ngb_Nodes_base' points to the first tree node,
+ *  `Ngb_Nodes' is shifted such that Ngb_Nodes[Ngb_MaxPart] gives the first
+ *     tree node.
+ *
+ *  \param[in] npart Number of particles involved.
+ *
+ *  \return status: 0 (default) -1: too many nodes.
+ */
+int ngb_treebuild_construct(int npart)
+{
+  /* create an empty root node  */
+  Ngb_NextFreeNode = Ngb_MaxPart; /* index of first free node */
+
+  for(int i = 0; i < 8; i++)
+    Ngb_Nodes[Ngb_NextFreeNode].u.suns[i] = -1;
+
+  Ngb_NumNodes = 1;
+  Ngb_NextFreeNode++;
+
+  /* create a set of empty nodes corresponding to the top-level domain
+   * grid. We need to generate these nodes first to make sure that we have a
+   * complete top-level tree which allows the easy insertion of the
+   * pseudo-particles at the right place
+   */
+  if(ngb_create_empty_nodes(Ngb_MaxPart, 0, 1, 0, 0, 0) < 0)
+    return -1;
+
+  Ngb_FirstNonTopLevelNode = Ngb_NextFreeNode;
+
+  Ngb_Node_Tmp_Sibling = (int *)mymalloc("Ngb_Node_Tmp_Sibling", (Ngb_MaxNodes + 1) * sizeof(int));
+  Ngb_Node_Tmp_Sibling -= Ngb_MaxPart;
+
+  ngb_record_topnode_siblings(Ngb_MaxPart, -1);
+
+  unsigned long long *ngbTree_IntPos_list =
+      (unsigned long long *)mymalloc("ngbTree_IntPos_list", 3 * npart * sizeof(unsigned long long));
+
+  /* now we insert all particles */
+  {
+    int out_of_space = 0;
+
+    int threadid = get_thread_num();
+    int start, end, size;
+
+    int first_empty_slot = Ngb_NextFreeNode + threadid * TAKE_NSLOTS_IN_ONE_GO;
+    int count_empty_slot = TAKE_NSLOTS_IN_ONE_GO;
+
+    if(threadid == 0)
+      Ngb_NextFreeNode += NUM_THREADS * TAKE_NSLOTS_IN_ONE_GO;
+
+    size  = (npart - 1) / NUM_THREADS + 1;
+    start = threadid * size;
+    end   = (threadid + 1) * size - 1;
+    if(end >= npart)
+      end = npart - 1;
+
+    for(int i = start; i <= end && out_of_space == 0; i++)
+      {
+        unsigned long long xxb  = ngb_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0);
+        unsigned long long yyb  = ngb_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0);
+        unsigned long long zzb  = ngb_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0);
+        unsigned long long mask = ((unsigned long long)1) << (52 - 1);
+        unsigned char shiftx    = (52 - 1);
+        unsigned char shifty    = (52 - 2);
+        unsigned char shiftz    = (52 - 3);
+        unsigned char levels    = 0;
+
+        ngbTree_IntPos_list[3 * i + 0] = xxb;
+        ngbTree_IntPos_list[3 * i + 1] = yyb;
+        ngbTree_IntPos_list[3 * i + 2] = zzb;
+
+        int no = 0;
+        while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */
+          {
+            unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                                     ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+            mask >>= 1;
+            levels++;
+
+            no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode];
+          }
+
+        no = TopNodes[no].Leaf;
+
+        if(DomainTask[no] != ThisTask)
+          terminate("STOP!  ID=%lld of type=%d is inserted into task=%d, but should be on task=%d no=%d\n", (long long)P[i].ID,
+                    P[i].Type, ThisTask, DomainTask[no], no);
+
+        int th = Ngb_DomainNodeIndex[no];
+
+        signed long long centermask = (0xFFF0000000000000llu) >> levels;
+
+        int parent            = -1; /* note: will not be used below before it is changed */
+        unsigned char subnode = 0;
+
+        while(1)
+          {
+            if(th >= Ngb_MaxPart) /* we are dealing with an internal node */
+              {
+                subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                           ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+                centermask >>= 1;
+                mask >>= 1;
+                levels++;
+
+                if(levels > MAX_TREE_LEVEL)
+                  {
+                    /* seems like we're dealing with particles at identical (or extremely close)
+                     * locations. Shift subnode index to allow tree construction. Note: Multipole moments
+                     * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have
+                     *      DomainLen/2^MAX_TREE_LEEL  < gravitational softening length
+                     */
+                    for(int j = 0; j < 8; j++)
+                      {
+                        if(Ngb_Nodes[th].u.suns[subnode] < 0)
+                          break;
+
+                        subnode++;
+                        if(subnode >= 8)
+                          subnode = 7;
+                      }
+                  }
+
+                int nn = Ngb_Nodes[th].u.suns[subnode];
+
+                if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
+                  {
+                    parent = th;
+                    th     = nn;
+                  }
+                else
+                  {
+                    /* here we have found an empty slot where we can attach
+                     * the new particle as a leaf.
+                     */
+                    Ngb_Nodes[th].u.suns[subnode] = i;
+                    break; /* done for this particle */
+                  }
+              }
+            else
+              {
+                /* We try to insert into a leaf with a single particle.  Need
+                 * to generate a new internal node at this point.
+                 * Then resume trying to insert the new particle at
+                 * the newly created internal node
+                 */
+                int thold = th;
+
+                if(count_empty_slot)
+                  {
+                    th = first_empty_slot + (TAKE_NSLOTS_IN_ONE_GO - count_empty_slot);
+                    count_empty_slot--;
+                  }
+                else
+                  {
+                    {
+                      th = Ngb_NextFreeNode;
+                      Ngb_NextFreeNode += TAKE_NSLOTS_IN_ONE_GO;
+                    }
+
+                    first_empty_slot = th;
+                    count_empty_slot = (TAKE_NSLOTS_IN_ONE_GO - 1);
+
+                    if(first_empty_slot + TAKE_NSLOTS_IN_ONE_GO - Ngb_MaxPart >= Ngb_MaxNodes)
+                      {
+                        out_of_space = 1;
+                        break;
+                      }
+                  }
+
+                Ngb_Nodes[parent].u.suns[subnode] = th;
+                struct NgbNODE *nfreep            = &Ngb_Nodes[th];
+
+                for(int j = 0; j < 8; j++)
+                  nfreep->u.suns[j] = -1;
+
+                unsigned long long *intppos = &ngbTree_IntPos_list[3 * thold];
+
+                subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) |
+                           ((unsigned char)((intppos[2] & mask) >> shiftz)));
+
+                nfreep->u.suns[subnode] = thold;
+              }
+          }
+      }
+  }
+
+  myfree(ngbTree_IntPos_list);
+
+  if((Ngb_NumNodes = Ngb_NextFreeNode - Ngb_MaxPart) >= Ngb_MaxNodes)
+    {
+      if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+        {
+          dump_particles();
+          terminate("task %d: out of space for neighbor tree, stopping with particle dump.\n", ThisTask);
+        }
+      else
+        return -1;
+    }
+
+  return 0;
+}
+
+/*! \brief Create empty ngb-tree node.
+ *
+ *  This function recursively creates a set of empty tree nodes which
+ *  corresponds to the top-level tree for the domain grid. This is done to
+ *  ensure that this top-level tree is always "complete" so that we can easily
+ *  associate the pseudo-particles of other CPUs with tree-nodes at a given
+ *  level in the tree, even when the particle population is so sparse that
+ *  some of these nodes are actually empty.
+ *
+ *  \param[in] no Index of node in Ngb_Nodes array.
+ *  \param[in] topnode Index in TopNodes.
+ *  \param[in] bits Number of bits used.
+ *  \param[in] x Integer coordinate X.
+ *  \param[in] y Integer coordinate Y.
+ *  \param[in] z Integer coordinate Z.
+ *
+ *  \return Status: 0 success; -1 error.
+ */
+int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z)
+{
+  if(TopNodes[topnode].Daughter >= 0)
+    {
+      for(int i = 0; i < 2; i++)
+        for(int j = 0; j < 2; j++)
+          for(int k = 0; k < 2; k++)
+            {
+              if(Ngb_NumNodes >= Ngb_MaxNodes)
+                {
+                  if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+                    {
+                      dump_particles();
+                      terminate("task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask,
+                                NTopnodes);
+                    }
+                  return -1;
+                }
+
+              int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
+
+              int count = i + 2 * j + 4 * k;
+
+              Ngb_Nodes[no].u.suns[count] = Ngb_NextFreeNode;
+
+              for(int n = 0; n < 8; n++)
+                Ngb_Nodes[Ngb_NextFreeNode].u.suns[n] = -1;
+
+              if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1)
+                Ngb_DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Ngb_NextFreeNode;
+
+              Ngb_NextFreeNode++;
+              Ngb_NumNodes++;
+
+              if(ngb_create_empty_nodes(Ngb_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j,
+                                        2 * z + k) < 0)
+                return -1;
+            }
+    }
+
+  return 0;
+}
+
+/*! \brief Determine node ranges.
+ *
+ *  This routine determines the node ranges a given internal node
+ *  and all its subnodes using a recursive computation.  The result is
+ *  stored in the Ngb_Nodes[] structure in the sequence of this tree-walk.
+ *
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] sib Sibling node of no.
+ *  \param[in] father Parent node of no.
+ *  \param[in, out] last Pointer to last node for which this function was
+ *                  called.
+ *  \param[in] mode 0: process a leave branch; 1: process top-level nodes.
+ *
+ *  \return void
+ */
+void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode)
+{
+  int j, jj, k, p, pp, nextsib, suns[8];
+  MyNgbTreeFloat range_min[3];
+  MyNgbTreeFloat range_max[3];
+  MyNgbTreeFloat vertex_vmin[3];
+  MyNgbTreeFloat vertex_vmax[3];
+#ifdef TREE_BASED_TIMESTEPS
+  MyNgbTreeFloat vmin[3], vmax[3], maxcsnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+  if(no >= Ngb_MaxPart && no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */
+    {
+      if(*last >= 0)
+        {
+          if(*last >= Ngb_MaxPart)
+            {
+              if(*last == no)
+                terminate("as");
+
+              if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */
+                Ngb_Nextnode[*last - Ngb_MaxNodes] = no;
+              else
+                Ngb_Nodes[*last].u.d.nextnode = no;
+            }
+          else
+            Ngb_Nextnode[*last] = no;
+        }
+
+      *last = no;
+
+      int not_interal_top_level = 0;
+
+      if(mode == 1)
+        {
+          if(!(no >= Ngb_MaxPart && no < Ngb_FirstNonTopLevelNode))
+            terminate("can't be");
+
+          if(Ngb_Node_Tmp_Sibling[no] != -2)
+            not_interal_top_level = 1;
+        }
+
+      if(not_interal_top_level)
+        {
+          p = Ngb_Nodes[no].u.d.nextnode;
+
+          if(p >= Ngb_MaxPart + Ngb_MaxNodes &&
+             p < Ngb_MaxPart + Ngb_MaxNodes + NTopleaves) /* a pseudo-particle, i.e. we are dealing with a non-local top-leave */
+            ngb_update_node_recursive(p, sib, no, last, mode);
+          else
+            {
+              /* this is local toplevel node */
+              *last = Ngb_Nodes[no].u.d.sibling;
+            }
+
+          if(Ngb_Node_Tmp_Sibling[no] != sib)
+            terminate("Ngb_Node_Tmp_Sibling[no] != sib");
+
+          /* restore the sibling pointer for local toplevel nodes (we had temporarily stored the last element in this branch */
+          Ngb_Nodes[no].u.d.sibling = sib;
+          Ngb_Nodes[no].father      = father;
+        }
+      else
+        {
+          for(j = 0; j < 8; j++)
+            suns[j] = Ngb_Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
+                                                  overwrite one element (union!) */
+
+#ifdef TREE_BASED_TIMESTEPS
+          maxcsnd = 0;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+          for(k = 0; k < 3; k++)
+            {
+              range_min[k] = MAX_NGBRANGE_NUMBER;
+              range_max[k] = -MAX_NGBRANGE_NUMBER;
+
+              vertex_vmin[k] = MAX_NGBRANGE_NUMBER;
+              vertex_vmax[k] = -MAX_NGBRANGE_NUMBER;
+
+#ifdef TREE_BASED_TIMESTEPS
+              vmin[k] = MAX_NGBRANGE_NUMBER;
+              vmax[k] = -MAX_NGBRANGE_NUMBER;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+            }
+
+          for(j = 0; j < 8; j++)
+            {
+              if((p = suns[j]) >= 0)
+                {
+                  /* check if we have a sibling on the same level */
+                  for(jj = j + 1; jj < 8; jj++)
+                    if((pp = suns[jj]) >= 0)
+                      break;
+
+                  if(jj < 8) /* yes, we do */
+                    nextsib = pp;
+                  else
+                    nextsib = sib;
+
+                  ngb_update_node_recursive(p, nextsib, no, last, mode);
+
+                  if(p >= Ngb_MaxPart) /* an internal node or pseudo particle */
+                    {
+                      if(p >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo particle */
+                        {
+                          /* nothing to be done here because the mass of the
+                           * pseudo-particle is still zero. This will be changed
+                           * later.
+                           */
+                        }
+                      else
+                        {
+#ifdef TREE_BASED_TIMESTEPS
+                          if(maxcsnd < ExtNgb_Nodes[p].MaxCsnd)
+                            maxcsnd = ExtNgb_Nodes[p].MaxCsnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+                          for(k = 0; k < 3; k++)
+                            {
+                              if(range_min[k] > Ngb_Nodes[p].u.d.range_min[k])
+                                range_min[k] = Ngb_Nodes[p].u.d.range_min[k];
+
+                              if(range_max[k] < Ngb_Nodes[p].u.d.range_max[k])
+                                range_max[k] = Ngb_Nodes[p].u.d.range_max[k];
+
+                              if(vertex_vmin[k] > Ngb_Nodes[p].vertex_vmin[k])
+                                vertex_vmin[k] = Ngb_Nodes[p].vertex_vmin[k];
+
+                              if(vertex_vmax[k] < Ngb_Nodes[p].vertex_vmax[k])
+                                vertex_vmax[k] = Ngb_Nodes[p].vertex_vmax[k];
+
+#ifdef TREE_BASED_TIMESTEPS
+                              if(vmin[k] > ExtNgb_Nodes[p].vmin[k])
+                                vmin[k] = ExtNgb_Nodes[p].vmin[k];
+
+                              if(vmax[k] < ExtNgb_Nodes[p].vmax[k])
+                                vmax[k] = ExtNgb_Nodes[p].vmax[k];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+                            }
+                        }
+                    }
+                  else /* a particle */
+                    {
+#ifdef TREE_BASED_TIMESTEPS
+                      if(maxcsnd < SphP[p].Csnd)
+                        maxcsnd = SphP[p].Csnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+                      for(k = 0; k < 3; k++)
+                        {
+                          if(range_min[k] > P[p].Pos[k])
+                            range_min[k] = P[p].Pos[k];
+
+                          if(range_max[k] < P[p].Pos[k])
+                            range_max[k] = P[p].Pos[k];
+
+                          if(P[p].Type == 0)
+                            {
+                              if(vertex_vmin[k] > SphP[p].VelVertex[k])
+                                vertex_vmin[k] = SphP[p].VelVertex[k];
+
+                              if(vertex_vmax[k] < SphP[p].VelVertex[k])
+                                vertex_vmax[k] = SphP[p].VelVertex[k];
+                            }
+
+#ifdef TREE_BASED_TIMESTEPS
+                          if(vmin[k] > P[p].Vel[k])
+                            vmin[k] = P[p].Vel[k];
+
+                          if(vmax[k] < P[p].Vel[k])
+                            vmax[k] = P[p].Vel[k];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+                        }
+                    }
+                }
+            }
+
+#ifdef TREE_BASED_TIMESTEPS
+          ExtNgb_Nodes[no].MaxCsnd = maxcsnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+          for(k = 0; k < 3; k++)
+            {
+              Ngb_Nodes[no].u.d.range_min[k] = range_min[k];
+              Ngb_Nodes[no].u.d.range_max[k] = range_max[k];
+              Ngb_Nodes[no].vertex_vmin[k]   = vertex_vmin[k];
+              Ngb_Nodes[no].vertex_vmax[k]   = vertex_vmax[k];
+#ifdef TREE_BASED_TIMESTEPS
+              ExtNgb_Nodes[no].vmin[k] = vmin[k];
+              ExtNgb_Nodes[no].vmax[k] = vmax[k];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+            }
+
+          Ngb_Nodes[no].u.d.sibling = sib;
+          Ngb_Nodes[no].father      = father;
+
+          Ngb_Nodes[no].Ti_Current = All.Ti_Current;
+        }
+    }
+  else /* single particle or pseudo particle */
+    {
+      if(*last >= 0)
+        {
+          if(*last >= Ngb_MaxPart)
+            {
+              if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */
+                Ngb_Nextnode[*last - Ngb_MaxNodes] = no;
+              else
+                Ngb_Nodes[*last].u.d.nextnode = no;
+            }
+          else
+            {
+              Ngb_Nextnode[*last] = no;
+            }
+        }
+      if(no < Ngb_MaxPart) /* only set it for single particles... */
+        {
+          if(father < Ngb_MaxPart)
+            terminate("no=%d father=%d\n", no, father);
+
+          Ngb_Father[no] = father;
+        }
+
+      *last = no;
+    }
+}
+
+/*! \brief Sets sibling information in u.suns for node no.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] sib Index of sibling.
+ *
+ *  \return void
+ */
+void ngb_record_topnode_siblings(int no, int sib)
+{
+  /* note: when this routine is called, only toplevel tree nodes are present */
+
+  if(Ngb_Nodes[no].u.suns[0] >= 0)
+    {
+      /* marker value to designate internal nodes in the top-level tree */
+      Ngb_Node_Tmp_Sibling[no] = -2;
+
+      if(Ngb_Nodes[no].u.suns[0] >= 0)
+        for(int j = 0; j < 8; j++)
+          {
+            int p = Ngb_Nodes[no].u.suns[j];
+            int nextsib;
+
+            if(j < 7)
+              nextsib = Ngb_Nodes[no].u.suns[j + 1];
+            else
+              nextsib = sib;
+
+            ngb_record_topnode_siblings(p, nextsib);
+          }
+    }
+  else
+    Ngb_Node_Tmp_Sibling[no] = sib; /* a top-level leave node */
+}
+
+/*! \brief Communicates top leaf data.
+ *
+ *  \return void
+ */
+void ngb_exchange_topleafdata(void)
+{
+  struct DomainNODE
+  {
+    MyNgbTreeFloat range_min[3];
+    MyNgbTreeFloat range_max[3];
+    MyNgbTreeFloat vertex_vmin[3];
+    MyNgbTreeFloat vertex_vmax[3];
+#ifdef TREE_BASED_TIMESTEPS
+    MyNgbTreeFloat MaxCsnd, vmin[3], vmax[3];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+  };
+
+  struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE));
+
+  /* share the pseudo-particle data accross CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(int task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(int n = 0; n < NTopleaves; n++)
+    recvcounts[DomainTask[n]]++;
+
+  for(int task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  recvoffset[0] = 0, byteoffset[0] = 0;
+  for(int task = 1; task < NTask; task++)
+    {
+      recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1];
+      byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+    }
+
+  struct DomainNODE *loc_DomainMoment =
+      (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
+
+  int idx = 0;
+  for(int n = 0; n < NTopleaves; n++)
+    {
+      if(DomainTask[n] == ThisTask)
+        {
+          int no = Ngb_DomainNodeIndex[n];
+
+          /* read out the multipole moments from the local base cells */
+#ifdef TREE_BASED_TIMESTEPS
+          loc_DomainMoment[idx].MaxCsnd = ExtNgb_Nodes[no].MaxCsnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+          for(int k = 0; k < 3; k++)
+            {
+              loc_DomainMoment[idx].range_min[k]   = Ngb_Nodes[no].u.d.range_min[k];
+              loc_DomainMoment[idx].range_max[k]   = Ngb_Nodes[no].u.d.range_max[k];
+              loc_DomainMoment[idx].vertex_vmin[k] = Ngb_Nodes[no].vertex_vmin[k];
+              loc_DomainMoment[idx].vertex_vmax[k] = Ngb_Nodes[no].vertex_vmax[k];
+#ifdef TREE_BASED_TIMESTEPS
+              loc_DomainMoment[idx].vmin[k] = ExtNgb_Nodes[no].vmin[k];
+              loc_DomainMoment[idx].vmax[k] = ExtNgb_Nodes[no].vmax[k];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+            }
+          idx++;
+        }
+    }
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  for(int task = 0; task < NTask; task++)
+    recvcounts[task] = 0;
+
+  for(int n = 0; n < NTopleaves; n++)
+    {
+      int task = DomainTask[n];
+      if(task != ThisTask)
+        {
+          int no  = Ngb_DomainNodeIndex[n];
+          int idx = recvoffset[task] + recvcounts[task]++;
+
+#ifdef TREE_BASED_TIMESTEPS
+          ExtNgb_Nodes[no].MaxCsnd = DomainMoment[idx].MaxCsnd;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+          for(int k = 0; k < 3; k++)
+            {
+              Ngb_Nodes[no].u.d.range_min[k] = DomainMoment[idx].range_min[k];
+              Ngb_Nodes[no].u.d.range_max[k] = DomainMoment[idx].range_max[k];
+              Ngb_Nodes[no].vertex_vmin[k]   = DomainMoment[idx].vertex_vmin[k];
+              Ngb_Nodes[no].vertex_vmax[k]   = DomainMoment[idx].vertex_vmax[k];
+#ifdef TREE_BASED_TIMESTEPS
+              ExtNgb_Nodes[no].vmin[k] = DomainMoment[idx].vmin[k];
+              ExtNgb_Nodes[no].vmax[k] = DomainMoment[idx].vmax[k];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+            }
+          Ngb_Nodes[no].Ti_Current = All.Ti_Current;
+        }
+    }
+
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvoffset);
+  myfree(recvcounts);
+  myfree(DomainMoment);
+}
+
+/*! \brief Drifts a node to time time1.
+ *
+ *  \param[in] current Current node.
+ *  \param[in] time1 Time to be drifted to.
+ *
+ *  \return void
+ */
+void drift_node(struct NgbNODE *current, integertime time1)
+{
+  double dt_drift;
+
+  if(All.ComovingIntegrationOn)
+    dt_drift = get_drift_factor(current->Ti_Current, time1);
+  else
+    dt_drift = (time1 - current->Ti_Current) * All.Timebase_interval;
+
+  for(int j = 0; j < 3; j++)
+    {
+      current->u.d.range_min[j] += current->vertex_vmin[j] * dt_drift;
+      current->u.d.range_max[j] += current->vertex_vmax[j] * dt_drift;
+    }
+
+  current->Ti_Current = time1;
+}
+
+/*! \brief Updates velocity informataion in ngb node data.
+ *
+ *  \return void
+ */
+void ngb_update_velocities(void)
+{
+  TIMER_START(CPU_NGBTREEUPDATEVEL);
+
+  Ngb_MarkerValue++;
+
+  int nchanged  = 0;
+  int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int));
+
+  for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      int target = TimeBinsHydro.ActiveParticleList[idx];
+      if(target >= 0)
+        if(P[target].Type == 0)
+          ngb_update_vbounds(target, &nchanged, nodelist);
+    }
+
+  for(int timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--)
+    {
+      for(int target = TimeBinsGravity.FirstInTimeBin[timebin]; target >= 0; target = TimeBinsGravity.NextInTimeBin[target])
+        if(target >= 0)
+          if(P[target].Type == 0)
+            ngb_update_vbounds(target, &nchanged, nodelist);
+    }
+
+  ngb_finish_vounds_update(nchanged, nodelist);
+
+  myfree(nodelist);
+
+  TIMER_STOP(CPU_NGBTREEUPDATEVEL);
+}
+
+/*! \brief Updates vmin and vmax in ngb nodes.
+ *
+ *  Inverse tree walk.
+ *
+ *  \param[in] i Index of particle.
+ *  \param[in, out] nchanged Number of changed top level nodes.
+ *  \param[out] nodelist Top level nodes that were changed.
+ *
+ *  \return void
+ */
+void ngb_update_vbounds(int i, int *nchanged, int *nodelist)
+{
+  int no = Ngb_Father[i];
+
+  while(no >= 0)
+    {
+      if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+        drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+      int flag_changed = 0;
+
+      for(int j = 0; j < 3; j++)
+        {
+          if(Ngb_Nodes[no].vertex_vmin[j] > SphP[i].VelVertex[j])
+            {
+              Ngb_Nodes[no].vertex_vmin[j] = SphP[i].VelVertex[j];
+              flag_changed                 = 1;
+            }
+
+          if(Ngb_Nodes[no].vertex_vmax[j] < SphP[i].VelVertex[j])
+            {
+              Ngb_Nodes[no].vertex_vmax[j] = SphP[i].VelVertex[j];
+              flag_changed                 = 1;
+            }
+
+#ifdef TREE_BASED_TIMESTEPS
+          if(ExtNgb_Nodes[no].vmin[j] > P[i].Vel[j])
+            {
+              ExtNgb_Nodes[no].vmin[j] = P[i].Vel[j];
+              flag_changed             = 1;
+            }
+
+          if(ExtNgb_Nodes[no].vmax[j] < P[i].Vel[j])
+            {
+              ExtNgb_Nodes[no].vmax[j] = P[i].Vel[j];
+              flag_changed             = 1;
+            }
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+        }
+
+      if(flag_changed == 0)
+        break;
+
+      if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */
+        {
+          if(Ngb_Marker[no] != Ngb_MarkerValue)
+            {
+              Ngb_Marker[no]      = Ngb_MarkerValue;
+              nodelist[*nchanged] = no;
+              *nchanged           = *nchanged + 1;
+            }
+          break;
+        }
+
+      no = Ngb_Nodes[no].father;
+    }
+}
+
+/*! \brief Finalizes velocity bounds update.
+ *
+ *  Exchanges changed information in top level nodes to all tasks.
+ *
+ *  \param[in] nchanged Number of changed top level nodes.
+ *  \param[in] list of changed top level nodes
+ *
+ *  \return void
+ */
+void ngb_finish_vounds_update(int nchanged, int *nodelist)
+{
+  struct DomainNODE
+  {
+    int node;
+    MyNgbTreeFloat vertex_vmin[3];
+    MyNgbTreeFloat vertex_vmax[3];
+#ifdef TREE_BASED_TIMESTEPS
+    MyNgbTreeFloat vmin[3];
+    MyNgbTreeFloat vmax[3];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+  };
+
+  /* share the pseudo-particle data accross CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(int task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  byteoffset[0] = 0;
+  for(int task = 1; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  struct DomainNODE *loc_DomainMoment =
+      (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
+
+  for(int i = 0; i < nchanged; i++)
+    {
+      int no                   = nodelist[i];
+      loc_DomainMoment[i].node = no;
+
+      for(int j = 0; j < 3; j++)
+        {
+          loc_DomainMoment[i].vertex_vmin[j] = Ngb_Nodes[no].vertex_vmin[j];
+          loc_DomainMoment[i].vertex_vmax[j] = Ngb_Nodes[no].vertex_vmax[j];
+#ifdef TREE_BASED_TIMESTEPS
+          loc_DomainMoment[i].vmin[j] = ExtNgb_Nodes[no].vmin[j];
+          loc_DomainMoment[i].vmax[j] = ExtNgb_Nodes[no].vmax[j];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+        }
+    }
+
+  int tot_nchanged = 0;
+  for(int task = 0; task < NTask; task++)
+    tot_nchanged += recvcounts[task];
+
+  struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE));
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  for(int i = 0; i < tot_nchanged; i++)
+    {
+      int no = tot_DomainMoment[i].node;
+
+      if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+        drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+      for(int j = 0; j < 3; j++)
+        {
+          Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j];
+          Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j];
+#ifdef TREE_BASED_TIMESTEPS
+          ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j];
+          ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j];
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+        }
+
+      no = Ngb_Nodes[no].father;
+
+      while(no >= 0)
+        {
+          if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+            drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+          int flag_changed = 0;
+
+          for(int j = 0; j < 3; j++)
+            {
+              if(Ngb_Nodes[no].vertex_vmin[j] > tot_DomainMoment[i].vertex_vmin[j])
+                {
+                  Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j];
+                  flag_changed                 = 1;
+                }
+
+              if(Ngb_Nodes[no].vertex_vmax[j] < tot_DomainMoment[i].vertex_vmax[j])
+                {
+                  Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j];
+                  flag_changed                 = 1;
+                }
+#ifdef TREE_BASED_TIMESTEPS
+              if(ExtNgb_Nodes[no].vmin[j] > tot_DomainMoment[i].vmin[j])
+                {
+                  ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j];
+                  flag_changed             = 1;
+                }
+
+              if(ExtNgb_Nodes[no].vmax[j] < tot_DomainMoment[i].vmax[j])
+                {
+                  ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j];
+                  flag_changed             = 1;
+                }
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+            }
+
+          if(flag_changed == 0)
+            break;
+
+          no = Ngb_Nodes[no].father;
+        }
+    }
+
+  myfree(tot_DomainMoment);
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvcounts);
+}
+
+/*! \brief Updates min and max position in ngb nodes.
+ *
+ *  Inverse tree walk.
+ *
+ *  \param[in] i Index of particle.
+ *  \param[in, out] nchanged Number of changed top level nodes.
+ *  \param[out] nodelist Top level nodes that were changed.
+ *
+ *  \return void
+ */
+void ngb_update_rangebounds(int i, int *nchanged, int *nodelist)
+{
+  int no = Ngb_Father[i];
+
+  while(no >= 0)
+    {
+      if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+        drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+      int flag_changed = 0;
+
+      for(int j = 0; j < 3; j++)
+        {
+          if(Ngb_Nodes[no].u.d.range_min[j] > P[i].Pos[j])
+            {
+              Ngb_Nodes[no].u.d.range_min[j] = P[i].Pos[j];
+              flag_changed                   = 1;
+            }
+
+          if(Ngb_Nodes[no].u.d.range_max[j] < P[i].Pos[j])
+            {
+              Ngb_Nodes[no].u.d.range_max[j] = P[i].Pos[j];
+              flag_changed                   = 1;
+            }
+        }
+
+      if(flag_changed == 0)
+        break;
+
+      if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */
+        {
+          if(Ngb_Marker[no] != Ngb_MarkerValue)
+            {
+              Ngb_Marker[no]      = Ngb_MarkerValue;
+              nodelist[*nchanged] = no;
+              *nchanged           = *nchanged + 1;
+            }
+          break;
+        }
+
+      no = Ngb_Nodes[no].father;
+    }
+}
+
+/*! \brief Finalizes position bounds update.
+ *
+ *  Exchanges changed information in top level nodes to all tasks.
+ *
+ *  \param[in] nchanged Number of changed top level nodes.
+ *  \param[in] nodelist List of changed top level nodes.
+ *
+ *  \return void
+ */
+void ngb_finish_rangebounds_update(int nchanged, int *nodelist)
+{
+  struct DomainNODE
+  {
+    int node;
+    MyNgbTreeFloat range_min[3];
+    MyNgbTreeFloat range_max[3];
+  };
+
+  /* share the pseudo-particle data accross CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(int task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  byteoffset[0] = 0;
+  for(int task = 1; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  struct DomainNODE *loc_DomainMoment =
+      (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
+
+  for(int i = 0; i < nchanged; i++)
+    {
+      int no                   = nodelist[i];
+      loc_DomainMoment[i].node = no;
+
+      for(int j = 0; j < 3; j++)
+        {
+          loc_DomainMoment[i].range_min[j] = Ngb_Nodes[no].u.d.range_min[j];
+          loc_DomainMoment[i].range_max[j] = Ngb_Nodes[no].u.d.range_max[j];
+        }
+    }
+
+  int tot_nchanged = 0;
+  for(int task = 0; task < NTask; task++)
+    tot_nchanged += recvcounts[task];
+
+  struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE));
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  for(int i = 0; i < tot_nchanged; i++)
+    {
+      int no = tot_DomainMoment[i].node;
+
+      if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+        drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+      for(int j = 0; j < 3; j++)
+        {
+          Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j];
+          Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j];
+        }
+
+      no = Ngb_Nodes[no].father;
+
+      while(no >= 0)
+        {
+          if(Ngb_Nodes[no].Ti_Current != All.Ti_Current)
+            drift_node(&Ngb_Nodes[no], All.Ti_Current);
+
+          int flag_changed = 0;
+
+          for(int j = 0; j < 3; j++)
+            {
+              if(Ngb_Nodes[no].u.d.range_min[j] > tot_DomainMoment[i].range_min[j])
+                {
+                  Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j];
+                  flag_changed                   = 1;
+                }
+
+              if(Ngb_Nodes[no].u.d.range_max[j] < tot_DomainMoment[i].range_max[j])
+                {
+                  Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j];
+                  flag_changed                   = 1;
+                }
+            }
+
+          if(flag_changed == 0)
+            break;
+
+          no = Ngb_Nodes[no].father;
+        }
+    }
+
+  myfree(tot_DomainMoment);
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvcounts);
+}
+
+/*! \brief Adjust ngb-tree structures due to a change in number of gas cells.
+ *
+ *  \param[in] delta_NgbMaxPart Difference in number of cells.
+ *
+ *  \return void
+ */
+void ngb_treemodifylength(int delta_NgbMaxPart)
+{
+  mpi_printf("ALLOCATE: Need to adjust NgbTree because Ngb_MaxPart needs to grow by %d\n", delta_NgbMaxPart);
+
+  for(int i = 0; i < Ngb_MaxPart + NTopleaves; i++) /* check for particles and pseudo particles */
+    if(Ngb_Nextnode[i] >= Ngb_MaxPart)              /* internal node or pseudo particle */
+      Ngb_Nextnode[i] += delta_NgbMaxPart;
+
+  for(int i = 0; i < Ngb_MaxPart; i++)
+    if(Ngb_Father[i] >= Ngb_MaxPart) /* internal node or pseudo particle */
+      Ngb_Father[i] += delta_NgbMaxPart;
+
+  for(int i = 0; i < Ngb_MaxNodes; i++)
+    {
+      if(Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode >= Ngb_MaxPart) /* internal node or pseudo particle */
+        Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode += delta_NgbMaxPart;
+
+      if(Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling >= Ngb_MaxPart) /* internal node or pseudo particle */
+        Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling += delta_NgbMaxPart;
+
+      if(Ngb_Nodes[i + Ngb_MaxPart].father >= Ngb_MaxPart)
+        Ngb_Nodes[i + Ngb_MaxPart].father += delta_NgbMaxPart;
+    }
+
+  for(int i = 0; i < NTopleaves; i++)
+    Ngb_DomainNodeIndex[i] += delta_NgbMaxPart;
+
+  Ngb_Nextnode = (int *)myrealloc_movable(Ngb_Nextnode, (Ngb_MaxPart + delta_NgbMaxPart + NTopleaves) * sizeof(int));
+
+  memmove(&Ngb_Nextnode[Ngb_MaxPart + delta_NgbMaxPart], &Ngb_Nextnode[Ngb_MaxPart], NTopleaves * sizeof(int));
+
+  Ngb_MaxPart += delta_NgbMaxPart;
+
+  Ngb_FirstNonTopLevelNode += delta_NgbMaxPart;
+
+  Ngb_Nodes -= delta_NgbMaxPart;
+
+#ifdef TREE_BASED_TIMESTEPS
+  ExtNgb_Nodes -= delta_NgbMaxPart;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+  Ngb_Father = (int *)myrealloc_movable(Ngb_Father, Ngb_MaxPart * sizeof(int));
+
+  Ngb_Marker = (int *)myrealloc_movable(Ngb_Marker, (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int));
+  memmove(Ngb_Marker + Ngb_MaxPart, Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, Ngb_MaxNodes * sizeof(int));
+  memset(Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, -1, delta_NgbMaxPart * sizeof(int));
+}
+
+/*! \brief Allocates arrays for neighbor tree.
+ *
+ *  \return void
+ */
+void ngb_treeallocate(void)
+{
+  if(Ngb_MaxPart == 0)
+    {
+      Ngb_MaxPart  = All.MaxPartSph;
+      Ngb_MaxNodes = (int)(All.NgbTreeAllocFactor * (All.MaxPartSph + BASENUMBER)) + NTopnodes;
+    }
+
+  if(All.TotNumGas == 0)
+    return;
+
+  if(Ngb_Nodes)
+    terminate("already allocated");
+
+  Ngb_DomainNodeIndex = (int *)mymalloc_movable(&Ngb_DomainNodeIndex, "Ngb_DomainNodeIndex", NTopleaves * sizeof(int));
+
+  Ngb_Nodes = (struct NgbNODE *)mymalloc_movable(&Ngb_Nodes, "Ngb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct NgbNODE));
+  Ngb_Nodes -= Ngb_MaxPart;
+
+#ifdef TREE_BASED_TIMESTEPS
+  ExtNgb_Nodes = (struct ExtNgbNODE *)mymalloc_movable(&ExtNgb_Nodes, "ExtNgb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct ExtNgbNODE));
+  ExtNgb_Nodes -= Ngb_MaxPart;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+  Ngb_Nextnode = (int *)mymalloc_movable(&Ngb_Nextnode, "Ngb_Nextnode", (Ngb_MaxPart + NTopleaves) * sizeof(int));
+  Ngb_Father   = (int *)mymalloc_movable(&Ngb_Father, "Ngb_Father", Ngb_MaxPart * sizeof(int));
+
+  Ngb_Marker = (int *)mymalloc_movable(&Ngb_Marker, "Ngb_Marker", (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int));
+}
+
+/*! \brief This function frees the memory allocated for the neighbor tree.
+ *
+ *  \return void
+ */
+void ngb_treefree(void)
+{
+  if(All.TotNumGas == 0)
+    return;
+
+  if(Ngb_Nodes)
+    {
+      myfree_movable(Ngb_Marker);
+      myfree_movable(Ngb_Father);
+      myfree_movable(Ngb_Nextnode);
+#ifdef TREE_BASED_TIMESTEPS
+      myfree_movable(ExtNgb_Nodes + Ngb_MaxPart);
+      ExtNgb_Nodes = NULL;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+      myfree_movable(Ngb_Nodes + Ngb_MaxPart);
+      myfree_movable(Ngb_DomainNodeIndex);
+
+      Ngb_Marker          = NULL;
+      Ngb_Father          = NULL;
+      Ngb_Nodes           = NULL;
+      Ngb_DomainNodeIndex = NULL;
+      Ngb_Nextnode        = NULL;
+      Ngb_MaxPart         = 0;
+      Ngb_MaxNodes        = 0;
+    }
+  else
+    terminate("trying to free the tree even though it's not allocated");
+}
diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c
new file mode 100644
index 0000000000..e777a7c29c
--- /dev/null
+++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c
@@ -0,0 +1,376 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/ngbtree/ngbtree_search.c
+ * \date        05/2018
+ * \brief       This file contains a search routine on the neighbor tree.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void find_nearest_meshpoint_global(mesh_search_data *
+ *                  searchdata_input, int nn, int hsmlguess, int verbose)
+ *                int ngbsearch_primary_cell_evaluate(int target, int mode,
+ *                  int threadid)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/* temporary particle arrays */
+static MyDouble *ngbsearch_nearest_dist;
+static MyDouble *ngbsearch_hsml;
+static mesh_search_data *searchdata;
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble pos[3];   /* tracer particle position */
+  MyDouble hsml;     /* current search radius */
+  MyDouble distance; /* nearest neighbor distance */
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->pos[0] = searchdata[i].Pos[0];
+  in->pos[1] = searchdata[i].Pos[1];
+  in->pos[2] = searchdata[i].Pos[2];
+
+  in->hsml     = ngbsearch_hsml[i];
+  in->distance = ngbsearch_nearest_dist[i];
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Distance; /* distance to closest cell on task */
+  int Task;
+  int Index;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      if(out->Index >= 0)
+        {
+          ngbsearch_nearest_dist[i] = out->Distance;
+          searchdata[i].Task        = out->Task;
+          searchdata[i].u.Index     = out->Index;
+        }
+    }
+  else /* combine */
+    {
+      /* closer cell on other task? */
+      if(out->Distance < ngbsearch_nearest_dist[i])
+        {
+          ngbsearch_nearest_dist[i] = out->Distance;
+          searchdata[i].Task        = out->Task;
+          searchdata[i].u.Index     = out->Index;
+        }
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+static int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid);
+static int n;
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= n)
+          break;
+
+        if(searchdata[i].Task == -1)
+          ngbsearch_primary_cell_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        ngbsearch_primary_cell_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Searches the cells at the positions in searchdata.
+ *
+ *  This function searches the cells which are at the positions specified in
+ *  searchdata. The Pos field must be set. After the search is performed the
+ *  Task and Index field contain the task/index of the cell at position Pos.
+ *  If hsmlguess=1 initial search radius is read from Index/Hsml union in
+ *  searchdata.
+ *
+ *  \param[in] searchdata_input Contains the search positions, after function
+ *             call the fields Task and Index are set.
+ *  \param[in] nn Number of items in searchdata.
+ *  \param[in] hsmlguess Guess for initial search radius;
+ *             1: from searchdata; else from MeanVolume of cells.
+ *  \param[in] verbose More output.
+ *
+ *  \return void
+ */
+void find_nearest_meshpoint_global(mesh_search_data *searchdata_input, int nn, int hsmlguess, int verbose)
+{
+  int i;
+  n                      = nn;
+  ngbsearch_nearest_dist = mymalloc("ngbsearch_nearest_dist", n * sizeof(MyDouble));
+  ngbsearch_hsml         = mymalloc("ngbsearch_hsml", n * sizeof(MyDouble));
+  searchdata             = searchdata_input;
+
+  for(i = 0; i < n; i++)
+    {
+      ngbsearch_nearest_dist[i] = MAX_REAL_NUMBER;
+
+      if(hsmlguess)
+        ngbsearch_hsml[i] = searchdata[i].u.hsmlguess;
+      else
+        ngbsearch_hsml[i] = 1e-6 * pow(All.MeanVolume, 1.0 / 3);
+
+      searchdata[i].Task = -1;  // None found yet
+    }
+
+  generic_set_MaxNexport();
+
+  int ntot, iter = 0;
+
+  /* we will repeat the whole thing for those points where we did not find a nearest neighbor */
+  do
+    {
+      generic_comm_pattern(n, kernel_local, kernel_imported);
+
+      int npleft = 0;
+
+      /* do final operations on results */
+      for(i = 0; i < n; i++)
+        {
+          if(searchdata[i].Task == -1)
+            {
+              npleft++;
+              ngbsearch_hsml[i] *= 2.0;
+
+              if(iter >= MAXITER - 10)
+                {
+                  printf("i=%d task=%d hsml=%g nearest dist=%g pos=(%g|%g|%g)\n", i, ThisTask, ngbsearch_hsml[i],
+                         ngbsearch_nearest_dist[i], searchdata[i].Pos[0], searchdata[i].Pos[1], searchdata[i].Pos[2]);
+                  myflush(stdout);
+                }
+              if(iter > MAXITER)
+                terminate("NGBSEARCH: iter > MAXITER");
+            }
+        }
+
+      /* sum up the left overs */
+      MPI_Allreduce(&npleft, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      if(ntot > 0) /* ok, we need to repeat for a few particles */
+        {
+          iter++;
+          if(iter > 0 && ThisTask == 0 && verbose)
+            {
+              printf("NGBSEARCH: iteration %d: need to repeat for %d points.\n", iter, ntot);
+              myflush(stdout);
+            }
+
+          if(iter > MAXITER)
+            terminate("NGBSEARCH: failed to converge in tracer particles\n");
+        }
+    }
+  while(ntot > 0);
+
+  myfree(ngbsearch_hsml);
+  myfree(ngbsearch_nearest_dist);
+}
+
+/*! \brief Performs the neighbor search.
+ *
+ *  \param[in] target the index of the particle to process(mode 0: in
+ *             searchdata, mode 1: in NgbSearchDataGet/Result).
+ *  \param[in] mode either 0 (handle local particles) or 1 (handle particles
+ *             sent to us).
+ *  \param[in] treadid Id of thread.
+ *
+ *  \return 0
+ */
+int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid)
+{
+  int j, n;
+  int numnodes, *firstnode;
+  MyDouble h, distmax;
+  MyDouble dx, dy, dz, r;
+  MyDouble *pos;
+  data_in local, *target_data;
+  data_out out;
+
+  int index = -1;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos     = target_data->pos;
+  h       = target_data->hsml;
+  distmax = target_data->distance;
+
+  int numngb = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode);
+
+  for(n = 0; n < numngb; n++)
+    {
+      j = Thread[threadid].Ngblist[n];
+
+      dx = pos[0] - P[j].Pos[0];
+      dy = pos[1] - P[j].Pos[1];
+      dz = pos[2] - P[j].Pos[2];
+
+      if(dx > boxHalf_X)
+        dx -= boxSize_X;
+      if(dx < -boxHalf_X)
+        dx += boxSize_X;
+      if(dy > boxHalf_Y)
+        dy -= boxSize_Y;
+      if(dy < -boxHalf_Y)
+        dy += boxSize_Y;
+      if(dz > boxHalf_Z)
+        dz -= boxSize_Z;
+      if(dz < -boxHalf_Z)
+        dz += boxSize_Z;
+
+      r = sqrt(dx * dx + dy * dy + dz * dz);
+      if(r < distmax && r < h && P[j].ID != 0 && P[j].Mass > 0)
+        {
+          distmax = r;
+          index   = j;
+        }
+    }
+
+  out.Distance = distmax;
+  out.Task     = ThisTask;
+  out.Index    = index;
+
+  if(index < 0)
+    {
+      out.Distance = MAX_REAL_NUMBER;
+      out.Task     = -1;
+      out.Index    = -1;
+    }
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c
new file mode 100644
index 0000000000..c682ce157d
--- /dev/null
+++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c
@@ -0,0 +1,225 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/ngbtree/ngbtree_walk.c
+ * \date        05/2018
+ * \brief       Routines to walk the ngb tree.
+ * \details     contains functions:
+ *                int ngb_treefind_variable_threads(MyDouble searchcenter[3],
+ *                  MyFloat hsml, int target, int mode, int thread_id, int
+ *                  numnodes, int *firstnode)
+ *                int ngb_treefind_export_node_threads(int no, int target, int
+ *                  thread_id, int image_flag)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 16.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Finds all cells around seearchcenter in region with radius hsml.
+ *
+ *  This function returns the number of neighbors with distance <= hsml, and
+ *  returns the particle indices in the global buffer Ngblist.
+ *  The tree traversal starts at startnode.
+ *  Keep in mind that this is usually called within an *_evaluate function
+ *  within the generic communication pattern. This means that first, the local
+ *  (bound to this task) search is performed and the local neighbors written
+ *  to the array, then communication happens and afterwards, the function is
+ *  called again in imported mode, finding particles on other tasks.
+ *
+ *  \param[in] searchcenter Center of the neighbor search.
+ *  \param[in] hsml Radius of the search.
+ *  \param[in] target Index of the particle around which the search is
+ *             performed; needed for parallel search. If < 0, only local search
+ *             is performed.
+ *  \param[in] mode Mode for local or imported particle search.
+ *  \param[in] thread_id ID of thread (always 0 in our case).
+ *  \param[in] numnodes Number of nodes on this task (1 for mode local;
+ *             for mode imported: given by generic_get_numnodes(...) ).
+ *  \param[in] firstnode Node to start with (in case of mode imported).
+ *
+ *  \return The number of neighbors found.
+ */
+int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes,
+                                  int *firstnode)
+{
+  MyDouble search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3];
+
+  for(int i = 0; i < 3; i++)
+    {
+      search_min[i] = searchcenter[i] - 1.001 * hsml;
+      search_max[i] = searchcenter[i] + 1.001 * hsml;
+    }
+
+  search_max_Lsub[0] = search_max[0] - boxSize_X;
+  search_max_Lsub[1] = search_max[1] - boxSize_Y;
+  search_max_Lsub[2] = search_max[2] - boxSize_Z;
+
+  search_min_Ladd[0] = search_min[0] + boxSize_X;
+  search_min_Ladd[1] = search_min[1] + boxSize_Y;
+  search_min_Ladd[2] = search_min[2] + boxSize_Z;
+
+  int numngb = 0;
+  double xtmp, ytmp, ztmp;
+  double hsml2 = hsml * hsml;
+
+  for(int k = 0; k < numnodes; k++)
+    {
+      int no;
+
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Ngb_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Ngb_Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < Ngb_MaxPart) /* single particle */
+            {
+              int p = no;
+              no    = Ngb_Nextnode[no];
+
+              if(P[p].Type > 0)
+                continue;
+
+              if(P[p].Ti_Current != All.Ti_Current)
+                {
+                  drift_particle(p, All.Ti_Current);
+                }
+
+              double dx = NGB_PERIODIC_LONG_X(P[p].Pos[0] - searchcenter[0]);
+              if(dx > hsml)
+                continue;
+              double dy = NGB_PERIODIC_LONG_Y(P[p].Pos[1] - searchcenter[1]);
+              if(dy > hsml)
+                continue;
+              double dz = NGB_PERIODIC_LONG_Z(P[p].Pos[2] - searchcenter[2]);
+              if(dz > hsml)
+                continue;
+
+              double r2 = dx * dx + dy * dy + dz * dz;
+              if(r2 > hsml2)
+                continue;
+
+              Thread[thread_id].R2list[numngb]    = r2;
+              Thread[thread_id].Ngblist[numngb++] = p;
+            }
+          else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */
+            {
+              struct NgbNODE *current = &Ngb_Nodes[no];
+
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              if(current->Ti_Current != All.Ti_Current)
+                {
+                  drift_node(current, All.Ti_Current);
+                }
+
+              if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0])
+                continue;
+              if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0])
+                continue;
+
+              if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1])
+                continue;
+              if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1])
+                continue;
+
+              if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2])
+                continue;
+              if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2])
+                continue;
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES should not occur here");
+
+              if(target >= 0) /* if no target is given, export will not occur */
+                if(ngb_treefind_export_node_threads(no, target, thread_id, 0))
+                  return -1;
+
+              no = Ngb_Nextnode[no - Ngb_MaxNodes];
+              continue;
+            }
+        }
+    }
+  return numngb;
+}
+
+/*! \brief Prepares export of ngb-tree node.
+ *
+ *  \param[in] no Pseudoparticle node to be exported.
+ *  \param[in] target (Local) index to identify what it refers to.
+ *  \param[in] thread_id ID of thread (0 in our case).
+ *  \param[in] image_flag Bit flag used in EXTENDED_GHOST_SEARCH.
+ *
+ *  \return 0
+ */
+int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag)
+{
+  /* The task indicated by the pseudoparticle node */
+  int task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)];
+
+  if(Thread[thread_id].Exportflag[task] != target)
+    {
+      Thread[thread_id].Exportflag[task]     = target;
+      int nexp                               = Thread[thread_id].Nexport++;
+      Thread[thread_id].PartList[nexp].Task  = task;
+      Thread[thread_id].PartList[nexp].Index = target;
+      Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize;
+    }
+
+  int nexp                      = Thread[thread_id].NexportNodes++;
+  nexp                          = -1 - nexp;
+  struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace);
+  nodelist[nexp].Task           = task;
+  nodelist[nexp].Index          = target;
+  nodelist[nexp].Node           = Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)];
+#ifdef EXTENDED_GHOST_SEARCH
+  nodelist[nexp].BitFlags = image_flag;
+#endif /* #ifdef EXTENDED_GHOST_SEARCH */
+  Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int);
+  return 0;
+}
diff --git a/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c
new file mode 100644
index 0000000000..7e9fbef498
--- /dev/null
+++ b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c
@@ -0,0 +1,539 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/star_formation/sfr_eEOS.c
+ * \date        05/2018
+ * \brief       Star formation rate routines for the effective multi-phase
+ *              model.
+ * \details     contains functions:
+ *                void cooling_and_starformation(void)
+ *                double get_starformation_rate(int i)
+ *                void init_clouds(void)
+ *                void integrate_sfr(void)
+ *                void set_units_sfr(void)
+ *                double calc_egyeff(int i, double gasdens, double *ne,
+ *                  double *x, double *tsfr, double *factorEVP)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../gravity/forcetree.h"
+
+#ifdef USE_SFR
+
+/*! \brief Main driver for star formation and gas cooling.
+ *
+ *  This function loops over all the active gas cells. If a given cell
+ *  meets the criteria for star formation to be active the multi-phase
+ *  model is activated, the properties of the cell are updated according to
+ *  the latter and the star formation rate computed. In the other case, the
+ *  standard isochoric cooling is applied to the gas cell by calling the
+ *  function cool_cell() and the star formation rate is set to 0.
+ *
+ *  \return void
+ */
+void cooling_and_starformation(void)
+{
+  TIMER_START(CPU_COOLINGSFR);
+
+  int idx, i, bin, flag;
+  double dt, dtime, ne = 1;
+  double unew, du;
+  double cloudmass;
+  double factorEVP, dens;
+  double tsfr;
+  double egyeff, x;
+
+  double eos_dens_threshold = All.PhysDensThresh;
+
+  /* note: assuming FULL ionization */
+  double u_to_temp_fac =
+      (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g;
+
+  /* clear the SFR stored in the active timebins */
+  for(bin = 0; bin < TIMEBINS; bin++)
+    if(TimeBinSynchronized[bin])
+      TimeBinSfr[bin] = 0;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].Mass == 0 && P[i].ID == 0)
+        continue; /* skip cells that have been swallowed or eliminated */
+
+      dens = SphP[i].Density;
+
+      dt    = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval;
+      dtime = All.cf_atime * dt / All.cf_time_hubble_a;
+
+      /* apply the temperature floor */
+
+      unew = dmax(All.MinEgySpec, SphP[i].Utherm);
+
+      if(unew < 0)
+        terminate("Invalid Temperature: Task=%d i=%d unew=%g\n", ThisTask, i, unew);
+
+      du = unew - SphP[i].Utherm;
+      SphP[i].Utherm += du;
+      SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass;
+
+      egyeff = 0.;
+      /* calculate the effective equation of state for gas above the density threshold */
+      if(dens * All.cf_a3inv >= eos_dens_threshold)
+        {
+          ne     = SphP[i].Ne;
+          egyeff = calc_egyeff(i, dens * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP);
+        }
+
+      /* do cooling, except for gas above the EOS density threshold that is colder than the eEOS */
+      if(dens * All.cf_a3inv < eos_dens_threshold || (dens * All.cf_a3inv >= eos_dens_threshold && SphP[i].Utherm > egyeff))
+        {
+          cool_cell(i);
+        }
+
+      /* check whether conditions for star formation are fulfilled.
+       * f=1  normal cooling
+       * f=0  star formation
+       */
+
+      flag = 1; /* default is normal cooling */
+
+      /* enable star formation if gas is above SF density threshold */
+      if(dens * All.cf_a3inv >= eos_dens_threshold)
+        if(SphP[i].Utherm <= egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh)
+          flag = 0;
+
+      if(All.ComovingIntegrationOn)
+        if(dens < All.OverDensThresh)
+          flag = 1;
+
+      if(P[i].Mass == 0) /* tracer particles don't form stars */
+        flag = 1;
+
+      if(flag == 1)
+        SphP[i].Sfr = 0;
+
+      /* active star formation */
+      if(flag == 0)
+        {
+          SphP[i].Ne = (HYDROGEN_MASSFRAC + 1) / 2 / HYDROGEN_MASSFRAC; /* note: assuming FULL ionization */
+
+          cloudmass = x * P[i].Mass;
+
+          if(tsfr < dtime)
+            tsfr = dtime;
+
+          if(dt > 0)
+            {
+              if(P[i].TimeBinHydro) /* upon start-up, we need to protect against dt==0 */
+                {
+                  unew = SphP[i].Utherm;
+
+                  // put (cold) star forming cells on the effective equation of state
+                  if(SphP[i].Utherm < egyeff)
+                    {
+                      unew = egyeff;
+                    }
+
+                  du = unew - SphP[i].Utherm;
+                  if(unew < All.MinEgySpec)
+                    du = All.MinEgySpec - SphP[i].Utherm;
+
+                  SphP[i].Utherm += du;
+                  SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass;
+
+#ifdef OUTPUT_COOLHEAT
+                  if(dtime > 0)
+                    SphP[i].CoolHeat = du * P[i].Mass / dtime;
+#endif /* #ifdef OUTPUT_COOLHEAT */
+
+                  set_pressure_of_cell(i);
+                }
+            }
+
+          SphP[i].Sfr = (1 - All.FactorSN) * cloudmass / tsfr * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR);
+
+          TimeBinSfr[P[i].TimeBinHydro] += SphP[i].Sfr;
+        }
+    } /* end of main loop over active particles */
+
+  TIMER_STOP(CPU_COOLINGSFR);
+}
+
+/*! \brief Return the star formation rate associated with the gas cell i.
+ *
+ *  \param[in] i the index of the gas cell.
+ *
+ *  \return star formation rate in solar masses / yr.
+ */
+double get_starformation_rate(int i)
+{
+  if(RestartFlag == 3)
+    return SphP[i].Sfr;
+
+  double rateOfSF;
+  int flag;
+  double tsfr;
+  double factorEVP, egyeff, ne, x, cloudmass;
+  /* note: assuming FULL ionization */
+  double u_to_temp_fac =
+      (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g;
+
+  double eos_dens_threshold = All.PhysDensThresh;
+
+  flag   = 1; /* default is normal cooling */
+  egyeff = 0.0;
+
+  if(SphP[i].Density * All.cf_a3inv >= eos_dens_threshold)
+    {
+      ne     = SphP[i].Ne;
+      egyeff = calc_egyeff(i, SphP[i].Density * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP);
+    }
+
+  if(SphP[i].Density * All.cf_a3inv >= All.PhysDensThresh)
+    if(SphP[i].Utherm <= 1.01 * egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh)
+      flag = 0;
+
+  if(All.ComovingIntegrationOn)
+    if(SphP[i].Density < All.OverDensThresh)
+      flag = 1;
+
+  if(flag == 1)
+    return 0;
+
+  cloudmass = x * P[i].Mass;
+
+  rateOfSF = (1 - All.FactorSN) * cloudmass / tsfr;
+
+  /* convert to solar masses per yr */
+  rateOfSF *= (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR);
+
+  return rateOfSF;
+}
+
+/*! \brief Initialize the parameters of effective multi-phase model.
+ *
+ *   In particular this function computes the value of PhysDensThresh, that is
+ *   the physical density threshold above which star formation is active, if
+ *   its value was set to 0 in the parameter file.
+ *
+ *   \return void
+ */
+void init_clouds(void)
+{
+  double A0, dens, tcool, ne, coolrate, egyhot, x, u4, meanweight;
+  double tsfr, peff, fac, neff, egyeff, factorEVP, sigma, thresholdStarburst;
+
+  if(All.PhysDensThresh == 0)
+    {
+      A0 = All.FactorEVP;
+
+      egyhot = All.EgySpecSN / A0;
+
+      meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */
+      u4         = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4;
+      u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs;
+
+      /* choose a high reference density to avoid that we pick up a compton cooling contribution */
+      if(All.ComovingIntegrationOn)
+        dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+      else
+        dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+
+      if(All.ComovingIntegrationOn)
+        {
+          All.Time = 1.0; /* to be guaranteed to get z=0 rate */
+          set_cosmo_factors_for_current_time();
+          IonizeParams();
+        }
+
+      ne = 1.0;
+      SetZeroIonization();
+
+      tcool = GetCoolingTime(egyhot, dens, &ne);
+
+      coolrate = egyhot / tcool / dens;
+
+      x = (egyhot - u4) / (egyhot - All.EgySpecCold);
+
+      All.PhysDensThresh =
+          x / pow(1 - x, 2) * (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold) / (All.MaxSfrTimescale * coolrate);
+
+      mpi_printf(
+          "USE_SFR: A0=%g   PhysDensThresh=%g (int units) %g h^2 cm^-3   expected fraction of cold gas at threshold=%g   tcool=%g   "
+          "dens=%g   egyhot=%g\n",
+          A0, All.PhysDensThresh, All.PhysDensThresh / (PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs), x, tcool, dens,
+          egyhot);
+
+      dens = All.PhysDensThresh;
+
+      do
+        {
+          ne     = 0.5;
+          egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP);
+          peff   = GAMMA_MINUS1 * dens * egyeff;
+
+          fac = 1 / (log(dens * 1.025) - log(dens));
+          dens *= 1.025;
+
+          neff = -log(peff) * fac;
+
+          ne     = 0.5;
+          egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP);
+          peff   = GAMMA_MINUS1 * dens * egyeff;
+
+          neff += log(peff) * fac;
+        }
+      while(neff > 4.0 / 3);
+
+      thresholdStarburst = dens;
+
+      mpi_printf("USE_SFR: run-away sets in for dens=%g   dynamic range for quiescent star formation=%g\n", thresholdStarburst,
+                 thresholdStarburst / All.PhysDensThresh);
+
+      integrate_sfr();
+
+      if(ThisTask == 0)
+        {
+          sigma = 10.0 / All.Hubble * 1.0e-10 / pow(1.0e-3, 2);
+
+          printf("USE_SFR: isotherm sheet central density=%g   z0=%g\n", M_PI * All.G * sigma * sigma / (2 * GAMMA_MINUS1) / u4,
+                 GAMMA_MINUS1 * u4 / (2 * M_PI * All.G * sigma));
+          myflush(stdout);
+        }
+
+      mpi_printf("USE_SFR: SNII energy=%g [internal units] = %g [erg/M_sun] = %g [1e51 erg/Msun]\n", All.FactorSN * All.EgySpecSN,
+                 All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS,
+                 All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS / 1e51);
+
+      if(All.ComovingIntegrationOn)
+        {
+          All.Time = All.TimeBegin;
+          set_cosmo_factors_for_current_time();
+          IonizeParams();
+        }
+    }
+}
+
+/*! \brief Compute the effective equation of state for the gas and
+ *         the integrated SFR per unit area.
+ *
+ *  This function computes the effective equation of state for the gas and
+ *  the integrated SFR per unit area. It saves the results into two files:
+ *  eos.txt for the equation of state and sfrrate.txt for the integrated SFR.
+ *  In the latter case, the SFR is determined by integrating along the vertical
+ *  direction the gas density of an infinite self-gravitating isothermal sheet.
+ *  The integrated gas density is saved as well, so effectively sfrrate.txt
+ *  contains the Kennicutt-Schmidt law of the star formation model.
+ *
+ *  \return void
+ */
+void integrate_sfr(void)
+{
+  double rho0, rho, rho2, q, dz, gam, sigma = 0, sigma_u4, sigmasfr = 0, ne, P1;
+  double x = 0, P, P2, x2, tsfr2, factorEVP2, drho, dq;
+  double meanweight, u4, tsfr, factorEVP, egyeff, egyeff2;
+  FILE *fd;
+
+  double eos_dens_threshold = All.PhysDensThresh;
+
+  meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */
+  u4         = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4;
+  u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs;
+
+  if(All.ComovingIntegrationOn)
+    {
+      All.Time = 1.0; /* to be guaranteed to get z=0 rate */
+      set_cosmo_factors_for_current_time();
+      IonizeParams();
+    }
+
+  if(WriteMiscFiles && (ThisTask == 0))
+    fd = fopen("eos.txt", "w");
+  else
+    fd = 0;
+
+  for(rho = eos_dens_threshold; rho <= 1000 * eos_dens_threshold; rho *= 1.1)
+    {
+      ne     = 1.0;
+      egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP);
+
+      P = GAMMA_MINUS1 * rho * egyeff;
+
+      if(WriteMiscFiles && (ThisTask == 0))
+        {
+          fprintf(fd, "%g %g %g\n", rho, P, x);
+        }
+    }
+
+  if(WriteMiscFiles && (ThisTask == 0))
+    fclose(fd);
+
+  if(WriteMiscFiles && (ThisTask == 0))
+    fd = fopen("sfrrate.txt", "w");
+  else
+    fd = 0;
+
+  for(rho0 = eos_dens_threshold; rho0 <= 10000 * eos_dens_threshold; rho0 *= 1.02)
+    {
+      rho = rho0;
+      q   = 0;
+      dz  = 0.001;
+
+      sigma = sigmasfr = sigma_u4 = 0;
+
+      while(rho > 0.0001 * rho0)
+        {
+          if(rho > All.PhysDensThresh)
+            {
+              ne     = 1.0;
+              egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP);
+
+              P = P1 = GAMMA_MINUS1 * rho * egyeff;
+
+              rho2 = 1.1 * rho;
+
+              egyeff2 = calc_egyeff(-1, rho2, &ne, &x2, &tsfr2, &factorEVP2);
+
+              P2 = GAMMA_MINUS1 * rho2 * egyeff2;
+
+              gam = log(P2 / P1) / log(rho2 / rho);
+            }
+          else
+            {
+              tsfr = 0;
+
+              P   = GAMMA_MINUS1 * rho * u4;
+              gam = 1.0;
+
+              sigma_u4 += rho * dz;
+            }
+
+          drho = q;
+          dq   = -(gam - 2) / rho * q * q - 4 * M_PI * All.G / (gam * P) * rho * rho * rho;
+
+          sigma += rho * dz;
+          if(tsfr > 0)
+            {
+              sigmasfr += (1 - All.FactorSN) * rho * x / tsfr * dz;
+            }
+
+          rho += drho * dz;
+          q += dq * dz;
+        }
+
+      sigma *= 2; /* to include the other side */
+      sigmasfr *= 2;
+      sigma_u4 *= 2;
+
+      sigma *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm);
+      sigmasfr *= All.HubbleParam * All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * (SEC_PER_YEAR / All.UnitTime_in_s) * 1.0e6 *
+                  PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm);
+      sigma_u4 *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm);
+
+      if(WriteMiscFiles && (ThisTask == 0))
+        {
+          fprintf(fd, "%g %g %g %g\n", rho0, sigma, sigmasfr, sigma_u4);
+        }
+    }
+
+  if(All.ComovingIntegrationOn)
+    {
+      All.Time = All.TimeBegin;
+      set_cosmo_factors_for_current_time();
+      IonizeParams();
+    }
+
+  if(WriteMiscFiles && (ThisTask == 0))
+    fclose(fd);
+}
+
+/*! \brief Set the appropriate units for the parameters of the multi-phase
+ *         model.
+ *
+ *  \return void
+ */
+void set_units_sfr(void)
+{
+  double meanweight;
+
+  All.OverDensThresh = All.CritOverDensity * All.OmegaBaryon * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+
+  All.PhysDensThresh = All.CritPhysDensity * PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs;
+
+  meanweight = 4 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */
+
+  All.EgySpecCold = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempClouds;
+  All.EgySpecCold *= All.UnitMass_in_g / All.UnitEnergy_in_cgs;
+
+  meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */
+
+  All.EgySpecSN = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempSupernova;
+  All.EgySpecSN *= All.UnitMass_in_g / All.UnitEnergy_in_cgs;
+}
+
+/*! \brief Calculate the effective energy of the multi-phase model.
+ *
+ *  \param[in] i (unused)
+ *  \param[in] gasdens gas density.
+ *  \param[in, out] ne Fractional electron density.
+ *  \param[out] x Fraction cold gas within model.
+ *  \param[out] tsfr Star formation timescale.
+ *  \param[out] factorEVP Supernova evaporation factor for given density.
+ */
+double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP)
+{
+  double egyhot, egyeff, tcool, y;
+  double rho = gasdens;
+
+  rho = dmax(rho, All.PhysDensThresh);
+
+  *tsfr = sqrt(All.PhysDensThresh / rho) * All.MaxSfrTimescale;
+
+  *factorEVP = pow(rho / All.PhysDensThresh, -0.8) * All.FactorEVP;
+
+  egyhot = All.EgySpecSN / (1 + *factorEVP) + All.EgySpecCold;
+
+  tcool = GetCoolingTime(egyhot, rho, ne);
+
+  y = *tsfr / tcool * egyhot / (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold);
+
+  *x = 1 + 1 / (2 * y) - sqrt(1 / y + 1 / (4 * y * y));
+
+  egyeff = egyhot * (1 - *x) + All.EgySpecCold * (*x);
+
+  return egyeff;
+}
+
+#endif /* #ifdef USE_SFR */
diff --git a/src/amuse/community/arepo/src/star_formation/starformation.c b/src/amuse/community/arepo/src/star_formation/starformation.c
new file mode 100644
index 0000000000..9ce94a96e5
--- /dev/null
+++ b/src/amuse/community/arepo/src/star_formation/starformation.c
@@ -0,0 +1,437 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/star_formation/starformation.c
+ * \date        05/2018
+ * \brief       Generic creation routines for star particles.
+ * \details     Star formation rates are calculated in sfr_eEOS for the
+ *              multiphase model.
+ *              contains functions:
+ *                void sfr_init()
+ *                void sfr_create_star_particles(void)
+ *                void convert_cell_into_star(int i, double birthtime)
+ *                void spawn_star_from_cell(int igas, double birthtime, int
+ *                  istar, MyDouble mass_of_star)
+ *                void make_star(int idx, int i, double prob, MyDouble
+ *                  mass_of_star, double *sum_mass_stars)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 07.06.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../gravity/forcetree.h"
+
+#ifdef USE_SFR
+
+static int stars_spawned;           /*!< local number of star particles spawned in the time step */
+static int tot_stars_spawned;       /*!< global number of star paricles spawned in the time step */
+static int stars_converted;         /*!< local number of gas cells converted into stars in the time step */
+static int tot_stars_converted;     /*!< global number of gas cells converted into stars in the time step */
+static int altogether_spawned;      /*!< local number of star+wind particles spawned in the time step */
+static int tot_altogether_spawned;  /*!< global number of star+wind particles spawned in the time step */
+static double cum_mass_stars = 0.0; /*!< cumulative mass of stars created in the time step (global value) */
+
+static int sfr_init_called = 0;
+
+/*! \brief Initialization routine.
+ *
+ *  \return void
+ */
+void sfr_init()
+{
+  if(sfr_init_called)
+    return;
+
+  sfr_init_called = 1;
+
+  init_clouds();
+}
+
+/*! \brief This routine creates star particles according to their
+ *         respective rates.
+ *
+ *  This function loops over all the active gas cells. If in a given cell the
+ *  SFR is greater than zero, the probability of forming a star is computed
+ *  and the corresponding particle is created stichastically according to the
+ *  model in Springel & Hernquist (2003, MNRAS). It also saves information
+ *  about the formed stellar mass and the star formation rate in the file
+ *  FdSfr.
+ *
+ *  \return void
+ */
+void sfr_create_star_particles(void)
+{
+  TIMER_START(CPU_COOLINGSFR);
+
+  int idx, i, bin;
+  double dt, dtime;
+  MyDouble mass_of_star;
+  double sum_sm, total_sm, rate, sum_mass_stars, total_sum_mass_stars;
+  double p = 0, pall = 0, prob, p_decide;
+  double rate_in_msunperyear;
+  double sfrrate, totsfrrate;
+
+  stars_spawned = stars_converted = 0;
+  sum_sm = sum_mass_stars = 0;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i >= 0)
+        {
+          if(P[i].Mass == 0 && P[i].ID == 0)
+            continue; /* skip cells that have been swallowed or eliminated */
+
+#ifdef SFR_KEEP_CELLS
+          if(P[i].Mass < 0.3 * All.TargetGasMass)
+            continue;
+#endif /* #ifdef SFR_KEEP_CELLS */
+
+          dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval;
+
+          /*  the actual time-step */
+
+          dtime = All.cf_atime * dt / All.cf_time_hubble_a;
+
+          mass_of_star = 0;
+          prob         = 0;
+          p            = 0;
+          pall         = 0;
+
+          if(SphP[i].Sfr > 0)
+            {
+              p    = SphP[i].Sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR)) * dtime / P[i].Mass;
+              pall = p;
+              sum_sm += P[i].Mass * (1 - exp(-p));
+
+#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS)
+
+              if(P[i].Mass < 2.0 * All.TargetGasMass)
+#ifdef SFR_KEEP_CELLS
+                mass_of_star = 0.9 * P[i].Mass;
+#else  /* #ifdef SFR_KEEP_CELLS */
+                mass_of_star = P[i].Mass;
+#endif /* #ifdef SFR_KEEP_CELLS */
+              else
+                mass_of_star = All.TargetGasMass;
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+              if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass)
+                {
+                  /* this cell does not appear to be in the high-res region.
+                     If we form a star, then it is given the mass of the cell,
+                     and later we give the star the SofteningType=3 particle to give it large softening */
+#ifdef SFR_KEEP_CELLS
+                  mass_of_star = 0.9 * P[i].Mass;
+#else  /* #ifdef SFR_KEEP_CELLS */
+                  mass_of_star = P[i].Mass;
+#endif /* #ifdef SFR_KEEP_CELLS #else */
+                }
+
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+#else  /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */
+              mass_of_star = P[i].Mass;
+#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) #else */
+
+#ifdef SFR_KEEP_CELLS
+              if(P[i].Mass < 0.5 * All.TargetGasMass)
+                continue; /* do not make stars from cells that should be derefined */
+#endif                    /* #ifdef SFR_KEEP_CELLS */
+
+              prob = P[i].Mass / mass_of_star * (1 - exp(-pall));
+            }
+
+          if(prob == 0)
+            continue;
+
+          if(prob < 0)
+            terminate("prob < 0");
+
+          if(prob > 1)
+            {
+              printf(
+                  "SFR: Warning, need to make a heavier star than desired. Task=%d prob=%g P[i].Mass=%g mass_of_star=%g "
+                  "mass_of_star_new=%g p=%g pall=%g\n",
+                  ThisTask, prob, P[i].Mass, mass_of_star, P[i].Mass * (1 - exp(-pall)), p, pall);
+              mass_of_star = P[i].Mass * (1 - exp(-pall));
+              prob         = 1.0;
+            }
+
+          /* decide what process to consider (currently available: make a star or kick to wind) */
+          p_decide = get_random_number();
+
+          if(p_decide < p / pall) /* ok, it is decided to consider star formation */
+            make_star(idx, i, prob, mass_of_star, &sum_mass_stars);
+        }
+    } /* end of main loop over active gas particles */
+
+  int in[4], out[4], cnt = 2;
+  in[0] = stars_spawned;
+  in[1] = stars_converted;
+
+  MPI_Allreduce(in, out, cnt, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  tot_stars_spawned   = out[0];
+  tot_stars_converted = out[1];
+
+  if(tot_stars_spawned > 0 || tot_stars_converted > 0)
+    mpi_printf("SFR: spawned %d stars, converted %d gas particles into stars\n", tot_stars_spawned, tot_stars_converted);
+
+  tot_altogether_spawned = tot_stars_spawned;
+  altogether_spawned     = stars_spawned;
+
+  if(tot_altogether_spawned)
+    {
+      /* need to assign new unique IDs to the spawned stars */
+
+      int *list;
+
+      if(All.MaxID == 0) /* MaxID not calculated yet */
+        calculate_maxid();
+
+      list = mymalloc("list", NTask * sizeof(int));
+
+      MPI_Allgather(&altogether_spawned, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD);
+
+      MyIDType newid = All.MaxID + 1;
+
+      for(i = 0; i < ThisTask; i++)
+        newid += list[i];
+
+      myfree(list);
+
+      for(i = 0; i < altogether_spawned; i++)
+        {
+          P[NumPart + i].ID = newid;
+
+          newid++;
+        }
+
+      All.MaxID += tot_altogether_spawned;
+    }
+
+  /* Note: New tree construction can be avoided because of  `force_add_star_to_tree()' */
+  if(tot_stars_spawned > 0 || tot_stars_converted > 0)
+    {
+      All.TotNumPart += tot_stars_spawned;
+      All.TotNumGas -= tot_stars_converted;
+      NumPart += stars_spawned;
+    }
+
+  for(bin = 0, sfrrate = 0; bin < TIMEBINS; bin++)
+    if(TimeBinsHydro.TimeBinCount[bin])
+      sfrrate += TimeBinSfr[bin];
+
+  double din[3] = {sfrrate, sum_sm, sum_mass_stars}, dout[3];
+
+  MPI_Reduce(din, dout, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      totsfrrate           = dout[0];
+      total_sm             = dout[1];
+      total_sum_mass_stars = dout[2];
+
+      if(All.TimeStep > 0)
+        rate = total_sm / (All.TimeStep / All.cf_time_hubble_a);
+      else
+        rate = 0;
+
+      /* compute the cumulative mass of stars */
+      cum_mass_stars += total_sum_mass_stars;
+
+      /* convert to solar masses per yr */
+      rate_in_msunperyear = rate * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR);
+
+      fprintf(FdSfr, "%14e %14e %14e %14e %14e %14e\n", All.Time, total_sm, totsfrrate, rate_in_msunperyear, total_sum_mass_stars,
+              cum_mass_stars);
+      myflush(FdSfr);
+    }
+
+  TIMER_STOP(CPU_COOLINGSFR);
+}
+
+/*! \brief Convert a cell into a star.
+ *
+ *  This function converts an active star-forming gas cell into a star.
+ *  The particle information of the gas cell is copied to the
+ *  location star and the fields necessary for the creation of the star
+ *  particle are initialized.
+ *
+ *  \param[in] i Index of the gas cell to be converted.
+ *  \param[in] birthtime Time of birth (in code units) of the stellar particle.
+ *
+ *  \return void
+ */
+void convert_cell_into_star(int i, double birthtime)
+{
+  P[i].Type          = 4;
+  P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type];
+
+#if defined(REFINEMENT_HIGH_RES_GAS)
+  if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass)
+    {
+      /* this cell does not appear to be in the high-res region.
+         We give the star the SofteningType=3 particle to give it large softening */
+      P[i].SofteningType = All.SofteningTypeOfPartType[3];
+    }
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+  if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING)))
+    P[i].SofteningType = get_softening_type_from_mass(P[i].Mass);
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+  TimeBinSfr[P[i].TimeBinHydro] -= SphP[i].Sfr;
+
+  voronoi_remove_connection(i);
+
+  return;
+}
+
+/*! \brief Spawn a star particle from a gas cell.
+ *
+ *  This function spawns a star particle from an active star-forming
+ *  cell. The particle information of the gas cell is copied to the
+ *  location istar and the fields necessary for the creation of the star
+ *  particle are initialized. The conserved variables of the gas cell
+ *  are then updated according to the mass ratio between the two components
+ *  to ensure conservation.
+ *
+ *  \param[in] igas Index of the gas cell from which the star is spawned.
+ *  \param[in] birthtime Time of birth (in code units) of the stellar particle.
+ *  \param[in] istar Index of the spawned stellar particle.
+ *  \param[in] mass_of_star The mass of the spawned stellar particle.
+ *
+ *  \return void
+ */
+void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star)
+{
+  P[istar]               = P[igas];
+  P[istar].Type          = 4;
+  P[istar].SofteningType = All.SofteningTypeOfPartType[P[istar].Type];
+  P[istar].Mass          = mass_of_star;
+
+#if defined(REFINEMENT_HIGH_RES_GAS)
+  if(SphP[igas].HighResMass < HIGHRESMASSFAC * P[igas].Mass)
+    {
+      /* this cell does not appear to be in the high-res region.
+         We give the star the SofteningType=3 particle to give it large softening */
+      P[istar].SofteningType = All.SofteningTypeOfPartType[3];
+    }
+#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */
+
+#ifdef INDIVIDUAL_GRAVITY_SOFTENING
+  if(((1 << P[istar].Type) & (INDIVIDUAL_GRAVITY_SOFTENING)))
+    P[istar].SofteningType = get_softening_type_from_mass(P[istar].Mass);
+#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */
+
+  timebin_add_particle(&TimeBinsGravity, istar, igas, P[istar].TimeBinGrav, TimeBinSynchronized[P[istar].TimeBinGrav]);
+
+  /* now change the conserved quantities in the cell in proportion */
+  double fac = (P[igas].Mass - P[istar].Mass) / P[igas].Mass;
+
+#ifdef MHD
+  double Emag = 0.5 * (SphP[igas].B[0] * SphP[igas].B[0] + SphP[igas].B[1] * SphP[igas].B[1] + SphP[igas].B[2] * SphP[igas].B[2]) *
+                SphP[igas].Volume * All.cf_atime;
+  SphP[igas].Energy -= Emag;
+#endif /* #ifdef MHD */
+
+  P[igas].Mass *= fac;
+  SphP[igas].Energy *= fac;
+  SphP[igas].Momentum[0] *= fac;
+  SphP[igas].Momentum[1] *= fac;
+  SphP[igas].Momentum[2] *= fac;
+
+#ifdef MHD
+  SphP[igas].Energy += Emag;
+#endif /* #ifdef MHD */
+
+#ifdef MAXSCALARS
+  for(int s = 0; s < N_Scalar; s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */
+    *(MyFloat *)(((char *)(&SphP[igas])) + scalar_elements[s].offset_mass) *= fac;
+#endif /* #ifdef MAXSCALARS */
+
+  return;
+}
+
+/*! \brief Make a star particle from a gas cell.
+ *
+ *  Given a gas cell where star formation is active and the probability
+ *  of forming a star, this function selectes either to convert the gas
+ *  cell into a star particle or to spawn a star depending on the
+ *  target mass for the star.
+ *
+ *  \param[in] idx Index of the gas cell in the hydro list of active cells.
+ *  \param[in] i Index of the gas cell.
+ *  \param[in] prob Probability of making a star.
+ *  \param[in] mass_of_star Desired mass of the star particle.
+ *  \param[in, out] sum_mass_stars Holds the mass of all the stars created at the
+ *             current time-step (for the local task)
+ *
+ *  \return void
+ */
+void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars)
+{
+  if(mass_of_star > P[i].Mass)
+    terminate("mass_of_star > P[i].Mass");
+
+  if(get_random_number() < prob)
+    {
+      if(mass_of_star == P[i].Mass)
+        {
+          /* here we turn the gas particle itself into a star particle */
+          Stars_converted++;
+          stars_converted++;
+
+          *sum_mass_stars += P[i].Mass;
+
+          convert_cell_into_star(i, All.Time);
+          timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro);
+        }
+      else
+        {
+          /* in this case we spawn a new star particle, only reducing the mass in the cell by mass_of_star */
+          altogether_spawned = stars_spawned;
+          if(NumPart + altogether_spawned >= All.MaxPart)
+            terminate("NumPart=%d spwawn %d particles no space left (All.MaxPart=%d)\n", NumPart, altogether_spawned, All.MaxPart);
+
+          int j = NumPart + altogether_spawned; /* index of new star */
+
+          spawn_star_from_cell(i, All.Time, j, mass_of_star);
+
+          *sum_mass_stars += mass_of_star;
+          stars_spawned++;
+        }
+    }
+}
+
+#endif /* #ifdef USE_SFR */
diff --git a/src/amuse/community/arepo/src/subfind/subfind.c b/src/amuse/community/arepo/src/subfind/subfind.c
new file mode 100644
index 0000000000..4759ae416a
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind.c
@@ -0,0 +1,577 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind.c
+ * \date        05/2018
+ * \brief       Main routines of the subfind sub-halo finder.
+ * \details     contains functions:
+ *                double subfind_get_particle_balance(void)
+ *                void subfind(int num)
+ *                void subfind_reorder_according_to_submp(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../fof/fof.h"
+
+#ifdef SUBFIND
+#include "subfind.h"
+
+/*! \brief Gets a measure of the particle load balance.
+ *
+ *  \return Maximum number of particle at one core divided by its average.
+ */
+double subfind_get_particle_balance(void)
+{
+  int maxpart;
+  long long sum;
+  MPI_Allreduce(&NumPart, &maxpart, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+  sumup_large_ints(1, &NumPart, &sum);
+  return maxpart / (((double)sum) / NTask);
+}
+
+/*! \brief Main subfind algorithm.
+ *
+ *  \param[in] num Index of this snapshot output.
+ *
+ *  \return void
+ */
+void subfind(int num)
+{
+  double t0, t1, tstart, tend, cputime;
+  int i, gr, nlocid, offset;
+
+  TIMER_START(CPU_SUBFIND);
+
+  tstart = second();
+
+  mpi_printf("\nSUBFIND: We now execute a parallel version of SUBFIND.\n");
+
+  /* let's determine the local dark matter densities */
+
+  TIMER_STOP(CPU_SUBFIND);
+  construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build forcetree with all particles */
+  TIMER_START(CPU_SUBFIND);
+
+  cputime = subfind_density(FIND_SMOOTHING_LENGTHS);
+  mpi_printf("SUBFIND: iteration to correct primary neighbor count took %g sec\n", cputime);
+
+  /* free the tree storage again */
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+  force_treefree();
+
+  TIMER_STOP(CPU_SUBFIND);
+  construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */
+  TIMER_START(CPU_SUBFIND);
+
+  cputime = subfind_density(FIND_TOTAL_DENSITIES);
+  mpi_printf("SUBFIND: density() took %g sec\n", cputime);
+
+  /* free the tree storage again */
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+  force_treefree();
+
+  for(i = 0; i < NumPart; i++)
+    if(P[i].Type == 0)
+      {
+#ifdef CELL_CENTER_GRAVITY
+        for(int j = 0; j < 3; j++)
+          PS[i].Center[j] = SphP[i].Center[j];
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        PS[i].Utherm = SphP[i].Utherm;
+      }
+    else
+      PS[i].Utherm = 0;
+
+  SubTreeAllocFactor = All.TreeAllocFactor;
+
+  /* Count, how many groups are above this limit, and how many processors we need for them */
+  int ncount = 0, nprocs = 0;
+  int seriallen = 0;
+  long long sum_seriallen;
+
+  double GroupSize = 0.6;
+
+  do
+    {
+      ncount    = 0;
+      nprocs    = 0;
+      seriallen = 0;
+
+      /* Let's set a fiducial size for the maximum group size before we select the collective subfind algorithm */
+      MaxSerialGroupLen = (int)(GroupSize * All.TotNumPart / NTask);
+
+      for(i = 0; i < Ngroups; i++)
+        if(Group[i].Len > MaxSerialGroupLen)
+          {
+            ncount++;
+            nprocs += ((Group[i].Len - 1) / MaxSerialGroupLen) + 1;
+          }
+        else
+          seriallen += Group[i].Len;
+
+      MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      sumup_large_ints(1, &seriallen, &sum_seriallen);
+
+      GroupSize += 0.05;
+    }
+  while(NprocsCollective > 0 && NprocsCollective >= NTask - 1);
+
+  if(GroupSize > 0.65)
+    {
+      mpi_printf("Increased GroupSize to %g.\n", GroupSize);
+    }
+
+  MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  sumup_large_ints(1, &seriallen, &sum_seriallen);
+
+  mpi_printf("SUBFIND: Number of FOF halos treated with collective SubFind code = %d\n", Ncollective);
+  mpi_printf("SUBFIND: Number of processors used in different partitions for the collective SubFind code = %d\n", NprocsCollective);
+  mpi_printf("SUBFIND: (The adopted size-limit for the collective algorithm was %d particles.)\n", MaxSerialGroupLen);
+  mpi_printf("SUBFIND: The other %d FOF halos are treated in parallel with serial code\n", TotNgroups - Ncollective);
+
+  /* set up a global table that informs about the processor assignment of the groups that are treated collectively */
+  ProcAssign                             = mymalloc_movable(&ProcAssign, "ProcAssign", Ncollective * sizeof(struct proc_assign_data));
+  struct proc_assign_data *locProcAssign = mymalloc("locProcAssign", ncount * sizeof(struct proc_assign_data));
+
+  for(i = 0, ncount = 0; i < Ngroups; i++)
+    if(Group[i].Len > MaxSerialGroupLen)
+      {
+        locProcAssign[ncount].GrNr = Group[i].GrNr;
+        locProcAssign[ncount].Len  = Group[i].Len;
+        ncount++;
+      }
+
+  /* gather the information on the collective groups accross all CPUs */
+  int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask);
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  MPI_Allgather(&ncount, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD);
+
+  int task;
+  for(task = 0; task < NTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct proc_assign_data);
+
+  for(task = 1, byteoffset[0] = 0; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  MPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvcounts);
+  myfree(locProcAssign);
+
+  /* make sure, the table is sorted in ascending group-number order */
+  qsort(ProcAssign, Ncollective, sizeof(struct proc_assign_data), subfind_compare_procassign_GrNr);
+
+  /* assign the processor sets for the collective groups and set disjoint color-flag to later split the processors into different
+   * communicators */
+  for(i = 0, nprocs = 0, CommSplitColor = Ncollective; i < Ncollective; i++)
+    {
+      ProcAssign[i].FirstTask = nprocs;
+      ProcAssign[i].NTask     = ((ProcAssign[i].Len - 1) / MaxSerialGroupLen) + 1;
+      nprocs += ProcAssign[i].NTask;
+
+      if(ThisTask >= ProcAssign[i].FirstTask && ThisTask < (ProcAssign[i].FirstTask + ProcAssign[i].NTask))
+        CommSplitColor = i;
+    }
+
+  /* Now assign a target task for the group. For collective groups, the target task is the master in the CPU set, whereas
+   * the serial ones are distributed in a round-robin fashion to the remaining CPUs
+   */
+  for(i = 0; i < Ngroups; i++)
+    {
+      if(Group[i].Len > MaxSerialGroupLen) /* we have a collective group */
+        {
+          if(Group[i].GrNr >= Ncollective || Group[i].GrNr < 0)
+            terminate("odd");
+          Group[i].TargetTask = ProcAssign[Group[i].GrNr].FirstTask;
+        }
+      else
+        Group[i].TargetTask = ((Group[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective;
+    }
+
+  /* distribute the groups */
+  subfind_distribute_groups();
+  qsort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr);
+
+  /* assign target CPUs for the particles in groups */
+  /* the particles not in groups will be distributed such that a uniform particle load results */
+  t0                  = second();
+  int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int));
+  int *count_task     = mymalloc("count_task", NTask * sizeof(int));
+  int *count_free     = mymalloc("count_free", NTask * sizeof(int));
+  int count_loc_free  = 0;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr < TotNgroups) /* particle is in a group */
+        {
+          if(PS[i].GrNr < Ncollective) /* we are in a collective group */
+            PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask);
+          else
+            PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective;
+
+          count_loc_task[PS[i].TargetTask]++;
+        }
+      else
+        count_loc_free++;
+
+      PS[i].TargetIndex = 0; /* unimportant here */
+    }
+
+  MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD);
+  MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  long long sum = 0;
+  for(i = 0; i < NTask; i++)
+    sum += count_task[i] + count_free[i];
+
+  int maxload = (sum + NTask - 1) / NTask;
+  for(i = 0; i < NTask; i++)
+    {
+      count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */
+      if(count_task[i] < 0)
+        count_task[i] = 0;
+    }
+
+  int current_task = 0;
+
+  for(i = 0; i < ThisTask; i++)
+    {
+      while(count_free[i] > 0 && current_task < NTask)
+        {
+          if(count_free[i] < count_task[current_task])
+            {
+              count_task[current_task] -= count_free[i];
+              count_free[i] = 0;
+            }
+          else
+            {
+              count_free[i] -= count_task[current_task];
+              count_task[current_task] = 0;
+              current_task++;
+            }
+        }
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr >=
+         TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */
+        {
+          while(count_task[current_task] == 0 && current_task < NTask - 1)
+            current_task++;
+
+          PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */
+          count_task[current_task]--;
+        }
+    }
+
+  myfree(count_free);
+  myfree(count_task);
+  myfree(count_loc_task);
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  int ngroups_cat = 42;     // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal.
+#endif                      /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+  int nsubgroups_cat = 42;  // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal.
+
+  double balance = subfind_get_particle_balance();
+  mpi_printf("SUBFIND: particle balance=%g\n", balance);
+
+  /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */
+  fof_subfind_exchange(MPI_COMM_WORLD);
+  t1 = second();
+  mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1));
+
+  balance = subfind_get_particle_balance();
+  mpi_printf("SUBFIND: particle balance for processing=%g\n", balance);
+
+  /* lets estimate the maximum number of substructures we need to store on the local CPU */
+  if(ThisTask < NprocsCollective)
+    {
+      MaxNsubgroups = (ProcAssign[CommSplitColor].Len / ProcAssign[CommSplitColor].NTask) / All.DesLinkNgb;
+    }
+  else
+    {
+      for(i = 0, nlocid = 0; i < Ngroups; i++)
+        nlocid += Group[i].Len;
+
+      MaxNsubgroups = nlocid / All.DesLinkNgb; /* should be a quite conservative upper limit */
+    }
+
+  Nsubgroups = 0;
+  SubGroup = (struct subgroup_properties *)mymalloc_movable(&SubGroup, "SubGroup", MaxNsubgroups * sizeof(struct subgroup_properties));
+
+  /* we can now split the communicator to give each collectively treated group its own processor set */
+  MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm);
+  MPI_Comm_size(SubComm, &SubNTask);
+  MPI_Comm_rank(SubComm, &SubThisTask);
+  SubTagOffset = TagOffset;
+
+  /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each
+   * deal with one large group. The serial CPUs each deal with several halos by themselves
+   */
+  if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */
+    {
+      /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm
+       * The relevant group is the one stored in Group[0] on SubThisTask==0.
+       */
+      subfind_process_group_collectively(nsubgroups_cat);
+    }
+  else
+    {
+      /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays!
+       */
+      submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+      for(i = 0; i < NumPart; i++)
+        {
+          PS[i].SubNr         = TotNgroups + 1; /* set a default that is larger than reasonable group number */
+          PS[i].OldIndex      = i;
+          submp[i].index      = i;
+          submp[i].GrNr       = PS[i].GrNr;
+          submp[i].DM_Density = PS[i].Density;
+        }
+      qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density);
+      subfind_reorder_according_to_submp();
+      myfree(submp);
+
+      /* now we have the particles in each group consecutively */
+      if(SubThisTask == 0)
+        printf(
+            "SUBFIND-SERIAL: Start to do %d small groups (cumulative length %lld) with serial subfind algorithm on %d processors "
+            "(root-node=%d)\n",
+            TotNgroups - Ncollective, sum_seriallen, SubNTask, ThisTask);
+
+      /* we now apply a serial version of subfind to the local groups */
+      t0 = second();
+      for(gr = 0, offset = 0; gr < Ngroups; gr++)
+        {
+          if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask)
+            offset = subfind_process_group_serial(gr, offset, nsubgroups_cat);
+          else
+            terminate("how come that we have this group number?");
+        }
+
+      MPI_Barrier(SubComm);
+      t1 = second();
+      if(SubThisTask == 0)
+        printf("SUBFIND-SERIAL: processing of serial groups took %g sec\n", timediff(t0, t1));
+
+      /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */
+      submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+      for(i = 0; i < NumPart; i++)
+        {
+          submp[i].index    = i;
+          submp[i].OldIndex = PS[i].OldIndex;
+        }
+      qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex);
+      subfind_reorder_according_to_submp();
+      myfree(submp);
+    }
+
+  /* free the communicator */
+  MPI_Comm_free(&SubComm);
+
+  /* make common allocation on all tasks */
+  int max_load, max_loadsph, load;
+
+  /* for resize */
+  load = All.MaxPart;
+  MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  load = All.MaxPartSph;
+  MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+  /* do resize */
+  All.MaxPart = max_load;
+  reallocate_memory_maxpart();
+  PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data));
+
+  All.MaxPartSph = max_loadsph;
+  reallocate_memory_maxpartsph();
+
+  /* distribute particles back to original CPU */
+  t0 = second();
+  for(i = 0; i < NumPart; i++)
+    {
+      PS[i].TargetTask  = PS[i].OriginTask;
+      PS[i].TargetIndex = PS[i].OriginIndex;
+    }
+
+  fof_subfind_exchange(MPI_COMM_WORLD);
+  t1 = second();
+  if(ThisTask == 0)
+    printf("SUBFIND: subfind_exchange() (for return to original CPU)  took %g sec\n", timediff(t0, t1));
+
+  TIMER_STOP(CPU_SUBFIND);
+  construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */
+  TIMER_START(CPU_SUBFIND);
+
+  /* compute spherical overdensities for FOF groups */
+  cputime = subfind_overdensity();
+  mpi_printf("SUBFIND: determining spherical overdensity masses took %g sec\n", cputime);
+
+  myfree(Father);
+  myfree(Nextnode);
+  myfree(Tree_Points);
+  force_treefree();
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  subfind_add_grp_props_calc_fof_angular_momentum(num, ngroups_cat);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  MPI_Allreduce(&Nsubgroups, &TotNsubgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  /* sort the groups according to group/subgroup-number */
+  t0 = second();
+  parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr);
+  parallel_sort(SubGroup, Nsubgroups, sizeof(struct subgroup_properties), subfind_compare_SubGroup_GrNr_SubNr);
+  t1 = second();
+  mpi_printf("SUBFIND: assembled and ordered groups and subgroups (took %g sec)\n", timediff(t0, t1));
+
+  /* determine largest subgroup and total particle/cell count in substructures */
+  int lenmax, glob_lenmax, totlen;
+  long long totsublength;
+  for(i = 0, totlen = 0, lenmax = 0; i < Nsubgroups; i++)
+    {
+      totlen += SubGroup[i].Len;
+
+      if(SubGroup[i].Len > lenmax)
+        lenmax = SubGroup[i].Len;
+    }
+  sumup_large_ints(1, &totlen, &totsublength);
+  MPI_Reduce(&lenmax, &glob_lenmax, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
+
+  /* set binding energy of fuzz to zero, was overwritten with Hsml before; needed for proper snapshot sorting of fuzz */
+  for(i = 0; i < NumPart; i++)
+    if(PS[i].SubNr == TotNgroups + 1)
+      PS[i].BindingEnergy = 0;
+
+  TIMER_STOP(CPU_SUBFIND);
+  TIMER_START(CPU_SNAPSHOT);
+
+  /* now final output of catalogue */
+  subfind_save_final(num);
+
+  TIMER_STOP(CPU_SNAPSHOT);
+  TIMER_START(CPU_SUBFIND);
+
+  tend = second();
+
+  if(ThisTask == 0)
+    {
+      printf("SUBFIND: Finished with SUBFIND.  (total time=%g sec)\n", timediff(tstart, tend));
+      printf("SUBFIND: Total number of subhalos with at least %d particles: %d\n", All.DesLinkNgb, TotNsubgroups);
+      if(TotNsubgroups > 0)
+        {
+          printf("SUBFIND: Largest subhalo has %d particles/cells.\n", glob_lenmax);
+          printf("SUBFIND: Total number of particles/cells in subhalos: %lld\n", totsublength);
+        }
+    }
+
+  myfree_movable(SubGroup);
+  myfree_movable(ProcAssign);
+
+  TIMER_STOP(CPU_SUBFIND);
+}
+
+/*! \brief Reorders particles in P and SphP array.
+ *
+ *  Reordering given by the submp array.
+ *
+ *  \return void
+ */
+void subfind_reorder_according_to_submp(void)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  struct subfind_data PSsave, PSsource;
+  int idsource, idsave, dest;
+  int *Id;
+
+  Id = (int *)mymalloc("Id", sizeof(int) * (NumPart));
+
+  for(i = 0; i < NumPart; i++)
+    Id[submp[i].index] = i;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource  = P[i];
+          PSsource = PS[i];
+          idsource = Id[i];
+
+          dest = Id[i];
+
+          do
+            {
+              Psave  = P[dest];
+              PSsave = PS[dest];
+              idsave = Id[dest];
+
+              P[dest]  = Psource;
+              PS[dest] = PSsource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource  = Psave;
+              PSsource = PSsave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+
+  myfree(Id);
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind.h b/src/amuse/community/arepo/src/subfind/subfind.h
new file mode 100644
index 0000000000..d229af8490
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind.h
@@ -0,0 +1,213 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind.h
+ * \date        05/2018
+ * \brief       Header for subfind algorithm.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 27.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef SUBFIND_H
+#define SUBFIND_H
+
+#include "../domain/domain.h"
+#include "../main/allvars.h"
+
+#define FIND_SMOOTHING_LENGTHS 0
+#define FIND_TOTAL_DENSITIES 1
+#define SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER 10000
+#define SUBFIND_GAL_RADIUS_FAC 2.0 /* for subfind metal calculation */
+
+#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES)
+extern int *NodeGrNr;
+#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */
+
+extern int GrNr;
+extern int NumPartGroup;
+
+extern struct topnode_data *SubTopNodes;
+extern struct local_topnode_data *Sub_LocTopNodes;
+
+extern int *SubDomainTask;
+extern int *SubDomainNodeIndex;
+extern int *SubNextnode;
+extern int SubNTopleaves;
+extern int SubNTopnodes;
+
+extern int SubTree_MaxPart;
+extern int SubTree_NumNodes;
+extern int SubTree_MaxNodes;
+extern int SubTree_FirstNonTopLevelNode;
+extern int SubTree_NumPartImported;
+extern int SubTree_NumPartExported;
+extern int SubTree_ImportedNodeOffset;
+extern int SubTree_NextFreeNode;
+extern MyDouble *SubTree_Pos_list;
+extern struct NODE *SubNodes;
+extern struct ExtNODE *SubExtNodes;
+
+extern double SubTreeAllocFactor;
+
+extern int *SubTree_ResultIndexList;
+extern int *SubTree_Task_list;
+extern unsigned long long *SubTree_IntPos_list;
+
+extern double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac;
+extern double SubDomainInverseLen, SubDomainBigFac;
+
+extern MyDouble GrCM[3];
+
+extern int Ncollective;
+extern int NprocsCollective;
+extern int MaxNsubgroups;
+extern int MaxNgbs;
+extern int MaxSerialGroupLen;
+extern r2type *R2list;
+
+extern int CommSplitColor;
+extern MPI_Comm SubComm;
+
+extern int SubNTask, SubThisTask;
+extern int SubTagOffset;
+
+extern struct proc_assign_data
+{
+  int GrNr;
+  int Len;
+  int FirstTask;
+  int NTask;
+} * ProcAssign;
+
+extern struct subgroup_properties
+{
+  int Len;
+  int LenType[NTYPES];
+  int GrNr;
+  int SubNr;
+  int SubParent;
+  MyIDType SubMostBoundID;
+  MyFloat Mass;
+  MyFloat MassType[NTYPES];
+  MyFloat SubVelDisp;
+  MyFloat SubVmax;
+  MyFloat SubVmaxRad;
+  MyFloat SubHalfMassRad;
+  MyFloat SubHalfMassRadType[NTYPES];
+  MyFloat SubMassInRad;
+  MyFloat SubMassInRadType[NTYPES];
+  MyFloat SubMassInHalfRad;
+  MyFloat SubMassInHalfRadType[NTYPES];
+  MyFloat SubMassInMaxRad;
+  MyFloat SubMassInMaxRadType[NTYPES];
+  MyFloat Pos[3];
+  MyFloat CM[3];
+  MyFloat Vel[3];
+  MyFloat Spin[3];
+
+#ifdef MHD
+  MyFloat Bfld_Halo, Bfld_Disk;
+#endif /* #ifdef MHD */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  MyFloat Ekin, Epot, Ethr;
+  MyFloat J[3], Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES];
+  MyFloat J_inRad[3], Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES];
+  MyFloat J_inHalfRad[3], Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES];
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef USE_SFR
+  MyFloat Sfr, SfrInRad, SfrInHalfRad, SfrInMaxRad, GasMassSfr;
+#endif /* #ifdef USE_SFR */
+} * SubGroup;
+
+extern struct nearest_r2_data
+{
+  double dist[2];
+} * R2Loc;
+
+extern struct nearest_ngb_data
+{
+  long long index[2];
+  int count;
+} * NgbLoc;
+
+extern int NumPaux;
+
+extern struct paux_data
+{
+  int TaskOfGr;
+  int LocGrIndex;
+  unsigned char Type;
+  unsigned char SofteningType;
+  MyDouble Pos[3];
+  MyDouble Mass;
+} * Paux;
+
+extern struct submp_data
+{
+  int index;
+  int GrNr;
+  int OldIndex;
+  MyFloat DM_Density;
+} * submp;
+
+extern struct cand_dat
+{
+  int head;
+  int len;
+  int nsub;
+  int rank, subnr, parent;
+  int bound_length;
+} * candidates;
+
+extern struct coll_cand_dat
+{
+  long long head;
+  long long rank;
+  int len;
+  int nsub;
+  int subnr, parent;
+  int bound_length;
+} * coll_candidates;
+
+typedef struct
+{
+  double rho;
+#ifdef SUBFIND_CALC_MORE
+  double vx, vy, vz;
+  double v2;
+#endif
+} SubDMData;
+
+void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr,
+                                           int parallel_flag, int nsubgroups_cat);
+int subfind_ngb_treefind_density(MyDouble searchcenter[3], double hsml, int target, int *startnode, int mode, int *exportflag,
+                                 int *exportnodecount, int *exportindex, SubDMData *sub_dm_data);
+int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id);
+void subfind_domain_do_local_refine(int n, int *list);
+void assign_group_numbers_based_on_catalogue(int ngroups_cat, int nsubgroups_cat);
+int subfind_compare_rlist_mhd(const void *a, const void *b);
+
+#endif /* #ifndef SUBFIND_H */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c
new file mode 100644
index 0000000000..9abd20009d
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c
@@ -0,0 +1,620 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_coll_domain.c
+ * \date        05/2018
+ * \brief       Domain decomposition for collective subfind algorithm.
+ * \details     contains functions:
+ *                static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+ *                void subfind_coll_domain_decomposition(void)
+ *                void subfind_coll_findExtent(void)
+ *                int subfind_coll_domain_determineTopTree(void)
+ *                void subfind_domain_do_local_refine(int n, int *list)
+ *                void subfind_coll_domain_walktoptree(int no)
+ *                void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain)
+ *                void subfind_coll_domain_allocate(void)
+ *                void subfind_coll_domain_free(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../domain/bsd_tree.h"
+#include "../domain/domain.h"
+#include "subfind.h"
+
+/*! \brief Define structure of my tree nodes.
+ */
+struct mydata
+{
+  double workload;
+  int topnode_index;
+
+  RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */
+};
+
+/*! \brief Comparison function of mydata objects (i.e. tree elements).
+ *
+ *  Compares the elements (most important first):
+ *   workload, topnode_index.
+ *
+ *  \param[in] lhs First object to compare.
+ *  \param[in] rhs Second object to compare.
+ *
+ *  \return (-1,0,1) -1 if lhs.workload > rhs.workload or lhs.topnode_index <
+ *          rhs.topnode_index.
+ */
+static int mydata_cmp(struct mydata *lhs, struct mydata *rhs)
+{
+  if(lhs->workload > rhs->workload)
+    return -1;
+  else if(lhs->workload < rhs->workload)
+    return 1;
+  else if(lhs->topnode_index < rhs->topnode_index)
+    return -1;
+  else if(lhs->topnode_index > rhs->topnode_index)
+    return 1;
+
+  return 0;
+}
+
+/* the following macro declares 'struct mytree', which is the header element
+ * needed as handle for a tree
+ */
+RB_HEAD(mytree, mydata);
+
+static struct mydata *nload;
+static struct mytree queue_load;
+
+/* the following macros declare appropriate function prototypes and functions
+ * needed for this type of tree
+ */
+RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp);
+RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp);
+
+/*! \brief Performs domain decomposition for subfind collective.
+ *
+ *  \return void
+ */
+void subfind_coll_domain_decomposition(void)
+{
+  int i;
+  int col_grouplen, col_partcount;
+
+  subfind_coll_domain_allocate();
+  subfind_coll_findExtent();
+
+  Key             = (peanokey *)mymalloc_movable(&Key, "Key", (sizeof(peanokey) * NumPart));
+  Sub_LocTopNodes = (struct local_topnode_data *)mymalloc_movable(&Sub_LocTopNodes, "Sub_LocTopNodes",
+                                                                  (MaxTopNodes * sizeof(struct local_topnode_data)));
+
+  MPI_Allreduce(&NumPartGroup, &col_grouplen, 1, MPI_INT, MPI_SUM, SubComm);
+  MPI_Allreduce(&NumPart, &col_partcount, 1, MPI_INT, MPI_SUM, SubComm);
+
+  fac_work = 0.5 / col_grouplen;
+  fac_load = 0.5 / col_partcount;
+
+  subfind_coll_domain_determineTopTree();
+
+  /* find the split of the top-level tree */
+  subfind_coll_domain_combine_topleaves_to_domains(SubNTask, SubNTopleaves);
+
+  /* determine the particles that need to be exported, and to which CPU they need to be sent */
+  for(i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr == GrNr)
+        {
+          int no = 0;
+          while(Sub_LocTopNodes[no].Daughter >= 0)
+            no = Sub_LocTopNodes[no].Daughter + (Key[i] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3);
+
+          no = Sub_LocTopNodes[no].Leaf;
+
+          int task = SubDomainTask[no];
+
+          PS[i].TargetTask = task;
+        }
+      else
+        PS[i].TargetTask = SubThisTask;
+
+      PS[i].TargetIndex = 0; /* unimportant here */
+    }
+
+  fof_subfind_exchange(SubComm);
+
+  /* note that the domain decomposition leads to an invalid values of NumPartGroup. This will however be redetermined in the main
+   * routine of the collective subfind, after the domain decomposition has been done.
+   */
+
+  /* copy what we need for the topnodes */
+  for(i = 0; i < SubNTopnodes; i++)
+    {
+      SubTopNodes[i].StartKey = Sub_LocTopNodes[i].StartKey;
+      SubTopNodes[i].Size     = Sub_LocTopNodes[i].Size;
+      SubTopNodes[i].Daughter = Sub_LocTopNodes[i].Daughter;
+      SubTopNodes[i].Leaf     = Sub_LocTopNodes[i].Leaf;
+
+      int j;
+      int bits   = my_ffsll(SubTopNodes[i].Size);
+      int blocks = (bits - 1) / 3 - 1;
+
+      for(j = 0; j < 8; j++)
+        {
+          peano1D xb, yb, zb;
+          peano_hilbert_key_inverse(SubTopNodes[i].StartKey + j * (SubTopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb);
+          xb >>= blocks;
+          yb >>= blocks;
+          zb >>= blocks;
+          int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2);
+          if(idx < 0 || idx > 7)
+            terminate("j=%d  idx=%d", j, idx);
+
+          SubTopNodes[i].MortonToPeanoSubnode[idx] = j;
+        }
+    }
+
+  myfree(Sub_LocTopNodes);
+  myfree(Key);
+
+  SubTopNodes   = (struct topnode_data *)myrealloc_movable(SubTopNodes, SubNTopnodes * sizeof(struct topnode_data));
+  SubDomainTask = (int *)myrealloc_movable(SubDomainTask, SubNTopleaves * sizeof(int));
+}
+
+/*! \brief Determines extent of local data and writes it to global variables.
+ *
+ *  \return void
+ */
+void subfind_coll_findExtent(void)
+{
+  int i, j;
+  double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
+
+  /* determine extension */
+  for(i = 0; i < 3; i++)
+    {
+      xmin[i] = MAX_REAL_NUMBER;
+      xmax[i] = -MAX_REAL_NUMBER;
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr == GrNr)
+        {
+          for(j = 0; j < 3; j++)
+            {
+#ifdef CELL_CENTER_GRAVITY
+              if(P[i].Type == 0)
+                {
+                  if(xmin[j] > PS[i].Center[j])
+                    xmin[j] = PS[i].Center[j];
+
+                  if(xmax[j] < PS[i].Center[j])
+                    xmax[j] = PS[i].Center[j];
+                }
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                {
+                  if(xmin[j] > P[i].Pos[j])
+                    xmin[j] = P[i].Pos[j];
+
+                  if(xmax[j] < P[i].Pos[j])
+                    xmax[j] = P[i].Pos[j];
+                }
+            }
+        }
+    }
+
+  MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, SubComm);
+  MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, SubComm);
+
+  len = 0;
+  for(j = 0; j < 3; j++)
+    if(xmax_glob[j] - xmin_glob[j] > len)
+      len = xmax_glob[j] - xmin_glob[j];
+
+  len *= 1.001;
+
+  SubDomainLen        = len;
+  SubDomainInverseLen = 1.0 / SubDomainLen;
+  SubDomainFac        = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION));
+  SubDomainBigFac     = (SubDomainLen / (((long long)1) << 52));
+
+  for(j = 0; j < 3; j++)
+    {
+      SubDomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
+      SubDomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
+    }
+}
+
+/*! \brief Determines extent of the subfind top-tree.
+ *
+ *  \return void
+ */
+int subfind_coll_domain_determineTopTree(void)
+{
+  int i, count;
+
+  mp = (struct domain_peano_hilbert_data *)mymalloc("mp", sizeof(struct domain_peano_hilbert_data) * NumPartGroup);
+
+  for(i = 0, count = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr == GrNr)
+        {
+          peano1D xb, yb, zb;
+
+#ifdef CELL_CENTER_GRAVITY
+          if(P[i].Type == 0)
+            {
+              xb = domain_double_to_int(((PS[i].Center[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0);
+              yb = domain_double_to_int(((PS[i].Center[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0);
+              zb = domain_double_to_int(((PS[i].Center[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0);
+            }
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            {
+              xb = domain_double_to_int(((P[i].Pos[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0);
+              yb = domain_double_to_int(((P[i].Pos[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0);
+              zb = domain_double_to_int(((P[i].Pos[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0);
+            }
+
+          mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION);
+          mp[count].index        = i;
+          count++;
+        }
+    }
+
+  if(count != NumPartGroup)
+    terminate("cost != NumPartGroup");
+
+  mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data));
+
+  SubNTopnodes                = 1;
+  SubNTopleaves               = 1;
+  Sub_LocTopNodes[0].Daughter = -1;
+  Sub_LocTopNodes[0].Parent   = -1;
+  Sub_LocTopNodes[0].Size     = PEANOCELLS;
+  Sub_LocTopNodes[0].StartKey = 0;
+  Sub_LocTopNodes[0].PIndex   = 0;
+  Sub_LocTopNodes[0].Cost     = NumPartGroup;
+  Sub_LocTopNodes[0].Count    = NumPartGroup;
+
+  int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * SubNTask);
+
+  if(limitNTopNodes > MaxTopNodes)
+    terminate("limitNTopNodes > MaxTopNodes");
+
+  RB_INIT(&queue_load);
+  nload     = mymalloc("nload", limitNTopNodes * sizeof(struct mydata));
+  int *list = mymalloc("list", limitNTopNodes * sizeof(int));
+
+  double limit = 1.0 / (All.TopNodeFactor * SubNTask);
+
+  /* insert the root node */
+  nload[0].workload      = 1.0;
+  nload[0].topnode_index = 0;
+  RB_INSERT(mytree, &queue_load, &nload[0]);
+
+  int iter = 0;
+
+  do
+    {
+      count = 0;
+
+      double first_workload = 0;
+
+      for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst))
+        {
+          if(Sub_LocTopNodes[nfirst->topnode_index].Size >= 8)
+            {
+              first_workload = nfirst->workload;
+              break;
+            }
+        }
+
+      for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np))
+        {
+          if(np->workload < 0.125 * first_workload)
+            break;
+
+          if(SubNTopnodes + 8 * (count + 1) >= limitNTopNodes)
+            break;
+
+          if(np->workload > limit || (SubNTopleaves < SubNTask && count == 0))
+            {
+              if(Sub_LocTopNodes[np->topnode_index].Size >= 8)
+                {
+                  list[count] = np->topnode_index;
+                  count++;
+                }
+            }
+        }
+
+      if(count > 0)
+        {
+          subfind_domain_do_local_refine(count, list);
+          iter++;
+        }
+    }
+  while(count > 0);
+
+  myfree(list);
+  myfree(nload);
+  myfree(mp);
+
+  /* count toplevel leaves */
+
+  /* count the number of top leaves */
+  SubNTopleaves = 0;
+  subfind_coll_domain_walktoptree(0);
+
+  if(SubNTopleaves < SubNTask)
+    terminate("SubNTopleaves = %d < SubNTask = %d", SubNTopleaves, SubNTask);
+
+  return 0;
+}
+
+/*! \brief Refines top-tree locally.
+ *
+ *  \param[in] n Number of new nodes.
+ *  \param[in] list Array with indices of new nodes.
+ *
+ *  \return void
+ */
+void subfind_domain_do_local_refine(int n, int *list)
+{
+  double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double));
+  double *worklist    = mymalloc("worklist", 8 * n * sizeof(double));
+
+  /* create the new nodes */
+  for(int k = 0; k < n; k++)
+    {
+      int i = list[k];
+
+      Sub_LocTopNodes[i].Daughter = SubNTopnodes;
+      SubNTopnodes += 8;
+      SubNTopleaves += 7;
+
+      for(int j = 0; j < 8; j++)
+        {
+          int sub = Sub_LocTopNodes[i].Daughter + j;
+
+          Sub_LocTopNodes[sub].Daughter = -1;
+          Sub_LocTopNodes[sub].Parent   = i;
+          Sub_LocTopNodes[sub].Size     = (Sub_LocTopNodes[i].Size >> 3);
+          Sub_LocTopNodes[sub].StartKey = Sub_LocTopNodes[i].StartKey + j * Sub_LocTopNodes[sub].Size;
+          Sub_LocTopNodes[sub].PIndex   = Sub_LocTopNodes[i].PIndex;
+          Sub_LocTopNodes[sub].Cost     = 0;
+          Sub_LocTopNodes[sub].Count    = 0;
+        }
+
+      int sub = Sub_LocTopNodes[i].Daughter;
+
+      for(int p = Sub_LocTopNodes[i].PIndex, j = 0; p < Sub_LocTopNodes[i].PIndex + Sub_LocTopNodes[i].Count; p++)
+        {
+          if(PS[mp[p].index].GrNr != GrNr)
+            terminate("Houston, we have a problem.");
+
+          if(j < 7)
+            while(mp[p].key >= Sub_LocTopNodes[sub + 1].StartKey)
+              {
+                j++;
+                sub++;
+                Sub_LocTopNodes[sub].PIndex = p;
+                if(j >= 7)
+                  break;
+              }
+
+          Sub_LocTopNodes[sub].Count++;
+          Sub_LocTopNodes[sub].Cost++;
+        }
+
+      for(int j = 0; j < 8; j++)
+        {
+          sub                 = Sub_LocTopNodes[i].Daughter + j;
+          worklist[k * 8 + j] = fac_work * Sub_LocTopNodes[sub].Cost + fac_load * Sub_LocTopNodes[sub].Count;
+        }
+    }
+
+  MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, SubComm);
+
+  for(int k = 0; k < n; k++)
+    {
+      int i = list[k];
+      RB_REMOVE(mytree, &queue_load, &nload[i]);
+    }
+
+  for(int k = 0, l = 0; k < n; k++)
+    {
+      int i = list[k];
+
+      for(int j = 0; j < 8; j++, l++)
+        {
+          int sub = Sub_LocTopNodes[i].Daughter + j;
+
+          /* insert the  node */
+          nload[sub].workload      = worktotlist[l];
+          nload[sub].topnode_index = sub;
+          RB_INSERT(mytree, &queue_load, &nload[sub]);
+        }
+    }
+
+  myfree(worklist);
+  myfree(worktotlist);
+}
+
+/*! \brief Walk the top tree and set reference to leaf node.
+ *
+ *  \param[in] no Node index.
+ *
+ *  \return void
+ */
+void subfind_coll_domain_walktoptree(int no)
+{
+  int i;
+
+  if(Sub_LocTopNodes[no].Daughter == -1)
+    {
+      Sub_LocTopNodes[no].Leaf = SubNTopleaves;
+      SubNTopleaves++;
+    }
+  else
+    {
+      for(i = 0; i < 8; i++)
+        subfind_coll_domain_walktoptree(Sub_LocTopNodes[no].Daughter + i);
+    }
+}
+
+/*! \brief Uses the cumulative cost function (which weights work-load and
+ *         memory-load equally) to subdivide the list of top-level leave
+ *         nodes into pieces that are (approximately) equal in size.
+ *
+ *  \param[in] ncpu Number of tasks.
+ *  \param[in] ndomain Number of domains.
+ *
+ *  \return void
+ */
+void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain)
+{
+  int i, j, start, end, n, no;
+  double work, workavg, work_before, workavg_before, workhalfnode;
+  float *domainWork, *local_domainWork;
+  int *domainCount, *local_domainCount;
+
+  /* sum the costs for each top leave */
+
+  domainWork  = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float));
+  domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int));
+
+  local_domainWork  = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float));
+  local_domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int));
+
+  for(i = 0; i < SubNTopleaves; i++)
+    {
+      local_domainWork[i]  = 0;
+      local_domainCount[i] = 0;
+    }
+
+  /* find for each particle its top-leave, and then add the associated cost with it */
+  for(n = 0; n < NumPart; n++)
+    {
+      if(PS[n].GrNr == GrNr)
+        {
+          no = 0;
+          while(Sub_LocTopNodes[no].Daughter >= 0)
+            no = Sub_LocTopNodes[no].Daughter + (Key[n] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3);
+
+          no = Sub_LocTopNodes[no].Leaf;
+
+          local_domainCount[no] += 1;
+          local_domainWork[no] += 1;
+        }
+    }
+
+  MPI_Allreduce(local_domainWork, domainWork, SubNTopleaves, MPI_FLOAT, MPI_SUM, SubComm);
+  MPI_Allreduce(local_domainCount, domainCount, SubNTopleaves, MPI_INT, MPI_SUM, SubComm);
+
+  myfree(local_domainCount);
+  myfree(local_domainWork);
+
+  /* now combine the top leaves to form the individual domains */
+
+  workhalfnode = 0.5 / ndomain;
+  workavg      = 1.0 / ncpu;
+  work_before = workavg_before = 0;
+
+  start = 0;
+
+  for(i = 0; i < ncpu; i++)
+    {
+      work = 0;
+      end  = start;
+
+      work += fac_work * domainWork[end] + fac_load * domainCount[end];
+
+      while((work + work_before + (end + 1 < ndomain ? fac_work * domainWork[end + 1] + fac_load * domainCount[end + 1] : 0) <
+             workavg + workavg_before + workhalfnode) ||
+            (i == ncpu - 1 && end < ndomain - 1))
+        {
+          if((ndomain - end) > (ncpu - i))
+            end++;
+          else
+            break;
+
+          work += fac_work * domainWork[end] + fac_load * domainCount[end];
+        }
+
+      for(j = start; j <= end; j++)
+        SubDomainTask[j] = i;
+
+      work_before += work;
+      workavg_before += workavg;
+      start = end + 1;
+    }
+
+  myfree(domainCount);
+  myfree(domainWork);
+}
+
+/*! \brief Allocates all the stuff that will be required for the
+ *         tree-construction/walk later on.
+ *
+ *  \return void
+ */
+void subfind_coll_domain_allocate(void)
+{
+  MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1);
+
+  if(SubDomainTask)
+    terminate("subfind collective domain storage already allocated");
+
+  SubTopNodes   = (struct topnode_data *)mymalloc_movable(&SubTopNodes, "SubTopNodes", (MaxTopNodes * sizeof(struct topnode_data)));
+  SubDomainTask = (int *)mymalloc_movable(&SubDomainTask, "SubDomainTask", (MaxTopNodes * sizeof(int)));
+}
+
+/*! \brief Free memory used for subfind collective domain decomposition.
+ *
+ *  \return void
+ */
+void subfind_coll_domain_free(void)
+{
+  if(!SubDomainTask)
+    terminate("subfind collective domain storage not allocated");
+
+  myfree(SubDomainTask);
+  myfree(SubTopNodes);
+
+  SubDomainTask = NULL;
+  SubTopNodes   = NULL;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c
new file mode 100644
index 0000000000..96d7db4b07
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c
@@ -0,0 +1,992 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_coll_tree.c
+ * \date        05/2018
+ * \brief       Functions for tree-construction for subfind collective.
+ * \details     contains functions:
+ *                int subfind_coll_treebuild(int npart, struct unbind_data *mp)
+ *                int subfind_coll_treebuild_construct(int npart, struct
+ *                  unbind_data *mp)
+ *                int subfind_coll_treebuild_insert_single_point(int i,
+ *                  unsigned long long *intpos, int th, unsigned char levels)
+ *                int subfind_coll_create_empty_nodes(int no, int topnode,
+ *                  int bits, int x, int y, int z, unsigned long long xc,
+ *                  unsigned long long yc, unsigned long long zc,
+ *                  unsigned long long ilen)
+ *                void subfind_coll_insert_pseudo_particles(void)
+ *                void subfind_coll_update_node_recursive(int no, int sib,
+ *                  int father, int *last)
+ *                void subfind_coll_exchange_topleafdata(void)
+ *                void subfind_coll_treeupdate_toplevel(int no, int topnode,
+ *                  int bits, int x, int y, int z)
+ *                void subfind_coll_treeallocate(int maxpart, int maxindex)
+ *                void subfind_coll_treefree(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../gravity/forcetree.h"
+#include "subfind.h"
+
+/*! \brief Main function to build subfind collective tree.
+ *
+ *  \param[in] npart Number of particles.
+ *  \param[in] mp Unbind data.
+ *
+ *  \return Number of nodes in tree.
+ */
+int subfind_coll_treebuild(int npart, struct unbind_data *mp)
+{
+  int flag;
+
+  do
+    {
+      int flag_single = subfind_coll_treebuild_construct(npart, mp);
+
+      MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, SubComm);
+
+      if(flag < 0)
+        {
+          subfind_coll_treefree();
+
+          SubTreeAllocFactor *= 1.15;
+
+          printf("SUBFIND-COLLECTIVE, root-task=%d: Increasing TreeAllocFactor, new value=%g\n", ThisTask, SubTreeAllocFactor);
+          fflush(stdout);
+
+          subfind_coll_treeallocate(NumPart, All.MaxPart);
+        }
+    }
+  while(flag < 0);
+
+  /* insert the pseudo particles that represent the mass distribution of other domains */
+  subfind_coll_insert_pseudo_particles();
+
+  /* now compute the multipole moments recursively */
+  int last = -1;
+
+  subfind_coll_update_node_recursive(SubTree_MaxPart, -1, -1, &last);
+
+  if(last >= SubTree_MaxPart)
+    {
+      if(last >= SubTree_MaxPart + SubTree_MaxNodes) /* a pseudo-particle or imported particle */
+        SubNextnode[last - SubTree_MaxNodes] = -1;
+      else
+        SubNodes[last].u.d.nextnode = -1;
+    }
+  else
+    SubNextnode[last] = -1;
+
+  subfind_coll_exchange_topleafdata();
+
+  SubTree_NextFreeNode = SubTree_MaxPart + 1;
+
+  subfind_coll_treeupdate_toplevel(SubTree_MaxPart, 0, 1, 0, 0, 0);
+
+  return SubTree_NumNodes;
+}
+
+/*! \brief Constructs the collective subfind oct-tree.
+ *
+ *  The index convention for accessing tree nodes is the following:
+ *  node index
+ *  [0...SubTree_MaxPart-1]   references single particles, the indices
+ *  [SubTree_MaxPart...SubTree_MaxPart+SubTree_MaxNodes-1] references tree
+ *  nodes.
+ *  [SubTree_MaxPart+SubTree_MaxNodes...
+ *  SubTree_MaxPart+SubTree_MaxNodes+NTopleaves-1] references "pseudo
+ *  particles", i.e. mark branches on foreign CPUs
+ *  [SubTree_MaxPart+SubTree_MaxNodes+NTopleaves...
+ *  SubTree_MaxPart+SubTree_MaxNodes+NTopleaves+0-1] references imported points
+ *
+ *  `Nodes_base' points to the first tree node, while `Nodes' is shifted such
+ *  that SubNodes[SubTree_MaxPart] gives the root tree node.
+ *
+ *  \param[in] npart Number of particles.
+ *  \param[in] mp Unbind data.
+ *
+ *  \return Number of nodes.
+ */
+int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp)
+{
+  int i, j, k, no, flag_full = 0;
+  unsigned long long *intposp;
+  MyDouble *posp;
+  unsigned long long ibaselen = ((unsigned long long)1) << 52;
+
+  /* create an empty root node  */
+  SubTree_NextFreeNode = SubTree_MaxPart;                 /* index of first free node */
+  struct NODE *nfreep  = &SubNodes[SubTree_NextFreeNode]; /* select first node        */
+
+  for(j = 0; j < 8; j++)
+    nfreep->u.suns[j] = -1;
+
+  nfreep->len = SubDomainLen;
+  for(j = 0; j < 3; j++)
+    nfreep->center[j] = SubDomainCenter[j];
+
+  SubTree_NumNodes = 1;
+  SubTree_NextFreeNode++;
+
+  /* create a set of empty nodes corresponding to the top-level domain
+   * grid. We need to generate these nodes first to make sure that we have a
+   * complete top-level tree which allows the easy insertion of the
+   * pseudo-particles at the right place
+   */
+  if(subfind_coll_create_empty_nodes(SubTree_MaxPart, 0, 1, 0, 0, 0, 0, 0, 0, ibaselen) < 0)
+    return -1;
+
+  SubTree_FirstNonTopLevelNode = SubTree_NextFreeNode;
+
+  /* if a high-resolution region in a global tree is used, we need to generate
+   * an additional set empty nodes to make sure that we have a complete
+   * top-level tree for the high-resolution inset
+   */
+
+  SubTree_IntPos_list =
+      (unsigned long long *)mymalloc_movable(&SubTree_IntPos_list, "SubTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long));
+
+  SubTree_ImportedNodeOffset = SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves;
+
+  /* now we determine for each point the insertion top-level node, and the task on which this lies */
+  for(i = 0; i < npart; i++)
+    {
+      for(j = 0; j < 3; j++)
+        {
+          if(mp)
+            k = mp[i].index;
+          else
+            k = i;
+
+#ifdef CELL_CENTER_GRAVITY
+          if(P[k].Type == 0)
+            posp = &PS[k].Center[j];
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            posp = &P[k].Pos[j];
+
+          if(*posp < SubDomainCorner[j] || *posp >= SubDomainCorner[j] + SubDomainLen)
+            {
+              terminate("out of box i=%d j=%d coord=%g SubDomainCorner=(%g|%g|%g) SubDomainLen=%g", i, j, *posp, SubDomainCorner[0],
+                        SubDomainCorner[1], SubDomainCorner[2], SubDomainLen);
+            }
+
+          SubTree_Pos_list[3 * k + j] = *posp;
+        }
+    }
+
+  for(i = 0; i < npart; i++)
+    {
+      if(mp)
+        k = mp[i].index;
+      else
+        k = i;
+
+      posp = &SubTree_Pos_list[3 * k];
+
+      unsigned long long xxb  = force_double_to_int(((*posp++ - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0);
+      unsigned long long yyb  = force_double_to_int(((*posp++ - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0);
+      unsigned long long zzb  = force_double_to_int(((*posp++ - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0);
+      unsigned long long mask = ((unsigned long long)1) << (52 - 1);
+      unsigned char shiftx    = (52 - 1);
+      unsigned char shifty    = (52 - 2);
+      unsigned char shiftz    = (52 - 3);
+      unsigned char levels    = 0;
+
+      intposp = &SubTree_IntPos_list[3 * k];
+
+      *intposp++ = xxb;
+      *intposp++ = yyb;
+      *intposp++ = zzb;
+
+      no = 0;
+      while(SubTopNodes[no].Daughter >= 0)
+        {
+          unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                                   ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+          mask >>= 1;
+          levels++;
+
+          no = SubTopNodes[no].Daughter + SubTopNodes[no].MortonToPeanoSubnode[subnode];
+        }
+
+      no = SubTopNodes[no].Leaf;
+
+      if(no >= SubTree_ImportedNodeOffset)
+        terminate("i=%d: no=%d SubTree_ImportedNodeOffset=%d", i, no, SubTree_ImportedNodeOffset);
+
+      if(subfind_coll_treebuild_insert_single_point(k, &SubTree_IntPos_list[3 * k], SubDomainNodeIndex[no], levels) < 0)
+        {
+          flag_full = 1;
+          break;
+        }
+    }
+
+  myfree_movable(SubTree_IntPos_list);
+
+  if(flag_full)
+    return -1;
+
+  return SubTree_NumNodes;
+}
+
+/*! \brief Inserts single point in tree.
+ *
+ *  \param[in] i Index of particle.
+ *  \param[in] intpos Integer position.
+ *  \param[in] th Index in SubNodes.
+ *  \param[in] levels Level corresponding to subnode.
+ *
+ *  \return void
+ */
+int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels)
+{
+  int j, parent = -1;
+  unsigned char subnode       = 0;
+  unsigned long long xxb      = intpos[0];
+  unsigned long long yyb      = intpos[1];
+  unsigned long long zzb      = intpos[2];
+  unsigned long long mask     = ((unsigned long long)1) << ((52 - 1) - levels);
+  unsigned char shiftx        = (52 - 1) - levels;
+  unsigned char shifty        = (52 - 2) - levels;
+  unsigned char shiftz        = (52 - 3) - levels;
+  signed long long centermask = (0xFFF0000000000000llu);
+  unsigned long long *intppos;
+  centermask >>= levels;
+
+  while(1)
+    {
+      if(th >= SubTree_MaxPart && th < SubTree_ImportedNodeOffset) /* we are dealing with an internal node */
+        {
+          subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                     ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+          centermask >>= 1;
+          mask >>= 1;
+          levels++;
+
+          if(levels > MAX_TREE_LEVEL)
+            {
+              /* seems like we're dealing with particles at identical (or extremely close)
+               * locations. Shift subnode index to allow tree construction. Note: Multipole moments
+               * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have
+               *      DomainLen/2^MAX_TREE_LEEL  < gravitational softening length
+               */
+              for(j = 0; j < 8; j++)
+                {
+                  if(SubNodes[th].u.suns[subnode] < 0)
+                    break;
+
+                  subnode++;
+                  if(subnode >= 8)
+                    subnode = 7;
+                }
+            }
+
+          int nn = SubNodes[th].u.suns[subnode];
+
+          if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
+            {
+              parent = th;
+              th     = nn;
+            }
+          else
+            {
+              /* here we have found an empty slot where we can attach
+               * the new particle as a leaf.
+               */
+              SubNodes[th].u.suns[subnode] = i;
+              break; /* done for this particle */
+            }
+        }
+      else
+        {
+          /* We try to insert into a leaf with a single particle.  Need
+           * to generate a new internal node at this point.
+           */
+          SubNodes[parent].u.suns[subnode] = SubTree_NextFreeNode;
+          struct NODE *nfreep              = &SubNodes[SubTree_NextFreeNode];
+
+          /* the other is: */
+          double len = ((double)(mask << 1)) * SubDomainBigFac;
+          double cx  = ((double)((xxb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[0];
+          double cy  = ((double)((yyb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[1];
+          double cz  = ((double)((zzb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[2];
+
+          nfreep->len       = len;
+          nfreep->center[0] = cx;
+          nfreep->center[1] = cy;
+          nfreep->center[2] = cz;
+
+          for(j = 0; j < 8; j++)
+            nfreep->u.suns[j] = -1;
+
+          if(th >= SubTree_ImportedNodeOffset)
+            {
+              terminate("unexpected here: th=%d SubTree_ImportedNodeOffset=%d", th, SubTree_ImportedNodeOffset);
+            }
+          else
+            intppos = &SubTree_IntPos_list[3 * th];
+
+          subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) |
+                     ((unsigned char)((intppos[2] & mask) >> shiftz)));
+
+          nfreep->u.suns[subnode] = th;
+
+          th = SubTree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */
+          SubTree_NumNodes++;
+          SubTree_NextFreeNode++;
+
+          if(SubTree_NumNodes >= SubTree_MaxNodes)
+            {
+              if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+                {
+                  char buf[500];
+                  sprintf(buf,
+                          "task %d: looks like a serious problem for particle %d, stopping with particle dump.  SubTree_NumNodes=%d "
+                          "SubTree_MaxNodes=%d  0=%d NumPart=%d\n",
+                          SubThisTask, i, SubTree_NumNodes, SubTree_MaxNodes, 0, NumPart);
+                  dump_particles();
+                  terminate(buf);
+                }
+
+              return -1;
+            }
+        }
+    }
+
+  return 0;
+}
+
+/*! \brief Recursively creates a set of empty tree nodes which corresponds to
+ *         the top-level tree for the domain grid. This is done to ensure that
+ *         this top-level tree is always "complete" so that we can easily
+ *         associate the pseudo-particles of other CPUs with tree-nodes at a
+ *         given level in the tree, even when the particle population is so
+ *         sparse that some of these nodes are actually empty.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] topnode Index of topnode.
+ *  \param[in] bits Number of bits used for Peano key.
+ *  \param[in] x Integer x position.
+ *  \param[in] y Integer y position.
+ *  \param[in] z Integer z position.
+ *  \param[in] xc X position of corner.
+ *  \param[in] yc Y position of corner.
+ *  \param[in] zc Z position of corner.
+ *  \param[in] ilen Sidelength.
+ *
+ *  \return 0: success; -1 Number of nodes exceeds maximum number of nodes.
+ */
+int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc,
+                                    unsigned long long zc, unsigned long long ilen)
+{
+  int i, j, k, n, sub, count;
+  unsigned long long xxc, yyc, zzc, ilenhalf;
+
+  ilen >>= 1;
+
+  if(SubTopNodes[topnode].Daughter >= 0)
+    {
+      for(i = 0; i < 2; i++)
+        for(j = 0; j < 2; j++)
+          for(k = 0; k < 2; k++)
+            {
+              if(SubTree_NumNodes >= SubTree_MaxNodes)
+                {
+                  if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR)
+                    {
+                      char buf[500];
+                      sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", SubThisTask,
+                              NTopnodes);
+                      dump_particles();
+                      terminate(buf);
+                    }
+                  return -1;
+                }
+
+              sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
+
+              count = i + 2 * j + 4 * k;
+
+              SubNodes[no].u.suns[count] = SubTree_NextFreeNode;
+
+              xxc      = xc + i * ilen;
+              yyc      = yc + j * ilen;
+              zzc      = zc + k * ilen;
+              ilenhalf = ilen >> 1;
+
+              double len = ((double)ilen) * SubDomainBigFac;
+              double cx  = ((double)(xxc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[0];
+              double cy  = ((double)(yyc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[1];
+              double cz  = ((double)(zzc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[2];
+
+              SubNodes[SubTree_NextFreeNode].len       = len;
+              SubNodes[SubTree_NextFreeNode].center[0] = cx;
+              SubNodes[SubTree_NextFreeNode].center[1] = cy;
+              SubNodes[SubTree_NextFreeNode].center[2] = cz;
+
+              for(n = 0; n < 8; n++)
+                SubNodes[SubTree_NextFreeNode].u.suns[n] = -1;
+
+              if(SubTopNodes[SubTopNodes[topnode].Daughter + sub].Daughter == -1)
+                SubDomainNodeIndex[SubTopNodes[SubTopNodes[topnode].Daughter + sub].Leaf] = SubTree_NextFreeNode;
+
+              SubTree_NextFreeNode++;
+              SubTree_NumNodes++;
+
+              if(subfind_coll_create_empty_nodes(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i,
+                                                 2 * y + j, 2 * z + k, xxc, yyc, zzc, ilen) < 0)
+                return -1;
+            }
+    }
+
+  return 0;
+}
+
+/*! \brief Inserts pseudo-particles which will represent the mass
+ *         distribution of the other CPUs. Initially, the mass of the
+ *         pseudo-particles is set to zero, and their coordinate is set to the
+ *        center of the domain-cell they correspond to. These quantities will
+ *        be updated later on.
+ *
+ *  \return void
+ */
+void subfind_coll_insert_pseudo_particles(void)
+{
+  int i, index;
+
+  for(i = 0; i < SubNTopleaves; i++)
+    {
+      index = SubDomainNodeIndex[i];
+
+      if(SubDomainTask[i] != SubThisTask)
+        SubNodes[index].u.suns[0] = SubTree_MaxPart + SubTree_MaxNodes + i;
+    }
+}
+
+/*! \brief Determines the multipole moments for a given internal node
+ *         and all its subnodes using a recursive computation.  The result is
+ *         stored in the SubNodes structure in the sequence of this tree-walk.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] sib Index of sibling.
+ *  \param[in] father Index of parent node.
+ *  \param[in, out] last Node index of last call.
+ *
+ *  \return void
+ */
+void subfind_coll_update_node_recursive(int no, int sib, int father, int *last)
+{
+  int j, jj, p, pp, nextsib, suns[8];
+  double s[3], mass;
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  if(no >= SubTree_MaxPart && no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */
+    {
+      for(j = 0; j < 8; j++)
+        suns[j] = SubNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
+                                             overwrite one element (union!) */
+      if(*last >= 0)
+        {
+          if(*last >= SubTree_MaxPart)
+            {
+              if(*last >= SubTree_MaxPart + SubTree_MaxNodes)
+                SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or imported point */
+              else
+                SubNodes[*last].u.d.nextnode = no;
+            }
+          else
+            SubNextnode[*last] = no;
+        }
+
+      *last = no;
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      for(j = 0; j < 8; j++)
+        {
+          if((p = suns[j]) >= 0)
+            {
+              /* check if we have a sibling on the same level */
+              for(jj = j + 1; jj < 8; jj++)
+                if((pp = suns[jj]) >= 0)
+                  break;
+
+              if(jj < 8) /* yes, we do */
+                nextsib = pp;
+              else
+                nextsib = sib;
+
+              subfind_coll_update_node_recursive(p, nextsib, no, last);
+
+              if(p < SubTree_MaxPart) /* a particle */
+                {
+                  MyDouble *pos = &SubTree_Pos_list[3 * p];
+
+                  mass += P[p].Mass;
+                  s[0] += P[p].Mass * pos[0];
+                  s[1] += P[p].Mass * pos[1];
+                  s[2] += P[p].Mass * pos[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType])
+                    maxsofttype = P[p].SofteningType;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass;
+
+                  if(P[p].Type == 0)
+                    {
+                      if(maxhydrosofttype < P[p].SofteningType)
+                        maxhydrosofttype = P[p].SofteningType;
+                      if(minhydrosofttype > P[p].SofteningType)
+                        minhydrosofttype = P[p].SofteningType;
+                    }
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  mass_per_type[P[p].SofteningType] += P[p].Mass;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else if(p < SubTree_MaxPart + SubTree_MaxNodes) /* an internal node  */
+                {
+                  mass += SubNodes[p].u.d.mass;
+                  s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0];
+                  s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1];
+                  s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype])
+                    maxsofttype = SubNodes[p].u.d.maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                  int k;
+                  for(k = 0; k < NSOFTTYPES; k++)
+                    mass_per_type[k] += SubExtNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype)
+                    maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype;
+                  if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype)
+                    minhydrosofttype = SubNodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else if(p < SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves) /* a pseudo particle */
+                {
+                  /* nothing to be done here because the mass of the
+                   *  pseudo-particle is still zero. This will be changed
+                   * later.
+                   */
+                }
+              else
+                {
+                  /* an imported point */
+                  terminate("should not occur here");
+                }
+            }
+        }
+
+      if(mass)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = SubNodes[no].center[0];
+          s[1] = SubNodes[no].center[1];
+          s[2] = SubNodes[no].center[2];
+        }
+
+      SubNodes[no].u.d.mass        = mass;
+      SubNodes[no].u.d.s[0]        = s[0];
+      SubNodes[no].u.d.s[1]        = s[1];
+      SubNodes[no].u.d.s[2]        = s[2];
+      SubNodes[no].u.d.maxsofttype = maxsofttype;
+      SubNodes[no].u.d.sibling     = sib;
+      SubNodes[no].u.d.father      = father;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      int k;
+      for(k = 0; k < NSOFTTYPES; k++)
+        SubExtNodes[no].mass_per_type[k] = mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      SubNodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    }
+  else /* single particle or pseudo particle */
+    {
+      if(*last >= 0)
+        {
+          if(*last >= SubTree_MaxPart)
+            {
+              if(*last >= SubTree_MaxPart + SubTree_MaxNodes)
+                SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or an imported point */
+              else
+                SubNodes[*last].u.d.nextnode = no;
+            }
+          else
+            SubNextnode[*last] = no;
+        }
+
+      *last = no;
+    }
+}
+
+/*! \brief This function communicates the values of the multipole moments of
+ *         the top-level tree-nodes of the domain grid.  This data can then be
+ *         used to update the pseudo-particles on each CPU accordingly.
+ *
+ *  \return void
+ */
+void subfind_coll_exchange_topleafdata(void)
+{
+  int n, no, idx, task;
+  int *recvcounts, *recvoffset, *bytecounts, *byteoffset;
+  struct DomainNODE
+  {
+    MyFloat s[3];
+    MyFloat mass;
+#ifdef MULTIPLE_NODE_SOFTENING
+    MyDouble mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+    unsigned char maxhydrosofttype;
+    unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    unsigned char maxsofttype;
+  } * DomainMoment, *loc_DomainMoment;
+
+  DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", SubNTopleaves * sizeof(struct DomainNODE));
+
+  /* share the pseudo-particle data accross CPUs */
+  recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * SubNTask);
+  recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * SubNTask);
+  bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * SubNTask);
+  byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * SubNTask);
+
+  for(task = 0; task < SubNTask; task++)
+    recvcounts[task] = 0;
+
+  for(n = 0; n < SubNTopleaves; n++)
+    {
+      if(SubDomainTask[n] < 0 || SubDomainTask[n] >= SubNTask)
+        terminate("n=%d|%d: SubDomainTask[n]=%d", n, SubNTopleaves, SubDomainTask[n]);
+
+      recvcounts[SubDomainTask[n]]++;
+    }
+
+  for(task = 0; task < SubNTask; task++)
+    bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
+
+  for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < SubNTask; task++)
+    {
+      recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1];
+      byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+    }
+
+  loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[SubThisTask] * sizeof(struct DomainNODE));
+
+  for(n = 0, idx = 0; n < SubNTopleaves; n++)
+    {
+      if(SubDomainTask[n] == SubThisTask)
+        {
+          no = SubDomainNodeIndex[n];
+
+          /* read out the multipole moments from the local base cells */
+          loc_DomainMoment[idx].s[0]        = SubNodes[no].u.d.s[0];
+          loc_DomainMoment[idx].s[1]        = SubNodes[no].u.d.s[1];
+          loc_DomainMoment[idx].s[2]        = SubNodes[no].u.d.s[2];
+          loc_DomainMoment[idx].mass        = SubNodes[no].u.d.mass;
+          loc_DomainMoment[idx].maxsofttype = SubNodes[no].u.d.maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+          int k;
+          for(k = 0; k < NSOFTTYPES; k++)
+            loc_DomainMoment[idx].mass_per_type[k] = SubExtNodes[no].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+          loc_DomainMoment[idx].maxhydrosofttype = SubNodes[no].u.d.maxhydrosofttype;
+          loc_DomainMoment[idx].minhydrosofttype = SubNodes[no].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+          idx++;
+        }
+    }
+
+  MPI_Allgatherv(loc_DomainMoment, bytecounts[SubThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, SubComm);
+
+  for(task = 0; task < SubNTask; task++)
+    recvcounts[task] = 0;
+
+  for(n = 0; n < SubNTopleaves; n++)
+    {
+      task = SubDomainTask[n];
+      if(task != SubThisTask)
+        {
+          no  = SubDomainNodeIndex[n];
+          idx = recvoffset[task] + recvcounts[task]++;
+
+          SubNodes[no].u.d.s[0]        = DomainMoment[idx].s[0];
+          SubNodes[no].u.d.s[1]        = DomainMoment[idx].s[1];
+          SubNodes[no].u.d.s[2]        = DomainMoment[idx].s[2];
+          SubNodes[no].u.d.mass        = DomainMoment[idx].mass;
+          SubNodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+          int k;
+          for(k = 0; k < NSOFTTYPES; k++)
+            SubExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+          SubNodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype;
+          SubNodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+        }
+    }
+
+  myfree(loc_DomainMoment);
+  myfree(byteoffset);
+  myfree(bytecounts);
+  myfree(recvoffset);
+  myfree(recvcounts);
+  myfree(DomainMoment);
+}
+
+/*! \brief This function updates the top-level tree after the multipole
+ *         moments of the pseudo-particles have been updated.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] topnode Index of topnode.
+ *  \param[in] bits Number of bits used.
+ *  \param[in] x Integer x position.
+ *  \param[in] y Integer y position.
+ *  \param[in] z Integer z position.
+ *
+ *  \return void
+ */
+void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z)
+{
+  int i, j, k, sub;
+  int p;
+  double s[3], mass;
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  if(SubTopNodes[topnode].Daughter >= 0)
+    {
+      for(i = 0; i < 2; i++)
+        for(j = 0; j < 2; j++)
+          for(k = 0; k < 2; k++)
+            {
+              sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
+
+              SubTree_NextFreeNode++;
+              subfind_coll_treeupdate_toplevel(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i,
+                                               2 * y + j, 2 * z + k);
+            }
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      p = SubNodes[no].u.d.nextnode;
+
+      for(j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */
+        {
+          if(p >= SubTree_MaxPart && p < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */
+            {
+              mass += SubNodes[p].u.d.mass;
+              s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0];
+              s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1];
+              s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2];
+              if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype])
+                maxsofttype = SubNodes[p].u.d.maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+              int k;
+              for(k = 0; k < NSOFTTYPES; k++)
+                mass_per_type[k] += SubExtNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype)
+                maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype;
+              if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype)
+                minhydrosofttype = SubNodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+            }
+          else
+            terminate("may not happen");
+
+          p = SubNodes[p].u.d.sibling;
+        }
+
+      if(mass)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = SubNodes[no].center[0];
+          s[1] = SubNodes[no].center[1];
+          s[2] = SubNodes[no].center[2];
+        }
+
+      SubNodes[no].u.d.s[0]        = s[0];
+      SubNodes[no].u.d.s[1]        = s[1];
+      SubNodes[no].u.d.s[2]        = s[2];
+      SubNodes[no].u.d.mass        = mass;
+      SubNodes[no].u.d.maxsofttype = maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+      int k;
+      for(k = 0; k < NSOFTTYPES; k++)
+        SubExtNodes[no].mass_per_type[k] = mass_per_type[k];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      SubNodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+    }
+}
+
+/*! \brief Allocates tree arrays.
+ *
+ *  This function allocates the memory used for storage of the tree nodes.
+ *  Usually, the number of required nodes is of order 0.7*maxpart, but if this
+ *  is insufficient, the code will try to allocated more space.
+ *
+ *  \param[in] maxpart Maximum number of nodes.
+ *  \param[in] maxindex Maximum number of particles.
+ *
+ *  \return void
+ */
+void subfind_coll_treeallocate(int maxpart, int maxindex)
+{
+  if(SubNodes)
+    terminate("already allocated");
+
+  SubTree_MaxPart  = maxindex;
+  SubTree_MaxNodes = (int)(SubTreeAllocFactor * maxpart) + SubNTopnodes;
+
+  SubDomainNodeIndex = (int *)mymalloc_movable(&SubDomainNodeIndex, "SubDomainNodeIndex", SubNTopleaves * sizeof(int));
+
+  SubTree_Pos_list = (MyDouble *)mymalloc_movable(&SubTree_Pos_list, "SubTree_Pos_list", 3 * maxpart * sizeof(MyDouble));
+
+  SubNodes = (struct NODE *)mymalloc_movable(&SubNodes, "SubNodes", (SubTree_MaxNodes + 1) * sizeof(struct NODE));
+  SubNodes -= SubTree_MaxPart;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+  SubExtNodes = (struct ExtNODE *)mymalloc_movable(&SubExtNodes, "SubExtNodes", (SubTree_MaxNodes + 1) * sizeof(struct ExtNODE));
+  SubExtNodes -= SubTree_MaxPart;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  SubNextnode = (int *)mymalloc_movable(&SubNextnode, "SubNextnode", (SubTree_MaxPart + SubNTopleaves) * sizeof(int));
+}
+
+/*! \brief Free tree arrays.
+ *
+ *  This function frees the memory allocated for the tree, i.e. it frees
+ *  the space allocated by the function subfind_coll_treeallocate().
+ *
+ *  \return void
+ */
+void subfind_coll_treefree(void)
+{
+  if(SubNodes)
+    {
+      myfree(SubNextnode);
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      myfree(SubExtNodes + SubTree_MaxPart);
+      SubExtNodes = NULL;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      myfree(SubNodes + SubTree_MaxPart);
+      myfree(SubTree_Pos_list);
+      myfree(SubDomainNodeIndex);
+
+      SubNodes           = NULL;
+      SubDomainNodeIndex = NULL;
+      SubNextnode        = NULL;
+      SubTree_Pos_list   = NULL;
+    }
+  else
+    terminate("trying to free the tree even though it's not allocated");
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c
new file mode 100644
index 0000000000..1a7cbd67c7
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c
@@ -0,0 +1,460 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_coll_treewalk.c
+ * \date        05/2018
+ * \brief       Algorithm for collective tree walk; computes gravitational
+ *              binding energy.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void subfind_potential_compute(int num, struct unbind_data
+ *                  *darg, int phasearg, double weakly_bound_limit_arg)
+ *                static int subfind_force_treeevaluate_potential(int target,
+ *                  int mode, int threadid)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../fof/fof.h"
+#include "subfind.h"
+
+static int subfind_force_treeevaluate_potential(int target, int mode, int threadid);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  unsigned char SofteningType;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+#ifdef CELL_CENTER_GRAVITY
+  if(P[i].Type == 0)
+    {
+      for(int k = 0; k < 3; k++)
+        in->Pos[k] = PS[i].Center[k];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      for(int k = 0; k < 3; k++)
+        in->Pos[k] = P[i].Pos[k];
+    }
+
+  in->SofteningType = P[i].SofteningType;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Potential;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      PS[i].Potential = out->Potential;
+    }
+  else /* combine */
+    {
+      PS[i].Potential += out->Potential;
+    }
+}
+
+#define USE_SUBCOMM_COMMUNICATOR
+#include "../utils/generic_comm_helpers2.h"
+
+static int Num;
+static struct unbind_data *d;
+static int phase;
+static double weakly_bound_limit;
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i, idx;
+
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < SubNTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= Num)
+          break;
+
+        i = d[idx].index;
+
+        if(phase == 1)
+          if(PS[i].BindingEnergy <= weakly_bound_limit)
+            continue;
+
+        subfind_force_treeevaluate_potential(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        subfind_force_treeevaluate_potential(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Computes potential energy.
+ *
+ *  \param[in] num Number of elements.
+ *  \param[in] darg Unbind data.
+ *  \param[in] phasearg Which phase are we in? 1:ignore weakly bound particles.
+ *  \param[in] weakly_bound_limit_arg Minimum binding energy between two
+ *             particles that is accounted for.
+ *
+ *  \return void
+ */
+void subfind_potential_compute(int num, struct unbind_data *darg, int phasearg, double weakly_bound_limit_arg)
+{
+  generic_set_MaxNexport();
+
+  Num                = num;
+  d                  = darg;
+  phase              = phasearg;
+  weakly_bound_limit = weakly_bound_limit_arg;
+
+  generic_comm_pattern(Num, kernel_local, kernel_imported);
+
+  double atime;
+
+  if(All.ComovingIntegrationOn)
+    atime = All.Time;
+  else
+    atime = 1;
+
+  for(int i = 0; i < num; i++)
+    {
+      if(phase == 1)
+        if(PS[d[i].index].BindingEnergy <= weakly_bound_limit)
+          continue;
+
+      PS[d[i].index].Potential *= All.G / atime;
+    }
+}
+
+/*! \brief Evaluate function of potential calculation.
+ *
+ *  \param[in] target Index of particle/cell/imported data.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return 0
+ */
+static int subfind_force_treeevaluate_potential(int target, int mode, int threadid)
+{
+  struct NODE *nop = 0;
+  int no, numnodes, *firstnode, k;
+  double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp;
+  double pos_x, pos_y, pos_z;
+#ifdef MULTIPLE_NODE_SOFTENING
+  struct ExtNODE *extnop = 0;
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+#if !defined(GRAVITY_NOT_PERIODIC)
+  double xtmp, ytmp, ztmp;
+#endif
+
+  data_in local, *in;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      in = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      in = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos_x = in->Pos[0];
+  pos_y = in->Pos[1];
+  pos_z = in->Pos[2];
+  h_i   = All.ForceSoftening[in->SofteningType];
+
+  double pot = 0;
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        no = SubTree_MaxPart; /* root node */
+      else
+        {
+          no = firstnode[k];
+          no = SubNodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+#ifdef MULTIPLE_NODE_SOFTENING
+          int indi_flag1 = -1, indi_flag2 = 0;
+#endif                             /* #ifdef MULTIPLE_NODE_SOFTENING */
+          if(no < SubTree_MaxPart) /* single particle */
+            {
+              dx = GRAVITY_NEAREST_X(SubTree_Pos_list[3 * no + 0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(SubTree_Pos_list[3 * no + 1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(SubTree_Pos_list[3 * no + 2] - pos_z);
+              r2 = dx * dx + dy * dy + dz * dz;
+
+              mass = P[no].Mass;
+
+              h_j = All.ForceSoftening[P[no].SofteningType];
+
+              if(h_j > h_i)
+                hmax = h_j;
+              else
+                hmax = h_i;
+
+              no = SubNextnode[no];
+            }
+          else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the
+                                                           branch */
+                    break;
+                }
+
+              nop  = &SubNodes[no];
+              mass = nop->u.d.mass;
+
+              dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z);
+
+              r2 = dx * dx + dy * dy + dz * dz;
+
+              /* check Barnes-Hut opening criterion */
+              if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind)
+                {
+                  /* open cell */
+                  if(mass)
+                    {
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+                }
+
+              h_j = All.ForceSoftening[nop->u.d.maxsofttype];
+
+              if(h_j > h_i)
+                {
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype)
+                    if(SubExtNodes[no].mass_per_type[0] > 0)
+                      if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype])
+                        {
+                          /* open cell */
+                          no = nop->u.d.nextnode;
+                          continue;
+                        }
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  indi_flag1 = 0;
+                  indi_flag2 = NSOFTTYPES;
+#else  /* #ifdef MULTIPLE_NODE_SOFTENING */
+                  if(r2 < h_j * h_j)
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */
+                  hmax = h_j;
+                }
+              else
+                hmax = h_i;
+
+                /* node can be used */
+#ifdef MULTIPLE_NODE_SOFTENING
+              extnop = &SubExtNodes[no];
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+              no = nop->u.d.sibling;
+            }
+          else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              terminate("this is not expected here");
+            }
+          else
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES");
+
+              subfind_treefind_collective_export_node_threads(no, target, threadid);
+
+              no = SubNextnode[no - SubTree_MaxNodes];
+              continue;
+            }
+
+          /* now evaluate the potential contribution */
+          r = sqrt(r2);
+
+#ifdef MULTIPLE_NODE_SOFTENING
+          int type;
+          for(type = indi_flag1; type < indi_flag2; type++)
+            {
+              if(type >= 0)
+                {
+                  mass = extnop->mass_per_type[type];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(type == 0)
+                    h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype];
+                  else
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                    h_j = All.ForceSoftening[type];
+
+                  if(h_j > h_i)
+                    hmax = h_j;
+                  else
+                    hmax = h_i;
+                }
+
+              if(mass)
+                {
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+                  if(r >= hmax)
+                    pot += FLT(-mass / r);
+                  else
+                    {
+                      h_inv = 1.0 / hmax;
+
+                      u = r * h_inv;
+                      if(u < 0.5)
+                        wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6));
+                      else
+                        wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)));
+
+                      pot += FLT(mass * h_inv * wp);
+                    }
+#ifdef MULTIPLE_NODE_SOFTENING
+                }
+            }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+        }
+    }
+
+  out.Potential = pot;
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_collective.c b/src/amuse/community/arepo/src/subfind/subfind_collective.c
new file mode 100644
index 0000000000..02c702b6de
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_collective.c
@@ -0,0 +1,2417 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_collective.c
+ * \date        05/2018
+ * \brief       Subfind algorithm running collectively on all tasks.
+ * \details     contains functions:
+ *                void subfind_process_group_collectively(int nsubgroups_cat)
+ *                void subfind_fof_calc_am_collective(int snapnr, int
+ *                  ngroups_cat)
+ *                void subfind_col_find_coll_candidates(int totgrouplen)
+ *                void subfind_unbind_independent_ones(int count_cand)
+ *                int subfind_col_unbind(struct unbind_data *d, int num, int
+ *                  *num_non_gas)
+ *                void subfind_poll_for_requests(void)
+ *                long long subfind_distlinklist_setrank_and_get_next(
+ *                  long long index, long long *rank)
+ *                void subfind_distlinklist_set_next(long long index,
+ *                  long long next)
+ *                void subfind_distlinklist_add_particle(long long index)
+ *                void subfind_distlinklist_mark_particle(long long index,
+ *                  int target, int submark)
+ *                void subfind_distlinklist_add_bound_particles(
+ *                  long long index, int nsub)
+ *                long long subfind_distlinklist_get_next(long long index)
+ *                long long subfind_distlinklist_get_rank(long long index)
+ *                long long subfind_distlinklist_get_head(long long index)
+ *                void subfind_distlinklist_get_two_heads(long long ngb_index1,
+ *                  long long ngb_index2, long long *head, long long
+ *                  *head_attach)
+ *                void subfind_distlinklist_set_headandnext(long long index,
+ *                  long long head, long long next)
+ *                int subfind_distlinklist_get_tail_set_tail_increaselen(
+ *                  long long index, long long *tail, long long newtail)
+ *                void subfind_distlinklist_set_tailandlen(long long index,
+ *                  long long tail, int len)
+ *                void subfind_distlinklist_get_tailandlen(long long index,
+ *                  long long *tail, int *len)
+ *                void subfind_distlinklist_set_all(long long index,
+ *                  long long head, long long tail, int len, long long next)
+ *                int subfind_compare_densities(const void *a, const void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../fof/fof.h"
+#include "subfind.h"
+
+#define TAG_POLLING_DONE 201
+#define TAG_SET_ALL 202
+#define TAG_GET_NGB_INDICES 204
+#define TAG_GET_TAILANDLEN 205
+#define TAG_GET_TAILANDLEN_DATA 206
+#define TAG_SET_TAILANDLEN 207
+#define TAG_SET_HEADANDNEXT 209
+#define TAG_SETHEADGETNEXT_DATA 210
+#define TAG_SET_NEXT 211
+#define TAG_SETHEADGETNEXT 213
+#define TAG_GET_NEXT 215
+#define TAG_GET_NEXT_DATA 216
+#define TAG_GET_HEAD 217
+#define TAG_GET_HEAD_DATA 218
+#define TAG_ADD_PARTICLE 219
+#define TAG_ADDBOUND 220
+#define TAG_NID 222
+#define TAG_NID_DATA 223
+#define TAG_SETRANK 224
+#define TAG_SETRANK_OUT 226
+#define TAG_GET_RANK 227
+#define TAG_GET_RANK_DATA 228
+#define TAG_MARK_PARTICLE 229
+#define TAG_SET_NEWTAIL 230
+#define TAG_GET_OLDTAIL 231
+#define TAG_GET_TWOHEADS 232
+#define TAG_GET_TWOHEADS_DATA 233
+
+#define MASK ((((long long)1) << 32) - 1)
+#define HIGHBIT (1 << 30)
+
+static long long *Head, *Next, *Tail;
+static int *Len;
+static int LocalLen;
+static int count_cand, max_coll_candidates;
+
+static struct unbind_data *ud;
+
+/*! \brief Data structure for sorting density data.
+ */
+static struct sort_density_data
+{
+  MyFloat density;
+  int ngbcount;
+  long long index; /* this will store the task in the upper word */
+  long long ngb_index1, ngb_index2;
+} * sd;
+
+/*! \brief Processes a group collectively on all MPI tasks.
+ *
+ *  \param[in] nsubgroups_cat (unused)
+ *
+ *  \return void
+ */
+void subfind_process_group_collectively(int nsubgroups_cat)
+{
+  int totgrouplen1, totgrouplen2;
+
+  /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */
+  if(SubThisTask == 0)
+    {
+      if(Ngroups != 1)
+        terminate("Ngroups=%d != 1  SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask);
+    }
+  else
+    {
+      if(Ngroups != 0)
+        terminate("Ngroups=%d != 0  SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask);
+    }
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing halo %d of length  %d  on  %d  processors.\n", ThisTask,
+             Group[0].GrNr, Group[0].Len, SubNTask);
+
+      GrNr         = Group[0].GrNr;
+      totgrouplen2 = Group[0].Len;
+      for(int j = 0; j < 3; j++)
+        GrCM[j] = Group[0].CM[j];
+    }
+
+  /* tell everybody in the set the group number, the center of mass, and the grouplen */
+  MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm);
+  MPI_Bcast(&GrCM[0], 3 * sizeof(MyDouble), MPI_BYTE, 0, SubComm);
+  MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm);
+
+  NumPartGroup = 0;
+  for(int i = 0; i < NumPart; i++)
+    if(PS[i].GrNr == GrNr)
+      NumPartGroup++;
+
+  MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm);
+
+  /* sanity check that we actually have all the right particles on the processor subset */
+  if(totgrouplen1 != totgrouplen2)
+    terminate("totgrouplen1=%d != totgrouplen2=%d", totgrouplen1, totgrouplen2); /* inconsistency */
+
+  /* do a domain decomposition just for this halo */
+  subfind_coll_domain_decomposition();
+
+  /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */
+  subfind_loctree_copyExtent();
+
+  /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */
+  submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+  for(int i = 0; i < NumPart; i++)
+    {
+      PS[i].SubNr         = TotNgroups + 1; /* set a default that is larger than reasonable group number */
+      PS[i].OldIndex      = i;
+      submp[i].index      = i;
+      submp[i].GrNr       = PS[i].GrNr;
+      submp[i].DM_Density = PS[i].Density;
+    }
+  qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density);
+  subfind_reorder_according_to_submp();
+  myfree(submp);
+
+  /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned.
+     They can however be accessed via SphP[PS[i].OldIndex] */
+
+  /* re-determine the number of local group particles, which has changed due to domain decomposition */
+  NumPartGroup = 0;
+  for(int i = 0; i < NumPart; i++)
+    if(PS[i].GrNr == GrNr)
+      NumPartGroup++;
+
+  /* allocate some storage for the halo */
+  subfind_coll_treeallocate(NumPart, All.MaxPart);
+
+  /* construct a tree for the halo */
+  subfind_coll_treebuild(NumPartGroup, NULL);
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  // calculate binding energy of full fof group
+  {
+    struct unbind_data *ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data));
+
+    NumPartGroup = 0;
+    for(int i = 0; i < NumPart; i++)
+      if(PS[i].GrNr == GrNr)
+        ud[NumPartGroup++].index = i;
+
+    subfind_potential_compute(NumPartGroup, ud, 0, 0);
+
+    double binding_energy_local = 0, binding_energy_global;
+
+    for(int i = 0; i < NumPartGroup; i++)
+      binding_energy_local += 0.5 * P[ud[i].index].Mass * PS[ud[i].index].Potential;
+
+    MPI_Allreduce(&binding_energy_local, &binding_energy_global, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+    Group[0].Epot = binding_energy_global;
+
+    myfree(ud);
+    ud = NULL;
+  }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  long long p;
+  int len;
+  int ncand, parent, totcand, nremaining;
+  int max_loc_length, max_length;
+  int count, countall, *countlist, *offset;
+  int i, j, k, nr, grindex = 0, nsubs, subnr;
+  int count_leaves, tot_count_leaves, master;
+  struct coll_cand_dat *tmp_coll_candidates = 0;
+  double t0, t1, tt0, tt1;
+
+  /* determine the radius that encloses a certain number of link particles */
+  subfind_find_linkngb();
+
+  sd = (struct sort_density_data *)mymalloc_movable(&sd, "sd", NumPartGroup * sizeof(struct sort_density_data));
+
+  /* determine the indices of the nearest two denser neighbours within the link region */
+  NgbLoc = (struct nearest_ngb_data *)mymalloc("NgbLoc", NumPartGroup * sizeof(struct nearest_ngb_data));
+  R2Loc  = (struct nearest_r2_data *)mymalloc("R2Loc", NumPartGroup * sizeof(struct nearest_r2_data));
+
+  subfind_find_nearesttwo();
+
+  for(i = 0; i < NumPartGroup; i++)
+    {
+      sd[i].density    = PS[i].Density;
+      sd[i].ngbcount   = NgbLoc[i].count;
+      sd[i].index      = (((long long)SubThisTask) << 32) + i;
+      sd[i].ngb_index1 = NgbLoc[i].index[0];
+      sd[i].ngb_index2 = NgbLoc[i].index[1];
+    }
+  myfree(R2Loc);
+  myfree(NgbLoc);
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: before parallel sort of 'sd'.\n", ThisTask);
+      fflush(stdout);
+    }
+
+  /* sort the densities */
+  parallel_sort_comm(sd, NumPartGroup, sizeof(struct sort_density_data), subfind_compare_densities, SubComm);
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: parallel sort of 'sd' done.\n", ThisTask);
+      fflush(stdout);
+    }
+
+  /* allocate and initialize distributed link list */
+  Head = (long long *)mymalloc_movable(&Head, "Head", NumPartGroup * sizeof(long long));
+  Next = (long long *)mymalloc_movable(&Next, "Next", NumPartGroup * sizeof(long long));
+  Tail = (long long *)mymalloc_movable(&Tail, "Tail", NumPartGroup * sizeof(long long));
+  Len  = (int *)mymalloc_movable(&Len, "Len", NumPartGroup * sizeof(int));
+
+  for(i = 0; i < NumPartGroup; i++)
+    {
+      Head[i] = Next[i] = Tail[i] = -1;
+      Len[i]                      = 0;
+    }
+
+  /* allocate a list to store subhalo coll_candidates */
+  max_coll_candidates = imax((NumPartGroup / 50), 200);
+  coll_candidates     = (struct coll_cand_dat *)mymalloc_movable(&coll_candidates, "coll_candidates",
+                                                             max_coll_candidates * sizeof(struct coll_cand_dat));
+  count_cand          = 0;
+
+  subfind_col_find_coll_candidates(totgrouplen1);
+
+  /* establish total number of coll_candidates */
+  MPI_Allreduce(&count_cand, &totcand, 1, MPI_INT, MPI_SUM, SubComm);
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: total number of subhalo coll_candidates=%d\n", ThisTask, totcand);
+      fflush(stdout);
+    }
+
+  nremaining = totcand;
+
+  for(i = 0; i < NumPartGroup; i++)
+    Tail[i] = -1;
+
+  for(i = 0; i < count_cand; i++)
+    coll_candidates[i].parent = 0;
+
+  do
+    {
+      /* Let's see which coll_candidates can be unbound independent from each other.
+         We identify them with those coll_candidates that have no embedded other candidate */
+      t0 = second();
+      if(SubThisTask == 0)
+        tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat));
+
+      count = count_cand;
+      count *= sizeof(struct coll_cand_dat);
+
+      countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int));
+      offset    = (int *)mymalloc("offset", SubNTask * sizeof(int));
+
+      MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
+
+      for(i = 1, offset[0] = 0; i < SubNTask; i++)
+        offset[i] = offset[i - 1] + countlist[i - 1];
+
+      MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
+
+      if(SubThisTask == 0)
+        {
+          for(k = 0; k < totcand; k++)
+            {
+              tmp_coll_candidates[k].nsub  = k;
+              tmp_coll_candidates[k].subnr = k;
+            }
+
+          qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank);
+          for(k = 0; k < totcand; k++)
+            {
+              if(tmp_coll_candidates[k].parent >= 0)
+                {
+                  tmp_coll_candidates[k].parent = 0;
+
+                  for(j = k + 1; j < totcand; j++)
+                    {
+                      if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
+                        break;
+
+                      if(tmp_coll_candidates[j].parent < 0) /* ignore these */
+                        continue;
+
+                      if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >=
+                         tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
+                        {
+                          tmp_coll_candidates[k].parent++; /* we here count the number of subhalos that are enclosed */
+                        }
+                      else
+                        {
+                          terminate("k=%d|%d has rank=%d and len=%d.  j=%d has rank=%d and len=%d\n", k, totcand,
+                                    (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, j,
+                                    (int)tmp_coll_candidates[j].rank, (int)tmp_coll_candidates[j].len);
+                        }
+                    }
+                }
+            }
+
+          qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr);
+        }
+
+      MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
+
+      myfree(offset);
+      myfree(countlist);
+
+      if(SubThisTask == 0)
+        myfree(tmp_coll_candidates);
+
+      for(i = 0, count_leaves = 0, max_loc_length = 0; i < count_cand; i++)
+        if(coll_candidates[i].parent == 0)
+          {
+            if(coll_candidates[i].len > max_loc_length)
+              max_loc_length = coll_candidates[i].len;
+
+            if(coll_candidates[i].len > 0.20 * All.TotNumPart / NTask) /* seems large, let's rather do it collectively */
+              {
+                coll_candidates[i].parent++; /* this will ensure that it is not considered in this round */
+              }
+            else
+              {
+                count_leaves++;
+              }
+          }
+
+      MPI_Allreduce(&count_leaves, &tot_count_leaves, 1, MPI_INT, MPI_SUM, SubComm);
+      MPI_Allreduce(&max_loc_length, &max_length, 1, MPI_INT, MPI_MAX, SubComm);
+
+      t1 = second();
+      if(SubThisTask == 0)
+        printf(
+            "SUBFIND-COLLECTIVE, root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, "
+            "finding took %g sec)\n",
+            ThisTask, tot_count_leaves, max_length, timediff(t0, t1));
+
+      if(tot_count_leaves <= 0) /* if there are none left, we break and do the reset collectively */
+        {
+          if(SubThisTask == 0)
+            printf("SUBFIND-COLLECTIVE, root-task=%d: too few, I do the rest of %d collectively\n", ThisTask, nremaining);
+          break;
+        }
+
+      nremaining -= tot_count_leaves;
+
+      for(i = 0; i < NumPart; i++)
+        {
+          PS[i].origintask = PS[i].TargetTask = SubThisTask;
+          PS[i].originindex                   = i;
+          PS[i].submark                       = HIGHBIT;
+          if(i < NumPartGroup)
+            if(Tail[i] >= 0) /* this means this particle is already bound to a substructure */
+              PS[i].origintask |= HIGHBIT;
+        }
+
+      /* we now mark the particles that are in subhalo coll_candidates that can be processed independently in parallel */
+      nsubs = 0;
+      t0    = second();
+      for(master = 0; master < SubNTask; master++)
+        {
+          ncand = count_cand;
+
+          MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
+
+          for(k = 0; k < ncand; k++)
+            {
+              if(SubThisTask == master)
+                {
+                  len    = coll_candidates[k].len;
+                  parent = coll_candidates[k].parent; /* this is here actually the daughter count */
+                }
+
+              MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
+              MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
+              MPI_Barrier(SubComm);
+
+              if(parent == 0)
+                {
+                  if(SubThisTask != master)
+                    subfind_poll_for_requests();
+                  else
+                    {
+                      for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++)
+                        {
+                          subfind_distlinklist_mark_particle(p, master, nsubs);
+
+                          if(p < 0)
+                            terminate("Bummer i=%d \n", i);
+
+                          p = subfind_distlinklist_get_next(p);
+                        }
+
+                      /* now tell the others to stop polling */
+                      for(i = 0; i < SubNTask; i++)
+                        if(i != SubThisTask)
+                          MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
+                    }
+
+                  MPI_Barrier(SubComm);
+                }
+
+              nsubs++;
+            }
+        }
+      t1 = second();
+      if(SubThisTask == 0)
+        {
+          printf("SUBFIND-COLLECTIVE, root-task=%d: particles are marked (took %g)\n", ThisTask, timediff(t0, t1));
+          fflush(stdout);
+        }
+
+      for(i = 0; i < NumPart; i++)
+        PS[i].TargetIndex = PS[i].submark; /* this will make sure that the particles are grouped by submark on the target task */
+
+      t0 = second();
+      subfind_distribute_particles(SubComm); /* assemble the particles on individual processors */
+      t1 = second();
+      if(SubThisTask == 0)
+        {
+          printf("SUBFIND-COLLECTIVE, root-task=%d: distribution of independent ones took %g sec\n", ThisTask, timediff(t0, t1));
+          fflush(stdout);
+        }
+
+      MPI_Barrier(SubComm);
+      t0 = second();
+
+      subfind_unbind_independent_ones(count_cand);
+
+      MPI_Barrier(SubComm);
+      t1 = second();
+
+      if(SubThisTask == 0)
+        {
+          printf("SUBFIND-COLLECTIVE, root-task=%d: unbinding of independent ones took %g sec\n", ThisTask, timediff(t0, t1));
+          fflush(stdout);
+        }
+
+      for(i = 0; i < NumPart; i++)
+        {
+          PS[i].origintask &= (HIGHBIT - 1); /* clear high bit if set */
+          PS[i].TargetTask  = PS[i].origintask;
+          PS[i].TargetIndex = PS[i].originindex;
+        }
+
+      t0 = second();
+      subfind_distribute_particles(SubComm); /* bring them back to their original processor */
+
+      t1 = second();
+      if(SubThisTask == 0)
+        {
+          printf("SUBFIND-COLLECTIVE, root-task=%d: bringing the independent ones back took %g sec\n", ThisTask, timediff(t0, t1));
+          fflush(stdout);
+        }
+
+      /* now mark the bound particles */
+      for(i = 0; i < NumPartGroup; i++)
+        if(PS[i].submark >= 0 && PS[i].submark < nsubs)
+          Tail[i] = PS[i].submark; /* we use this to flag bound parts of substructures */
+
+      for(i = 0; i < count_cand; i++)
+        if(coll_candidates[i].parent == 0)
+          coll_candidates[i].parent = -1;
+    }
+  while(tot_count_leaves > 0);
+
+  /**** now we do the collective unbinding of the subhalo coll_candidates that contain other subhalo coll_candidates ****/
+  ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data));
+
+  t0 = second();
+  for(master = 0, nr = 0; master < SubNTask; master++)
+    {
+      ncand = count_cand;
+
+      MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
+
+      for(k = 0; k < ncand; k++)
+        {
+          if(SubThisTask == master)
+            {
+              len    = coll_candidates[k].len;
+              nsubs  = coll_candidates[k].nsub;
+              parent = coll_candidates[k].parent; /* this is here actually the daughter count */
+            }
+
+          MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
+          MPI_Barrier(SubComm);
+
+          if(parent >= 0)
+            {
+              MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
+              MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
+
+              if(SubThisTask == 0)
+                {
+                  printf("SUBFIND-COLLECTIVE, root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr,
+                         nremaining, (int)len);
+                  fflush(stdout);
+                }
+
+              nr++;
+
+              LocalLen = 0;
+
+              tt0 = second();
+
+              if(SubThisTask != master)
+                subfind_poll_for_requests();
+              else
+                {
+                  for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++)
+                    {
+                      subfind_distlinklist_add_particle(p);
+                      if(p < 0)
+                        terminate("Bummer i=%d \n", i);
+
+                      p = subfind_distlinklist_get_next(p);
+                    }
+
+                  /* now tell the others to stop polling */
+                  for(i = 0; i < SubNTask; i++)
+                    if(i != SubThisTask)
+                      MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
+                }
+
+              int LocalNonGasLen;
+
+              LocalLen = subfind_col_unbind(ud, LocalLen, &LocalNonGasLen);
+
+              tt1 = second();
+              if(SubThisTask == 0)
+                {
+                  printf("SUBFIND-COLLECTIVE, root-task=%d: took %g sec\n", ThisTask, timediff(tt0, tt1));
+                  fflush(stdout);
+                }
+
+              MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm);
+
+              if(len >= All.DesLinkNgb)
+                {
+                  /* ok, we found a substructure */
+
+                  for(i = 0; i < LocalLen; i++)
+                    Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */
+
+                  if(SubThisTask == master)
+                    {
+                      coll_candidates[k].bound_length = len;
+                    }
+                }
+              else
+                {
+                  if(SubThisTask == master)
+                    {
+                      coll_candidates[k].bound_length = 0;
+                    }
+                }
+            }
+        }
+    }
+  t1 = second();
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
+             timediff(t0, t1));
+      fflush(stdout);
+    }
+
+  for(k = 0, count = 0; k < count_cand; k++)
+    if(coll_candidates[k].bound_length >= All.DesLinkNgb)
+      {
+        if(coll_candidates[k].len < All.DesLinkNgb)
+          terminate("coll_candidates[k=%d].len=%d bound=%d\n", k, coll_candidates[k].len, coll_candidates[k].bound_length);
+
+        count++;
+      }
+
+  MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, SubComm);
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: found %d bound substructures in FoF group of length %d\n", ThisTask, countall,
+             totgrouplen1);
+      fflush(stdout);
+    }
+
+  /* now determine the parent subhalo for each candidate */
+  t0 = second();
+  parallel_sort_comm(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_boundlength, SubComm);
+
+  if(SubThisTask == 0)
+    tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat));
+
+  count = count_cand;
+  count *= sizeof(struct coll_cand_dat);
+
+  countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int));
+  offset    = (int *)mymalloc("offset", SubNTask * sizeof(int));
+
+  MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
+
+  for(i = 1, offset[0] = 0; i < SubNTask; i++)
+    offset[i] = offset[i - 1] + countlist[i - 1];
+
+  MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
+
+  if(SubThisTask == 0)
+    {
+      for(k = 0; k < totcand; k++)
+        {
+          tmp_coll_candidates[k].subnr  = k;
+          tmp_coll_candidates[k].parent = 0;
+        }
+
+      qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank);
+
+      for(k = 0; k < totcand; k++)
+        {
+          for(j = k + 1; j < totcand; j++)
+            {
+              if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
+                break;
+
+              if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
+                {
+                  if(tmp_coll_candidates[k].bound_length >= All.DesLinkNgb)
+                    tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr;
+                }
+              else
+                {
+                  terminate("k=%d|%d has rank=%d and len=%d.  j=%d has rank=%d and len=%d bound=%d\n", k, countall,
+                            (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len,
+                            (int)tmp_coll_candidates[k].bound_length, (int)tmp_coll_candidates[j].rank,
+                            (int)tmp_coll_candidates[j].len, (int)tmp_coll_candidates[j].bound_length);
+                }
+            }
+        }
+
+      qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr);
+    }
+
+  MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
+
+  myfree(offset);
+  myfree(countlist);
+
+  if(SubThisTask == 0)
+    myfree(tmp_coll_candidates);
+
+  t1 = second();
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n", ThisTask,
+             timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0));
+      fflush(stdout);
+    }
+
+  /* Now let's save some properties of the substructures */
+  if(SubThisTask == 0)
+    Group[0].Nsubs = countall;
+
+  t0 = second();
+  for(master = 0, subnr = 0; master < SubNTask; master++)
+    {
+      ncand = count_cand;
+      MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
+
+      for(k = 0; k < ncand; k++)
+        {
+          if(SubThisTask == master)
+            {
+              len    = coll_candidates[k].bound_length;
+              nsubs  = coll_candidates[k].nsub;
+              parent = coll_candidates[k].parent;
+            }
+
+          MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
+          MPI_Barrier(SubComm);
+
+          if(len > 0)
+            {
+              MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
+              MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
+
+              LocalLen = 0;
+
+              if(SubThisTask != master)
+                subfind_poll_for_requests();
+              else
+                {
+                  for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++)
+                    {
+                      subfind_distlinklist_add_bound_particles(p, nsubs);
+                      p = subfind_distlinklist_get_next(p);
+                    }
+
+                  /* now tell the others to stop polling */
+                  for(i = 0; i < SubNTask; i++)
+                    if(i != SubThisTask)
+                      MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
+                }
+
+              MPI_Barrier(SubComm);
+
+              if(SubThisTask == 0)
+                {
+                  if(Nsubgroups >= MaxNsubgroups)
+                    terminate("Nsubgroups=%d >= MaxNsubgroups=%d", Nsubgroups, MaxNsubgroups);
+                }
+
+              tt0 = second();
+              subfind_determine_sub_halo_properties(ud, LocalLen, &SubGroup[Nsubgroups], GrNr, subnr, 1, nsubgroups_cat);
+              tt1 = second();
+
+              /* we have filled into ud the binding energy and the particle ID return */
+
+              if(SubThisTask == 0)
+                {
+                  if(Nsubgroups >= MaxNsubgroups)
+                    terminate("Nsubgroups >= MaxNsubgroups");
+
+                  if(subnr == 0)
+                    {
+                      for(j = 0; j < 3; j++)
+                        Group[grindex].Pos[j] = SubGroup[Nsubgroups].Pos[j];
+                    }
+
+                  SubGroup[Nsubgroups].GrNr      = GrNr;
+                  SubGroup[Nsubgroups].SubNr     = subnr;
+                  SubGroup[Nsubgroups].SubParent = parent;
+
+                  Nsubgroups++;
+                }
+
+              /* Let's now assign the subgroup number */
+              for(i = 0; i < LocalLen; i++)
+                PS[ud[i].index].SubNr = subnr;
+
+              subnr++;
+            }
+        }
+    }
+
+  t1 = second();
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: determining substructure properties took %g sec (presently allocated %g MB)\n",
+             ThisTask, timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0));
+      fflush(stdout);
+    }
+
+  myfree(ud);
+  ud = NULL;
+  myfree(coll_candidates);
+  myfree(Len);
+  myfree(Tail);
+  myfree(Next);
+  myfree(Head);
+  myfree(sd);
+
+  subfind_coll_treefree();
+  subfind_coll_domain_free();
+
+  /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */
+  submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+  for(int i = 0; i < NumPart; i++)
+    {
+      submp[i].index    = i;
+      submp[i].OldIndex = PS[i].OldIndex;
+    }
+  qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex);
+  subfind_reorder_according_to_submp();
+  myfree(submp);
+}
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+/*! \brief Calculates angualar momentum collectively on all MPI tasks.
+ *
+ *  \param[in] snapnr (unused)
+ *  \param[in] ngroups_cat (unused)
+ *
+ *  \return void
+ */
+void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat)
+{
+  int len, totgrouplen1, totgrouplen2;
+  long long index;
+
+  int grindex = 0, i, k, ptype;
+  double Pos_pbc[3], Vel_tot[3], gr_pos[3], gr_vel[3];
+  double gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3];
+  double gr_CMFrac, gr_CMFracType[NTYPES];
+  int gr_len_dm;
+  double gr_mass, gr_mass_gas, gr_mass_stars;  // gr_mass_dm,
+  double gr_Ekin, gr_Ethr;
+
+  /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */
+  if(SubThisTask == 0)
+    {
+      if(Ngroups != 1)
+        terminate("Ngroups=%d != 1  SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask);
+    }
+  else
+    {
+      if(Ngroups != 0)
+        terminate("Ngroups=%d != 0  SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask);
+    }
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing AM of halo %d of length %d on %d processors.\n", ThisTask,
+             Group[0].GrNr, Group[0].Len, SubNTask);
+
+      totgrouplen2 = Group[0].Len;
+    }
+
+  /* tell everybody in the set the group number and the grouplen */
+  MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm);
+  MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm);
+
+  for(i = 0, NumPartGroup = 0; i < NumPart; i++)
+    if(PS[i].GrNr == GrNr)
+      NumPartGroup++;
+
+  MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm);
+
+  /* sanity check that we actually have all the right particles on the processor subset */
+  if(totgrouplen1 != totgrouplen2)
+    terminate("totgrouplen1 != totgrouplen2"); /* inconsistency */
+
+  /* do a domain decomposition just for this halo */
+  subfind_coll_domain_decomposition();
+
+  /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */
+  subfind_loctree_copyExtent();
+
+  /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */
+  submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+  for(i = 0; i < NumPart; i++)
+    {
+      PS[i].OldIndex      = i;
+      submp[i].index      = i;
+      submp[i].GrNr       = PS[i].GrNr;
+      submp[i].DM_Density = PS[i].Density;
+    }
+  qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density);
+  subfind_reorder_according_to_submp();
+  myfree(submp);
+
+  /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned.
+     They can however be accessed via SphP[PS[i].OldIndex] */
+
+  /* re-determine the number of local group particles, which has changed due to domain decomposition */
+  for(i = 0, NumPartGroup = 0; i < NumPart; i++)
+    if(PS[i].GrNr == GrNr)
+      NumPartGroup++;
+
+  ud  = (struct unbind_data *)mymalloc("ud", NumPartGroup * sizeof(struct unbind_data));
+  len = NumPartGroup;
+
+  // pick my particles
+  for(i = 0; i < len; i++)
+    ud[i].index = i;
+
+  // initialize
+  gr_CMFrac = 0;
+  gr_Ekin   = 0;
+  gr_Ethr   = 0;
+  for(k = 0; k < 3; k++)
+    {
+      gr_Jtot[k]   = 0;
+      gr_Jdm[k]    = 0;
+      gr_Jgas[k]   = 0;
+      gr_Jstars[k] = 0;
+    }
+  for(k = 0; k < NTYPES; k++)
+    {
+      gr_CMFracType[k] = 0;
+    }
+
+  if(SubThisTask == 0)
+    {
+      for(k = 0; k < 3; k++)
+        {
+          gr_pos[k] = Group[grindex].Pos[k];
+          gr_vel[k] = Group[grindex].Vel[k];
+        }
+    }
+
+  // send group properties stored only on root task to all participating tasks
+  MPI_Bcast(gr_pos, 3, MPI_DOUBLE, 0, SubComm);
+  MPI_Bcast(gr_vel, 3, MPI_DOUBLE, 0, SubComm);
+
+  for(k = 0; k < len; k++)
+    {
+      index = ud[k].index;
+      ptype = P[index].Type;
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = P[index].Pos[i] - gr_pos[i];
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = fof_periodic(Pos_pbc[i]);
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; /* convert to physical length */
+
+      for(i = 0; i < 3; i++)
+        Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i];
+
+      gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]);
+      if(P[index].Type == 0)
+        gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm;
+
+      gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+      gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+      gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+      if(ptype == 1)  // dm illustris
+        {
+          gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+      if(ptype == 0)  // gas (incl. winds)
+        {
+          gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+      if(ptype == 4)  // stars
+        {
+          gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+    }
+
+  MPI_Allreduce(MPI_IN_PLACE, gr_Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, gr_Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, gr_Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, gr_Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+
+  // save the properties
+  if(SubThisTask == 0)
+    {
+      Group[grindex].Ekin = gr_Ekin;
+      Group[grindex].Ethr = gr_Ethr;
+      for(i = 0; i < 3; i++)
+        {
+          Group[grindex].J[i]      = gr_Jtot[i];
+          Group[grindex].JDM[i]    = gr_Jdm[i];
+          Group[grindex].JGas[i]   = gr_Jgas[i];
+          Group[grindex].JStars[i] = gr_Jstars[i];
+        }
+    }
+
+  // calculate counter-rotating fractions
+  gr_len_dm = 0;
+  gr_mass = gr_mass_gas = gr_mass_stars = 0;
+
+  for(k = 0; k < len; k++)
+    {
+      index = ud[k].index;
+      ptype = P[index].Type;
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = P[index].Pos[i] - gr_pos[i];
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = fof_periodic(Pos_pbc[i]);
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = Pos_pbc[i] * All.cf_atime;  // units: phys kpc/h
+
+      for(i = 0; i < 3; i++)
+        Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i];
+
+      jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+      jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+      jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+      gr_mass += P[index].Mass;
+      if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.)
+        gr_CMFrac += P[index].Mass;  // / gr_mass;
+
+      if(ptype == 1)  // dm illustris
+        {
+          gr_len_dm++;
+          if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.)
+            gr_CMFracType[1]++;  //= P[index].Mass / gr_mass_dm;
+        }
+      if(ptype == 0)  // gas (incl. winds)
+        {
+          gr_mass_gas += P[index].Mass;
+          if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.)
+            gr_CMFracType[0] += P[index].Mass;  // / gr_mass_gas;
+        }
+      if(ptype == 4)  // stars
+        {
+          gr_mass_stars += P[index].Mass;
+          if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.)
+            gr_CMFracType[4] += P[index].Mass;  // / gr_mass_stars;
+        }
+    }
+
+  MPI_Allreduce(MPI_IN_PLACE, &gr_mass, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_len_dm, 1, MPI_INT, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_mass_gas, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_mass_stars, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, &gr_CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+  MPI_Allreduce(MPI_IN_PLACE, gr_CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+
+  // save the properties
+  if(SubThisTask == 0)
+    {
+      gr_CMFrac /= gr_mass;
+      gr_CMFracType[1] /= gr_len_dm;
+      gr_CMFracType[0] /= gr_mass_gas;
+      gr_CMFracType[4] /= gr_mass_stars;
+
+      Group[grindex].CMFrac = gr_CMFrac;
+      for(i = 0; i < NTYPES; i++)
+        Group[grindex].CMFracType[i] = gr_CMFracType[i];
+    }
+
+  myfree(ud);
+
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE: root-task = %d AM done.\n", ThisTask);
+
+  subfind_coll_domain_free();
+
+  /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */
+  submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+  for(i = 0; i < NumPart; i++)
+    {
+      submp[i].index    = i;
+      submp[i].OldIndex = PS[i].OldIndex;
+    }
+  qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex);
+  subfind_reorder_according_to_submp();
+  myfree(submp);
+}
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+/*! \brief Finds candidates for subfind collective.
+ *
+ *  \param[in] totgrouplen Length of group.
+ *
+ *  \return void
+ */
+void subfind_col_find_coll_candidates(int totgrouplen)
+{
+  int ngbcount, retcode, len_attach;
+  int i, k, len, master;
+  long long prev, tail, tail_attach, tmp, next, index;
+  long long p, ss, head, head_attach, ngb_index1, ngb_index2, rank;
+  double t0, t1, tt0, tt1;
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask,
+             AllocatedBytes / (1024.0 * 1024.0));
+      fflush(stdout);
+    }
+
+  /* now find the subhalo coll_candidates by building up link lists from high density to low density */
+  t0 = second();
+  for(master = 0; master < SubNTask; master++)
+    {
+      tt0 = second();
+      if(SubThisTask != master)
+        subfind_poll_for_requests();
+      else
+        {
+          for(k = 0; k < NumPartGroup; k++)
+            {
+              ngbcount   = sd[k].ngbcount;
+              ngb_index1 = sd[k].ngb_index1;
+              ngb_index2 = sd[k].ngb_index2;
+
+              switch(ngbcount) /* treat the different possible cases */
+                {
+                  case 0: /* this appears to be a lonely maximum -> new group */
+                    subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, -1);
+                    break;
+
+                  case 1: /* the particle is attached to exactly one group */
+                    head = subfind_distlinklist_get_head(ngb_index1);
+
+                    if(head == -1)
+                      terminate("We have a problem!  head=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head, k, SubThisTask);
+
+                    retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index);
+
+                    if(!(retcode & 1))
+                      subfind_distlinklist_set_headandnext(sd[k].index, head, -1);
+                    if(!(retcode & 2))
+                      subfind_distlinklist_set_next(tail, sd[k].index);
+                    break;
+
+                  case 2: /* the particle merges two groups together */
+                    if((ngb_index1 >> 32) == (ngb_index2 >> 32))
+                      {
+                        subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, &head, &head_attach);
+                      }
+                    else
+                      {
+                        head        = subfind_distlinklist_get_head(ngb_index1);
+                        head_attach = subfind_distlinklist_get_head(ngb_index2);
+                      }
+
+                    if(head == -1 || head_attach == -1)
+                      terminate("We have a problem!  head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head,
+                                (int)(head_attach >> 32), (int)head_attach, k, SubThisTask);
+
+                    if(head != head_attach)
+                      {
+                        subfind_distlinklist_get_tailandlen(head, &tail, &len);
+                        subfind_distlinklist_get_tailandlen(head_attach, &tail_attach, &len_attach);
+
+                        if(len_attach > len ||
+                           (len_attach == len &&
+                            head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */
+                          {
+                            tmp         = head;
+                            head        = head_attach;
+                            head_attach = tmp;
+                            tmp         = tail;
+                            tail        = tail_attach;
+                            tail_attach = tmp;
+                            tmp         = len;
+                            len         = len_attach;
+                            len_attach  = tmp;
+                          }
+
+                        /* only in case the attached group is long enough we bother to register it
+                           as a subhalo candidate */
+
+                        if(len_attach >= All.DesLinkNgb)
+                          {
+                            if(count_cand < max_coll_candidates)
+                              {
+                                coll_candidates[count_cand].len  = len_attach;
+                                coll_candidates[count_cand].head = head_attach;
+                                count_cand++;
+                              }
+                            else
+                              terminate("Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates,
+                                        NumPartGroup);
+                          }
+
+                        /* now join the two groups */
+                        subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach);
+                        subfind_distlinklist_set_next(tail, head_attach);
+
+                        ss = head_attach;
+                        do
+                          {
+                            ss = subfind_distlinklist_set_head_get_next(ss, head);
+                          }
+                        while(ss >= 0);
+                      }
+
+                    /* finally, attach the particle to 'head' */
+                    retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index);
+
+                    if(!(retcode & 1))
+                      subfind_distlinklist_set_headandnext(sd[k].index, head, -1);
+                    if(!(retcode & 2))
+                      subfind_distlinklist_set_next(tail, sd[k].index);
+                    break;
+                }
+            }
+
+          fflush(stdout);
+
+          /* now tell the others to stop polling */
+          for(k = 0; k < SubNTask; k++)
+            if(k != SubThisTask)
+              MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
+        }
+
+      MPI_Barrier(SubComm);
+      tt1 = second();
+      if(SubThisTask == 0)
+        {
+          printf("SUBFIND-COLLECTIVE, root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask, timediff(tt0, tt1));
+          fflush(stdout);
+        }
+    }
+  t1 = second();
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask, timediff(t0, t1));
+
+  /* add the full thing as a subhalo candidate */
+  t0 = second();
+  for(master = 0, head = -1, prev = -1; master < SubNTask; master++)
+    {
+      if(SubThisTask != master)
+        subfind_poll_for_requests();
+      else
+        {
+          for(i = 0; i < NumPartGroup; i++)
+            {
+              index = (((long long)SubThisTask) << 32) + i;
+
+              if(Head[i] == index)
+                {
+                  subfind_distlinklist_get_tailandlen(Head[i], &tail, &len);
+                  next = subfind_distlinklist_get_next(tail);
+                  if(next == -1)
+                    {
+                      if(prev < 0)
+                        head = index;
+
+                      if(prev >= 0)
+                        subfind_distlinklist_set_next(prev, index);
+
+                      prev = tail;
+                    }
+                }
+            }
+
+          /* now tell the others to stop polling */
+          for(k = 0; k < SubNTask; k++)
+            if(k != SubThisTask)
+              MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
+        }
+
+      MPI_Barrier(SubComm);
+      MPI_Bcast(&head, sizeof(head), MPI_BYTE, master, SubComm);
+      MPI_Bcast(&prev, sizeof(prev), MPI_BYTE, master, SubComm);
+    }
+
+  if(SubThisTask == SubNTask - 1)
+    {
+      if(count_cand < max_coll_candidates)
+        {
+          coll_candidates[count_cand].len  = totgrouplen;
+          coll_candidates[count_cand].head = head;
+          count_cand++;
+        }
+      else
+        terminate("count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates);
+    }
+  t1 = second();
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: adding background as candidate took %g sec\n", ThisTask, timediff(t0, t1));
+
+  /* go through the whole chain once to establish a rank order. For the rank we use Len[] */
+  t0 = second();
+
+  master = (head >> 32);
+
+  if(SubThisTask != master)
+    subfind_poll_for_requests();
+  else
+    {
+      p    = head;
+      rank = 0;
+
+      while(p >= 0)
+        {
+          p = subfind_distlinklist_setrank_and_get_next(p, &rank);
+        }
+
+      /* now tell the others to stop polling */
+      for(i = 0; i < SubNTask; i++)
+        if(i != master)
+          MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
+    }
+
+  MPI_Barrier(SubComm);
+  MPI_Bcast(&rank, sizeof(rank), MPI_BYTE, master, SubComm); /* just for testing */
+
+  /* for each candidate, we now pull out the rank of its head */
+  for(master = 0; master < SubNTask; master++)
+    {
+      if(SubThisTask != master)
+        subfind_poll_for_requests();
+      else
+        {
+          for(k = 0; k < count_cand; k++)
+            coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head);
+
+          /* now tell the others to stop polling */
+          for(i = 0; i < SubNTask; i++)
+            if(i != SubThisTask)
+              MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
+        }
+    }
+  MPI_Barrier(SubComm);
+
+  t1 = second();
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: establishing of rank order took %g sec  (p=%d, grouplen=%d) presently allocated %g MB\n",
+           ThisTask, timediff(t0, t1), (int)rank, totgrouplen, AllocatedBytes / (1024.0 * 1024.0));
+
+  if(((int)rank) != totgrouplen)
+    terminate("mismatch\n");
+}
+
+/*! \brief Unbinding for independent subgroups.
+ *
+ *  \param[in] cont_cand Number of subgroup candidates.
+ *
+ *  \return void
+ */
+void subfind_unbind_independent_ones(int count_cand)
+{
+  int i, j, k, len, nsubs, len_non_gas;
+
+  ud = (struct unbind_data *)mymalloc("ud", NumPart * sizeof(struct unbind_data));
+
+  subfind_loctree_treeallocate(All.TreeAllocFactor * NumPart, NumPart);
+
+  qsort(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_nsubs);
+
+  for(k = 0, i = 0; k < count_cand; k++)
+    if(coll_candidates[k].parent == 0)
+      {
+        while(PS[i].submark < coll_candidates[k].nsub)
+          {
+            i++;
+            if(i >= NumPart)
+              terminate("i >= NumPart");
+          }
+
+        if(PS[i].submark >= 0 && PS[i].submark < HIGHBIT)
+          {
+            len   = 0;
+            nsubs = PS[i].submark;
+
+            if(nsubs != coll_candidates[k].nsub)
+              {
+                terminate("TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs,
+                          coll_candidates[k].nsub);
+              }
+
+            while(i < NumPart)
+              {
+                if(PS[i].submark == nsubs)
+                  {
+                    PS[i].submark = HIGHBIT;
+                    if((PS[i].origintask & HIGHBIT) == 0)
+                      {
+                        ud[len].index = i;
+                        len++;
+                      }
+                    i++;
+                  }
+                else
+                  break;
+              }
+
+            /* call the serial unbind function */
+            len = subfind_unbind(ud, len, &len_non_gas);
+
+            if(len >= All.DesLinkNgb)
+              {
+                /* ok, we found a substructure */
+                coll_candidates[k].bound_length = len;
+
+                for(j = 0; j < len; j++)
+                  PS[ud[j].index].submark = nsubs; /* we use this to flag the substructures */
+              }
+            else
+              coll_candidates[k].bound_length = 0;
+          }
+      }
+
+  subfind_loctree_treefree();
+
+  myfree(ud);
+}
+
+/*! \brief Unbinding for subfind collective.
+ *
+ *  \param[in] d Unbind data.
+ *  \param[in] num Number of particles in subgroup.
+ *  \param[out] num_non_gas Number of particles which are not gas cells.
+ *
+ *  \return
+ */
+int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas)
+{
+  int iter = 0;
+  int i, j, p, part_index, minindex, task;
+  int unbound, totunbound, numleft, mincpu;
+  int *npart, *offset, *nbu_count, count_bound_unbound, phaseflag;
+  double s[3], dx[3], ddxx, v[3], dv[3], sloc[3], vloc[3], pos[3];
+  double vel_to_phys, atime;
+  MyFloat minpot, *potlist;
+  double boxsize, xtmp;
+  double mass, massloc;
+  double *bnd_energy, energy_limit, energy_limit_local, weakly_bound_limit_local, weakly_bound_limit = 0;
+
+  if(SubThisTask == 0)
+    {
+      printf("SUBFIND-COLLECTIVE, root-task=%d: beginning of subfind_col_unbind()\n", ThisTask);
+      fflush(stdout);
+    }
+
+  boxsize = All.BoxSize;
+
+  vel_to_phys = 1.0 / All.cf_atime;
+  atime       = All.cf_atime;
+
+  phaseflag = 0; /* this means we will recompute the potential for all particles */
+
+  do
+    {
+      subfind_coll_treebuild(num, d);
+
+      /* let's compute the potential energy */
+
+      subfind_potential_compute(num, d, phaseflag, weakly_bound_limit);
+
+      if(phaseflag == 0)
+        {
+          potlist = (MyFloat *)mymalloc("potlist", SubNTask * sizeof(MyFloat));
+
+          for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++)
+            {
+              if(gsl_isnan(PS[d[i].index].Potential))
+                terminate("pot is nan");
+
+              if(PS[d[i].index].Potential < minpot || minindex == -1)
+                {
+                  minpot   = PS[d[i].index].Potential;
+                  minindex = d[i].index;
+                }
+            }
+
+          MPI_Allgather(&minpot, sizeof(MyFloat), MPI_BYTE, potlist, sizeof(MyFloat), MPI_BYTE, SubComm);
+
+          for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++)
+            if(potlist[i] < minpot)
+              {
+                mincpu = i;
+                minpot = potlist[i];
+              }
+
+          if(mincpu < 0)
+            terminate("mincpu < 0");
+
+          myfree(potlist);
+
+          if(SubThisTask == mincpu)
+            {
+#ifdef CELL_CENTER_GRAVITY
+              if(P[minindex].Type == 0)
+                {
+                  for(j = 0; j < 3; j++)
+                    pos[j] = PS[minindex].Center[j];
+                }
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                {
+                  for(j = 0; j < 3; j++)
+                    pos[j] = P[minindex].Pos[j];
+                }
+            }
+
+          MPI_Bcast(&pos[0], 3, MPI_DOUBLE, mincpu, SubComm);
+          /* pos[] now holds the position of minimum potential */
+          /* we take that as the center */
+        }
+
+      /* let's get bulk velocity and the center-of-mass */
+
+      for(j = 0; j < 3; j++)
+        sloc[j] = vloc[j] = 0;
+
+      for(i = 0, massloc = 0; i < num; i++)
+        {
+          part_index = d[i].index;
+
+          for(j = 0; j < 3; j++)
+            {
+#ifdef CELL_CENTER_GRAVITY
+              if(P[part_index].Type == 0)
+                ddxx = GRAVITY_NEAREST_X(PS[part_index].Center[j] - pos[j]);
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                ddxx = GRAVITY_NEAREST_X(P[part_index].Pos[j] - pos[j]);
+
+              sloc[j] += P[part_index].Mass * ddxx;
+              vloc[j] += P[part_index].Mass * P[part_index].Vel[j];
+            }
+          massloc += P[part_index].Mass;
+        }
+
+      MPI_Allreduce(sloc, s, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(vloc, v, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&massloc, &mass, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+
+      for(j = 0; j < 3; j++)
+        {
+          s[j] /= mass; /* center of mass */
+          v[j] /= mass;
+
+          s[j] += pos[j];
+
+          while(s[j] < 0)
+            s[j] += boxsize;
+          while(s[j] >= boxsize)
+            s[j] -= boxsize;
+        }
+
+      bnd_energy = (double *)mymalloc("bnd_energy", num * sizeof(double));
+
+      for(i = 0; i < num; i++)
+        {
+          part_index = d[i].index;
+
+          for(j = 0; j < 3; j++)
+            {
+              dv[j] = vel_to_phys * (P[part_index].Vel[j] - v[j]);
+
+#ifdef CELL_CENTER_GRAVITY
+              if(P[part_index].Type == 0)
+                dx[j] = atime * GRAVITY_NEAREST_X(PS[part_index].Center[j] - s[j]);
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                dx[j] = atime * GRAVITY_NEAREST_X(P[part_index].Pos[j] - s[j]);
+
+              dv[j] += All.cf_Hrate * dx[j];
+            }
+
+          PS[part_index].BindingEnergy = PS[part_index].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]);
+          PS[part_index].BindingEnergy += All.G / All.cf_atime * P[part_index].Mass /
+                                          (All.ForceSoftening[P[part_index].SofteningType] / 2.8); /* add self-energy */
+
+          if(P[part_index].Type == 0)
+            PS[part_index].BindingEnergy += PS[part_index].Utherm;
+
+          bnd_energy[i] = PS[part_index].BindingEnergy;
+        }
+
+      parallel_sort_comm(bnd_energy, num, sizeof(double), subfind_compare_binding_energy, SubComm);
+
+      npart     = (int *)mymalloc("npart", SubNTask * sizeof(int));
+      nbu_count = (int *)mymalloc("nbu_count", SubNTask * sizeof(int));
+      offset    = (int *)mymalloc("offset", SubNTask * sizeof(int));
+
+      MPI_Allgather(&num, 1, MPI_INT, npart, 1, MPI_INT, SubComm);
+      MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm);
+
+      for(i = 1, offset[0] = 0; i < SubNTask; i++)
+        offset[i] = offset[i - 1] + npart[i - 1];
+
+      j = (int)(0.25 * numleft); /* index of limiting energy value */
+
+      task = 0;
+      while(j >= npart[task])
+        {
+          j -= npart[task];
+          task++;
+        }
+
+      if(SubThisTask == task)
+        energy_limit_local = bnd_energy[j];
+      else
+        energy_limit_local = 1.0e30;
+
+      MPI_Allreduce(&energy_limit_local, &energy_limit, 1, MPI_DOUBLE, MPI_MIN, SubComm);
+
+      for(i = 0, count_bound_unbound = 0; i < num; i++)
+        {
+          if(bnd_energy[i] > 0)
+            count_bound_unbound++;
+          else
+            count_bound_unbound--;
+        }
+
+      MPI_Allgather(&count_bound_unbound, 1, MPI_INT, nbu_count, 1, MPI_INT, SubComm);
+
+      for(i = 0, count_bound_unbound = 0; i < SubThisTask; i++)
+        count_bound_unbound += nbu_count[i];
+
+      for(i = 0; i < num - 1; i++)
+        {
+          if(bnd_energy[i] > 0)
+            count_bound_unbound++;
+          else
+            count_bound_unbound--;
+          if(count_bound_unbound <= 0)
+            break;
+        }
+
+      if(num > 0 && count_bound_unbound <= 0)
+        weakly_bound_limit_local = bnd_energy[i];
+      else
+        weakly_bound_limit_local = -1.0e30;
+
+      MPI_Allreduce(&weakly_bound_limit_local, &weakly_bound_limit, 1, MPI_DOUBLE, MPI_MAX, SubComm);
+
+      for(i = 0, unbound = 0; i < num; i++)
+        {
+          p = d[i].index;
+
+          if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit)
+            {
+              unbound++;
+
+              d[i] = d[num - 1];
+              num--;
+              i--;
+            }
+          else if(P[p].Type != 0)
+            (*num_non_gas)++;
+        }
+
+      myfree(offset);
+      myfree(nbu_count);
+      myfree(npart);
+      myfree(bnd_energy);
+
+      MPI_Allreduce(&unbound, &totunbound, 1, MPI_INT, MPI_SUM, SubComm);
+      MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm);
+
+      if(phaseflag == 0)
+        {
+          if(totunbound > 0)
+            phaseflag = 1;
+        }
+      else
+        {
+          if(totunbound == 0)
+            {
+              phaseflag  = 0; /* this will make us repeat everything once more for all particles */
+              totunbound = 1;
+            }
+        }
+
+      iter++;
+    }
+  while(totunbound > 0 && numleft >= All.DesLinkNgb);
+
+  return num;
+}
+
+/*! \brief Gets new request from other task.
+ *
+ *  \return void
+ */
+void subfind_poll_for_requests(void)
+{
+  int index, nsub, source, tag, ibuf[3], target, submark, task;
+  long long head, next, rank, buf[5];
+  long long oldtail, newtail;
+  int task_newtail, i_newtail, task_oldtail, i_oldtail;
+  char msg[200];
+  MPI_Status status;
+
+  do
+    {
+      MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status);
+
+      source = status.MPI_SOURCE;
+      tag    = status.MPI_TAG;
+
+      /* MPI_Get_count(&status, MPI_BYTE, &count); */
+      switch(tag)
+        {
+          case TAG_GET_TWOHEADS:
+            MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE);
+            buf[0] = Head[ibuf[0]];
+            buf[1] = Head[ibuf[1]];
+            MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm);
+            break;
+          case TAG_SET_NEWTAIL:
+            MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE);
+            index       = buf[0];
+            newtail     = buf[1];
+            oldtail     = Tail[index]; /* return old tail */
+            Tail[index] = newtail;
+            Len[index]++;
+
+            task_newtail = (newtail >> 32);
+            if(task_newtail == SubThisTask)
+              {
+                i_newtail       = (newtail & MASK);
+                Head[i_newtail] = (((long long)SubThisTask) << 32) + index;
+                Next[i_newtail] = -1;
+              }
+            task_oldtail = (oldtail >> 32);
+            if(task_oldtail == SubThisTask)
+              {
+                i_oldtail       = (oldtail & MASK);
+                Next[i_oldtail] = newtail;
+              }
+
+            buf[0] = oldtail;
+            MPI_Send(buf, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm);
+            break;
+          case TAG_SET_ALL:
+            MPI_Recv(buf, 5 * sizeof(long long), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE);
+            index       = buf[0];
+            Head[index] = buf[1];
+            Tail[index] = buf[2];
+            Len[index]  = buf[3];
+            Next[index] = buf[4];
+            break;
+          case TAG_GET_TAILANDLEN:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            buf[0] = Tail[index];
+            buf[1] = Len[index];
+            MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm);
+            break;
+          case TAG_SET_TAILANDLEN:
+            MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE);
+            index       = buf[0];
+            Tail[index] = buf[1];
+            Len[index]  = buf[2];
+            break;
+          case TAG_SET_HEADANDNEXT:
+            MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE);
+            index       = buf[0];
+            Head[index] = buf[1];
+            Next[index] = buf[2];
+            break;
+          case TAG_SET_NEXT:
+            MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE);
+            index       = buf[0];
+            Next[index] = buf[1];
+            break;
+          case TAG_SETHEADGETNEXT:
+            MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE);
+            index = buf[0];
+            head  = buf[1];
+            do
+              {
+                Head[index] = head;
+                next        = Next[index];
+                task        = (next >> 32);
+                index       = (next & MASK);
+              }
+            while(next >= 0 && task == SubThisTask);
+            MPI_Send(&next, 1 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm);
+            break;
+          case TAG_GET_NEXT:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            MPI_Send(&Next[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm);
+            break;
+          case TAG_GET_HEAD:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            MPI_Send(&Head[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm);
+            break;
+          case TAG_ADD_PARTICLE:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            if(Tail[index] < 0) /* consider only particles not already in substructures */
+              {
+                ud[LocalLen].index = index;
+                if(index >= NumPartGroup)
+                  {
+                    sprintf(msg, "What: index=%d NumPartGroup=%d\n", index, NumPartGroup);
+                    terminate(msg);
+                  }
+                LocalLen++;
+              }
+            break;
+          case TAG_MARK_PARTICLE:
+            MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE);
+            index   = ibuf[0];
+            target  = ibuf[1];
+            submark = ibuf[2];
+
+            if(PS[index].submark != HIGHBIT)
+              terminate("TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, index, PS[index].submark);
+
+            PS[index].TargetTask = target;
+            PS[index].submark    = submark;
+            break;
+          case TAG_ADDBOUND:
+            MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status);
+            index = ibuf[0];
+            nsub  = ibuf[1];
+            if(Tail[index] == nsub) /* consider only particles in this substructure */
+              {
+                ud[LocalLen].index = index;
+                LocalLen++;
+              }
+            break;
+          case TAG_SETRANK:
+            MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE);
+            index = buf[0];
+            rank  = buf[1];
+            do
+              {
+                Len[index] = rank++;
+                next       = Next[index];
+                if(next < 0)
+                  break;
+                index = (next & MASK);
+              }
+            while((next >> 32) == SubThisTask);
+            buf[0] = next;
+            buf[1] = rank;
+            MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm);
+            break;
+          case TAG_GET_RANK:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            rank = Len[index];
+            MPI_Send(&rank, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm);
+            break;
+
+          case TAG_POLLING_DONE:
+            MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
+            break;
+
+          default:
+            terminate("tag not present in the switch");
+            break;
+        }
+    }
+  while(tag != TAG_POLLING_DONE);
+}
+
+/*! \brief Sets rank in global linked list and gets next entry.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in, out] rank Rank to be set in linked list.
+ *
+ *  \return Next entry
+ */
+long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank)
+{
+  int task, i;
+  long long next;
+  long long buf[2];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Len[i] = *rank;
+      *rank  = *rank + 1;
+      next   = Next[i];
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = *rank;
+
+      MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK, SubComm);
+      MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE);
+      next  = buf[0];
+      *rank = buf[1];
+    }
+  return next;
+}
+
+/*! \brief Sets head in global linked list and gets next
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] head Head value to be set.
+ *
+ *  \return Next value.
+ */
+long long subfind_distlinklist_set_head_get_next(long long index, long long head)
+{
+  int task, i;
+  long long buf[2];
+  long long next;
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Head[i] = head;
+      next    = Next[i];
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = head;
+      MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm);
+      MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE);
+    }
+
+  return next;
+}
+
+/*! \brief Sets next value in global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] next Next value to be set.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_set_next(long long index, long long next)
+{
+  int task, i;
+  long long buf[2];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Next[i] = next;
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = next;
+      MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEXT, SubComm);
+    }
+}
+
+/*! \brief Adds particle to 'ud' list if not already in substructure.
+ *
+ *  \param[in] index Index in global linked list.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_add_particle(long long index)
+{
+  int task, i;
+  char msg[200];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      if(Tail[i] < 0) /* consider only particles not already in substructures */
+        {
+          ud[LocalLen].index = i;
+          if(i >= NumPartGroup)
+            {
+              sprintf(msg, "What: index=%d NumPartGroup=%d\n", i, NumPartGroup);
+              terminate(msg);
+            }
+
+          LocalLen++;
+        }
+    }
+  else
+    {
+      MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm);
+    }
+}
+
+/*! \brief Sets target task and submark field in 'PS' structure.
+ *
+ *  \param[in] index Index in global linked list
+ *  \param[in] target Value for TargetTask field.
+ *  \param[in] submark Value for submark field.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_mark_particle(long long index, int target, int submark)
+{
+  int task, i, ibuf[3];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      if(PS[i].submark != HIGHBIT)
+        terminate("Tas=%d i=%d P[i].submark=%d?\n", SubThisTask, i, PS[i].submark);
+
+      PS[i].TargetTask = target;
+      PS[i].submark    = submark;
+    }
+  else
+    {
+      ibuf[0] = i;
+      ibuf[1] = target;
+      ibuf[2] = submark;
+      MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm);
+    }
+}
+
+/*! \brief Add bound particle to 'ud' array.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] nsub Number of subgroups (i.e. if Tail index the same, not yet
+ *             in a substructrue).
+ *
+ *  \return void
+ */
+void subfind_distlinklist_add_bound_particles(long long index, int nsub)
+{
+  int task, i, ibuf[2];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      if(Tail[i] == nsub) /* consider only particles not already in substructures */
+        {
+          ud[LocalLen].index = i;
+          LocalLen++;
+        }
+    }
+  else
+    {
+      ibuf[0] = i;
+      ibuf[1] = nsub;
+      MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm);
+    }
+}
+
+/*! \brief Get Next value from global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *
+ *  \return
+ */
+long long subfind_distlinklist_get_next(long long index)
+{
+  int task, i;
+  long long next;
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      next = Next[i];
+    }
+  else
+    {
+      MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm);
+      MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE);
+    }
+
+  return next;
+}
+
+/*! \brief Get rank value from global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *
+ *  \return Rank value.
+ */
+long long subfind_distlinklist_get_rank(long long index)
+{
+  int task, i;
+  long long rank;
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      rank = Len[i];
+    }
+  else
+    {
+      MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm);
+      MPI_Recv(&rank, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE);
+    }
+
+  return rank;
+}
+
+/*! \brief Get the head value of global linked list.
+ *
+ *  \param[in] index Index in the global linked list.
+ *
+ *  \return Head value.
+ */
+long long subfind_distlinklist_get_head(long long index)
+{
+  int task, i;
+  long long head;
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      head = Head[i];
+    }
+  else
+    {
+      MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm);
+      MPI_Recv(&head, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE);
+    }
+
+  return head;
+}
+
+/*! \brief Gets the head value of two entries in linked list.
+ *
+ *  \param[in] ngb_index1 Index of first subgroup.
+ *  \param[in] ngb_index2 Index of second subgroup.
+ *  \param[out] head Head value of first subgroup.
+ *  \param[out] head_attach head value of second subgroup.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach)
+{
+  int task, i1, i2, ibuf[2];
+  long long buf[2];
+
+  task = (ngb_index1 >> 32);
+  i1   = (ngb_index1 & MASK);
+  i2   = (ngb_index2 & MASK);
+
+  if(SubThisTask == task)
+    {
+      *head        = Head[i1];
+      *head_attach = Head[i2];
+    }
+  else
+    {
+      ibuf[0] = i1;
+      ibuf[1] = i2;
+      MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm);
+      MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE);
+      *head        = buf[0];
+      *head_attach = buf[1];
+    }
+}
+
+/*! \brief Sets Head and Next entries in global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] head Value for Head.
+ *  \param[in] next Value for Next.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_set_headandnext(long long index, long long head, long long next)
+{
+  int task, i;
+  long long buf[3];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Head[i] = head;
+      Next[i] = next;
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = head;
+      buf[2] = next;
+      MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm);
+    }
+}
+
+/*! \brief Returns old tail, sets a new tail, increases length of linked list.
+ *
+ *  \param[in] index Index of the subgroup.
+ *  \param[out] tail Old value for tail.
+ *  \param[in] newtail New value for tail.
+ *
+ *  \return return code
+ */
+int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail)
+{
+  int task, i, task_newtail, i_newtail, task_oldtail, i_oldtail, retcode;
+  long long oldtail;
+  long long buf[2];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  retcode = 0;
+
+  if(SubThisTask == task)
+    {
+      oldtail = Tail[i];
+      Tail[i] = newtail;
+      Len[i]++;
+      *tail = oldtail;
+
+      task_newtail = (newtail >> 32);
+      if(task_newtail == SubThisTask)
+        {
+          i_newtail       = (newtail & MASK);
+          Head[i_newtail] = index;
+          Next[i_newtail] = -1;
+          retcode |= 1;
+        }
+      task_oldtail = (oldtail >> 32);
+      if(task_oldtail == SubThisTask)
+        {
+          i_oldtail       = (oldtail & MASK);
+          Next[i_oldtail] = newtail;
+          retcode |= 2;
+        }
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = newtail;
+      MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm);
+      MPI_Recv(&oldtail, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE);
+      *tail = oldtail;
+
+      if((newtail >> 32) == task)
+        retcode |= 1;
+      if((oldtail >> 32) == task)
+        retcode |= 2;
+    }
+
+  return retcode;
+}
+
+/*! \brief Set tail and len in global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] tail Value to be set in 'Tail'.
+ *  \param[in] len Value to be set in 'Len'.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len)
+{
+  int task, i;
+  long long buf[3];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Tail[i] = tail;
+      Len[i]  = len;
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = tail;
+      buf[2] = len;
+      MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm);
+    }
+}
+
+/*! \brief Get tail and len in global linked list.
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[out] tail 'Tail' value.
+ *  \param[out] len 'Len' value.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len)
+{
+  int task, i;
+  long long buf[2];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      *tail = Tail[i];
+      *len  = Len[i];
+    }
+  else
+    {
+      MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm);
+      MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE);
+      *tail = buf[0];
+      *len  = buf[1];
+    }
+}
+
+/*! \brief Sets head, tail, len and next in global linked list
+ *
+ *  \param[in] index Index in global linked list.
+ *  \param[in] head Value for 'Head'.
+ *  \param[in] tail Value for 'Tail'.
+ *  \param[in] len Value for 'Len'.
+ *  \param[in] next Value for 'Next'.
+ *
+ *  \return void
+ */
+void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next)
+{
+  int task, i;
+  long long buf[5];
+
+  task = (index >> 32);
+  i    = (index & MASK);
+
+  if(SubThisTask == task)
+    {
+      Head[i] = head;
+      Tail[i] = tail;
+      Len[i]  = len;
+      Next[i] = next;
+    }
+  else
+    {
+      buf[0] = i;
+      buf[1] = head;
+      buf[2] = tail;
+      buf[3] = len;
+      buf[4] = next;
+      MPI_Send(buf, 5 * sizeof(long long), MPI_BYTE, task, TAG_SET_ALL, SubComm);
+    }
+}
+
+/*! \brief Comparison function of sort_density_data objects.
+ *
+ *  Compares element density.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1); -1 if a > b
+ */
+int subfind_compare_densities(const void *a, const void *b) /* largest density first */
+{
+  if(((struct sort_density_data *)a)->density > (((struct sort_density_data *)b)->density))
+    return -1;
+
+  if(((struct sort_density_data *)a)->density < (((struct sort_density_data *)b)->density))
+    return +1;
+
+  return 0;
+}
+
+#endif
diff --git a/src/amuse/community/arepo/src/subfind/subfind_density.c b/src/amuse/community/arepo/src/subfind/subfind_density.c
new file mode 100644
index 0000000000..0b61aa9d97
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_density.c
@@ -0,0 +1,662 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_density.c
+ * \date        05/2018
+ * \brief       Smoothing length and density calculation for particles.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                double subfind_density(int mode)
+ *                static int subfind_density_evaluate(int target, int mode,
+ *                  int threadid)
+ *                void subfind_density_hsml_guess(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+
+#include "../fof/fof.h"
+#include "subfind.h"
+
+static char *Todo;
+static int *DM_NumNgb;
+#ifdef SUBFIND_CALC_MORE
+static MyFloat *Vx, *Vy, *Vz;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+static int subfind_density_evaluate(int target, int mode, int threadid);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Hsml;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+#ifdef CELL_CENTER_GRAVITY
+  if(P[i].Type == 0)
+    {
+      in->Pos[0] = SphP[i].Center[0];
+      in->Pos[1] = SphP[i].Center[1];
+      in->Pos[2] = SphP[i].Center[2];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      in->Pos[0] = P[i].Pos[0];
+      in->Pos[1] = P[i].Pos[1];
+      in->Pos[2] = P[i].Pos[2];
+    }
+  in->Hsml = PS[i].Hsml;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  int Ngb;
+  MyFloat Rho;
+#ifdef SUBFIND_CALC_MORE
+  MyFloat VelDisp, Vx, Vy, Vz, RhoDM;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      DM_NumNgb[i]  = out->Ngb;
+      PS[i].Density = out->Rho;
+#ifdef SUBFIND_CALC_MORE
+      Vx[i]                  = out->Vx;
+      Vy[i]                  = out->Vy;
+      Vz[i]                  = out->Vz;
+      PS[i].SubfindVelDisp   = out->VelDisp;
+      PS[i].SubfindDMDensity = out->RhoDM;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+    }
+  else /* combine */
+    {
+      DM_NumNgb[i] += out->Ngb;
+      PS[i].Density += out->Rho;
+#ifdef SUBFIND_CALC_MORE
+      Vx[i] += out->Vx;
+      Vy[i] += out->Vy;
+      Vz[i] += out->Vz;
+      PS[i].SubfindVelDisp += out->VelDisp;
+      PS[i].SubfindDMDensity += out->RhoDM;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= NumPart)
+          break;
+
+        if(Todo[i])
+          subfind_density_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        subfind_density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Calculates smoothing length or density via neighbor search.
+ *
+ *  \param[in] mode Mode if the function: FIND_SMOOTHING_LENGTHS, or to
+ *             calculate densities.
+ *
+ *  \return Time spent in this routine.
+ */
+double subfind_density(int mode)
+{
+  long long ntot;
+  int i, npleft, iter;
+  MyFloat *Left, *Right;
+  double t0, t1, tstart, tend;
+
+  if(mode == FIND_SMOOTHING_LENGTHS)
+    mpi_printf("SUBFIND: finding smoothing length for all particles\n");
+  else
+    mpi_printf("SUBFIND: finding total densities around all particles\n");
+
+  tstart = second();
+
+  int HsmlFlag = 0;
+
+#ifdef SUBFIND_CALC_MORE
+  HsmlFlag = 1; /* in this case, calculate densities for all particles, not only those in groups */
+#endif          /* #ifdef SUBFIND_CALC_MORE */
+
+  DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPart);
+  Left      = (MyFloat *)mymalloc_movable(&Left, "Left", sizeof(MyFloat) * NumPart);
+  Right     = (MyFloat *)mymalloc_movable(&Right, "Right", sizeof(MyFloat) * NumPart);
+  Todo      = (char *)mymalloc_movable(&Todo, "Todo", sizeof(char) * NumPart);
+
+#ifdef SUBFIND_CALC_MORE
+  Vx = (MyFloat *)mymalloc("Vx", sizeof(MyFloat) * NumPart);
+  Vy = (MyFloat *)mymalloc("Vy", sizeof(MyFloat) * NumPart);
+  Vz = (MyFloat *)mymalloc("Vz", sizeof(MyFloat) * NumPart);
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+  generic_set_MaxNexport();
+
+  for(i = 0; i < NumPart; i++)
+    {
+      Left[i] = Right[i] = 0;
+      DM_NumNgb[i]       = 0;
+      Todo[i]            = 1;
+      if((PS[i].GrNr >= TotNgroups) && (HsmlFlag == 0))  // particle not in groups
+        Todo[i] = 0;
+
+#ifdef REFINEMENT_HIGH_RES_GAS
+      if((PS[i].GrNr >= TotNgroups) && (P[i].Type == 4 || P[i].Type == 5))  // particle of type 4 or 5 but not in group
+        Todo[i] = 0;
+
+      if(P[i].Type != 0 && P[i].Type != 1 && P[i].Type != 4 && P[i].Type != 5)
+        Todo[i] = 0;
+      if(P[i].Type == 0)
+        if(SphP[i].AllowRefinement == 0)
+          Todo[i] = 0;
+#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */
+
+      PS[i].Density = 0;
+#ifdef SUBFIND_CALC_MORE
+      PS[i].SubfindHsml      = 0;
+      PS[i].SubfindDensity   = 0;
+      PS[i].SubfindDMDensity = 0;
+      PS[i].SubfindVelDisp   = 0;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+    }
+
+  iter = 0;
+
+  /* we will repeat the whole thing for those particles where we didn't find enough neighbours */
+  do
+    {
+      t0 = second();
+
+      generic_comm_pattern(NumPart, kernel_local, kernel_imported);
+
+      /* do final operations on results */
+      for(i = 0, npleft = 0; i < NumPart; i++)
+        {
+          /* now check whether we had enough neighbours */
+
+          if(Todo[i] && mode == FIND_SMOOTHING_LENGTHS)
+            {
+              if(abs(DM_NumNgb[i] - All.DesNumNgb) > All.MaxNumNgbDeviation &&
+                 ((Right[i] - Left[i]) > 1.0e-4 * Left[i] || Left[i] == 0 || Right[i] == 0))
+                {
+                  /* need to redo this particle */
+                  npleft++;
+
+                  if(DM_NumNgb[i] < All.DesNumNgb)
+                    Left[i] = (MyFloat)dmax(PS[i].Hsml, Left[i]);
+                  else
+                    {
+                      if(Right[i] != 0)
+                        {
+                          if(PS[i].Hsml < Right[i])
+                            Right[i] = PS[i].Hsml;
+                        }
+                      else
+                        Right[i] = PS[i].Hsml;
+                    }
+
+                  if(iter >= MAXITER - 10)
+                    {
+                      printf("SUBFIND: i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n   pos=(%g|%g|%g)\n", i,
+                             ThisTask, (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)DM_NumNgb[i], Right[i] - Left[i],
+                             P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
+                      myflush(stdout);
+                    }
+
+                  if(Right[i] > 0 && Left[i] > 0)
+                    PS[i].Hsml = (MyFloat)pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3);
+                  else
+                    {
+                      if(Right[i] == 0 && Left[i] == 0)
+                        terminate("can't occur");
+
+                      if(Right[i] == 0 && Left[i] > 0)
+                        PS[i].Hsml *= 1.26;
+
+                      if(Right[i] > 0 && Left[i] == 0)
+                        PS[i].Hsml /= 1.26;
+                    }
+                }
+              else
+                Todo[i] = 0;
+            }
+        }
+
+      sumup_large_ints(1, &npleft, &ntot);
+
+      t1 = second();
+
+      if(ntot > 0 && mode == FIND_SMOOTHING_LENGTHS)
+        {
+          iter++;
+
+          if(iter > 0)
+            mpi_printf("SUBFIND: ngb iteration %2d: need to repeat for %15lld particles. (took %g sec)\n", iter, ntot,
+                       timediff(t0, t1));
+
+          if(iter > MAXITER)
+            terminate("failed to converge in neighbour iteration in density()\n");
+        }
+    }
+  while(ntot > 0);
+
+#ifdef SUBFIND_CALC_MORE
+  double vel_to_phys;
+
+  vel_to_phys = 1.0 / All.cf_atime;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      Vx[i] /= DM_NumNgb[i];
+      Vy[i] /= DM_NumNgb[i];
+      Vz[i] /= DM_NumNgb[i];
+      PS[i].SubfindVelDisp /= DM_NumNgb[i];
+      PS[i].SubfindVelDisp = vel_to_phys * sqrt(PS[i].SubfindVelDisp - Vx[i] * Vx[i] - Vy[i] * Vy[i] - Vz[i] * Vz[i]);
+    }
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+#ifdef SUBFIND_CALC_MORE
+  myfree_movable(Vz);
+  myfree_movable(Vy);
+  myfree_movable(Vx);
+#endif /* #ifdef SUBFIND_CALC_MORE */
+  myfree_movable(Todo);
+  myfree_movable(Right);
+  myfree_movable(Left);
+  myfree_movable(DM_NumNgb);
+
+#ifdef SUBFIND_CALC_MORE
+  for(i = 0; i < NumPart; i++)
+    {
+      PS[i].SubfindHsml    = PS[i].Hsml;
+      PS[i].SubfindDensity = PS[i].Density;
+    }
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+  tend = second();
+  return timediff(tstart, tend);
+}
+
+/*! \brief Evaluate function of subfind density calculation.
+ *
+ *  \param[in] target Index of particle of interest
+ *  \param[in] mode Local or imported particles?
+ *  \param[in] treadid ID of thread.
+ *
+ *  \return 0
+ */
+static int subfind_density_evaluate(int target, int mode, int threadid)
+{
+  int k, numnodes, *firstnode, type;
+  double hsml;
+  double rhosum = 0;
+  MyDouble *pos;
+  int numngb = 0, no, p;
+  struct NODE *current;
+  double dx, dy, dz, r2, mass;
+  double h2, hinv, hinv3, r, u, wk;
+  MyDouble xtmp, ytmp, ztmp;
+#ifdef SUBFIND_CALC_MORE
+  double vxsum = 0, vysum = 0, vzsum = 0, v2sum = 0, rhodmsum = 0;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+  data_in local, *target_data;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos  = target_data->Pos;
+  hsml = target_data->Hsml;
+
+  h2    = hsml * hsml;
+  hinv  = 1.0 / hsml;
+  hinv3 = hinv * hinv * hinv;
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Tree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < Tree_MaxPart) /* single particle */
+            {
+              p  = no;
+              no = Nextnode[no];
+
+              dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]);
+              if(dx > hsml)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]);
+              if(dy > hsml)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]);
+              if(dz > hsml)
+                continue;
+
+              if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml)
+                continue;
+
+              mass = P[p].Mass;
+              type = P[p].Type;
+            }
+          else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              current = &Nodes[no];
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              double dist = hsml + 0.5 * current->len;
+
+              dx = (MyFloat)FOF_NEAREST_LONG_X(current->center[0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = (MyFloat)FOF_NEAREST_LONG_Y(current->center[1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = (MyFloat)FOF_NEAREST_LONG_Z(current->center[2] - pos[2]);
+              if(dz > dist)
+                continue;
+              /* now test against the minimal sphere enclosing everything */
+              dist += FACT1 * current->len;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+              continue;
+            }
+          else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              int n = no - Tree_ImportedNodeOffset;
+              no    = Nextnode[no - Tree_MaxNodes];
+
+              dx = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]);
+              if(dx > hsml)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]);
+              if(dy > hsml)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]);
+              if(dz > hsml)
+                continue;
+
+              if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml)
+                continue;
+
+              mass = Tree_Points[n].Mass;
+              type = Tree_Points[n].Type;
+
+              p = -1;
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("can't be");
+
+              if(target >= 0) /* if no target is given, export will not occur */
+                tree_treefind_export_node_threads(no, target, threadid);
+
+              no = Nextnode[no - Tree_MaxNodes];
+              continue;
+            }
+
+          if((1 << type) & (FOF_PRIMARY_LINK_TYPES))
+            {
+              numngb++;
+
+#ifdef SUBFIND_CALC_MORE
+              if(p < 0)
+                terminate("this should not occur");
+
+              vxsum += P[p].Vel[0];
+              vysum += P[p].Vel[1];
+              vzsum += P[p].Vel[2];
+              v2sum += P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2];
+#endif /* #ifdef SUBFIND_CALC_MORE */
+            }
+
+          if(((1 << type) & (FOF_PRIMARY_LINK_TYPES)) || ((1 << type) & (FOF_SECONDARY_LINK_TYPES)))
+            if(r2 < h2)
+              {
+                r = sqrt(r2);
+
+                u = r * hinv;
+
+                if(u < 0.5)
+                  wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
+                else
+                  wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
+
+                rhosum += mass * wk;
+
+#ifdef SUBFIND_CALC_MORE
+                if((1 << type) & (FOF_PRIMARY_LINK_TYPES))
+                  rhodmsum += mass * wk;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+              }
+        }
+    }
+
+  out.Ngb = numngb;
+  out.Rho = rhosum;
+#ifdef SUBFIND_CALC_MORE
+  out.Vx      = vxsum;
+  out.Vy      = vysum;
+  out.Vz      = vzsum;
+  out.VelDisp = v2sum;
+  out.RhoDM   = rhodmsum;
+#endif /* #ifdef SUBFIND_CALC_MORE */
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+/*! \brief Sets Hsml to an initial guess to reduce number of iterations for
+ *         to get final smoothing length (Hsml).
+ *
+ *  \return void
+ */
+void subfind_density_hsml_guess(void)
+{
+  int i;
+  double hsml_prev = 0;
+
+  for(i = 0; i < NumPart; i++)
+    {
+      int no, p;
+
+      if((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))
+        {
+          no = Father[i];
+
+          while(8 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass && Nodes[no].len == 0)
+            {
+              p = Nodes[no].u.d.father;
+
+              if(p < 0)
+                break;
+
+              no = p;
+            }
+
+          PS[i].Hsml = hsml_prev = (pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len);
+
+          if(PS[i].Hsml == 0)
+            {
+              printf("Hsml=0 task=%d i=%d no=%d Nodes[no].len=%g Nodes[no].u.d.mass=%g P[i].Mass=%g type=%d ID=%llu  pos=(%g|%g|%g)\n",
+                     ThisTask, i, no, Nodes[no].len, Nodes[no].u.d.mass, P[i].Mass, P[i].Type, (long long)P[i].ID, P[i].Pos[0],
+                     P[i].Pos[1], P[i].Pos[2]);
+              terminate("zero hsml guess\n");
+            }
+        }
+      else
+        {
+          if(hsml_prev)
+            PS[i].Hsml = hsml_prev;
+          else
+            PS[i].Hsml = All.SofteningTable[P[i].SofteningType];
+        }
+    }
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_distribute.c b/src/amuse/community/arepo/src/subfind/subfind_distribute.c
new file mode 100644
index 0000000000..80b492193c
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_distribute.c
@@ -0,0 +1,421 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_distribute.c
+ * \date        05/2018
+ * \brief       Moves grops and particles across MPI tasks form their
+ *              simulation ordering to a subfind ordering.
+ * \details     contains functions:
+ *                void subfind_distribute_groups(void)
+ *                void subfind_distribute_particles(MPI_Comm Communicator)
+ *                void subfind_reorder_P(int *Id, int Nstart, int N)
+ *                void subfind_reorder_PS(int *Id, int Nstart, int N)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../fof/fof.h"
+#include "subfind.h"
+
+#ifdef SUBFIND
+static struct group_properties *send_Group;
+
+/*! \brief Distributes groups equally on MPI tasks.
+ *
+ *  \return void
+ */
+void subfind_distribute_groups(void)
+{
+  int i, nexport, nimport, target, ngrp, recvTask;
+
+  /* count how many we have of each task */
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+
+  for(i = 0; i < Ngroups; i++)
+    {
+      target = Group[i].TargetTask;
+
+      if(target < 0 || target >= NTask)
+        terminate("target < 0 || target >= NTask");
+
+      if(target != ThisTask)
+        Send_count[target]++;
+    }
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(i = 0, nexport = 0, nimport = 0, Recv_offset[0] = Send_offset[0] = 0; i < NTask; i++)
+    {
+      nimport += Recv_count[i];
+      nexport += Send_count[i];
+
+      if(i > 0)
+        {
+          Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1];
+          Recv_offset[i] = Recv_offset[i - 1] + Recv_count[i - 1];
+        }
+    }
+
+  send_Group = (struct group_properties *)mymalloc_movable(&send_Group, "send_Group", nexport * sizeof(struct group_properties));
+
+  for(i = 0; i < NTask; i++)
+    Send_count[i] = 0;
+
+  for(i = 0; i < Ngroups; i++)
+    {
+      target = Group[i].TargetTask;
+
+      if(target != ThisTask)
+        {
+          send_Group[Send_offset[target] + Send_count[target]] = Group[i];
+          Send_count[target]++;
+
+          Group[i] = Group[Ngroups - 1];
+          Ngroups--;
+          i--;
+        }
+    }
+
+  if(Ngroups + nimport > MaxNgroups)
+    {
+#ifdef VERBOSE
+      printf("SUBFIND: Task=%d: (Ngroups=%d) + (nimport=%d) > (MaxNgroups=%d). Will increase MaxNgroups.\n", ThisTask, Ngroups,
+             nimport, MaxNgroups);
+#endif /* #ifdef VERBOSE */
+      MaxNgroups = Ngroups + nimport;
+      Group      = (struct group_properties *)myrealloc_movable(Group, sizeof(struct group_properties) * MaxNgroups);
+    }
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      recvTask = ThisTask ^ ngrp;
+
+      if(recvTask < NTask)
+        {
+          if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
+            {
+              /* get the group info */
+              MPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE,
+                           recvTask, TAG_DENS_A, &Group[Ngroups + Recv_offset[recvTask]],
+                           Recv_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+  Ngroups += nimport;
+
+  myfree_movable(send_Group);
+}
+
+static struct particle_data *partBuf;
+static struct subfind_data *subBuf;
+
+/* \brief Distributes particles on MPI tasks.
+ *
+ *  This function redistributes the particles in P[] and PS[] according to what
+ *  is stored in PS[].TargetTask, and PS[].TargetIndex. NOTE: The associated
+ *  SphP[] is not moved, i.e. the association is broken until the particles are
+ *  moved back into the original order!
+ *
+ *  \param[in] Communicator MPI communicator.
+ *
+ *  \return void
+ */
+void subfind_distribute_particles(MPI_Comm Communicator)
+{
+  int nimport, nexport;
+  int i, j, n, ngrp, target;
+  int max_load, load;
+  int CommThisTask, CommNTask;
+
+  MPI_Comm_size(Communicator, &CommNTask);
+  MPI_Comm_rank(Communicator, &CommThisTask);
+
+  for(n = 0; n < CommNTask; n++)
+    Send_count[n] = 0;
+
+  for(n = 0; n < NumPart; n++)
+    {
+      target = PS[n].TargetTask;
+
+      if(target != CommThisTask)
+        {
+          if(target < 0 || target >= CommNTask)
+            terminate("n=%d targettask=%d", n, target);
+
+          Send_count[target]++;
+        }
+    }
+
+  MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
+
+  for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++)
+    {
+      nexport += Send_count[j];
+      nimport += Recv_count[j];
+
+      if(j > 0)
+        {
+          Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
+          Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
+        }
+    }
+
+  /* for resize */
+  load = (NumPart + nimport - nexport);
+  MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
+
+  partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data));
+  subBuf  = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data));
+
+  for(i = 0; i < CommNTask; i++)
+    Send_count[i] = 0;
+
+  for(n = 0; n < NumPart; n++)
+    {
+      target = PS[n].TargetTask;
+
+      if(target != CommThisTask)
+        {
+          partBuf[Send_offset[target] + Send_count[target]] = P[n];
+          subBuf[Send_offset[target] + Send_count[target]]  = PS[n];
+
+          P[n]  = P[NumPart - 1];
+          PS[n] = PS[NumPart - 1];
+
+          Send_count[target]++;
+          NumPart--;
+          n--;
+        }
+    }
+
+  /* do resize */
+  if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart)
+    {
+      All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
+      reallocate_memory_maxpart();
+      PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data));
+    }
+
+  for(i = 0; i < CommNTask; i++)
+    Recv_offset[i] += NumPart;
+
+#ifndef NO_ISEND_IRECV_IN_DOMAIN
+
+  MPI_Request *requests = (MPI_Request *)mymalloc("requests", 8 * CommNTask * sizeof(MPI_Request));
+  int n_requests        = 0;
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = CommThisTask ^ ngrp;
+
+      if(target < CommNTask)
+        {
+          if(Recv_count[target] > 0)
+            {
+              MPI_Irecv(P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                        Communicator, &requests[n_requests++]);
+              MPI_Irecv(PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY,
+                        Communicator, &requests[n_requests++]);
+            }
+        }
+    }
+
+  MPI_Barrier(Communicator); /* not really necessary, but this will guarantee that all receives are
+                                posted before the sends, which helps the stability of MPI on
+                                bluegene, and perhaps some mpich1-clusters */
+
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = CommThisTask ^ ngrp;
+
+      if(target < CommNTask)
+        {
+          if(Send_count[target] > 0)
+            {
+              MPI_Isend(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA,
+                        Communicator, &requests[n_requests++]);
+              MPI_Isend(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY,
+                        Communicator, &requests[n_requests++]);
+            }
+        }
+    }
+
+  MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
+  myfree(requests);
+
+#else  /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */
+  for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
+    {
+      target = CommThisTask ^ ngrp;
+
+      if(target < CommNTask)
+        {
+          if(Send_count[target] > 0 || Recv_count[target] > 0)
+            {
+              MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                           TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target,
+                           TAG_PDATA, Communicator, MPI_STATUS_IGNORE);
+
+              MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY,
+                           PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY,
+                           Communicator, MPI_STATUS_IGNORE);
+            }
+        }
+    }
+#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */
+
+  NumPart += nimport;
+  myfree_movable(subBuf);
+  myfree_movable(partBuf);
+
+  /* finally, let's also address the desired local order according to PS[].TargetIndex */
+
+  struct fof_local_sort_data *mp;
+  int *Id;
+
+  mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart));
+  Id = (int *)mymalloc("Id", sizeof(int) * (NumPart));
+
+  for(i = 0; i < NumPart; i++)
+    {
+      mp[i].index       = i;
+      mp[i].targetindex = PS[i].TargetIndex;
+    }
+
+  qsort(mp, NumPart, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex);
+
+  for(i = 0; i < NumPart; i++)
+    Id[mp[i].index] = i;
+
+  subfind_reorder_P(Id, 0, NumPart);
+
+  for(i = 0; i < NumPart; i++)
+    Id[mp[i].index] = i;
+
+  subfind_reorder_PS(Id, 0, NumPart);
+
+  myfree(Id);
+  myfree(mp);
+}
+
+/*! \brief Reorders elements in the P array.
+ *
+ * \param[in] Id Array containing ordering.
+ * \param[in] Nstart Start index (in Id and P).
+ * \param[in] N Final element index + 1.
+ *
+ *  \return void
+ */
+void subfind_reorder_P(int *Id, int Nstart, int N)
+{
+  int i;
+  struct particle_data Psave, Psource;
+  int idsource, idsave, dest;
+
+  for(i = Nstart; i < N; i++)
+    {
+      if(Id[i] != i)
+        {
+          Psource  = P[i];
+          idsource = Id[i];
+
+          dest = Id[i];
+
+          do
+            {
+              Psave  = P[dest];
+              idsave = Id[dest];
+
+              P[dest]  = Psource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              Psource  = Psave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+/*! \brief Reorders elements in the PS array.
+ *
+ * \param[in] Id Array containing ordering.
+ * \param[in] Nstart Start index (in Id and P).
+ * \param[in] N Final element index + 1.
+ *
+ *  \return void
+ */
+void subfind_reorder_PS(int *Id, int Nstart, int N)
+{
+  int i;
+  struct subfind_data PSsave, PSsource;
+  int idsource, idsave, dest;
+
+  for(i = Nstart; i < N; i++)
+    {
+      if(Id[i] != i)
+        {
+          PSsource = PS[i];
+
+          idsource = Id[i];
+          dest     = Id[i];
+
+          do
+            {
+              PSsave = PS[dest];
+              idsave = Id[dest];
+
+              PS[dest] = PSsource;
+              Id[dest] = idsource;
+
+              if(dest == i)
+                break;
+
+              PSsource = PSsave;
+              idsource = idsave;
+
+              dest = idsource;
+            }
+          while(1);
+        }
+    }
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c
new file mode 100644
index 0000000000..8faaba4542
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c
@@ -0,0 +1,539 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_findlinkngb.c
+ * \date        05/2018
+ * \brief       Algorithm to find smoothing lengths of particles to get a
+ *              desried number of neighbours.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void subfind_find_linkngb(void)
+ *                static int subfind_linkngb_evaluate(int target, int mode,
+ *                  int threadid)
+ *                int subfind_treefind_collective_export_node_threads(int no,
+ *                  int i, int thread_id)
+ *                static int subfind_ngb_compare_dist(const void *a, const
+ *                  void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 15.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "subfind.h"
+
+static int subfind_ngb_compare_dist(const void *a, const void *b);
+static int subfind_linkngb_evaluate(int target, int mode, int threadid);
+
+static int *DM_NumNgb;
+static double *Dist2list;
+static int *Ngblist;
+static MyFloat *Left, *Right;
+static char *Todo;
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat DM_Hsml;
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+#ifdef CELL_CENTER_GRAVITY
+  if(P[i].Type == 0)
+    {
+      in->Pos[0] = PS[i].Center[0];
+      in->Pos[1] = PS[i].Center[1];
+      in->Pos[2] = PS[i].Center[2];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      in->Pos[0] = P[i].Pos[0];
+      in->Pos[1] = P[i].Pos[1];
+      in->Pos[2] = P[i].Pos[2];
+    }
+
+  in->DM_Hsml = PS[i].Hsml;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  int Ngb;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays.
+ *  \param[in] i Index of particle.
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      DM_NumNgb[i] = out->Ngb;
+    }
+  else /* combine */
+    {
+      DM_NumNgb[i] += out->Ngb;
+    }
+}
+
+#define USE_SUBCOMM_COMMUNICATOR
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < SubNTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= NumPartGroup)
+          break;
+
+        if(Todo[i])
+          subfind_linkngb_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        subfind_linkngb_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Iteratvie search for particle smoothing length to enclose a given
+ *         number of neighbours.
+ *
+ *  \return void
+ */
+void subfind_find_linkngb(void)
+{
+  long long ntot;
+  int i, npleft, iter = 0;
+  double t0, t1;
+
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: Start find_linkngb. (%d particles on root-task)\n", ThisTask, NumPartGroup);
+
+  /* allocate buffers to arrange communication */
+
+  Ngblist   = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int));
+  Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double));
+
+  generic_set_MaxNexport();
+
+  Left      = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * NumPartGroup);
+  Right     = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * NumPartGroup);
+  Todo      = (char *)mymalloc("Todo", sizeof(char) * NumPartGroup);
+  DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPartGroup);
+
+  for(i = 0; i < NumPartGroup; i++)
+    {
+      Left[i] = Right[i] = 0;
+      Todo[i]            = 1;
+    }
+
+  /* we will repeat the whole thing for those particles where we didn't find enough neighbours */
+  do
+    {
+      t0 = second();
+
+      generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported);
+
+      /* do final operations on results */
+      for(i = 0, npleft = 0; i < NumPartGroup; i++)
+        {
+          /* now check whether we had enough neighbours */
+          if(Todo[i])
+            {
+              if(DM_NumNgb[i] != All.DesLinkNgb && ((Right[i] - Left[i]) > 1.0e-6 * Left[i] || Left[i] == 0 || Right[i] == 0))
+                {
+                  /* need to redo this particle */
+                  npleft++;
+
+                  if(DM_NumNgb[i] < All.DesLinkNgb)
+                    Left[i] = dmax(PS[i].Hsml, Left[i]);
+                  else
+                    {
+                      if(Right[i] != 0)
+                        {
+                          if(PS[i].Hsml < Right[i])
+                            Right[i] = PS[i].Hsml;
+                        }
+                      else
+                        Right[i] = PS[i].Hsml;
+                    }
+
+                  if(iter >= MAXITER - 10)
+                    {
+                      printf("i=%d task=%d ID=%d DM_Hsml=%g Left=%g Right=%g Right-Left=%g\n   pos=(%g|%g|%g)\n", i, ThisTask,
+                             (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)(Right[i] - Left[i]), P[i].Pos[0], P[i].Pos[1],
+                             P[i].Pos[2]);
+                      fflush(stdout);
+                    }
+
+                  if(Right[i] > 0 && Left[i] > 0)
+                    PS[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3);
+                  else
+                    {
+                      if(Right[i] == 0 && Left[i] == 0)
+                        terminate("can't occur");
+
+                      if(Right[i] == 0 && Left[i] > 0)
+                        PS[i].Hsml *= 1.26;
+
+                      if(Right[i] > 0 && Left[i] == 0)
+                        PS[i].Hsml /= 1.26;
+                    }
+                }
+              else
+                Todo[i] = 0;
+            }
+        }
+
+      sumup_large_ints_comm(1, &npleft, &ntot, SubComm);
+
+      t1 = second();
+
+      if(ntot > 0)
+        {
+          iter++;
+
+          if(iter > 0 && SubThisTask == 0)
+            {
+              printf("SUBFIND-COLLECTIVE, root-task=%d: find linkngb iteration %d, need to repeat for %lld particles. (took %g sec)\n",
+                     ThisTask, iter, ntot, timediff(t0, t1));
+              fflush(stdout);
+            }
+
+          if(iter > MAXITER)
+            terminate("failed to converge in neighbour iteration in density()\n");
+        }
+    }
+  while(ntot > 0);
+
+  myfree(DM_NumNgb);
+  myfree(Todo);
+  myfree(Right);
+  myfree(Left);
+
+  myfree(Dist2list);
+  myfree(Ngblist);
+
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: Done with find_linkngb\n", ThisTask);
+}
+
+/*! \brief Evaluate function for the neighbor search algorithm.
+ *
+ *  \param[in] target Index of particle of interest.
+ *  \param[in] mode Local or imported particles?
+ *  \param[in] treadid ID of thread.
+ *
+ *  \return 0
+ */
+static int subfind_linkngb_evaluate(int target, int mode, int threadid)
+{
+  int no, numnodes, *firstnode, numngb;
+  double hsml;
+  MyDouble *pos;
+  int i, k, p, exported = 0;
+  struct NODE *current;
+  double dx, dy, dz, dist, r2;
+  MyDouble xtmp, ytmp, ztmp;
+
+  data_in local, *in;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      in = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      in = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos  = in->Pos;
+  hsml = in->DM_Hsml;
+
+  numngb = 0;
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = SubTree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = SubNodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < SubTree_MaxPart) /* single particle */
+            {
+              p  = no;
+              no = SubNextnode[no];
+
+              dist = hsml;
+              dx   = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]);
+              if(dz > dist)
+                continue;
+              if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist)
+                continue;
+
+              Dist2list[numngb] = r2;
+              Ngblist[numngb++] = p;
+            }
+          else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */
+            {
+              if(mode == 1)
+                {
+                  if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the
+                                                           branch */
+                    break;
+                }
+
+              current = &SubNodes[no];
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              dist = hsml + 0.5 * current->len;
+              dx   = FOF_NEAREST_LONG_X(current->center[0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]);
+              if(dz > dist)
+                continue;
+              /* now test against the minimal sphere enclosing everything */
+              dist += FACT1 * current->len;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else
+            { /* pseudo particle */
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES");
+
+              if(target >= 0) /* if no target is given, export will not occur */
+                {
+                  exported = 1;
+
+                  if(mode == MODE_LOCAL_PARTICLES)
+                    subfind_treefind_collective_export_node_threads(no, target, threadid);
+                }
+
+              no = SubNextnode[no - SubTree_MaxNodes];
+            }
+        }
+    }
+
+  if(mode == MODE_LOCAL_PARTICLES) /* local particle */
+    if(exported == 0)              /* completely local */
+      if(numngb >= All.DesLinkNgb)
+        {
+          R2list = (r2type *)mymalloc("R2list", sizeof(r2type) * numngb);
+          for(i = 0; i < numngb; i++)
+            {
+              R2list[i].index = Ngblist[i];
+              R2list[i].r2    = Dist2list[i];
+            }
+
+          qsort(R2list, numngb, sizeof(r2type), subfind_ngb_compare_dist);
+
+          PS[target].Hsml = sqrt(R2list[All.DesLinkNgb - 1].r2);
+          numngb          = All.DesLinkNgb;
+
+          for(i = 0; i < numngb; i++)
+            {
+              Ngblist[i]   = R2list[i].index;
+              Dist2list[i] = R2list[i].r2;
+            }
+
+          myfree(R2list);
+        }
+
+  out.Ngb = numngb;
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+/*! \brief Prepares node export.
+ *
+ *  \param[in] no Index of node.
+ *  \param[in] i Index of particle.
+ *  \param[in] thread_id Export thread.
+ *
+ *  \return 0
+ */
+int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id)
+{
+  /* The task indicated by the pseudoparticle node */
+  int task = SubDomainTask[no - (SubTree_MaxPart + SubTree_MaxNodes)];
+
+  if(Thread[thread_id].Exportflag[task] != i)
+    {
+      Thread[thread_id].Exportflag[task]     = i;
+      int nexp                               = Thread[thread_id].Nexport++;
+      Thread[thread_id].PartList[nexp].Task  = task;
+      Thread[thread_id].PartList[nexp].Index = i;
+      Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize;
+    }
+
+  int nexp                      = Thread[thread_id].NexportNodes++;
+  nexp                          = -1 - nexp;
+  struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace);
+  nodelist[nexp].Task           = task;
+  nodelist[nexp].Index          = i;
+  nodelist[nexp].Node           = SubDomainNodeIndex[no - (SubTree_MaxPart + SubTree_MaxNodes)];
+  Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int);
+  return 0;
+}
+
+/*! \brief Comparison function for r2type objects.
+ *
+ *  Compares element r2.
+ *
+ *  \param[in] a First object.
+ *  \param[in] b Second object.
+ *
+ *  \return (-1,0,1) -1 if a < b.
+ */
+static int subfind_ngb_compare_dist(const void *a, const void *b)
+{
+  if(((r2type *)a)->r2 < (((r2type *)b)->r2))
+    return -1;
+
+  if(((r2type *)a)->r2 > (((r2type *)b)->r2))
+    return +1;
+
+  return 0;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_io.c b/src/amuse/community/arepo/src/subfind/subfind_io.c
new file mode 100644
index 0000000000..be760976b1
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_io.c
@@ -0,0 +1,156 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_io.c
+ * \date        05/2018
+ * \brief       Main output routine for subfind.
+ * \details     contains functions:
+ *                void subfind_save_final(int num)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../fof/fof.h"
+
+#ifdef SUBFIND
+#include "subfind.h"
+
+/*! \brief Saves subfind group catalogue to disk.
+ *
+ *  Note that this routine calls the FoF I/O routines.
+ *
+ *  \param[in] num Index of this snapshot output.
+ *
+ *  \return void
+ */
+void subfind_save_final(int num)
+{
+  int i, filenr, gr, ngrps, masterTask, lastTask, totsubs;
+  char buf[1000];
+  double t0, t1;
+
+  /* prepare list of ids with assigned group numbers */
+#ifdef FOF_STOREIDS
+  fof_subfind_prepare_ID_list();
+#endif /* #ifdef FOF_STOREIDS */
+
+  t0 = second();
+
+  /* fill in the FirstSub-values */
+  for(i = 0, totsubs = 0; i < Ngroups; i++)
+    {
+      if(i > 0)
+        Group[i].FirstSub = Group[i - 1].FirstSub + Group[i - 1].Nsubs;
+      else
+        Group[i].FirstSub = 0;
+      totsubs += Group[i].Nsubs;
+    }
+
+  MPI_Allgather(&totsubs, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD);
+  for(i = 1, Send_offset[0] = 0; i < NTask; i++)
+    Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1];
+
+  for(i = 0; i < Ngroups; i++)
+    {
+      if(Group[i].Nsubs > 0)
+        Group[i].FirstSub += Send_offset[ThisTask];
+      else
+        Group[i].FirstSub = -1;
+    }
+
+  CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE);
+
+  if(NTask < All.NumFilesPerSnapshot)
+    {
+      warn(
+          "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot "
+          "accordingly.\n");
+      All.NumFilesPerSnapshot = NTask;
+    }
+
+  if(All.SnapFormat < 1 || All.SnapFormat > 3)
+    mpi_printf("Unsupported File-Format All.SnapFormat=%d \n", All.SnapFormat);
+
+#ifndef HAVE_HDF5
+  if(All.SnapFormat == 3)
+    {
+      mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n");
+    }
+#endif /* #ifndef  HAVE_HDF5 */
+
+  /* assign processors to output files */
+  distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask);
+
+  if(All.NumFilesPerSnapshot > 1)
+    {
+      if(ThisTask == 0)
+        {
+          sprintf(buf, "%s/groups_%03d", All.OutputDir, num);
+          mkdir(buf, 02755);
+        }
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+
+  if(All.NumFilesPerSnapshot > 1)
+    sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_subhalo_tab", num, filenr);
+  else
+    sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_subhalo_tab", num);
+
+  ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel;
+  if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel))
+    ngrps++;
+
+  for(gr = 0; gr < ngrps; gr++)
+    {
+      if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */
+        fof_subfind_write_file(buf, masterTask, lastTask);
+
+      MPI_Barrier(MPI_COMM_WORLD);
+    }
+
+  myfree(CommBuffer);
+
+#ifdef FOF_STOREIDS
+  myfree(ID_list);
+#endif /* #ifdef FOF_STOREIDS */
+
+  t1 = second();
+
+  mpi_printf("SUBFIND: Subgroup catalogues saved. took = %g sec\n", timediff(t0, t1));
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_loctree.c b/src/amuse/community/arepo/src/subfind/subfind_loctree.c
new file mode 100644
index 0000000000..9b3f26255c
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_loctree.c
@@ -0,0 +1,930 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_loctree.c
+ * \date        05/2018
+ * \brief       Algorithms for local tree in subfind.
+ * \details     contains functions:
+ *                void subfind_loctree_findExtent(int npart, struct unbind_data *mp)
+ *                void subfind_loctree_copyExtent(void)
+ *                int subfind_loctree_treebuild(int npart, struct unbind_data **udp)
+ *                void subfind_loctree_update_node_recursive(int no, int sib, int father)
+ *                double subfind_loctree_treeevaluate_potential(int target)
+ *                int subfind_locngb_compare_key(const void *a, const void *b)
+ *                double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess)
+ *                int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess)
+ *                size_t subfind_loctree_treeallocate(int maxnodes, int maxpart)
+ *                void subfind_loctree_treefree(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../gravity/forcetree.h"
+#include "subfind.h"
+
+#ifdef SUBFIND
+static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3];
+static int LocMaxPart;
+static int MaxNodes, last;
+static int *LocNextNode;
+
+static unsigned long long *LocTree_IntPos_list;
+
+/*! \brief Node structure for subfind tree.
+ */
+static struct LocNODE
+{
+  union
+  {
+    int suns[8]; /*!< temporary pointers to daughter nodes */
+    struct
+    {
+      MyDouble s[3]; /*!< center of mass of node */
+      MyDouble mass; /*!< mass of node */
+      unsigned char maxsofttype;
+#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING)
+      unsigned char maxhydrosofttype;
+      unsigned char minhydrosofttype;
+#endif              /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */
+      int sibling;  /*!< this gives the next node in the walk in case the current node can be used */
+      int nextnode; /*!< this gives the next node in case the current node needs to be opened */
+    } d;
+  } u;
+
+  MyDouble center[3]; /*!< geometrical center of node */
+  MyFloat len;        /*!< sidelength of treenode */
+
+#ifdef MULTIPLE_NODE_SOFTENING
+  MyDouble mass_per_type[NSOFTTYPES];
+#endif             /* #ifdef MULTIPLE_NODE_SOFTENING */
+} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */
+    *LocNodes;     /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart]
+                      gives the first allocated node */
+
+/*! \brief Calculates min/max coordinate of particles in unbind data.
+ *
+ *  \param[in] npart Number of local particles (in unbind_data).
+ *  \param[in] mp Pointer to unbind data.
+ *
+ *  \return void
+ */
+void subfind_loctree_findExtent(int npart, struct unbind_data *mp)
+{
+  int i, j, k;
+  double len, xmin[3], xmax[3];
+
+  /* determine extension */
+  for(i = 0; i < 3; i++)
+    {
+      xmin[i] = MAX_REAL_NUMBER;
+      xmax[i] = -MAX_REAL_NUMBER;
+    }
+
+  for(k = 0; k < npart; k++)
+    {
+      if(mp)
+        i = mp[k].index;
+      else
+        terminate("what?");
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        {
+          for(j = 0; j < 3; j++)
+            {
+              if(xmin[j] > PS[i].Center[j])
+                xmin[j] = PS[i].Center[j];
+
+              if(xmax[j] < PS[i].Center[j])
+                xmax[j] = PS[i].Center[j];
+            }
+        }
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        {
+          for(j = 0; j < 3; j++)
+            {
+              if(xmin[j] > P[i].Pos[j])
+                xmin[j] = P[i].Pos[j];
+
+              if(xmax[j] < P[i].Pos[j])
+                xmax[j] = P[i].Pos[j];
+            }
+        }
+    }
+
+  len = 0;
+  for(j = 0; j < 3; j++)
+    if(xmax[j] - xmin[j] > len)
+      len = xmax[j] - xmin[j];
+
+  len *= 1.001;
+
+  RootLen        = len;
+  RootInverseLen = 1.0 / RootLen;
+  RootFac        = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION));
+  RootBigFac     = (RootLen / (((long long)1) << 52));
+
+  for(j = 0; j < 3; j++)
+    {
+      RootCenter[j] = 0.5 * (xmin[j] + xmax[j]);
+      RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len;
+    }
+}
+
+/*! \brief Copy extent information from SubDomain to Root.
+ *
+ *  This is called from the collective subfind code.
+ *
+ *  \return void
+ */
+void subfind_loctree_copyExtent(void)
+{
+  int j;
+  for(j = 0; j < 3; j++)
+    {
+      RootCenter[j] = SubDomainCenter[j];
+      RootCorner[j] = SubDomainCorner[j];
+    }
+  RootLen        = SubDomainLen;
+  RootInverseLen = SubDomainInverseLen;
+  RootFac        = SubDomainFac;
+  RootBigFac     = SubDomainBigFac;
+}
+
+/*! \brief Construct the subfind tree.
+ *
+ *  \param[in] npart Number of particles involved.
+ *  \param[in] udp Unbind data.
+ *
+ *  \return Number of nodes.
+ */
+int subfind_loctree_treebuild(int npart, struct unbind_data **udp)
+{
+  int i, j, k, subnode = 0, parent = -1, numnodes;
+  int nfree, th, nn;
+  struct LocNODE *nfreep;
+  struct unbind_data *mp;
+
+  /* select first node */
+  nfree  = LocMaxPart;
+  nfreep = &LocNodes[nfree];
+
+  mp = *udp;
+
+  /* create an empty  root node  */
+  nfreep->len = (MyFloat)RootLen;
+  for(i = 0; i < 3; i++)
+    nfreep->center[i] = (MyFloat)RootCenter[i];
+
+  for(i = 0; i < 8; i++)
+    nfreep->u.suns[i] = -1;
+
+  numnodes = 1;
+  nfreep++;
+  nfree++;
+
+  /* insert all particles */
+
+  LocTree_IntPos_list =
+      (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long));
+
+  for(k = 0; k < npart; k++)
+    {
+      if(mp)
+        i = mp[k].index;
+      else
+        terminate("what?");
+
+      MyDouble *posp;
+
+#ifdef CELL_CENTER_GRAVITY
+      if(P[i].Type == 0)
+        posp = &PS[i].Center[0];
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        posp = &P[i].Pos[0];
+
+      unsigned long long xxb      = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0);
+      unsigned long long yyb      = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0);
+      unsigned long long zzb      = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0);
+      unsigned long long mask     = ((unsigned long long)1) << (52 - 1);
+      unsigned char shiftx        = (52 - 1);
+      unsigned char shifty        = (52 - 2);
+      unsigned char shiftz        = (52 - 3);
+      signed long long centermask = (0xFFF0000000000000llu);
+      unsigned char levels        = 0;
+
+      unsigned long long *intposp = &LocTree_IntPos_list[3 * i];
+
+      *intposp++ = xxb;
+      *intposp++ = yyb;
+      *intposp++ = zzb;
+
+      th = LocMaxPart;
+
+      while(1)
+        {
+          if(th >= LocMaxPart) /* we are dealing with an internal node */
+            {
+              subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                         ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+              centermask >>= 1;
+              mask >>= 1;
+              levels++;
+
+              if(levels > MAX_TREE_LEVEL)
+                {
+                  /* seems like we're dealing with particles at identical (or extremely close)
+                   * locations. Shift subnode index to allow tree construction. Note: Multipole moments
+                   * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have
+                   *      DomainLen/2^MAX_TREE_LEEL  < gravitational softening length
+                   */
+                  for(j = 0; j < 8; j++)
+                    {
+                      if(LocNodes[th].u.suns[subnode] < 0)
+                        break;
+
+                      subnode++;
+                      if(subnode >= 8)
+                        subnode = 7;
+                    }
+                }
+
+              nn = LocNodes[th].u.suns[subnode];
+
+              if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
+                {
+                  parent = th; /* note: subnode can still be used in the next step of the walk */
+                  th     = nn;
+                }
+              else
+                {
+                  /* here we have found an empty slot where we can
+                   * attach the new particle as a leaf
+                   */
+                  LocNodes[th].u.suns[subnode] = i;
+                  break; /* done for this particle */
+                }
+            }
+          else
+            {
+              /* we try to insert into a leaf with a single particle
+               * need to generate a new internal node at this point
+               */
+              LocNodes[parent].u.suns[subnode] = nfree;
+
+              /* the other is: */
+              double len = ((double)(mask << 1)) * RootBigFac;
+              double cx  = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0];
+              double cy  = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1];
+              double cz  = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2];
+
+              nfreep->len       = len;
+              nfreep->center[0] = cx;
+              nfreep->center[1] = cy;
+              nfreep->center[2] = cz;
+
+              nfreep->u.suns[0] = -1;
+              nfreep->u.suns[1] = -1;
+              nfreep->u.suns[2] = -1;
+              nfreep->u.suns[3] = -1;
+              nfreep->u.suns[4] = -1;
+              nfreep->u.suns[5] = -1;
+              nfreep->u.suns[6] = -1;
+              nfreep->u.suns[7] = -1;
+
+              unsigned long long *intppos = &LocTree_IntPos_list[3 * th];
+
+              subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) |
+                         ((unsigned char)((intppos[2] & mask) >> shiftz)));
+
+              nfreep->u.suns[subnode] = th;
+
+              th = nfree; /* resume trying to insert the new particle at
+                             the newly created internal node */
+
+              numnodes++;
+              nfree++;
+              nfreep++;
+
+              if(numnodes >= MaxNodes)
+                {
+                  MaxNodes *= 1.2;
+
+                  LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE));
+                  LocNodes      = LocNodes_base - LocMaxPart;
+                  nfreep        = &LocNodes[nfree];
+                  mp            = *udp;
+
+                  if(numnodes > MaxNodes)
+                    {
+                      char buf[1000];
+
+                      sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d  %g %g %g", MaxNodes, i, P[i].Pos[0],
+                              P[i].Pos[1], P[i].Pos[2]);
+                      terminate(buf);
+                    }
+                }
+            }
+        }
+    }
+
+  myfree(LocTree_IntPos_list);
+
+  /* now compute the multipole moments recursively */
+  last = -1;
+  subfind_loctree_update_node_recursive(LocMaxPart, -1, -1);
+
+  if(last >= LocMaxPart)
+    LocNodes[last].u.d.nextnode = -1;
+  else
+    LocNextNode[last] = -1;
+
+  return numnodes;
+}
+
+/*! \brief Compute multipole moments.
+ *
+ *  This routine computes the multipole moments for a given internal node and
+ *  all its subnodes using a recursive computation.
+ *
+ *  \param[in] no Node that we are in.
+ *  \param[in] sib Sibling of the node.
+ *  \param[in] father Parent node.
+ *
+ *  \return void
+ */
+void subfind_loctree_update_node_recursive(int no, int sib, int father)
+{
+  int j, jj, p, pp = 0, nextsib, suns[8];
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  double mass;
+  double s[3];
+
+  if(no >= LocMaxPart)
+    {
+      for(j = 0; j < 8; j++)
+        suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
+                                             overwrite one element (union!) */
+      if(last >= 0)
+        {
+          if(last >= LocMaxPart)
+            LocNodes[last].u.d.nextnode = no;
+          else
+            LocNextNode[last] = no;
+        }
+
+      last = no;
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      for(j = 0; j < 8; j++)
+        {
+          if((p = suns[j]) >= 0)
+            {
+              /* check if we have a sibling on the same level */
+              for(jj = j + 1; jj < 8; jj++)
+                if((pp = suns[jj]) >= 0)
+                  break;
+
+              if(jj < 8) /* yes, we do */
+                nextsib = pp;
+              else
+                nextsib = sib;
+
+              subfind_loctree_update_node_recursive(p, nextsib, no);
+
+              if(p >= LocMaxPart) /* an internal node  */
+                {
+                  mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */
+                  s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0];
+                  s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1];
+                  s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype])
+                    maxsofttype = LocNodes[p].u.d.maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                  int k;
+                  for(k = 0; k < NSOFTTYPES; k++)
+                    mass_per_type[k] += LocNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype)
+                    maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype;
+                  if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype)
+                    minhydrosofttype = LocNodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else /* a particle */
+                {
+                  mass += P[p].Mass;
+#ifdef CELL_CENTER_GRAVITY
+                  if(P[p].Type == 0)
+                    {
+                      s[0] += P[p].Mass * PS[p].Center[0];
+                      s[1] += P[p].Mass * PS[p].Center[1];
+                      s[2] += P[p].Mass * PS[p].Center[2];
+                    }
+                  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                    {
+                      s[0] += P[p].Mass * P[p].Pos[0];
+                      s[1] += P[p].Mass * P[p].Pos[1];
+                      s[2] += P[p].Mass * P[p].Pos[2];
+                    }
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType])
+                    maxsofttype = P[p].SofteningType;
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass;
+
+                  if(P[p].Type == 0)
+                    {
+                      if(maxhydrosofttype < P[p].SofteningType)
+                        maxhydrosofttype = P[p].SofteningType;
+                      if(minhydrosofttype > P[p].SofteningType)
+                        minhydrosofttype = P[p].SofteningType;
+                    }
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  mass_per_type[P[p].SofteningType] += P[p].Mass;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+            }
+        }
+
+      if(mass > 0)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = LocNodes[no].center[0];
+          s[1] = LocNodes[no].center[1];
+          s[2] = LocNodes[no].center[2];
+        }
+
+      LocNodes[no].u.d.s[0]        = (MyFloat)s[0];
+      LocNodes[no].u.d.s[1]        = (MyFloat)s[1];
+      LocNodes[no].u.d.s[2]        = (MyFloat)s[2];
+      LocNodes[no].u.d.mass        = (MyFloat)mass;
+      LocNodes[no].u.d.maxsofttype = maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+      int k;
+      for(k = 0; k < NSOFTTYPES; k++)
+        LocNodes[no].mass_per_type[k] = mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      LocNodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      LocNodes[no].u.d.sibling = sib;
+    }
+  else /* single particle or pseudo particle */
+    {
+      if(last >= 0)
+        {
+          if(last >= LocMaxPart)
+            LocNodes[last].u.d.nextnode = no;
+          else
+            LocNextNode[last] = no;
+        }
+
+      last = no;
+    }
+}
+
+/*! \brief Evaluates the potential by walking the subfind local tree.
+ *
+ *  \param[in] target Index of the particle.
+ *
+ *  \return Gravitational potiential.
+ */
+double subfind_loctree_treeevaluate_potential(int target)
+{
+  struct LocNODE *nop = 0;
+  int no;
+  double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp;
+  double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp;
+
+#ifdef CELL_CENTER_GRAVITY
+  if(P[target].Type == 0)
+    {
+      pos_x = PS[target].Center[0];
+      pos_y = PS[target].Center[1];
+      pos_z = PS[target].Center[2];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      pos_x = P[target].Pos[0];
+      pos_y = P[target].Pos[1];
+      pos_z = P[target].Pos[2];
+    }
+
+  h_i = All.ForceSoftening[P[target].SofteningType];
+
+  pot = 0;
+
+  no = LocMaxPart;
+
+  while(no >= 0)
+    {
+#ifdef MULTIPLE_NODE_SOFTENING
+      int indi_flag1 = -1, indi_flag2 = 0;
+#endif                    /* #ifdef MULTIPLE_NODE_SOFTENING */
+      if(no < LocMaxPart) /* single particle */
+        {
+#ifdef CELL_CENTER_GRAVITY
+          if(P[no].Type == 0)
+            {
+              dx = GRAVITY_NEAREST_X(PS[no].Center[0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(PS[no].Center[1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(PS[no].Center[2] - pos_z);
+            }
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            {
+              dx = GRAVITY_NEAREST_X(P[no].Pos[0] - pos_x);
+              dy = GRAVITY_NEAREST_Y(P[no].Pos[1] - pos_y);
+              dz = GRAVITY_NEAREST_Z(P[no].Pos[2] - pos_z);
+            }
+
+          r2 = dx * dx + dy * dy + dz * dz;
+
+          mass = P[no].Mass;
+
+          h_j = All.ForceSoftening[P[no].SofteningType];
+
+          if(h_j > h_i)
+            hmax = h_j;
+          else
+            hmax = h_i;
+
+          no = LocNextNode[no];
+        }
+      else
+        {
+          nop  = &LocNodes[no];
+          mass = nop->u.d.mass;
+
+          dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x);
+          dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y);
+          dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z);
+
+          r2 = dx * dx + dy * dy + dz * dz;
+
+          /* check Barnes-Hut opening criterion */
+          if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind)
+            {
+              /* open cell */
+              if(mass)
+                {
+                  no = nop->u.d.nextnode;
+                  continue;
+                }
+            }
+
+          h_j = All.ForceSoftening[nop->u.d.maxsofttype];
+
+          if(h_j > h_i)
+            {
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype)
+                if(LocNodes[no].mass_per_type[0] > 0)
+                  if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype])
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+              indi_flag1 = 0;
+              indi_flag2 = NSOFTTYPES;
+#else  /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+              if(r2 < h_j * h_j)
+                {
+                  /* open cell */
+                  no = nop->u.d.nextnode;
+                  continue;
+                }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */
+              hmax = h_j;
+            }
+          else
+            hmax = h_i;
+
+          no = nop->u.d.sibling; /* node can be used */
+        }
+
+      r = sqrt(r2);
+#ifdef MULTIPLE_NODE_SOFTENING
+      int type;
+      for(type = indi_flag1; type < indi_flag2; type++)
+        {
+          if(type >= 0)
+            {
+              mass = nop->mass_per_type[type];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(type == 0)
+                h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype];
+              else
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                h_j = All.ForceSoftening[type];
+
+              if(h_j > h_i)
+                hmax = h_j;
+              else
+                hmax = h_i;
+            }
+
+          if(mass)
+            {
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+              if(r >= hmax)
+                pot -= mass / r;
+              else
+                {
+                  h_inv = 1.0 / hmax;
+
+                  u = r * h_inv;
+
+                  if(u < 0.5)
+                    wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6));
+                  else
+                    wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)));
+
+                  pot += mass * h_inv * wp;
+#ifdef MULTIPLE_NODE_SOFTENING
+                }
+            }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+        }
+    }
+
+  return pot;
+}
+
+/*! \brief Comparison function for r2type objects.
+ *
+ *  Compares element r2.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a->r2 < b->r2.
+ */
+int subfind_locngb_compare_key(const void *a, const void *b)
+{
+  if(((r2type *)a)->r2 < (((r2type *)b)->r2))
+    return -1;
+
+  if(((r2type *)a)->r2 > (((r2type *)b)->r2))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Iterates on smoothing length of  neighbor search to get a desired
+ *         number of neighbors.
+ *
+ *  \param[in] xyz Search center of neighbor search.
+ *  \param[in] desngb Desired number of neighbors.
+ *  \param[in] hguess Initial guess of smoothing length.
+ *
+ *  \return Distance of the outermost particle to seearch center.
+ */
+double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess)
+{
+  int numngb;
+  double h2max;
+
+  if(hguess == 0)
+    terminate("hguess needed");
+
+  while(1)
+    {
+      numngb = subfind_locngb_treefind_variable(xyz, hguess);
+
+      if(numngb < desngb)
+        {
+          hguess *= 1.26;
+          continue;
+        }
+
+      if(numngb >= desngb)
+        {
+          qsort(R2list, numngb, sizeof(r2type), subfind_locngb_compare_key);
+          h2max = R2list[desngb - 1].r2;
+          break;
+        }
+
+      hguess *= 1.26;
+    }
+
+  return sqrt(h2max);
+}
+
+/*! \brief (Local) tree-search in subfind tree.
+ *
+ *  Adds these cells to R2list.
+ *
+ *  \param[in] searchcenter Center around which particles are searched.
+ *  \param[in] hguess Distance up to which particles are searched.
+ *
+ *  \return Number of neighbors found.
+ */
+int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess)
+{
+  int numngb, no, p;
+  double dx, dy, dz, r2, h2;
+  struct LocNODE *thisnode;
+  double xtmp, ytmp, ztmp;
+
+  h2 = hguess * hguess;
+
+  numngb = 0;
+  no     = LocMaxPart;
+
+  while(no >= 0)
+    {
+      if(no < LocMaxPart) /* single particle */
+        {
+          p  = no;
+          no = LocNextNode[no];
+#ifdef CELL_CENTER_GRAVITY
+          if(P[p].Type == 0)
+            {
+              dx = GRAVITY_NEAREST_X(PS[p].Center[0] - searchcenter[0]);
+              dy = GRAVITY_NEAREST_Y(PS[p].Center[1] - searchcenter[1]);
+              dz = GRAVITY_NEAREST_Z(PS[p].Center[2] - searchcenter[2]);
+            }
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            {
+              dx = GRAVITY_NEAREST_X(P[p].Pos[0] - searchcenter[0]);
+              dy = GRAVITY_NEAREST_Y(P[p].Pos[1] - searchcenter[1]);
+              dz = GRAVITY_NEAREST_Z(P[p].Pos[2] - searchcenter[2]);
+            }
+
+          if(dx < -hguess)
+            continue;
+          if(dx > hguess)
+            continue;
+
+          if(dy < -hguess)
+            continue;
+          if(dy > hguess)
+            continue;
+
+          if(dz < -hguess)
+            continue;
+          if(dz > hguess)
+            continue;
+
+          r2 = dx * dx + dy * dy + dz * dz;
+
+          if(r2 <= h2)
+            {
+              R2list[numngb].r2    = r2;
+              R2list[numngb].index = p;
+              numngb++;
+            }
+        }
+      else
+        {
+          thisnode = &LocNodes[no];
+
+          no = LocNodes[no].u.d.sibling; /* in case the node can be discarded */
+
+          if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) + 0.5 * thisnode->len) < -hguess)
+            continue;
+          if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) - 0.5 * thisnode->len) > hguess)
+            continue;
+          if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) + 0.5 * thisnode->len) < -hguess)
+            continue;
+          if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) - 0.5 * thisnode->len) > hguess)
+            continue;
+          if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) + 0.5 * thisnode->len) < -hguess)
+            continue;
+          if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) - 0.5 * thisnode->len) > hguess)
+            continue;
+
+          no = thisnode->u.d.nextnode; /* ok, we need to open the node */
+        }
+    }
+
+  return numngb;
+}
+
+/*! \brief Allocates memory used for storage of the tree
+ *         and auxiliary arrays for tree-walk and link-lists.
+ *
+ *  \param[in] maxnodes Maximum number of nodes.
+ *  \param[in] maxpart Maximum number of particles.
+ *
+ *  \return Size of allocated memory in bytes.
+ */
+size_t subfind_loctree_treeallocate(int maxnodes, int maxpart)
+{
+  size_t bytes, allbytes = 0;
+
+  if(LocNextNode)
+    terminate("loctree already allocated");
+
+  MaxNodes   = maxnodes;
+  LocMaxPart = maxpart;
+
+  LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int));
+  allbytes += bytes;
+
+  R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type));
+  allbytes += bytes;
+
+  LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE));
+  LocNodes      = LocNodes_base - LocMaxPart;
+  allbytes += bytes;
+
+  return allbytes;
+}
+
+/*! \brief Frees the memory allocated for subfind_loctree.
+ *
+ *  \return void
+ */
+void subfind_loctree_treefree(void)
+{
+  myfree(LocNodes_base);
+  myfree(R2list);
+  myfree(LocNextNode);
+
+  LocNextNode   = NULL;
+  R2list        = NULL;
+  LocNodes_base = NULL;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c
new file mode 100644
index 0000000000..23e8bf95f3
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c
@@ -0,0 +1,475 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_nearesttwo.c
+ * \date        05/2018
+ * \brief       Neighbor finding of particles in group.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void subfind_find_nearesttwo(void)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "subfind.h"
+
+static int subfind_nearesttwo_evaluate(int target, int mode, int threadid);
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyIDType ID;
+  MyFloat Hsml;
+  MyFloat Density;
+  MyFloat Dist[2];
+  int Count;
+  long long Index[2];
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  int k;
+
+#ifdef CELL_CENTER_GRAVITY
+  if(P[i].Type == 0)
+    {
+      in->Pos[0] = PS[i].Center[0];
+      in->Pos[1] = PS[i].Center[1];
+      in->Pos[2] = PS[i].Center[2];
+    }
+  else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+    {
+      in->Pos[0] = P[i].Pos[0];
+      in->Pos[1] = P[i].Pos[1];
+      in->Pos[2] = P[i].Pos[2];
+    }
+
+  in->Hsml    = PS[i].Hsml;
+  in->ID      = P[i].ID;
+  in->Density = PS[i].Density;
+  in->Count   = NgbLoc[i].count;
+  for(k = 0; k < NgbLoc[i].count; k++)
+    {
+      in->Dist[k]  = R2Loc[i].dist[k];
+      in->Index[k] = NgbLoc[i].index[k];
+    }
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat Dist[2];
+  long long Index[2];
+  int Count;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      int k;
+
+      NgbLoc[i].count = out->Count;
+
+      for(k = 0; k < out->Count; k++)
+        {
+          R2Loc[i].dist[k]   = out->Dist[k];
+          NgbLoc[i].index[k] = out->Index[k];
+        }
+    }
+  else /* combine */
+    {
+      int k, l;
+
+      for(k = 0; k < out->Count; k++)
+        {
+          if(NgbLoc[i].count >= 1)
+            if(NgbLoc[i].index[0] == out->Index[k])
+              continue;
+
+          if(NgbLoc[i].count == 2)
+            if(NgbLoc[i].index[1] == out->Index[k])
+              continue;
+
+          if(NgbLoc[i].count < 2)
+            {
+              l = NgbLoc[i].count;
+              NgbLoc[i].count++;
+            }
+          else
+            {
+              if(R2Loc[i].dist[0] > R2Loc[i].dist[1])
+                l = 0;
+              else
+                l = 1;
+
+              if(out->Dist[k] >= R2Loc[i].dist[l])
+                continue;
+            }
+
+          R2Loc[i].dist[l]   = out->Dist[k];
+          NgbLoc[i].index[l] = out->Index[k];
+
+          if(NgbLoc[i].count == 2)
+            if(NgbLoc[i].index[0] == NgbLoc[i].index[1])
+              terminate("this is not supposed to happen");
+        }
+    }
+}
+
+#define USE_SUBCOMM_COMMUNICATOR
+#include "../utils/generic_comm_helpers2.h"
+
+static double *Dist2list;
+static int *Ngblist;
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < SubNTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= NumPartGroup)
+          break;
+
+        subfind_nearesttwo_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        subfind_nearesttwo_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Neighbour finding for each particle in group.
+ *
+ *  \return void
+ */
+void subfind_find_nearesttwo(void)
+{
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: Start finding nearest two.\n", ThisTask);
+
+  /* allocate buffers to arrange communication */
+
+  Ngblist   = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int));
+  Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double));
+
+  generic_set_MaxNexport();
+
+  for(int i = 0; i < NumPartGroup; i++)
+    NgbLoc[i].count = 0;
+
+  generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported);
+
+  myfree(Dist2list);
+  myfree(Ngblist);
+
+  if(SubThisTask == 0)
+    printf("SUBFIND-COLLECTIVE, root-task=%d: Done with nearest two.\n", ThisTask);
+}
+
+/*! \brief Neighbor finding routine on local particles.
+ *
+ *  \param[in] target Index of particle/cell.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ * \return 0
+ */
+static int subfind_nearesttwo_evaluate(int target, int mode, int threadid)
+{
+  int j, k, n, no, count;
+  MyIDType ID;
+  long long index[2];
+  double dist[2];
+  int numngb, numnodes, *firstnode;
+  double hsml;
+  double density;
+  MyDouble *pos;
+  struct NODE *current;
+  double dx, dy, dz, disthsml, r2;
+  MyDouble xtmp, ytmp, ztmp;
+
+  data_in local, *in;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      in = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      in = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  ID      = in->ID;
+  density = in->Density;
+  pos     = in->Pos;
+  hsml    = in->Hsml;
+  count   = in->Count;
+  for(k = 0; k < count; k++)
+    {
+      dist[k]  = in->Dist[k];
+      index[k] = in->Index[k];
+    }
+
+  if(count == 2)
+    if(index[0] == index[1])
+      {
+        terminate("task=%d/%d target=%d mode=%d  index_0=%lld  index_1=%lld\n", SubThisTask, ThisTask, target, mode, index[0],
+                  index[1]);
+      }
+
+  numngb = 0;
+  count  = 0;
+
+  hsml *= 1.00001; /* prevents that the most distant neighbour on the edge of the search region may not be found.
+                    * (needed for consistency with serial algorithm)
+                    */
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = SubTree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = SubNodes[no].u.d.nextnode; /* open it */
+        }
+      while(no >= 0)
+        {
+          if(no < SubTree_MaxPart) /* single particle */
+            {
+              int p = no;
+              no    = SubNextnode[no];
+
+              disthsml = hsml;
+              dx       = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]);
+              if(dx > disthsml)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]);
+              if(dy > disthsml)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]);
+              if(dz > disthsml)
+                continue;
+              if((r2 = (dx * dx + dy * dy + dz * dz)) > disthsml * disthsml)
+                continue;
+
+              Dist2list[numngb] = r2;
+              Ngblist[numngb++] = p;
+            }
+          else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */
+            {
+              if(mode == 1)
+                {
+                  if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the
+                                                           branch */
+                    {
+                      break;
+                    }
+                }
+
+              current = &SubNodes[no];
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              disthsml = hsml + 0.5 * current->len;
+
+              dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]);
+              if(dx > disthsml)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]);
+              if(dy > disthsml)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]);
+              if(dz > disthsml)
+                continue;
+              /* now test against the minimal sphere enclosing everything */
+              disthsml += FACT1 * current->len;
+              if(dx * dx + dy * dy + dz * dz > disthsml * disthsml)
+                continue;
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              terminate("do not expect imported points here");
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES");
+
+              if(target >= 0) /* note: if no target is given, export will not occur */
+                subfind_treefind_collective_export_node_threads(no, target, threadid);
+
+              no = SubNextnode[no - SubTree_MaxNodes];
+            }
+        }
+    }
+
+  for(n = 0; n < numngb; n++)
+    {
+      j  = Ngblist[n];
+      r2 = Dist2list[n];
+
+      if(P[j].ID != ID) /* exclude the self-particle */
+        {
+          if(PS[j].Density > density) /* we only look at neighbours that are denser */
+            {
+              if(count < 2)
+                {
+                  dist[count]  = r2;
+                  index[count] = (((long long)SubThisTask) << 32) + j;
+                  count++;
+                }
+              else
+                {
+                  if(dist[0] > dist[1])
+                    k = 0;
+                  else
+                    k = 1;
+
+                  if(r2 < dist[k])
+                    {
+                      dist[k]  = r2;
+                      index[k] = (((long long)SubThisTask) << 32) + j;
+                    }
+                }
+            }
+        }
+    }
+
+  out.Count = count;
+  for(k = 0; k < count; k++)
+    {
+      out.Dist[k]  = dist[k];
+      out.Index[k] = index[k];
+    }
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_properties.c b/src/amuse/community/arepo/src/subfind/subfind_properties.c
new file mode 100644
index 0000000000..5d2756cbdf
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_properties.c
@@ -0,0 +1,1195 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_properties.c
+ * \date        05/2018
+ * \brief       Calculation of the subgroup properties.
+ * \details     contains functions:
+ *                void subfind_determine_sub_halo_properties(struct
+ *                  unbind_data *d, int num, struct subgroup_properties
+ *                  *subgroup, int grnr, int subnr, int parallel_flag, int
+ *                  nsubgroups_cat)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../fof/fof.h"
+#include "subfind.h"
+
+/*! \brief Calculates subhalo properties.
+ *
+ *
+ *  \param[in] d Unbind data.
+ *  \param[in] num Length of d.
+ *  \param[out] subgroup Data for subgroup properties.
+ *  \param[in] grnr Index in GroupCat.
+ *  \param[in] subnr Index of Subhalo in this group.
+ *  \param[in] parallel_flag If set, the code calculates the properties for a
+ *             subhalo distributed onto several processors.
+ *  \param[in] nsubgroups_cat (unused)
+ *
+ *  \return void
+ */
+void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr,
+                                           int parallel_flag, int nsubgroups_cat)
+{
+  int i, j, p, len_type[NTYPES], len_type_loc[NTYPES], totlen;
+  double s[3], v[3], pos[3], vel[3], spin[3], cm[3], veldisp, max, vel_to_phys, H_of_a, minpot;
+#ifdef MHD
+  double bfld_halo, bfld_disk, bfld_vol_halo, bfld_vol_disk;
+#endif /* #ifdef MHD */
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  double Ekin = 0, Epot = 0, Ethr = 0, Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES];
+  double Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES];
+  double Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES];
+  double jpart[3], Jtot[3], Jtot_inRad[3], Jtot_inHalfRad[3];
+  double sinrad[3], sinhalfrad[3], vinrad[3], vinhalfrad[3];
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+  double lx, ly, lz, dv[3], dx[3], disp, rr_tmp, disp_tmp, halfmassrad = 0, halfmassradtype[NTYPES];
+  double boxsize, ddxx, vmax, vmaxrad, maxrad;
+  double mass, massinrad, massinhalfrad, massinmaxrad;
+  double mass_tab[NTYPES], massinrad_tab[NTYPES], massinhalfrad_tab[NTYPES], massinmaxrad_tab[NTYPES];
+  double xtmp;
+
+  sort_r2list *rr_list = 0;
+  int minindex;
+  MyIDType mostboundid;
+
+#ifdef USE_SFR
+  double sfr = 0, sfrinrad = 0, sfrinhalfrad = 0, sfrinmaxrad = 0, gasMassSfr = 0;
+#endif /* #ifdef USE_SFR */
+
+  boxsize = All.BoxSize;
+
+  vel_to_phys = 1.0 / All.cf_atime;
+
+  if(All.ComovingIntegrationOn)
+    H_of_a = hubble_function(All.Time);
+  else
+    H_of_a = 0;
+
+  mass = massinrad = massinhalfrad = massinmaxrad = 0;
+  for(j = 0; j < NTYPES; j++)
+    {
+      len_type[j] = 0;
+      mass_tab[j] = halfmassradtype[j] = massinrad_tab[j] = massinhalfrad_tab[j] = massinmaxrad_tab[j] = 0;
+    }
+
+  for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++)
+    {
+      p = d[i].index;
+      if(PS[p].Potential < minpot || minindex == -1)
+        {
+          minpot   = PS[p].Potential;
+          minindex = p;
+        }
+
+      len_type[P[p].Type]++;
+
+#ifdef USE_SFR
+      if(P[p].Type == 0)
+        sfr += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */
+#endif                                   /* #ifdef USE_SFR */
+    }
+
+  for(j = 0; j < NTYPES; j++)
+    len_type_loc[j] = len_type[j];
+
+  if(parallel_flag)
+    {
+      int len_typetot[NTYPES];
+      MPI_Allreduce(len_type, len_typetot, NTYPES, MPI_INT, MPI_SUM, SubComm);
+      for(j = 0; j < NTYPES; j++)
+        len_type[j] = len_typetot[j];
+
+      double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double));
+      MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm);
+      int mincpu;
+
+      for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++)
+        if(minpotlist[i] < minpot)
+          {
+            mincpu = i;
+            minpot = minpotlist[mincpu];
+          }
+
+      myfree(minpotlist);
+
+      if(mincpu < 0)
+        terminate("mincpu < 0");
+
+      if(SubThisTask == mincpu)
+        for(j = 0; j < 3; j++)
+          {
+#ifdef CELL_CENTER_GRAVITY
+            if(P[minindex].Type == 0)
+              pos[j] = SphP[PS[minindex].OldIndex].Center[j];
+            else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+              pos[j] = P[minindex].Pos[j];
+          }
+
+      MPI_Bcast(pos, 3, MPI_DOUBLE, mincpu, SubComm);
+
+#ifdef USE_SFR
+      double sfrtot;
+      MPI_Allreduce(&sfr, &sfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      sfr = sfrtot;
+#endif /* #ifdef USE_SFR */
+    }
+  else
+    {
+      if(minindex == -1)
+        terminate("minindex == -1");
+
+      for(j = 0; j < 3; j++)
+        {
+#ifdef CELL_CENTER_GRAVITY
+          if(P[minindex].Type == 0)
+            pos[j] = SphP[PS[minindex].OldIndex].Center[j];
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            pos[j] = P[minindex].Pos[j];
+        }
+    }
+
+  /* pos[] now holds the position of minimum potential */
+  /* we'll take it that as the center */
+
+  /* determine the particle ID with the smallest binding energy */
+  for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++)
+    {
+      p = d[i].index;
+      if(PS[p].BindingEnergy < minpot || minindex == -1)
+        {
+          minpot   = PS[p].BindingEnergy;
+          minindex = p;
+        }
+    }
+
+  if(parallel_flag)
+    {
+      double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double));
+      MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm);
+      int mincpu;
+
+      for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++)
+        if(minpotlist[i] < minpot)
+          {
+            mincpu = i;
+            minpot = minpotlist[mincpu];
+          }
+
+      myfree(minpotlist);
+
+      if(mincpu < 0)
+        terminate("mincpu < 0");
+
+      if(SubThisTask == mincpu)
+        {
+          mostboundid = P[minindex].ID;
+        }
+
+      MPI_Bcast(&mostboundid, sizeof(mostboundid), MPI_BYTE, mincpu, SubComm);
+    }
+  else
+    {
+      if(minindex == -1)
+        terminate("minindex == -1");
+
+      mostboundid = P[minindex].ID;
+    }
+
+  /* let's get bulk velocity and the center-of-mass */
+  /* here we still take all particles */
+
+  for(j = 0; j < 3; j++)
+    s[j] = v[j] = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      p = d[i].index;
+      for(j = 0; j < 3; j++)
+        {
+          ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+          s[j] += P[p].Mass * ddxx;
+          v[j] += P[p].Mass * P[p].Vel[j];
+        }
+      mass += P[p].Mass;
+
+      int ptype = P[p].Type;
+      mass_tab[ptype] += P[p].Mass;
+    }
+
+  if(parallel_flag)
+    {
+      double stot[3], vtot[3], masstot, mass_tabtot[NTYPES];
+
+      MPI_Allreduce(s, stot, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(v, vtot, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(mass_tab, mass_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+
+      mass = masstot;
+      for(j = 0; j < 3; j++)
+        {
+          s[j] = stot[j];
+          v[j] = vtot[j];
+        }
+
+      for(j = 0; j < NTYPES; j++)
+        mass_tab[j] = mass_tabtot[j];
+    }
+
+  for(j = 0; j < 3; j++)
+    {
+      s[j] /= mass; /* center of mass */
+      v[j] /= mass;
+      vel[j] = vel_to_phys * v[j];
+    }
+
+  for(j = 0; j < 3; j++)
+    {
+      s[j] += pos[j];
+
+      while(s[j] < 0)
+        s[j] += boxsize;
+      while(s[j] >= boxsize)
+        s[j] -= boxsize;
+      cm[j] = s[j];  // this is in comoving coordinates
+    }
+
+  disp = lx = ly = lz = 0;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  Jtot[0] = Jtot[1] = Jtot[2] = 0;
+  Jdm[0] = Jdm[1] = Jdm[2] = 0;
+  Jgas[0] = Jgas[1] = Jgas[2] = 0;
+  Jstars[0] = Jstars[1] = Jstars[2] = 0;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (num + 1));
+
+  for(i = 0; i < num; i++)
+    {
+      p = d[i].index;
+
+      for(j = 0, rr_tmp = 0, disp_tmp = 0; j < 3; j++)
+        {
+          ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]);
+          dx[j] = All.cf_atime * ddxx;
+          dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]);
+          dv[j] += H_of_a * dx[j];
+
+          disp_tmp += P[p].Mass * dv[j] * dv[j];
+          /* for rotation curve computation, take minimum of potential as center */
+          ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+          ddxx = All.cf_atime * ddxx;
+          rr_tmp += ddxx * ddxx;
+        }
+
+      lx += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+      ly += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+      lz += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      for(j = 0; j < 3; j++)  // hubble drifts in velocity now with respect to pot min which we consider as the centre of rotation
+        {
+          ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+          dx[j] = All.cf_atime * ddxx;
+          dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]);
+          dv[j] += H_of_a * dx[j];
+        }
+
+      int ptype = P[p].Type;
+
+      Ekin += (P[p].Mass / 2) * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]);
+      Epot += (P[p].Mass / 2) * PS[p].Potential;
+      if(P[p].Type == 0)
+        Ethr += P[p].Mass * SphP[PS[p].OldIndex].Utherm;
+
+      Jtot[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+      Jtot[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+      Jtot[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+      if(ptype == 1)  // dm illustris
+        {
+          Jdm[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          Jdm[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          Jdm[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+        }
+      if(ptype == 0)  // gas (incl. winds!)
+        {
+          Jgas[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          Jgas[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          Jgas[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+        }
+      if(ptype == 4)  // stars (previously: StarP[P[p].AuxDataID].BirthTime)
+        {
+          Jstars[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          Jstars[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          Jstars[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+        }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+      rr_tmp = sqrt(rr_tmp);
+
+      rr_list[i].mass = P[p].Mass;
+      rr_list[i].r    = rr_tmp;
+      disp += disp_tmp;
+    }
+
+  if(parallel_flag)
+    {
+      double spintot[3], disptot;
+      spin[0] = lx;
+      spin[1] = ly;
+      spin[2] = lz;
+      MPI_Allreduce(spin, spintot, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&disp, &disptot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      disp = disptot;
+      lx   = spintot[0];
+      ly   = spintot[1];
+      lz   = spintot[2];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      MPI_Allreduce(MPI_IN_PLACE, &Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, &Epot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, &Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+    }
+
+  spin[0] = lx / mass;
+  spin[1] = ly / mass;
+  spin[2] = lz / mass;
+
+  veldisp = sqrt(disp / (3 * mass)); /* convert to 1d velocity dispersion */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  // counter rotating mass fractions
+  CMFrac = 0;
+  for(i = 0; i < NTYPES; i++)
+    CMFracType[i] = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      /* identify particle type */
+      p = d[i].index;
+
+      /* calculate particle radius */
+      for(j = 0; j < 3; j++)
+        {
+          ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);  // counter-rotating mass calc with respect to pot min
+          dx[j] = All.cf_atime * ddxx;
+          dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]);
+          dv[j] += H_of_a * dx[j];
+        }
+
+      int ptype = P[p].Type;
+
+      jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+      jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+      jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+      if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.)
+        CMFrac += P[p].Mass / mass;
+
+      if(ptype == 1)  // dm illustris
+        if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.)
+          CMFracType[1] += P[p].Mass / mass_tab[1];
+      if(ptype == 0)  // gas (incl. winds!)
+        if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.)
+          CMFracType[0] += P[p].Mass / mass_tab[0];
+      if(ptype == 4)  // stars
+        if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.)
+          CMFracType[4] += P[p].Mass / mass_tab[4];
+    }
+
+  if(parallel_flag)
+    {
+      MPI_Allreduce(MPI_IN_PLACE, &CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+    }
+
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  if(parallel_flag)
+    parallel_sort_comm(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm);
+  else
+    mysort(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve);
+
+  /* calculate cumulative mass */
+  for(i = 1; i < num; i++)
+    rr_list[i].mass += rr_list[i - 1].mass;
+
+  if(parallel_flag)
+    {
+      double mass_part = 0;
+      if(num)
+        mass_part = rr_list[num - 1].mass;
+      double *masslist = mymalloc("masslist", SubNTask * sizeof(double));
+      MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm);
+
+      double massbefore = 0;
+      for(i = 0; i < SubThisTask; i++)
+        massbefore += masslist[i];
+
+      for(i = 0; i < num; i++)
+        rr_list[i].mass += massbefore;
+
+      myfree(masslist);
+
+      /* now calculate rotation curve maximum and half mass radius */
+
+      double halfmassrad_loc  = 0;
+      sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list));
+      sort_r2list low_element;
+      if(num > 0)
+        low_element = rr_list[0];
+      else
+        {
+          low_element.mass = 0;
+          low_element.r    = 0;
+        }
+      MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm);
+
+      rr_list[num].mass = 0;
+      rr_list[num].r    = 0;
+
+      for(j = SubThisTask + 1; j < SubNTask; j++)
+        if(rr_lowlist[j].mass > 0)
+          {
+            rr_list[num] = rr_lowlist[j];
+            break;
+          }
+
+      myfree(rr_lowlist);
+
+      int *numlist = mymalloc("numlist", SubNTask * sizeof(int));
+      MPI_Allgather(&num, 1, MPI_INT, numlist, 1, MPI_INT, SubComm);
+
+      int nbefore = 0;
+      for(i = 0; i < SubThisTask; i++)
+        nbefore += numlist[i];
+
+      for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--)
+        {
+          if((i + nbefore) > 5 && rr_list[i].mass > max * rr_list[i].r)
+            {
+              max    = rr_list[i].mass / rr_list[i].r;
+              maxrad = rr_list[i].r;
+            }
+
+          if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass)
+            halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r);
+        }
+
+      myfree(numlist);
+
+      MPI_Allreduce(&halfmassrad_loc, &halfmassrad, 1, MPI_DOUBLE, MPI_MAX, SubComm);
+      double *maxlist    = mymalloc("maxlist", SubNTask * sizeof(double));
+      double *maxradlist = mymalloc("maxradlist", SubNTask * sizeof(double));
+      MPI_Allgather(&max, 1, MPI_DOUBLE, maxlist, 1, MPI_DOUBLE, SubComm);
+      MPI_Allgather(&maxrad, 1, MPI_DOUBLE, maxradlist, 1, MPI_DOUBLE, SubComm);
+      for(i = 0, max = maxrad = 0; i < SubNTask; i++)
+        {
+          if(maxlist[i] > max)
+            {
+              max    = maxlist[i];
+              maxrad = maxradlist[i];
+            }
+        }
+      myfree(maxradlist);
+      myfree(maxlist);
+    }
+  else
+    {
+      for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--)
+        {
+          if(i > 5 && rr_list[i].mass > max * rr_list[i].r)
+            {
+              max    = rr_list[i].mass / rr_list[i].r;
+              maxrad = rr_list[i].r;
+            }
+
+          if(i < num - 1)
+            if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass)
+              halfmassrad = 0.5 * (rr_list[i].r + rr_list[i + 1].r);
+        }
+    }
+
+  halfmassrad /= All.cf_atime;
+  vmax    = sqrt(All.G * max);
+  vmaxrad = maxrad / All.cf_atime;
+
+  myfree(rr_list);
+
+  /* half mass radii for different types */
+  /* need to recalculate len_type_loc first, because of special particle treatment in GFM */
+  for(j = 0; j < NTYPES; j++)
+    len_type_loc[j] = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      p         = d[i].index;
+      int ptype = P[p].Type;
+
+      len_type_loc[ptype]++;
+    }
+
+  int itmp, type;
+  for(type = 0; type < NTYPES; type++)
+    {
+      rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (len_type_loc[type] + 1));
+      itmp    = 0;
+      for(i = 0; i < num; i++)
+        {
+          p = d[i].index;
+
+          int ptype = P[p].Type;
+
+          if(ptype == type)
+            {
+              for(j = 0, rr_tmp = 0; j < 3; j++)
+                {
+                  ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+                  rr_tmp += ddxx * ddxx;
+                }
+
+              rr_tmp = sqrt(rr_tmp);
+
+              rr_list[itmp].mass = P[p].Mass;
+              rr_list[itmp].r    = rr_tmp;
+              itmp++;
+            }
+        }
+
+      if(itmp != len_type_loc[type])
+        terminate("should not occur: %d %d", itmp, len_type_loc[type]);
+
+      if(parallel_flag)
+        parallel_sort_comm(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm);
+      else
+        mysort(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve);
+
+      /* calculate cumulative mass */
+      for(i = 1; i < len_type_loc[type]; i++)
+        rr_list[i].mass = rr_list[i - 1].mass + rr_list[i].mass;
+
+      if(parallel_flag)
+        {
+          double mass_part = 0;
+          if(len_type_loc[type])
+            mass_part = rr_list[len_type_loc[type] - 1].mass;
+          double *masslist = mymalloc("masslist", SubNTask * sizeof(double));
+          MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm);
+
+          double massbefore = 0;
+          for(i = 0; i < SubThisTask; i++)
+            massbefore += masslist[i];
+
+          for(i = 0; i < len_type_loc[type]; i++)
+            rr_list[i].mass += massbefore;
+
+          myfree(masslist);
+        }
+
+      /* now calculate half mass radii */
+      if(parallel_flag)
+        {
+          double halfmassrad_loc  = 0;
+          sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list));
+          sort_r2list low_element;
+          if(len_type_loc[type] > 0)
+            low_element = rr_list[0];
+          else
+            {
+              low_element.mass = 0;
+              low_element.r    = 0;
+            }
+
+          MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm);
+
+          rr_list[len_type_loc[type]].mass = 0;
+          rr_list[len_type_loc[type]].r    = 0;
+          for(j = SubThisTask + 1; j < SubNTask; j++)
+            if(rr_lowlist[j].mass > 0)
+              {
+                rr_list[len_type_loc[type]] = rr_lowlist[j];
+                break;
+              }
+
+          myfree(rr_lowlist);
+
+          for(i = len_type_loc[type] - 1; i >= 0; i--)
+            {
+              if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type])
+                halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r);
+            }
+
+          MPI_Allreduce(&halfmassrad_loc, &halfmassradtype[type], 1, MPI_DOUBLE, MPI_MAX, SubComm);
+        }
+      else
+        {
+          for(i = len_type_loc[type] - 1; i >= 0; i--)
+            {
+              if(i < len_type_loc[type] - 1)
+                if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type])
+                  halfmassradtype[type] = 0.5 * (rr_list[i].r + rr_list[i + 1].r);
+            }
+        }
+
+      myfree(rr_list);
+    }
+
+    /* properties of 'central galaxies', defined in several ways as particles within some radius:
+       either (stellar half mass radius) or SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) or (radius of Vmax) */
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  // centre of mass /velocity of particles in half/ stellar mass rad
+  sinrad[0] = sinrad[1] = sinrad[2] = 0;
+  sinhalfrad[0] = sinhalfrad[1] = sinhalfrad[2] = 0;
+  vinrad[0] = vinrad[1] = vinrad[2] = 0;
+  vinhalfrad[0] = vinhalfrad[1] = vinhalfrad[2] = 0;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  for(i = 0; i < num; i++)
+    {
+      /* identify particle type */
+      p         = d[i].index;
+      int ptype = P[p].Type;
+
+      /* calculate particle radius */
+      for(j = 0, rr_tmp = 0; j < 3; j++)
+        {
+          ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+          rr_tmp += ddxx * ddxx;
+        }
+      rr_tmp = sqrt(rr_tmp);
+
+      /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */
+      if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])
+        {
+          massinrad += P[p].Mass;
+          massinrad_tab[ptype] += P[p].Mass;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          for(j = 0; j < 3; j++)
+            {
+              ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);  // comoving (as it should be.)
+              sinrad[j] += P[p].Mass * ddxx;
+              vinrad[j] += P[p].Mass * P[p].Vel[j];
+            }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+          if(ptype == 0)
+            {
+              if(P[p].Type == 0)
+                {
+#ifdef USE_SFR
+                  sfrinrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */
+#endif                                                  /* #ifdef USE_SFR */
+                }
+            }
+        }
+
+      /* properties inside (stellar half mass radius) */
+      if(rr_tmp < 1.0 * halfmassradtype[4])
+        {
+          massinhalfrad += P[p].Mass;
+          massinhalfrad_tab[ptype] += P[p].Mass;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          for(j = 0; j < 3; j++)
+            {
+              ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);  // comoving (as it should be.)
+              sinhalfrad[j] += P[p].Mass * ddxx;
+              vinhalfrad[j] += P[p].Mass * P[p].Vel[j];
+            }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+          if(ptype == 0)
+            {
+              if(P[p].Type == 0)
+                {
+#ifdef USE_SFR
+                  sfrinhalfrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */
+#endif                                                      /* #ifdef USE_SFR */
+                }
+            }
+        }
+
+      /* properties inside (radius of Vmax) */
+      if(rr_tmp < 1.0 * vmaxrad)
+        {
+          massinmaxrad += P[p].Mass;
+          massinmaxrad_tab[ptype] += P[p].Mass;
+
+          if(ptype == 0)
+            {
+              if(P[p].Type == 0)
+                {
+#ifdef USE_SFR
+                  sfrinmaxrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */
+#endif                                                     /* #ifdef USE_SFR */
+                }
+            }
+        }
+    }
+
+    /* properties of star forming gas */
+#ifdef USE_SFR
+  for(i = 0; i < num; i++)
+    {
+      p = d[i].index;
+
+      if(P[p].Type == 0)
+        {
+          if(SphP[PS[p].OldIndex].Sfr > 0)
+            {
+              gasMassSfr += P[p].Mass;
+            }
+        }
+    }
+#endif /* #ifdef USE_SFR */
+
+#ifdef MHD
+  bfld_halo = bfld_disk = bfld_vol_halo = bfld_vol_disk = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      p = d[i].index;
+
+      if(P[p].Type == 0)
+        {
+          double bfld2 = (SphP[PS[p].OldIndex].B[0] * SphP[PS[p].OldIndex].B[0]) +
+                         (SphP[PS[p].OldIndex].B[1] * SphP[PS[p].OldIndex].B[1]) +
+                         (SphP[PS[p].OldIndex].B[2] * SphP[PS[p].OldIndex].B[2]);
+          double vol = SphP[PS[p].OldIndex].Volume;
+
+          bfld_halo += bfld2 * vol;
+          bfld_vol_halo += vol;
+
+          /* calculate particle radius */
+          for(j = 0, rr_tmp = 0; j < 3; j++)
+            {
+              ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              rr_tmp += ddxx * ddxx;
+            }
+          rr_tmp = sqrt(rr_tmp);
+
+          if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])
+            {
+              bfld_disk += bfld2 * vol;
+              bfld_vol_disk += vol;
+            }
+        }
+    }
+#endif /* #ifdef MHD */
+
+  if(parallel_flag)
+    {
+      double massinradtot, massinrad_tabtot[NTYPES];
+      MPI_Allreduce(&massinrad, &massinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(massinrad_tab, massinrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+      massinrad = massinradtot;
+      for(j = 0; j < NTYPES; j++)
+        massinrad_tab[j] = massinrad_tabtot[j];
+
+      double massinhalfradtot, massinhalfrad_tabtot[NTYPES];
+      MPI_Allreduce(&massinhalfrad, &massinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(massinhalfrad_tab, massinhalfrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+      massinhalfrad = massinhalfradtot;
+      for(j = 0; j < NTYPES; j++)
+        massinhalfrad_tab[j] = massinhalfrad_tabtot[j];
+
+      double massinmaxradtot, massinmaxrad_tabtot[NTYPES];
+      MPI_Allreduce(&massinmaxrad, &massinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(massinmaxrad_tab, massinmaxrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+      massinmaxrad = massinmaxradtot;
+      for(j = 0; j < NTYPES; j++)
+        massinmaxrad_tab[j] = massinmaxrad_tabtot[j];
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      MPI_Allreduce(MPI_IN_PLACE, sinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, vinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, sinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, vinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef MHD
+      double bfld_halo_tot, bfld_disk_tot, bfld_vol_halo_tot, bfld_vol_disk_tot;
+      MPI_Allreduce(&bfld_halo, &bfld_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&bfld_vol_halo, &bfld_vol_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&bfld_disk, &bfld_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(&bfld_vol_disk, &bfld_vol_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+
+      bfld_halo     = bfld_halo_tot;
+      bfld_vol_halo = bfld_vol_halo_tot;
+      bfld_disk     = bfld_disk_tot;
+      bfld_vol_disk = bfld_vol_disk_tot;
+#endif /* #ifdef MHD */
+
+#ifdef USE_SFR
+      double sfrinradtot;
+      MPI_Allreduce(&sfrinrad, &sfrinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      sfrinrad = sfrinradtot;
+
+      double sfrinhalfradtot;
+      MPI_Allreduce(&sfrinhalfrad, &sfrinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      sfrinhalfrad = sfrinhalfradtot;
+
+      double sfrinmaxradtot;
+      MPI_Allreduce(&sfrinmaxrad, &sfrinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      sfrinmaxrad = sfrinmaxradtot;
+
+      double gasMassSfrtot;
+      MPI_Allreduce(&gasMassSfr, &gasMassSfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      gasMassSfr = gasMassSfrtot;
+#endif /* #ifdef USE_SFR */
+    }
+
+  if(parallel_flag)
+    MPI_Allreduce(&num, &totlen, 1, MPI_INT, MPI_SUM, SubComm);
+  else
+    totlen = num;
+
+#ifdef MHD
+  if(bfld_vol_halo > 0.)
+    bfld_halo = sqrt(bfld_halo / bfld_vol_halo);
+  if(bfld_vol_disk > 0.)
+    bfld_disk = sqrt(bfld_disk / bfld_vol_disk);
+#endif /* #ifdef MHD */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  // finish centre of mass of spheres
+  for(j = 0; j < 3; j++)
+    {
+      if(massinrad > 0)
+        {
+          sinrad[j] /= massinrad;
+          sinrad[j] += pos[j];
+
+          while(sinrad[j] < 0)
+            sinrad[j] += boxsize;
+          while(sinrad[j] >= boxsize)
+            sinrad[j] -= boxsize;
+
+          vinrad[j] /= massinrad;  // this is comoving (as it should be.)
+        }
+
+      if(massinhalfrad > 0)
+        {
+          sinhalfrad[j] /= massinhalfrad;
+          sinhalfrad[j] += pos[j];
+
+          while(sinhalfrad[j] < 0)
+            sinhalfrad[j] += boxsize;
+          while(sinhalfrad[j] >= boxsize)
+            sinhalfrad[j] -= boxsize;
+
+          vinhalfrad[j] /= massinhalfrad;
+        }
+    }
+
+  Jtot_inHalfRad[0] = Jtot_inHalfRad[1] = Jtot_inHalfRad[2] = 0;
+  Jdm_inHalfRad[0] = Jdm_inHalfRad[1] = Jdm_inHalfRad[2] = 0;
+  Jgas_inHalfRad[0] = Jgas_inHalfRad[1] = Jgas_inHalfRad[2] = 0;
+  Jstars_inHalfRad[0] = Jstars_inHalfRad[1] = Jstars_inHalfRad[2] = 0;
+  Jtot_inRad[0] = Jtot_inRad[1] = Jtot_inRad[2] = 0;
+  Jdm_inRad[0] = Jdm_inRad[1] = Jdm_inRad[2] = 0;
+  Jgas_inRad[0] = Jgas_inRad[1] = Jgas_inRad[2] = 0;
+  Jstars_inRad[0] = Jstars_inRad[1] = Jstars_inRad[2] = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      /* identify particle type */
+      p = d[i].index;
+
+      /* calculate particle radius */
+      for(j = 0, rr_tmp = 0; j < 3; j++)
+        {
+          ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+          rr_tmp += ddxx * ddxx;
+        }
+      rr_tmp = sqrt(rr_tmp);
+
+      int ptype = P[p].Type;
+
+      /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */
+      if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]))
+        {
+          for(j = 0; j < 3; j++)
+            {
+              ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              dx[j] = All.cf_atime * ddxx;
+              dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]);
+              dv[j] += H_of_a * dx[j];
+            }
+
+          Jtot_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          Jtot_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          Jtot_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+          if(ptype == 1)  // dm illustris
+            {
+              Jdm_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jdm_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jdm_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+          if(ptype == 0)  // gas
+            {
+              Jgas_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jgas_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jgas_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+          if(ptype == 4)  // stars
+            {
+              Jstars_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jstars_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jstars_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+        }
+
+      /* properties inside (stellar half mass radius) */
+      if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4]))
+        {
+          for(j = 0; j < 3; j++)
+            {
+              ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              dx[j] = All.cf_atime * ddxx;
+              dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]);
+              dv[j] += H_of_a * dx[j];
+            }
+
+          Jtot_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          Jtot_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          Jtot_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+          if(ptype == 1)  // dm illustris
+            {
+              Jdm_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jdm_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jdm_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+          if(ptype == 0)  // gas
+            {
+              Jgas_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jgas_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jgas_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+          if(ptype == 4)  // stars
+            {
+              Jstars_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+              Jstars_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+              Jstars_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+            }
+        }
+    }
+
+  if(parallel_flag)
+    {
+      MPI_Allreduce(MPI_IN_PLACE, Jtot_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jdm_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jgas_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jstars_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jtot_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jdm_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jgas_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, Jstars_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm);
+    }
+
+  // counter rotating mass fractions
+  CMFrac_inHalfRad = CMFrac_inRad = 0;
+  for(i = 0; i < NTYPES; i++)
+    CMFracType_inHalfRad[i] = CMFracType_inRad[i] = 0;
+
+  for(i = 0; i < num; i++)
+    {
+      /* identify particle type */
+      p = d[i].index;
+
+      /* calculate particle radius */
+      for(j = 0, rr_tmp = 0; j < 3; j++)
+        {
+          ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);  // counter-rotating mass calc with respect to pot min
+          rr_tmp += ddxx * ddxx;
+        }
+      rr_tmp = sqrt(rr_tmp);
+
+      int ptype = P[p].Type;
+
+      /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */
+      if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]))
+        {
+          for(j = 0; j < 3; j++)
+            {
+              ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              dx[j] = All.cf_atime * ddxx;
+              dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]);
+              dv[j] += H_of_a * dx[j];
+            }
+
+          jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+          if((Jtot_inRad[0] * jpart[0] + Jtot_inRad[1] * jpart[1] + Jtot_inRad[2] * jpart[2]) < 0.)
+            CMFrac_inRad += P[p].Mass / massinrad;
+
+          if(ptype == 1)  // dm illustris
+            if((Jdm_inRad[0] * jpart[0] + Jdm_inRad[1] * jpart[1] + Jdm_inRad[2] * jpart[2]) < 0.)
+              CMFracType_inRad[1] += P[p].Mass / massinrad_tab[1];
+          if(ptype == 0)  // gas (incl. winds!)
+            if((Jgas_inRad[0] * jpart[0] + Jgas_inRad[1] * jpart[1] + Jgas_inRad[2] * jpart[2]) < 0.)
+              CMFracType_inRad[0] += P[p].Mass / massinrad_tab[0];
+          if(ptype == 4)  // stars
+            if((Jstars_inRad[0] * jpart[0] + Jstars_inRad[1] * jpart[1] + Jstars_inRad[2] * jpart[2]) < 0.)
+              CMFracType_inRad[4] += P[p].Mass / massinrad_tab[4];
+        }
+
+      /* properties inside (stellar half mass radius) */
+      if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4]))
+        {
+          for(j = 0; j < 3; j++)
+            {
+              ddxx  = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              dx[j] = All.cf_atime * ddxx;
+              dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]);
+              dv[j] += H_of_a * dx[j];
+            }
+
+          jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]);
+          jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]);
+          jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]);
+
+          if((Jtot_inHalfRad[0] * jpart[0] + Jtot_inHalfRad[1] * jpart[1] + Jtot_inHalfRad[2] * jpart[2]) < 0.)
+            CMFrac_inHalfRad += P[p].Mass / massinhalfrad;
+
+          if(ptype == 1)  // dm illustris
+            if((Jdm_inHalfRad[0] * jpart[0] + Jdm_inHalfRad[1] * jpart[1] + Jdm_inHalfRad[2] * jpart[2]) < 0.)
+              CMFracType_inHalfRad[1] += P[p].Mass / massinhalfrad_tab[1];
+          if(ptype == 0)  // gas (incl. winds!)
+            if((Jgas_inHalfRad[0] * jpart[0] + Jgas_inHalfRad[1] * jpart[1] + Jgas_inHalfRad[2] * jpart[2]) < 0.)
+              CMFracType_inHalfRad[0] += P[p].Mass / massinhalfrad_tab[0];
+          if(ptype == 4)  // stars
+            if((Jstars_inHalfRad[0] * jpart[0] + Jstars_inHalfRad[1] * jpart[1] + Jstars_inHalfRad[2] * jpart[2]) < 0.)
+              CMFracType_inHalfRad[4] += P[p].Mass / massinhalfrad_tab[4];
+        }
+    }
+
+  if(parallel_flag)
+    {
+      MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inRad, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inHalfRad, 1, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, CMFracType_inRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+      MPI_Allreduce(MPI_IN_PLACE, CMFracType_inHalfRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm);
+    }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  /* now store the calculated properties in the subgroup structure */
+  if(parallel_flag == 0 || SubThisTask == 0)
+    {
+      subgroup->Len              = totlen;
+      subgroup->Mass             = mass;
+      subgroup->SubMassInRad     = massinrad;
+      subgroup->SubMassInHalfRad = massinhalfrad;
+      subgroup->SubMassInMaxRad  = massinmaxrad;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      subgroup->Ekin             = Ekin;
+      subgroup->Epot             = Epot;
+      subgroup->Ethr             = Ethr;
+      subgroup->CMFrac           = CMFrac;
+      subgroup->CMFrac_inHalfRad = CMFrac_inHalfRad;
+      subgroup->CMFrac_inRad     = CMFrac_inRad;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#ifdef MHD
+      subgroup->Bfld_Halo = bfld_halo;
+      subgroup->Bfld_Disk = bfld_disk;
+#endif /* #ifdef MHD */
+
+      for(j = 0; j < 6; j++)
+        {
+          subgroup->MassType[j]             = mass_tab[j];
+          subgroup->LenType[j]              = len_type[j];
+          subgroup->SubHalfMassRadType[j]   = halfmassradtype[j];
+          subgroup->SubMassInRadType[j]     = massinrad_tab[j];
+          subgroup->SubMassInHalfRadType[j] = massinhalfrad_tab[j];
+          subgroup->SubMassInMaxRadType[j]  = massinmaxrad_tab[j];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          subgroup->CMFracType[j]           = CMFracType[j];
+          subgroup->CMFracType_inHalfRad[j] = CMFracType_inHalfRad[j];
+          subgroup->CMFracType_inRad[j]     = CMFracType_inRad[j];
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        }
+      for(j = 0; j < 3; j++)
+        {
+          subgroup->Pos[j]  = pos[j];
+          subgroup->Vel[j]  = vel[j];
+          subgroup->CM[j]   = cm[j];
+          subgroup->Spin[j] = spin[j];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          subgroup->J[j]                = Jtot[j];
+          subgroup->Jdm[j]              = Jdm[j];
+          subgroup->Jgas[j]             = Jgas[j];
+          subgroup->Jstars[j]           = Jstars[j];
+          subgroup->J_inHalfRad[j]      = Jtot_inHalfRad[j];
+          subgroup->Jdm_inHalfRad[j]    = Jdm_inHalfRad[j];
+          subgroup->Jgas_inHalfRad[j]   = Jgas_inHalfRad[j];
+          subgroup->Jstars_inHalfRad[j] = Jstars_inHalfRad[j];
+          subgroup->J_inRad[j]          = Jtot_inRad[j];
+          subgroup->Jdm_inRad[j]        = Jdm_inRad[j];
+          subgroup->Jgas_inRad[j]       = Jgas_inRad[j];
+          subgroup->Jstars_inRad[j]     = Jstars_inRad[j];
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+        }
+
+      subgroup->SubMostBoundID = mostboundid;
+      subgroup->SubVelDisp     = veldisp;
+      subgroup->SubVmax        = vmax;
+      subgroup->SubVmaxRad     = vmaxrad;
+      subgroup->SubHalfMassRad = halfmassrad;
+
+#ifdef USE_SFR
+      subgroup->Sfr          = sfr;
+      subgroup->SfrInRad     = sfrinrad;
+      subgroup->SfrInHalfRad = sfrinhalfrad;
+      subgroup->SfrInMaxRad  = sfrinmaxrad;
+      subgroup->GasMassSfr   = gasMassSfr;
+#endif /* #ifdef USE_SFR */
+    }
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_reprocess.c b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c
new file mode 100644
index 0000000000..c189d86001
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c
@@ -0,0 +1,240 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_fof_reprocess.c
+ * \date        05/2018
+ * \brief       Routines to calculate additional group properties.
+ * \details     contains functions:
+ *                void subfind_add_grp_props_calc_fof_angular_momentum(int num,
+ *                  int ngroups_cat)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <inttypes.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../fof/fof.h"
+#include "subfind.h"
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+/*! \brief Angular Momentum calculation for groups.
+ *
+ *  \param[in] num Index of snapshot.
+ *  \param[in] ngroups_cat Number of groups in group file.
+ *
+ *  \return void
+ */
+void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat)
+{
+  mpi_printf("FOF: Begin Angular Momentum Calculation for FOF Groups.\n");
+
+  /* assign target CPUs for the particles in groups */
+  /* the particles not in groups will be distributed such that a uniform particle load results */
+  double t0           = second();
+  int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int));
+  int *count_task     = mymalloc("count_task", NTask * sizeof(int));
+  int *count_free     = mymalloc("count_free", NTask * sizeof(int));
+  int count_loc_free  = 0;
+
+  for(int i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr < 0)
+        terminate("PS[i].GrNr=%d", PS[i].GrNr);
+
+      if(PS[i].GrNr < TotNgroups) /* particle is in a group */
+        {
+          if(PS[i].GrNr < Ncollective) /* we are in a collective group */
+            PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask);
+          else
+            PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective;
+
+          if(PS[i].TargetTask < 0 || PS[i].TargetTask >= NTask)
+            terminate("PS[i].TargetTask=%d PS[i].GrNr=%d", PS[i].TargetTask, PS[i].GrNr);
+
+          count_loc_task[PS[i].TargetTask]++;
+        }
+      else
+        count_loc_free++;
+
+      PS[i].TargetIndex = 0; /* unimportant here */
+    }
+
+  MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD);
+  MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+  long long sum = 0;
+  for(int i = 0; i < NTask; i++)
+    sum += count_task[i] + count_free[i];
+
+  int maxload = (sum + NTask - 1) / NTask;
+  for(int i = 0; i < NTask; i++)
+    {
+      count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */
+      if(count_task[i] < 0)
+        count_task[i] = 0;
+    }
+
+  int current_task = 0;
+
+  for(int i = 0; i < ThisTask; i++)
+    {
+      while(count_free[i] > 0 && current_task < NTask)
+        {
+          if(count_free[i] < count_task[current_task])
+            {
+              count_task[current_task] -= count_free[i];
+              count_free[i] = 0;
+            }
+          else
+            {
+              count_free[i] -= count_task[current_task];
+              count_task[current_task] = 0;
+              current_task++;
+            }
+        }
+    }
+
+  for(int i = 0; i < NumPart; i++)
+    {
+      if(PS[i].GrNr >=
+         TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */
+        {
+          while(count_task[current_task] == 0 && current_task < NTask - 1)
+            current_task++;
+
+          PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */
+          count_task[current_task]--;
+        }
+    }
+
+  myfree(count_free);
+  myfree(count_task);
+  myfree(count_loc_task);
+
+  double balance = subfind_get_particle_balance();
+  mpi_printf("SUBFIND: particle balance=%g\n", balance);
+
+  /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */
+  fof_subfind_exchange(MPI_COMM_WORLD);
+  double t1 = second();
+  mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1));
+
+  balance = subfind_get_particle_balance();
+  mpi_printf("SUBFIND: particle balance for AM processing=%g\n", balance);
+
+  /* we can now split the communicator to give each collectively treated group its own processor set */
+  MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm);
+  MPI_Comm_size(SubComm, &SubNTask);
+  MPI_Comm_rank(SubComm, &SubThisTask);
+  SubTagOffset = TagOffset;
+
+  /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each
+   * deal with one large group. The serial CPUs each deal with several halos by themselves
+   */
+  if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */
+    {
+      /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm
+       * The relevant group is the one stored in Group[0] on SubThisTask==0.
+       */
+      subfind_fof_calc_am_collective(num, ngroups_cat);
+    }
+  else
+    {
+      /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays!
+       */
+      submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+      for(int i = 0; i < NumPart; i++)
+        {
+          PS[i].OldIndex      = i;
+          submp[i].index      = i;
+          submp[i].GrNr       = PS[i].GrNr;
+          submp[i].DM_Density = PS[i].Density;
+        }
+      qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density);
+      subfind_reorder_according_to_submp();
+      myfree(submp);
+
+      /* now we have the particles in each group consecutively */
+      if(SubThisTask == 0)
+        printf("SUBFIND-SERIAL: Start to do AM for %d small groups with serial subfind algorithm on %d processors (root-node=%d)\n",
+               TotNgroups - Ncollective, SubNTask, ThisTask);
+
+      /* we now apply a serial version of subfind to the local groups */
+
+      t0 = second();
+      for(int gr = 0, offset = 0; gr < Ngroups; gr++)
+        {
+          if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask)
+            offset = subfind_fof_calc_am_serial(gr, offset, num, ngroups_cat);
+          else
+            terminate("how come that we have this group number?");
+        }
+
+      MPI_Barrier(SubComm);
+      t1 = second();
+      if(SubThisTask == 0)
+        printf("SUBFIND-SERIAL: processing AM of serial groups took %g sec\n", timediff(t0, t1));
+
+      /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */
+      submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart);
+      for(int i = 0; i < NumPart; i++)
+        {
+          submp[i].index    = i;
+          submp[i].OldIndex = PS[i].OldIndex;
+        }
+      qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex);
+      subfind_reorder_according_to_submp();
+      myfree(submp);
+    }
+
+  /* free the communicator */
+  MPI_Comm_free(&SubComm);
+
+  /* distribute particles back to original CPU */
+  t0 = second();
+  for(int i = 0; i < NumPart; i++)
+    {
+      PS[i].TargetTask  = PS[i].OriginTask;
+      PS[i].TargetIndex = PS[i].OriginIndex;
+    }
+
+  fof_subfind_exchange(MPI_COMM_WORLD);
+  t1 = second();
+  if(ThisTask == 0)
+    printf("SUBFIND: subfind_exchange() (for return to original CPU after AM)  took %g sec\n", timediff(t0, t1));
+
+  mpi_printf("FOF: Angular Momentum Calculation for FOF Groups finished successfully.\n");
+}
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_serial.c b/src/amuse/community/arepo/src/subfind/subfind_serial.c
new file mode 100644
index 0000000000..acc996ed02
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_serial.c
@@ -0,0 +1,807 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_serial.c
+ * \date        05/2018
+ * \brief       Processes the local groups in serial mode.
+ * \details     contains functions:
+ *                int subfind_process_group_serial(int gr, int Offs, int
+ *                  nsubgroups_cat)
+ *                int subfind_unbind(struct unbind_data *ud, int len, int
+ *                  *len_non_gas)
+ *                int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr,
+ *                  int ngroups_cat)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+#include "../fof/fof.h"
+#include "subfind.h"
+
+static int *Head, *Next, *Tail, *Len;
+
+/*! \brief Subhalo finding on each core individually.
+ *
+ *  \param[in] gr Group index.
+ *  \param[in] Offs Offset.
+ *  \param[in] nsubgroups_cat (unused)
+ *
+ *  \return New offset.
+ */
+int subfind_process_group_serial(int gr, int Offs, int nsubgroups_cat)
+{
+  int i, j, k, p, count_cand, count, len, len_non_gas, N, nsubs, part_index, subnr, totlen;
+  static struct unbind_data *ud;
+
+  while(PS[Offs].GrNr != Group[gr].GrNr)
+    {
+      Offs++;
+      if(Offs >= NumPart)
+        {
+          char buf[1000];
+          sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr);
+
+          for(int i = 0; i < NumPart; i++)
+            printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr);
+
+          terminate(buf);
+        }
+    }
+
+  N    = Group[gr].Len;
+  GrNr = Group[gr].GrNr;
+
+  subfind_loctree_treeallocate((int)(All.TreeAllocFactor * N) + NTopnodes, NumPart);
+
+  for(int i = 0; i < N; i++)
+    if(PS[Offs + i].GrNr != Group[gr].GrNr)
+      terminate("task=%d, gr=%d: don't have the number of particles for GrNr=%d i=%d group-len:N=%d found=%d before=%d\n", ThisTask,
+                gr, Group[gr].GrNr, i, N, PS[Offs + i].GrNr, PS[Offs - 1].GrNr);
+
+  candidates = (struct cand_dat *)mymalloc_movable(&candidates, "candidates", N * sizeof(struct cand_dat));
+
+  Head = (int *)mymalloc_movable(&Head, "Head", N * sizeof(int));
+  Next = (int *)mymalloc_movable(&Next, "Next", N * sizeof(int));
+  Tail = (int *)mymalloc_movable(&Tail, "Tail", N * sizeof(int));
+  Len  = (int *)mymalloc_movable(&Len, "Len", N * sizeof(int));
+  ud   = (struct unbind_data *)mymalloc_movable(&ud, "ud", N * sizeof(struct unbind_data));
+
+  Head -= Offs;
+  Next -= Offs;
+  Tail -= Offs;
+  Len -= Offs;
+
+  for(int i = 0; i < N; i++)
+    ud[i].index = Offs + i;
+
+  subfind_loctree_findExtent(N, ud);
+
+  subfind_loctree_treebuild(N, &ud); /* build tree for all particles of this group */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  // compute the binding energy of FOF group
+  double Epot = 0;
+  for(int i = 0; i < N; i++)
+    {
+      int p      = ud[i].index;
+      double pot = subfind_loctree_treeevaluate_potential(p);
+
+      // note: add self-energy
+      pot += P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8);  // (P[p].Soft / 2.8);
+
+      // multiply with G, scale by scale factor
+      pot *= All.G / All.cf_atime;
+
+      Epot += (P[p].Mass / 2) * pot;
+    }
+  Group[gr].Epot = Epot;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  for(int i = Offs; i < Offs + N; i++)
+    Head[i] = Next[i] = Tail[i] = -1;
+
+  /* note: particles are already ordered in the order of decreasing density */
+
+  int ss, ngbs, ndiff, head = 0, head_attach;
+  int listofdifferent[2], prev;
+  int ngb_index, rank;
+  int desngb = All.DesLinkNgb;
+
+  for(i = 0, count_cand = 0; i < N; i++)
+    {
+      part_index = Offs + i;
+
+      MyDouble *pos;
+#ifdef CELL_CENTER_GRAVITY
+      if(P[part_index].Type == 0)
+        pos = PS[part_index].Center;
+      else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+        pos = P[part_index].Pos;
+
+      subfind_locngb_treefind(pos, desngb, PS[part_index].Hsml);
+
+      /* note: returned neighbours are already sorted by distance */
+
+      for(k = 0, ndiff = 0, ngbs = 0; k < desngb && ngbs < 2 && ndiff < 2; k++)
+        {
+          ngb_index = R2list[k].index;
+
+          if(ngb_index != part_index) /* to exclude the particle itself */
+            {
+              /* we only look at neighbours that are denser */
+              if(PS[ngb_index].Density > PS[part_index].Density)
+                {
+                  ngbs++;
+
+                  if(Head[ngb_index] >= 0) /* neighbor is attached to a group */
+                    {
+                      if(ndiff == 1)
+                        if(listofdifferent[0] == Head[ngb_index])
+                          continue;
+
+                      /* a new group has been found */
+                      listofdifferent[ndiff++] = Head[ngb_index];
+                    }
+                  else
+                    terminate(
+                        "this may not occur: ThisTask=%d gr=%d k=%d i=%d part_index=%d ngb_index = %d  head[ngb_index]=%d "
+                        "P[part_index].DM_Density=%g %g GrNrs= %d %d \n",
+                        ThisTask, gr, k, i, part_index, ngb_index, Head[ngb_index], PS[part_index].Density, PS[ngb_index].Density,
+                        PS[part_index].GrNr, PS[ngb_index].GrNr);
+                }
+            }
+        }
+
+      switch(ndiff) /* treat the different possible cases */
+        {
+          case 0: /* this appears to be a lonely maximum -> new group */
+            head             = part_index;
+            Head[part_index] = Tail[part_index] = part_index;
+            Len[part_index]                     = 1;
+            Next[part_index]                    = -1;
+            break;
+
+          case 1: /* the particle is attached to exactly one group */
+            head             = listofdifferent[0];
+            Head[part_index] = head;
+            Next[Tail[head]] = part_index;
+            Tail[head]       = part_index;
+            Len[head]++;
+            Next[part_index] = -1;
+            break;
+
+          case 2: /* the particle merges two groups together */
+            head        = listofdifferent[0];
+            head_attach = listofdifferent[1];
+            if(Len[head_attach] > Len[head] ||
+               (Len[head_attach] == Len[head] &&
+                head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */
+              {
+                head        = listofdifferent[1];
+                head_attach = listofdifferent[0];
+              }
+
+            /* only in case the attached group is long enough we bother to register is
+               as a subhalo candidate */
+
+            if(Len[head_attach] >= All.DesLinkNgb)
+              {
+                candidates[count_cand].len  = Len[head_attach];
+                candidates[count_cand].head = Head[head_attach];
+                count_cand++;
+              }
+
+            /* now join the two groups */
+            Next[Tail[head]] = head_attach;
+            Tail[head]       = Tail[head_attach];
+            Len[head] += Len[head_attach];
+
+            ss = head_attach;
+            do
+              {
+                Head[ss] = head;
+              }
+            while((ss = Next[ss]) >= 0);
+
+            /* finally, attach the particle */
+            Head[part_index] = head;
+            Next[Tail[head]] = part_index;
+            Tail[head]       = part_index;
+            Len[head]++;
+            Next[part_index] = -1;
+            break;
+
+          default:
+            terminate("can't be!");
+            break;
+        }
+    }
+
+  /* add the full thing as a subhalo candidate */
+  for(i = 0, prev = -1; i < N; i++)
+    {
+      if(Head[Offs + i] == Offs + i)
+        if(Next[Tail[Offs + i]] == -1)
+          {
+            if(prev < 0)
+              head = Offs + i;
+            if(prev >= 0)
+              Next[prev] = Offs + i;
+
+            prev = Tail[Offs + i];
+          }
+    }
+
+  candidates[count_cand].len  = N;
+  candidates[count_cand].head = head;
+  count_cand++;
+
+  /* go through them once and assign the rank */
+  for(i = 0, p = head, rank = 0; i < N; i++)
+    {
+      Len[p] = rank++;
+      p      = Next[p];
+    }
+
+  /* for each candidate, we now pull out the rank of its head */
+  for(k = 0; k < count_cand; k++)
+    candidates[k].rank = Len[candidates[k].head];
+
+  for(i = Offs; i < Offs + N; i++)
+    Tail[i] = -1;
+
+  for(k = 0, nsubs = 0; k < count_cand; k++)
+    {
+      for(i = 0, p = candidates[k].head, len = 0; i < candidates[k].len; i++, p = Next[p])
+        if(Tail[p] < 0)
+          ud[len++].index = p;
+
+      if(len >= All.DesLinkNgb)
+        len = subfind_unbind(ud, len, &len_non_gas);
+
+      if(len >= All.DesLinkNgb)
+        {
+          /* ok, we found a substructure */
+
+          for(i = 0; i < len; i++)
+            Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */
+
+          candidates[k].nsub         = nsubs;
+          candidates[k].bound_length = len;
+          nsubs++;
+        }
+      else
+        {
+          candidates[k].nsub         = -1;
+          candidates[k].bound_length = 0;
+        }
+    }
+
+#ifdef VERBOSE
+  printf("\nGroupLen=%d  (gr=%d)\n", N, gr);
+  printf("Number of substructures: %d (before unbinding: %d)\n", nsubs, count_cand);
+#endif /* #ifdef VERBOSE */
+
+  mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_boundlength);
+
+  /* now we determine the parent subhalo for each candidate */
+  for(k = 0; k < count_cand; k++)
+    {
+      candidates[k].subnr  = k;
+      candidates[k].parent = 0;
+    }
+
+  mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_rank);
+
+  for(k = 0; k < count_cand; k++)
+    {
+      for(j = k + 1; j < count_cand; j++)
+        {
+          if(candidates[j].rank > candidates[k].rank + candidates[k].len)
+            break;
+
+          if(candidates[k].rank + candidates[k].len >= candidates[j].rank + candidates[j].len)
+            {
+              if(candidates[k].bound_length >= All.DesLinkNgb)
+                candidates[j].parent = candidates[k].subnr;
+            }
+          else
+            {
+              char buf[1000];
+              sprintf(buf, "k=%d|%d has rank=%d and len=%d.  j=%d has rank=%d and len=%d bound=%d\n", k, count_cand,
+                      (int)candidates[k].rank, candidates[k].len, (int)candidates[k].bound_length, candidates[j].rank,
+                      (int)candidates[j].len, candidates[j].bound_length);
+              terminate(buf);
+            }
+        }
+    }
+
+  mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_subnr);
+
+  /* now determine the properties */
+  Group[gr].Nsubs  = nsubs;
+  Group[gr].Pos[0] = Group[gr].CM[0];
+  Group[gr].Pos[1] = Group[gr].CM[1];
+  Group[gr].Pos[2] = Group[gr].CM[2];
+
+  for(k = 0, subnr = 0, totlen = 0; k < nsubs; k++)
+    {
+      len = candidates[k].bound_length;
+
+#ifdef VERBOSE
+      printf("subnr=%d  SubLen=%d\n", subnr, len);
+#endif /* #ifdef VERBOSE */
+
+      totlen += len;
+
+      for(i = 0, p = candidates[k].head, count = 0; i < candidates[k].len; i++)
+        {
+          if(Tail[p] == candidates[k].nsub)
+            ud[count++].index = p;
+
+          p = Next[p];
+        }
+
+      if(count != len)
+        terminate("count=%d != len=%d  k=%d subnr=%d  nsubs=%d", count, len, k, subnr, nsubs);
+
+      if(Nsubgroups > MaxNsubgroups)
+        terminate("Nsubgroups = %d >= MaxNsubgroups = %d", Nsubgroups, MaxNsubgroups);
+
+      subfind_determine_sub_halo_properties(ud, len, &SubGroup[Nsubgroups], GrNr, subnr, 0, nsubgroups_cat);
+
+      SubGroup[Nsubgroups].SubParent = candidates[k].parent;
+      SubGroup[Nsubgroups].SubNr     = subnr;
+      SubGroup[Nsubgroups].GrNr      = Group[gr].GrNr;
+
+      if(subnr == 0)
+        {
+          for(j = 0; j < 3; j++)
+            Group[gr].Pos[j] = SubGroup[Nsubgroups].Pos[j];
+        }
+
+      Nsubgroups++;
+
+      /* Let's now assign the subgroup number */
+
+      for(i = 0; i < len; i++)
+        PS[ud[i].index].SubNr = subnr;
+
+      subnr++;
+    }
+
+#ifdef VERBOSE
+  printf("Fuzz=%d\n", N - totlen);
+#endif /* #ifdef VERBOSE */
+
+  myfree(ud);
+  myfree(Len + Offs);
+  myfree(Tail + Offs);
+  myfree(Next + Offs);
+  myfree(Head + Offs);
+
+  myfree(candidates);
+
+  subfind_loctree_treefree();
+
+  return Offs;
+}
+
+/*! \brief Unbinding algorithm.
+ *
+ *  \param[in, out] ud Unbind data.
+ *  \param[in] len length of ud array.
+ *  \param[out] len_non_gas Number of particles which are not gas cells.
+ *
+ *  \return Length of array minus the unbound particles.
+ */
+int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas)
+{
+  double *bnd_energy, energy_limit, weakly_bound_limit = 0;
+  int i, j, p, minindex, unbound, phaseflag, iter = 0;
+  double ddxx, s[3], dx[3], v[3], dv[3], pos[3];
+  double vel_to_phys, H_of_a, atime, pot, minpot = 0;
+  double boxsize, xtmp;
+  double TotMass;
+
+  boxsize = All.BoxSize;
+
+  if(All.ComovingIntegrationOn)
+    {
+      vel_to_phys = 1.0 / All.Time;
+      H_of_a      = hubble_function(All.Time);
+      atime       = All.Time;
+    }
+  else
+    {
+      vel_to_phys = atime = 1;
+      H_of_a              = 0;
+    }
+
+  bnd_energy = (double *)mymalloc("bnd_energy", len * sizeof(double));
+
+  phaseflag = 0; /* this means we will recompute the potential for all particles */
+
+  do
+    {
+      subfind_loctree_treebuild(len, &ud);
+
+      /* let's compute the potential  */
+
+      if(phaseflag == 0) /* redo it for all the particles */
+        {
+          for(i = 0, minindex = -1, minpot = 1.0e30; i < len; i++)
+            {
+              p = ud[i].index;
+
+              pot = subfind_loctree_treeevaluate_potential(p);
+
+              PS[p].Potential = All.G / All.cf_atime * pot;
+
+              if(PS[p].Potential < minpot || minindex == -1)
+                {
+                  minpot   = PS[p].Potential;
+                  minindex = p;
+                }
+            }
+
+#ifdef CELL_CENTER_GRAVITY
+          if(P[minindex].Type == 0)
+            {
+              for(j = 0; j < 3; j++)
+                pos[j] = PS[minindex].Center[j]; /* position of minimum potential */
+            }
+          else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+            {
+              for(j = 0; j < 3; j++)
+                pos[j] = P[minindex].Pos[j]; /* position of minimum potential */
+            }
+        }
+      else
+        {
+          /* we only repeat for those close to the unbinding threshold */
+          for(i = 0; i < len; i++)
+            {
+              p = ud[i].index;
+
+              if(PS[p].BindingEnergy >= weakly_bound_limit)
+                {
+                  pot = subfind_loctree_treeevaluate_potential(p);
+
+                  PS[p].Potential *= All.G / All.cf_atime;
+                }
+            }
+        }
+
+      /* let's get bulk velocity and the center-of-mass */
+
+      v[0] = v[1] = v[2] = 0;
+      s[0] = s[1] = s[2] = 0;
+
+      for(i = 0, TotMass = 0; i < len; i++)
+        {
+          p = ud[i].index;
+
+          for(j = 0; j < 3; j++)
+            {
+#ifdef CELL_CENTER_GRAVITY
+              if(P[p].Type == 0)
+                ddxx = GRAVITY_NEAREST_X(PS[p].Center[j] - pos[j]);
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]);
+              s[j] += P[p].Mass * ddxx;
+              v[j] += P[p].Mass * P[p].Vel[j];
+            }
+          TotMass += P[p].Mass;
+        }
+
+      for(j = 0; j < 3; j++)
+        {
+          v[j] /= TotMass;
+          s[j] /= TotMass; /* center-of-mass */
+
+          s[j] += pos[j];
+
+          while(s[j] < 0)
+            s[j] += boxsize;
+          while(s[j] >= boxsize)
+            s[j] -= boxsize;
+        }
+
+      for(i = 0; i < len; i++)
+        {
+          p = ud[i].index;
+
+          for(j = 0; j < 3; j++)
+            {
+              dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]);
+#ifdef CELL_CENTER_GRAVITY
+              if(P[p].Type == 0)
+                dx[j] = atime * GRAVITY_NEAREST_X(PS[p].Center[j] - s[j]);
+              else
+#endif /* #ifdef CELL_CENTER_GRAVITY */
+                dx[j] = atime * GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]);
+
+              dv[j] += H_of_a * dx[j];
+            }
+
+          PS[p].BindingEnergy = PS[p].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]);
+          PS[p].BindingEnergy +=
+              All.G / All.cf_atime * P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8); /* note: add self-energy */
+
+          if(P[p].Type == 0)
+            PS[p].BindingEnergy += PS[p].Utherm;
+
+          bnd_energy[i] = PS[p].BindingEnergy;
+        }
+
+      mysort(bnd_energy, len, sizeof(double), subfind_compare_binding_energy); /* largest comes first! */
+
+      energy_limit = bnd_energy[(int)(0.25 * len)];
+
+      for(i = 0, unbound = 0; i < len - 1; i++)
+        {
+          if(bnd_energy[i] > 0)
+            unbound++;
+          else
+            unbound--;
+
+          if(unbound <= 0)
+            break;
+        }
+      weakly_bound_limit = bnd_energy[i];
+
+      /* now omit unbound particles,  but at most 1/4 of the original size */
+
+      for(i = 0, unbound = 0, *len_non_gas = 0; i < len; i++)
+        {
+          p = ud[i].index;
+          if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit)
+            {
+              unbound++;
+              ud[i] = ud[len - 1];
+              i--;
+              len--;
+            }
+          else if(P[p].Type != 0)
+            (*len_non_gas)++;
+        }
+
+      if(len < All.DesLinkNgb)
+        break;
+
+      if(phaseflag == 0)
+        {
+          if(unbound > 0)
+            phaseflag = 1;
+        }
+      else
+        {
+          if(unbound == 0)
+            {
+              phaseflag = 0; /* this will make us repeat everything once more for all particles */
+              unbound   = 1;
+            }
+        }
+
+      if(iter++ > MAXITER)
+        terminate("iter > MAXITER = %d", MAXITER);
+    }
+  while(unbound > 0);
+
+  myfree(bnd_energy);
+
+  return (len);
+}
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+/*! \brief Serial version of angular momentum calculation.
+ *
+ *  \param[in] gr Group index.
+ *  \param[in] Offs Offset of group (first index in PS).
+ *  \param[in] snapnr (unused)
+ *  \param[in] ngroups_cat (unused)
+ */
+int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat)
+{
+  long long index;
+  int len, i, k;
+  double Pos_pbc[3], Vel_tot[3], gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3];
+  double gr_CMFrac, gr_CMFracType[NTYPES], gr_Ekin, gr_Ethr;
+  int gr_len_dm;
+  double gr_mass, gr_mass_gas, gr_mass_stars;
+  int ptype;
+
+  while(PS[Offs].GrNr != Group[gr].GrNr)
+    {
+      Offs++;
+      if(Offs >= NumPart)
+        {
+          char buf[1000];
+          sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr);
+
+          for(i = 0; i < NumPart; i++)
+            printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr);
+
+          terminate(buf);
+        }
+    }
+
+  len = Group[gr].Len;
+
+  struct unbind_data *ud = (struct unbind_data *)mymalloc("ud", len * sizeof(struct unbind_data));
+
+  // get all fof particles
+  for(i = 0; i < len; i++)
+    ud[i].index = Offs + i;
+
+  // initialize
+  gr_CMFrac = 0;
+  gr_Ekin   = 0;
+  gr_Ethr   = 0;
+
+  for(k = 0; k < 3; k++)
+    {
+      gr_Jtot[k]   = 0;
+      gr_Jdm[k]    = 0;
+      gr_Jgas[k]   = 0;
+      gr_Jstars[k] = 0;
+    }
+  for(k = 0; k < NTYPES; k++)
+    {
+      gr_CMFracType[k] = 0;
+    }
+
+  // calc angular momentum for dm, gas, stars
+  for(k = 0; k < len; k++)
+    {
+      index = ud[k].index;
+      ptype = P[index].Type;
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i];
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = fof_periodic(Pos_pbc[i]);
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = Pos_pbc[i] * All.cf_atime;  // units: phys kpc/h
+
+      for(i = 0; i < 3; i++)
+        Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i];
+
+      gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]);
+      if(P[index].Type == 0)
+        gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm;
+
+      gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+      gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+      gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+      if(ptype == 1)  // dm illustris
+        {
+          gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+      if(ptype == 0)  // gas (incl. winds)
+        {
+          gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+      if(ptype == 4)  // stars
+        {
+          gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+          gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+          gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+        }
+    }
+
+  Group[gr].Ekin = gr_Ekin;
+  Group[gr].Ethr = gr_Ethr;
+  for(i = 0; i < 3; i++)
+    {
+      Group[gr].J[i]      = gr_Jtot[i];
+      Group[gr].JDM[i]    = gr_Jdm[i];
+      Group[gr].JGas[i]   = gr_Jgas[i];
+      Group[gr].JStars[i] = gr_Jstars[i];
+    }
+
+  // calc counter-rotating fractions
+  gr_len_dm = 0;
+  gr_mass = gr_mass_gas = gr_mass_stars = 0;
+
+  for(k = 0; k < len; k++)
+    {
+      index = ud[k].index;
+      ptype = P[index].Type;
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i];
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = fof_periodic(Pos_pbc[i]);
+
+      for(i = 0; i < 3; i++)
+        Pos_pbc[i] = Pos_pbc[i] * All.cf_atime;  // units: phys kpc/h
+
+      for(i = 0; i < 3; i++)
+        Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i];
+
+      jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+      jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+      jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+      gr_mass += P[index].Mass;
+      if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.)
+        gr_CMFrac += P[index].Mass;  // / Group[gr].Mass;
+
+      if(ptype == 1)  // dm illustris
+        {
+          gr_len_dm++;
+          if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.)
+            gr_CMFracType[1]++;
+        }
+      if(ptype == 0)  // gas (incl. winds)
+        {
+          gr_mass_gas += P[index].Mass;
+          if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.)
+            gr_CMFracType[0] += P[index].Mass;  // / Group[gr].MassType[0];
+        }
+      if(ptype == 4)  // stars
+        {
+          gr_mass_stars += P[index].Mass;
+          if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.)
+            gr_CMFracType[4] += P[index].Mass;  // / Group[gr].MassType[4];
+        }
+    }
+
+  gr_CMFrac /= gr_mass;  // Group[gr].Mass;
+  gr_CMFracType[1] /= gr_len_dm;
+  gr_CMFracType[0] /= gr_mass_gas;    // Group[gr].MassType[0];
+  gr_CMFracType[4] /= gr_mass_stars;  // Group[gr].MassType[4];
+
+  Group[gr].CMFrac = gr_CMFrac;
+  for(i = 0; i < NTYPES; i++)
+    Group[gr].CMFracType[i] = gr_CMFracType[i];
+
+  myfree(ud);
+  return Offs;
+}
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_so.c b/src/amuse/community/arepo/src/subfind/subfind_so.c
new file mode 100644
index 0000000000..5f3774b6ea
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_so.c
@@ -0,0 +1,964 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_so.c
+ * \date        05/2018
+ * \brief       Spherical overdensity algorithm for subfind.
+ * \details     contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                double subfind_overdensity(void)
+ *                static int subfind_overdensity_evaluate(int target, int mode,
+ *                  int threadid)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef SUBFIND
+
+#include "../fof/fof.h"
+#include "subfind.h"
+
+static double *R200, *M200;
+
+static char *Todo;
+static MyFloat *Left, *Right;
+static int mainstep;
+
+static int subfind_overdensity_evaluate(int target, int mode, int threadid);
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+/*! \brief Structure for angular momentum properties.
+ */
+static struct Angular_Momentum
+{
+  double Pmom[3];
+  double MassType[NTYPES];
+  double Jtot[3];
+  double Jdm[3];
+  double Jgas[3];
+  double Jstars[3];
+  int LenType[NTYPES];
+  double CMFrac;
+  double CMFracType[NTYPES];
+  double Ekin;
+  double Epot;
+  double Ethr;
+  double N200;
+} * AngMom;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  double R200;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  double M200;
+  int GrNr;
+  int TaskOfGr;
+  int LocGrIndex;
+  struct Angular_Momentum AngMomIn;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  int Firstnode;
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant group data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in group arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  in->Pos[0] = Group[i].Pos[0];
+  in->Pos[1] = Group[i].Pos[1];
+  in->Pos[2] = Group[i].Pos[2];
+  in->R200   = R200[i];
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  in->GrNr       = Group[i].GrNr;
+  in->TaskOfGr   = ThisTask;
+  in->LocGrIndex = i;
+  in->M200       = M200[i];
+  in->AngMomIn   = AngMom[i];
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  double Mass;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  struct Angular_Momentum AngMomOut;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and group data arrays (AngMom,...)
+ *  \param[in] i Index of particle in group arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      if(mainstep == 0)
+        M200[i] = out->Mass;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      if(mainstep == 0)
+        {
+          for(int k = 0; k < 3; k++)
+            AngMom[i].Pmom[k] = out->AngMomOut.Pmom[k];
+          for(int k = 0; k < NTYPES; k++)
+            {
+              AngMom[i].MassType[k] = out->AngMomOut.MassType[k];
+              AngMom[i].LenType[k]  = out->AngMomOut.LenType[k];
+            }
+          AngMom[i].N200 = out->AngMomOut.N200;
+        }
+      else if(mainstep == 1)
+        {
+          for(int k = 0; k < 3; k++)
+            {
+              AngMom[i].Jtot[k]   = out->AngMomOut.Jtot[k];
+              AngMom[i].Jdm[k]    = out->AngMomOut.Jdm[k];
+              AngMom[i].Jgas[k]   = out->AngMomOut.Jgas[k];
+              AngMom[i].Jstars[k] = out->AngMomOut.Jstars[k];
+            }
+          AngMom[i].Ekin = out->AngMomOut.Ekin;
+          AngMom[i].Ethr = out->AngMomOut.Ethr;
+        }
+      else if(mainstep == 2)
+        {
+          AngMom[i].CMFrac = out->AngMomOut.CMFrac;
+          for(int k = 0; k < NTYPES; k++)
+            AngMom[i].CMFracType[k] = out->AngMomOut.CMFracType[k];
+        }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+    }
+  else /* combine */
+    {
+      if(mainstep == 0)
+        M200[i] += out->Mass;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      if(mainstep == 0)
+        {
+          for(int k = 0; k < 3; k++)
+            AngMom[i].Pmom[k] += out->AngMomOut.Pmom[k];
+          for(int k = 0; k < NTYPES; k++)
+            {
+              AngMom[i].MassType[k] += out->AngMomOut.MassType[k];
+              AngMom[i].LenType[k] += out->AngMomOut.LenType[k];
+            }
+          AngMom[i].N200 += out->AngMomOut.N200;
+        }
+      else if(mainstep == 1)
+        {
+          for(int k = 0; k < 3; k++)
+            {
+              AngMom[i].Jtot[k] += out->AngMomOut.Jtot[k];
+              AngMom[i].Jdm[k] += out->AngMomOut.Jdm[k];
+              AngMom[i].Jgas[k] += out->AngMomOut.Jgas[k];
+              AngMom[i].Jstars[k] += out->AngMomOut.Jstars[k];
+            }
+          AngMom[i].Ekin += out->AngMomOut.Ekin;
+          AngMom[i].Ethr += out->AngMomOut.Ethr;
+        }
+      else if(mainstep == 2)
+        {
+          AngMom[i].CMFrac += out->AngMomOut.CMFrac;
+          for(int k = 0; k < NTYPES; k++)
+            AngMom[i].CMFracType[k] += out->AngMomOut.CMFracType[k];
+        }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int i;
+
+  {
+    int threadid = get_thread_num();
+
+    for(int j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        i = NextParticle++;
+
+        if(i >= Ngroups)
+          break;
+
+        if(Todo[i])
+          {
+            R200[i] = 0.5 * (Left[i] + Right[i]);
+            subfind_overdensity_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+          }
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        subfind_overdensity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Main routine executing the spherical overdensity algorithm.
+ *
+ *  \return Time needed for calculation.
+ */
+double subfind_overdensity(void)
+{
+  long long ntot;
+  int i, npleft, rep, iter;
+  double t0, t1, overdensity, Deltas[4], rhoback, z, omegaz, x, DeltaMean200, DeltaCrit200, DeltaCrit500, DeltaTopHat;
+  double tstart = second();
+
+  Left  = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * Ngroups);
+  Right = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * Ngroups);
+  R200  = (double *)mymalloc("R200", sizeof(double) * Ngroups);
+  M200  = (double *)mymalloc("M200", sizeof(double) * Ngroups);
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  AngMom = (struct Angular_Momentum *)mymalloc("AngMom", sizeof(struct Angular_Momentum) * Ngroups);
+  Paux   = (struct paux_data *)mymalloc("Paux", sizeof(struct paux_data) * NumPart);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  Todo = mymalloc("Todo", sizeof(char) * Ngroups);
+
+  if(All.ComovingIntegrationOn)
+    z = 1 / All.Time - 1;
+  else
+    z = 0;
+
+  rhoback = 3 * All.Omega0 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
+
+  omegaz =
+      All.Omega0 * pow(1 + z, 3) / (All.Omega0 * pow(1 + z, 3) + (1 - All.Omega0 - All.OmegaLambda) * pow(1 + z, 2) + All.OmegaLambda);
+
+  DeltaMean200 = 200.0;
+  DeltaCrit200 = 200.0 / omegaz;
+  DeltaCrit500 = 500.0 / omegaz;
+
+  x           = omegaz - 1;
+  DeltaTopHat = 18 * M_PI * M_PI + 82 * x - 39 * x * x;
+  DeltaTopHat /= omegaz;
+
+  Deltas[0] = DeltaMean200; /* standard fixed overdensity with respect to background */
+  Deltas[1] = DeltaTopHat;  /* tophat overdensity with respect to background */
+  Deltas[2] = DeltaCrit200; /* overdensity of 200 relative to critical, expressed relative to background density */
+  Deltas[3] = DeltaCrit500; /* overdensity of 500 relative to critical, expressed relative to background density */
+
+  generic_set_MaxNexport();
+
+  for(rep = 0; rep < 4; rep++) /* repeat for all four overdensity values */
+    {
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      int mainstepmax = 3;
+#else  /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+      int mainstepmax = 1;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES #else */
+      for(mainstep = 0; mainstep < mainstepmax; mainstep++)
+        {
+          for(i = 0; i < Ngroups; i++)
+            {
+              if(Group[i].Nsubs > 0)
+                {
+                  if(mainstep == 0)
+                    {
+                      double rguess = pow(All.G * Group[i].Mass / (100 * All.Hubble * All.Hubble), 1.0 / 3);
+
+                      Right[i] = 3 * rguess;
+                      Left[i]  = 0;
+                    }
+                  Todo[i] = 1;
+                }
+              else
+                {
+                  Todo[i] = 0;
+                }
+            }
+
+          iter = 0;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+          if(mainstep == 1)
+            NumPaux = 0;
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+          /* we will repeat the whole thing for those groups where we didn't converge to a SO radius yet */
+          do
+            {
+              t0 = second();
+
+              generic_comm_pattern(Ngroups, kernel_local, kernel_imported);
+
+              if(mainstep == 0)
+                {
+                  /* do final operations on results */
+                  for(i = 0, npleft = 0; i < Ngroups; i++)
+                    {
+                      if(Todo[i])
+                        {
+                          overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback;
+
+                          if((Right[i] - Left[i]) > 1.0e-4 * Left[i])
+                            {
+                              /* need to redo this group */
+                              npleft++;
+
+                              if(overdensity > Deltas[rep])
+                                Left[i] = R200[i];
+                              else
+                                Right[i] = R200[i];
+
+                              if(iter >= MAXITER - 10)
+                                {
+                                  printf("gr=%d task=%d  R200=%g Left=%g Right=%g Menclosed=%g Right-Left=%g\n   pos=(%g|%g|%g)\n", i,
+                                         ThisTask, R200[i], Left[i], Right[i], M200[i], Right[i] - Left[i], Group[i].Pos[0],
+                                         Group[i].Pos[1], Group[i].Pos[2]);
+                                  myflush(stdout);
+                                }
+                            }
+                          else
+                            Todo[i] = 0;
+                        }
+                    }
+                }
+              else
+                for(i = 0, npleft = 0; i < Ngroups; i++)
+                  Todo[i] = 0;
+
+              sumup_large_ints(1, &npleft, &ntot);
+
+              t1 = second();
+
+              if(ntot > 0)
+                {
+                  iter++;
+
+                  if(iter > 0)
+                    mpi_printf("SUBFIND: SO iteration %2d: need to repeat for %12lld halo centers. (took %g sec)\n", iter, ntot,
+                               timediff(t0, t1));
+
+                  if(iter > MAXITER)
+                    terminate("failed to converge in SO iteration");
+                }
+            }
+          while(ntot > 0);
+        } /* end of mainstep loop */
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+      double *egypot = mymalloc("egypot", Ngroups * sizeof(double));
+
+      subfind_so_potegy(egypot);
+
+      for(i = 0; i < Ngroups; i++)
+        {
+          double rate;
+
+          /* work out sampling rate */
+          if(AngMom[i].N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER)
+            rate = 1.0;
+          else
+            rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / AngMom[i].N200);
+
+          AngMom[i].Epot = egypot[i] / (rate * rate);
+        }
+
+      myfree(egypot);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+      for(i = 0; i < Ngroups; i++)
+        {
+          if(Group[i].Nsubs > 0)
+            {
+              overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback;
+
+              if((overdensity - Deltas[rep]) > 0.1 * Deltas[rep])
+                {
+                  R200[i] = M200[i] = 0;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                  memset(&AngMom[i], 0, sizeof(struct Angular_Momentum));
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                }
+              else if(M200[i] < 5 * Group[i].Mass / Group[i].Len)
+                {
+                  R200[i] = M200[i] = 0;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                  memset(&AngMom[i], 0, sizeof(struct Angular_Momentum));
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                }
+            }
+          else
+            {
+              R200[i] = M200[i] = 0;
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+              memset(&AngMom[i], 0, sizeof(struct Angular_Momentum));
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+            }
+
+          switch(rep)
+            {
+              case 0:
+                Group[i].M_Mean200 = M200[i];
+                Group[i].R_Mean200 = R200[i];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                Group[i].Ekin_Mean200   = AngMom[i].Ekin;
+                Group[i].Ethr_Mean200   = AngMom[i].Ethr;
+                Group[i].Epot_Mean200   = AngMom[i].Epot;
+                Group[i].CMFrac_Mean200 = AngMom[i].CMFrac;
+                for(int k = 0; k < NTYPES; k++)
+                  {
+                    Group[i].MassType_Mean200[k]   = AngMom[i].MassType[k];
+                    Group[i].LenType_Mean200[k]    = AngMom[i].LenType[k];
+                    Group[i].CMFracType_Mean200[k] = AngMom[i].CMFracType[k];
+                  }
+                for(int k = 0; k < 3; k++)
+                  {
+                    Group[i].J_Mean200[k]      = AngMom[i].Jtot[k];
+                    Group[i].JDM_Mean200[k]    = AngMom[i].Jdm[k];
+                    Group[i].JGas_Mean200[k]   = AngMom[i].Jgas[k];
+                    Group[i].JStars_Mean200[k] = AngMom[i].Jstars[k];
+                  }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                break;
+              case 1:
+                Group[i].M_TopHat200 = M200[i];
+                Group[i].R_TopHat200 = R200[i];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                Group[i].Ekin_TopHat200   = AngMom[i].Ekin;
+                Group[i].Ethr_TopHat200   = AngMom[i].Ethr;
+                Group[i].Epot_TopHat200   = AngMom[i].Epot;
+                Group[i].CMFrac_TopHat200 = AngMom[i].CMFrac;
+                for(int k = 0; k < NTYPES; k++)
+                  {
+                    Group[i].MassType_TopHat200[k]   = AngMom[i].MassType[k];
+                    Group[i].LenType_TopHat200[k]    = AngMom[i].LenType[k];
+                    Group[i].CMFracType_TopHat200[k] = AngMom[i].CMFracType[k];
+                  }
+                for(int k = 0; k < 3; k++)
+                  {
+                    Group[i].J_TopHat200[k]      = AngMom[i].Jtot[k];
+                    Group[i].JDM_TopHat200[k]    = AngMom[i].Jdm[k];
+                    Group[i].JGas_TopHat200[k]   = AngMom[i].Jgas[k];
+                    Group[i].JStars_TopHat200[k] = AngMom[i].Jstars[k];
+                  }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                break;
+              case 2:
+                Group[i].M_Crit200 = M200[i];
+                Group[i].R_Crit200 = R200[i];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                Group[i].Ekin_Crit200   = AngMom[i].Ekin;
+                Group[i].Ethr_Crit200   = AngMom[i].Ethr;
+                Group[i].Epot_Crit200   = AngMom[i].Epot;
+                Group[i].CMFrac_Crit200 = AngMom[i].CMFrac;
+                for(int k = 0; k < NTYPES; k++)
+                  {
+                    Group[i].MassType_Crit200[k]   = AngMom[i].MassType[k];
+                    Group[i].LenType_Crit200[k]    = AngMom[i].LenType[k];
+                    Group[i].CMFracType_Crit200[k] = AngMom[i].CMFracType[k];
+                  }
+                for(int k = 0; k < 3; k++)
+                  {
+                    Group[i].J_Crit200[k]      = AngMom[i].Jtot[k];
+                    Group[i].JDM_Crit200[k]    = AngMom[i].Jdm[k];
+                    Group[i].JGas_Crit200[k]   = AngMom[i].Jgas[k];
+                    Group[i].JStars_Crit200[k] = AngMom[i].Jstars[k];
+                  }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                break;
+              case 3:
+                Group[i].M_Crit500 = M200[i];
+                Group[i].R_Crit500 = R200[i];
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+                Group[i].Ekin_Crit500   = AngMom[i].Ekin;
+                Group[i].Ethr_Crit500   = AngMom[i].Ethr;
+                Group[i].Epot_Crit500   = AngMom[i].Epot;
+                Group[i].CMFrac_Crit500 = AngMom[i].CMFrac;
+                for(int k = 0; k < NTYPES; k++)
+                  {
+                    Group[i].MassType_Crit500[k]   = AngMom[i].MassType[k];
+                    Group[i].LenType_Crit500[k]    = AngMom[i].LenType[k];
+                    Group[i].CMFracType_Crit500[k] = AngMom[i].CMFracType[k];
+                  }
+                for(int k = 0; k < 3; k++)
+                  {
+                    Group[i].J_Crit500[k]      = AngMom[i].Jtot[k];
+                    Group[i].JDM_Crit500[k]    = AngMom[i].Jdm[k];
+                    Group[i].JGas_Crit500[k]   = AngMom[i].Jgas[k];
+                    Group[i].JStars_Crit500[k] = AngMom[i].Jstars[k];
+                  }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+                break;
+            }
+        }
+    }
+
+  myfree(Todo);
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  myfree(Paux);
+  myfree(AngMom);
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+  myfree(M200);
+  myfree(R200);
+  myfree(Right);
+  myfree(Left);
+
+  double tend = second();
+  return timediff(tstart, tend);
+}
+
+/*! \brief Evaluate function of subfind_overdensity.
+ *
+ *  \param[in] target Index of group.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return 0
+ */
+static int subfind_overdensity_evaluate(int target, int mode, int threadid)
+{
+  int k, p, no, numnodes, *firstnode;
+  double hsml, mass;
+  MyDouble *pos;
+  struct NODE *current;
+  MyDouble dx, dy, dz, dist, r2;
+#define FACT2 0.86602540
+  MyDouble xtmp, ytmp, ztmp;
+
+  data_in local, *in;
+  data_out out;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      in = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      in = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos  = in->Pos;
+  hsml = in->R200;
+  mass = 0;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  double Pmom[3], Mtot = 0, Jtot[3], Jdm[3], Jgas[3], Jstars[3], CMFrac = 0, N200 = 0;
+  double ekin = 0, etherm = 0;
+  double MassType[NTYPES], CMFracType[NTYPES];
+  int LenType[NTYPES];
+
+  for(int i = 0; i < 3; i++)
+    {
+      Pmom[i]   = 0;
+      Jtot[i]   = 0;
+      Jdm[i]    = 0;
+      Jgas[i]   = 0;
+      Jstars[i] = 0;
+    }
+  for(int i = 0; i < NTYPES; i++)
+    {
+      MassType[i]   = 0;
+      LenType[i]    = 0;
+      CMFracType[i] = 0;
+    }
+
+  if(mainstep == 1)
+    {
+      Mtot = in->M200;
+      N200 = in->AngMomIn.N200;
+      for(int i = 0; i < 3; i++)
+        Pmom[i] = in->AngMomIn.Pmom[i];
+    }
+  else if(mainstep == 2)
+    {
+      Mtot = in->M200;
+      for(int i = 0; i < 3; i++)
+        {
+          Pmom[i]   = in->AngMomIn.Pmom[i];
+          Jtot[i]   = in->AngMomIn.Jtot[i];
+          Jdm[i]    = in->AngMomIn.Jdm[i];
+          Jgas[i]   = in->AngMomIn.Jgas[i];
+          Jstars[i] = in->AngMomIn.Jstars[i];
+        }
+      for(int i = 0; i < NTYPES; i++)
+        MassType[i] = in->AngMomIn.MassType[i];
+    }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Tree_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          if(no < Tree_MaxPart) /* single particle */
+            {
+              p  = no;
+              no = Nextnode[no];
+
+              dist = hsml;
+              dx   = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]);
+              if(dz > dist)
+                continue;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              if(mainstep == 0)
+                mass += P[p].Mass;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+              if(mainstep == 0)
+                {
+                  for(int i = 0; i < 3; i++)
+                    Pmom[i] += P[p].Mass * P[p].Vel[i] / All.cf_atime;  // units: 10^10 M_sol/h km/s
+
+                  for(int i = 0; i < NTYPES; i++)
+                    if(P[p].Type == i)
+                      {
+                        MassType[i] += P[p].Mass;
+
+                        LenType[i]++;
+                      }
+
+                  N200 += 1.0;
+                }
+              else if(mainstep == 1)
+                {
+                  double rate;
+                  /* work out sampling rate */
+                  if(N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER)
+                    rate = 1.0;
+                  else
+                    rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / N200);
+
+                  if(get_random_number_aux() < rate)
+                    {
+                      if(NumPaux >= NumPart)
+                        terminate("NumPaux >= NumPart");
+
+                      Paux[NumPaux].Pos[0]        = NEAREST_X(P[p].Pos[0] - pos[0]);
+                      Paux[NumPaux].Pos[1]        = NEAREST_Y(P[p].Pos[1] - pos[1]);
+                      Paux[NumPaux].Pos[2]        = NEAREST_Z(P[p].Pos[2] - pos[2]);
+                      Paux[NumPaux].Mass          = P[p].Mass;
+                      Paux[NumPaux].TaskOfGr      = in->TaskOfGr;
+                      Paux[NumPaux].LocGrIndex    = in->LocGrIndex;
+                      Paux[NumPaux].Type          = P[p].Type;
+                      Paux[NumPaux].SofteningType = P[p].SofteningType;
+                      NumPaux++;
+                    }
+
+                  int ptype = P[p].Type;
+
+                  double Pos_pbc[3], Vel_centre[3], Vel_tot[3];
+                  Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime;
+                  Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime;
+                  Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime;
+
+                  for(int i = 0; i < 3; i++)
+                    Vel_centre[i] = (Pmom[i] / Mtot);  // units: km/s
+
+                  for(int i = 0; i < 3; i++)
+                    Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i];
+
+                  ekin += 0.5 * P[p].Mass * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]);
+
+                  Jtot[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+                  Jtot[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+                  Jtot[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+                  if(ptype == 1)  // dm illustris
+                    {
+                      Jdm[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+                      Jdm[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+                      Jdm[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+                    }
+                  if(ptype == 0)  // gas
+                    {
+                      etherm += P[p].Mass * PS[p].Utherm;
+
+                      Jgas[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+                      Jgas[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+                      Jgas[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+                    }
+                  if(ptype == 4)  // stars
+                    {
+                      Jstars[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+                      Jstars[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+                      Jstars[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+                    }
+                }
+              else if(mainstep == 2)
+                {
+                  int ptype = P[p].Type;
+
+                  double Pos_pbc[3], Vel_centre[3], Vel_tot[3], jpart[3], Jtot[3];
+                  Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime;
+                  Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime;
+                  Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime;
+
+                  for(int i = 0; i < 3; i++)
+                    Vel_centre[i] = (Pmom[i] / Mtot);
+
+                  for(int i = 0; i < 3; i++)
+                    Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i];
+
+                  jpart[0] = P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]);
+                  jpart[1] = P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]);
+                  jpart[2] = P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]);
+
+                  if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.)
+                    CMFrac += P[p].Mass / Mtot;
+
+                  if(ptype == 1)  // dm
+                    if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.)
+                      CMFracType[1] += P[p].Mass / MassType[1];
+
+                  if(ptype == 0)  // gas
+                    if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.)
+                      CMFracType[0] += P[p].Mass / MassType[0];
+
+                  if(ptype == 4)  // stars
+                    if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.)
+                      CMFracType[4] += P[p].Mass / MassType[4];
+                }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+            }
+          else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              current = &Nodes[no];
+
+              no = current->u.d.sibling; /* in case the node can be discarded */
+
+              dist = hsml + 0.5 * current->len;
+              dx   = FOF_NEAREST_LONG_X(current->center[0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]);
+              if(dz > dist)
+                continue;
+              /* now test against the minimal sphere enclosing everything */
+              dist += FACT1 * current->len;
+              if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist)
+                continue;
+
+#ifndef SUBFIND_EXTENDED_PROPERTIES
+              if(no >= Tree_FirstNonTopLevelNode) /* only do this for fully local nodes */
+                {
+                  /* test whether the node is contained within the sphere, which gives  short-cut if we only need the mass */
+                  dist = hsml - FACT2 * current->len;
+                  if(dist > 0)
+                    if(r2 < dist * dist)
+                      {
+                        mass += current->u.d.mass;
+                        continue;
+                      }
+                }
+#endif /* #ifndef SUBFIND_EXTENDED_PROPERTIES */
+
+              no = current->u.d.nextnode; /* ok, we need to open the node */
+            }
+          else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
+            {
+              int n = no - Tree_ImportedNodeOffset;
+              no    = Nextnode[no - Tree_MaxNodes];
+
+              dist = hsml;
+              dx   = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]);
+              if(dx > dist)
+                continue;
+              dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]);
+              if(dy > dist)
+                continue;
+              dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]);
+              if(dz > dist)
+                continue;
+              if(dx * dx + dy * dy + dz * dz > dist * dist)
+                continue;
+
+              mass += Tree_Points[n].Mass;
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == MODE_IMPORTED_PARTICLES");
+
+              if(mode == MODE_LOCAL_PARTICLES)
+                tree_treefind_export_node_threads(no, target, threadid);
+
+              no = Nextnode[no - Tree_MaxNodes];
+            }
+        }
+    }
+
+  out.Mass = mass;
+
+#ifdef SUBFIND_EXTENDED_PROPERTIES
+  if(mainstep == 0)
+    {
+      for(int k = 0; k < 3; k++)
+        out.AngMomOut.Pmom[k] = Pmom[k];
+      for(int k = 0; k < NTYPES; k++)
+        {
+          out.AngMomOut.MassType[k] = MassType[k];
+          out.AngMomOut.LenType[k]  = LenType[k];
+        }
+
+      out.AngMomOut.N200 = N200;
+    }
+  else if(mainstep == 1)
+    {
+      for(int k = 0; k < 3; k++)
+        {
+          out.AngMomOut.Jtot[k]   = Jtot[k];
+          out.AngMomOut.Jdm[k]    = Jdm[k];
+          out.AngMomOut.Jgas[k]   = Jgas[k];
+          out.AngMomOut.Jstars[k] = Jstars[k];
+        }
+
+      out.AngMomOut.Ekin = ekin;
+      out.AngMomOut.Ethr = etherm;
+    }
+  else if(mainstep == 2)
+    {
+      out.AngMomOut.CMFrac = CMFrac;
+      for(int k = 0; k < NTYPES; k++)
+        out.AngMomOut.CMFracType[k] = CMFracType[k];
+    }
+#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return 0;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c
new file mode 100644
index 0000000000..823cb62eb2
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c
@@ -0,0 +1,853 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_so_potegy.c
+ * \date        05/2018
+ * \brief       Calculates the the potential energy.
+ * \details     contains functions:
+ *                static void subfind_so_potegy_loctree_findExtent(int npart,
+ *                  int start)
+ *                static int subfind_so_potegy_loctree_treebuild(int npart,
+ *                  int start)
+ *                static void subfind_so_potegy_loctree_update_node_recursive(
+ *                  int no, int sib, int father)
+ *                double subfind_so_potegy_loctree_treeevaluate_potential(int
+ *                  target)
+ *                static size_t subfind_so_potegy_loctree_treeallocate(int
+ *                  maxnodes, int maxpart)
+ *                static void subfind_so_potegy_loctree_treefree(void)
+ *                static int subfind_compare_Paux_LocGrIndex(const void *a,
+ *                  const void *b)
+ *                double subfind_so_potegy(double *egypot)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES)
+
+#include "../fof/fof.h"
+#include "subfind.h"
+
+static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3];
+static int LocMaxPart;
+static int MaxNodes, last;
+static int *LocNextNode;
+static unsigned long long *LocTree_IntPos_list;
+static struct paux_data *LocPaux;
+
+static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father);
+
+/*! \brief Node structure for local tree.
+ */
+static struct LocNODE
+{
+  union
+  {
+    int suns[8]; /*!< temporary pointers to daughter nodes */
+    struct
+    {
+      MyDouble s[3]; /*!< center of mass of node */
+      MyDouble mass; /*!< mass of node */
+      unsigned char maxsofttype;
+#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING)
+      unsigned char maxhydrosofttype;
+      unsigned char minhydrosofttype;
+#endif              /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */
+      int sibling;  /*!< this gives the next node in the walk in case the current node can be used */
+      int nextnode; /*!< this gives the next node in case the current node needs to be opened */
+    } d;
+  } u;
+
+  MyDouble center[3]; /*!< geometrical center of node */
+  MyFloat len;        /*!< sidelength of treenode */
+
+#ifdef MULTIPLE_NODE_SOFTENING
+  MyDouble mass_per_type[NSOFTTYPES];
+#endif
+} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */
+    *LocNodes;     /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart]
+                      gives the first allocated node */
+
+/*! \brief Finds spatial extent of local particles.
+ *
+ *  Sets global 'Root*' variables that determine root node properties.
+ *
+ *  \param[in] npart Number of particles.
+ *  \param[in] start Start index.
+ *
+ *  \return void
+ */
+static void subfind_so_potegy_loctree_findExtent(int npart, int start)
+{
+  double len, xmin[3], xmax[3];
+
+  /* determine extension */
+  for(int i = 0; i < 3; i++)
+    {
+      xmin[i] = MAX_REAL_NUMBER;
+      xmax[i] = -MAX_REAL_NUMBER;
+    }
+
+  for(int k = 0; k < npart; k++)
+    {
+      int i = start + k;
+
+      for(int j = 0; j < 3; j++)
+        {
+          if(xmin[j] > LocPaux[i].Pos[j])
+            xmin[j] = LocPaux[i].Pos[j];
+
+          if(xmax[j] < LocPaux[i].Pos[j])
+            xmax[j] = LocPaux[i].Pos[j];
+        }
+    }
+
+  len = 0;
+  for(int j = 0; j < 3; j++)
+    if(xmax[j] - xmin[j] > len)
+      len = xmax[j] - xmin[j];
+
+  len *= 1.001;
+
+  RootLen        = len;
+  RootInverseLen = 1.0 / RootLen;
+  RootFac        = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION));
+  RootBigFac     = (RootLen / (((long long)1) << 52));
+
+  for(int j = 0; j < 3; j++)
+    {
+      RootCenter[j] = 0.5 * (xmin[j] + xmax[j]);
+      RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len;
+    }
+}
+
+/*! \brief Builds local tree.
+ *
+ *  \param[in] npart Number of particles.
+ *  \param[in] start Start index.
+ *
+ *  \return Number of nodes in tree.
+ */
+static int subfind_so_potegy_loctree_treebuild(int npart, int start)
+{
+  int subnode = 0, parent = -1, numnodes;
+  int nfree, th, nn;
+  struct LocNODE *nfreep;
+
+  /* select first node */
+  nfree  = LocMaxPart;
+  nfreep = &LocNodes[nfree];
+
+  /* create an empty  root node  */
+  nfreep->len = (MyFloat)RootLen;
+  for(int i = 0; i < 3; i++)
+    nfreep->center[i] = (MyFloat)RootCenter[i];
+
+  for(int i = 0; i < 8; i++)
+    nfreep->u.suns[i] = -1;
+
+  numnodes = 1;
+  nfreep++;
+  nfree++;
+
+  /* insert all particles */
+
+  LocTree_IntPos_list =
+      (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * LocMaxPart * sizeof(unsigned long long));
+
+  for(int k = 0; k < npart; k++)
+    {
+      int i = start + k;
+
+      MyDouble *posp;
+
+      posp = &LocPaux[i].Pos[0];
+
+      unsigned long long xxb      = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0);
+      unsigned long long yyb      = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0);
+      unsigned long long zzb      = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0);
+      unsigned long long mask     = ((unsigned long long)1) << (52 - 1);
+      unsigned char shiftx        = (52 - 1);
+      unsigned char shifty        = (52 - 2);
+      unsigned char shiftz        = (52 - 3);
+      signed long long centermask = (0xFFF0000000000000llu);
+      unsigned char levels        = 0;
+
+      unsigned long long *intposp = &LocTree_IntPos_list[3 * i];
+
+      *intposp++ = xxb;
+      *intposp++ = yyb;
+      *intposp++ = zzb;
+
+      th = LocMaxPart;
+
+      while(1)
+        {
+          if(th >= LocMaxPart) /* we are dealing with an internal node */
+            {
+              subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
+                         ((unsigned char)((zzb & mask) >> (shiftz--))));
+
+              centermask >>= 1;
+              mask >>= 1;
+              levels++;
+
+              if(levels > MAX_TREE_LEVEL)
+                {
+                  /* seems like we're dealing with particles at identical (or extremely close)
+                   * locations. Shift subnode index to allow tree construction. Note: Multipole moments
+                   * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have
+                   *      DomainLen/2^MAX_TREE_LEVEL  < gravitational softening length
+                   */
+                  for(int j = 0; j < 8; j++)
+                    {
+                      if(LocNodes[th].u.suns[subnode] < 0)
+                        break;
+
+                      subnode++;
+                      if(subnode >= 8)
+                        subnode = 7;
+                    }
+                }
+
+              nn = LocNodes[th].u.suns[subnode];
+
+              if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
+                {
+                  parent = th; /* note: subnode can still be used in the next step of the walk */
+                  th     = nn;
+                }
+              else
+                {
+                  /* here we have found an empty slot where we can
+                   * attach the new particle as a leaf
+                   */
+                  LocNodes[th].u.suns[subnode] = i;
+                  break; /* done for this particle */
+                }
+            }
+          else
+            {
+              /* we try to insert into a leaf with a single particle
+               * need to generate a new internal node at this point
+               */
+              LocNodes[parent].u.suns[subnode] = nfree;
+
+              /* the other is: */
+              double len = ((double)(mask << 1)) * RootBigFac;
+              double cx  = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0];
+              double cy  = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1];
+              double cz  = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2];
+
+              nfreep->len       = len;
+              nfreep->center[0] = cx;
+              nfreep->center[1] = cy;
+              nfreep->center[2] = cz;
+
+              nfreep->u.suns[0] = -1;
+              nfreep->u.suns[1] = -1;
+              nfreep->u.suns[2] = -1;
+              nfreep->u.suns[3] = -1;
+              nfreep->u.suns[4] = -1;
+              nfreep->u.suns[5] = -1;
+              nfreep->u.suns[6] = -1;
+              nfreep->u.suns[7] = -1;
+
+              unsigned long long *intppos = &LocTree_IntPos_list[3 * th];
+
+              subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) |
+                         ((unsigned char)((intppos[2] & mask) >> shiftz)));
+
+              nfreep->u.suns[subnode] = th;
+
+              th = nfree; /* resume trying to insert the new particle at
+                             the newly created internal node */
+
+              numnodes++;
+              nfree++;
+              nfreep++;
+
+              if(numnodes >= MaxNodes)
+                {
+                  MaxNodes *= 1.2;
+
+                  LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE));
+                  LocNodes      = LocNodes_base - LocMaxPart;
+                  nfreep        = &LocNodes[nfree];
+
+                  if(numnodes > MaxNodes)
+                    {
+                      char buf[1000];
+
+                      sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d  %g %g %g", MaxNodes, i,
+                              LocPaux[i].Pos[0], LocPaux[i].Pos[1], LocPaux[i].Pos[2]);
+                      terminate(buf);
+                    }
+                }
+            }
+        }
+    }
+
+  myfree(LocTree_IntPos_list);
+
+  /* now compute the multipole moments recursively */
+  last = -1;
+  subfind_so_potegy_loctree_update_node_recursive(LocMaxPart, -1, -1);
+
+  if(last >= LocMaxPart)
+    LocNodes[last].u.d.nextnode = -1;
+  else
+    LocNextNode[last] = -1;
+
+  return numnodes;
+}
+
+/*! \brief Walk the tree and update node data recursively.
+ *
+ *  This routine computes the multipole moments for a given internal node and
+ *  all its subnodes using a recursive computation. Note that this switches
+ *  the information stored in LocNodes[no].u from suns to d!
+ *
+ *
+ *  \param[in] no Node index.
+ *  \param[in] sib Sibling index.
+ *  \param[in] father Parent index.
+ *
+ *  \return void
+ */
+static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father)
+{
+  int j, jj, p, pp = 0, nextsib, suns[8];
+  unsigned char maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+  double mass_per_type[NSOFTTYPES];
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+  unsigned char maxhydrosofttype;
+  unsigned char minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+  double mass;
+  double s[3];
+
+  if(no >= LocMaxPart)
+    {
+      for(j = 0; j < 8; j++)
+        suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
+                                             overwrite one element (union!) */
+      if(last >= 0)
+        {
+          if(last >= LocMaxPart)
+            LocNodes[last].u.d.nextnode = no;
+          else
+            LocNextNode[last] = no;
+        }
+
+      last = no;
+
+      mass        = 0;
+      s[0]        = 0;
+      s[1]        = 0;
+      s[2]        = 0;
+      maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+      for(j = 0; j < NSOFTTYPES; j++)
+        mass_per_type[j] = 0;
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      maxhydrosofttype = NSOFTTYPES;
+      minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      for(j = 0; j < 8; j++)
+        {
+          if((p = suns[j]) >= 0)
+            {
+              /* check if we have a sibling on the same level */
+              for(jj = j + 1; jj < 8; jj++)
+                if((pp = suns[jj]) >= 0)
+                  break;
+
+              if(jj < 8) /* yes, we do */
+                nextsib = pp;
+              else
+                nextsib = sib;
+
+              subfind_so_potegy_loctree_update_node_recursive(p, nextsib, no);
+
+              if(p >= LocMaxPart) /* an internal node  */
+                {
+                  mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */
+                  s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0];
+                  s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1];
+                  s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype])
+                    maxsofttype = LocNodes[p].u.d.maxsofttype;
+
+#ifdef MULTIPLE_NODE_SOFTENING
+                  int k;
+                  for(k = 0; k < NSOFTTYPES; k++)
+                    mass_per_type[k] += LocNodes[p].mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype)
+                    maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype;
+                  if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype)
+                    minhydrosofttype = LocNodes[p].u.d.minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+              else /* a particle */
+                {
+                  mass += LocPaux[p].Mass;
+
+                  s[0] += LocPaux[p].Mass * LocPaux[p].Pos[0];
+                  s[1] += LocPaux[p].Mass * LocPaux[p].Pos[1];
+                  s[2] += LocPaux[p].Mass * LocPaux[p].Pos[2];
+
+                  if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocPaux[p].SofteningType])
+                    maxsofttype = LocPaux[p].SofteningType;
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+                  mass_per_type[LocPaux[p].Type == 0 ? 0 : LocPaux[p].SofteningType] += LocPaux[p].Mass;
+
+                  if(LocPaux[p].Type == 0)
+                    {
+                      if(maxhydrosofttype < LocPaux[p].SofteningType)
+                        maxhydrosofttype = LocPaux[p].SofteningType;
+                      if(minhydrosofttype > LocPaux[p].SofteningType)
+                        minhydrosofttype = LocPaux[p].SofteningType;
+                    }
+#else  /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                  mass_per_type[LocPaux[p].SofteningType] += LocPaux[p].Mass;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+                }
+            }
+        }
+
+      if(mass > 0)
+        {
+          s[0] /= mass;
+          s[1] /= mass;
+          s[2] /= mass;
+        }
+      else
+        {
+          s[0] = LocNodes[no].center[0];
+          s[1] = LocNodes[no].center[1];
+          s[2] = LocNodes[no].center[2];
+        }
+
+      LocNodes[no].u.d.s[0]        = (MyFloat)s[0];
+      LocNodes[no].u.d.s[1]        = (MyFloat)s[1];
+      LocNodes[no].u.d.s[2]        = (MyFloat)s[2];
+      LocNodes[no].u.d.mass        = (MyFloat)mass;
+      LocNodes[no].u.d.maxsofttype = maxsofttype;
+#ifdef MULTIPLE_NODE_SOFTENING
+      int k;
+      for(k = 0; k < NSOFTTYPES; k++)
+        LocNodes[no].mass_per_type[k] = mass_per_type[k];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+      LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype;
+      LocNodes[no].u.d.minhydrosofttype = minhydrosofttype;
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+      LocNodes[no].u.d.sibling = sib;
+    }
+  else /* single particle or pseudo particle */
+    {
+      if(last >= 0)
+        {
+          if(last >= LocMaxPart)
+            LocNodes[last].u.d.nextnode = no;
+          else
+            LocNextNode[last] = no;
+        }
+
+      last = no;
+    }
+}
+
+/*! \brief Calculates the gravitational potential energy of single particle.
+ *
+ *  \pararm[in] target Target particle index (in LocPaux).
+ *
+ *  \return Gravitational potential.
+ */
+double subfind_so_potegy_loctree_treeevaluate_potential(int target)
+{
+  struct LocNODE *nop = 0;
+  int no;
+  double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp;
+  double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp;
+
+  pos_x = LocPaux[target].Pos[0];
+  pos_y = LocPaux[target].Pos[1];
+  pos_z = LocPaux[target].Pos[2];
+
+  h_i = All.ForceSoftening[LocPaux[target].SofteningType];
+
+  pot = 0;
+
+  no = LocMaxPart;
+
+  while(no >= 0)
+    {
+#ifdef MULTIPLE_NODE_SOFTENING
+      int indi_flag1 = -1, indi_flag2 = 0;
+#endif                    /* #ifdef MULTIPLE_NODE_SOFTENING */
+      if(no < LocMaxPart) /* single particle */
+        {
+          dx = GRAVITY_NEAREST_X(LocPaux[no].Pos[0] - pos_x);
+          dy = GRAVITY_NEAREST_Y(LocPaux[no].Pos[1] - pos_y);
+          dz = GRAVITY_NEAREST_Z(LocPaux[no].Pos[2] - pos_z);
+
+          r2 = dx * dx + dy * dy + dz * dz;
+
+          mass = LocPaux[no].Mass;
+
+          h_j = All.ForceSoftening[LocPaux[no].SofteningType];
+
+          if(h_j > h_i)
+            hmax = h_j;
+          else
+            hmax = h_i;
+
+          no = LocNextNode[no];
+        }
+      else
+        {
+          nop  = &LocNodes[no];
+          mass = nop->u.d.mass;
+
+          dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x);
+          dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y);
+          dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z);
+
+          r2 = dx * dx + dy * dy + dz * dz;
+
+          /* check Barnes-Hut opening criterion */
+          if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind)
+            {
+              /* open cell */
+              if(mass)
+                {
+                  no = nop->u.d.nextnode;
+                  continue;
+                }
+            }
+
+          h_j = All.ForceSoftening[nop->u.d.maxsofttype];
+
+          if(h_j > h_i)
+            {
+#ifdef MULTIPLE_NODE_SOFTENING
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype)
+                if(LocNodes[no].mass_per_type[0] > 0)
+                  if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype])
+                    {
+                      /* open cell */
+                      no = nop->u.d.nextnode;
+                      continue;
+                    }
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+              indi_flag1 = 0;
+              indi_flag2 = NSOFTTYPES;
+#else  /* #ifdef MULTIPLE_NODE_SOFTENING */
+
+              if(r2 < h_j * h_j)
+                {
+                  /* open cell */
+                  no = nop->u.d.nextnode;
+                  continue;
+                }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */
+              hmax = h_j;
+            }
+          else
+            hmax = h_i;
+
+          no = nop->u.d.sibling; /* node can be used */
+        }
+
+      r = sqrt(r2);
+#ifdef MULTIPLE_NODE_SOFTENING
+      int type;
+      for(type = indi_flag1; type < indi_flag2; type++)
+        {
+          if(type >= 0)
+            {
+              mass = nop->mass_per_type[type];
+
+#ifdef ADAPTIVE_HYDRO_SOFTENING
+              if(type == 0)
+                h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype];
+              else
+#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */
+                h_j = All.ForceSoftening[type];
+
+              if(h_j > h_i)
+                hmax = h_j;
+              else
+                hmax = h_i;
+            }
+
+          if(mass)
+            {
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+              if(r >= hmax)
+                pot -= mass / r;
+              else
+                {
+                  h_inv = 1.0 / hmax;
+
+                  u = r * h_inv;
+
+                  if(u < 0.5)
+                    wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6));
+                  else
+                    wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)));
+
+                  pot += mass * h_inv * wp;
+#ifdef MULTIPLE_NODE_SOFTENING
+                }
+            }
+#endif /* #ifdef MULTIPLE_NODE_SOFTENING */
+        }
+    }
+
+  return pot;
+}
+
+/*! \brief Allocates memory used for storage of the tree and auxiliary arrays
+ *         for tree-walk and link-lists.
+ *
+ *  \param[in] maxnodes Maximum number of nodes.
+ *  \param[in] maxpart Maximum number of particles.
+ *
+ *  \return Number of allocated bytes.
+ */
+static size_t subfind_so_potegy_loctree_treeallocate(int maxnodes, int maxpart)
+{
+  size_t bytes, allbytes = 0;
+
+  if(LocNextNode)
+    terminate("loctree already allocated");
+
+  MaxNodes   = maxnodes;
+  LocMaxPart = maxpart;
+
+  LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int));
+  allbytes += bytes;
+
+  R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type));
+  allbytes += bytes;
+
+  LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE));
+  LocNodes      = LocNodes_base - LocMaxPart;
+  allbytes += bytes;
+
+  return allbytes;
+}
+
+/*! \brief Frees the allocated memory.
+ *
+ *  \return void
+ */
+static void subfind_so_potegy_loctree_treefree(void)
+{
+  myfree(LocNodes_base);
+  myfree(R2list);
+  myfree(LocNextNode);
+
+  LocNextNode   = NULL;
+  R2list        = NULL;
+  LocNodes_base = NULL;
+}
+
+/*! \brief Comparison function for paux_data objects.
+ *
+ *  Compares field LocGrIndex.
+ *
+ *  \param[in] a First object to be compared.
+ *  \param[in] b Second object to be compared.
+ *
+ *  \return (-1,0,1); -1 if a < b.
+ */
+static int subfind_compare_Paux_LocGrIndex(const void *a, const void *b)
+{
+  if(((struct paux_data *)a)->LocGrIndex < ((struct paux_data *)b)->LocGrIndex)
+    return -1;
+
+  if(((struct paux_data *)a)->LocGrIndex > ((struct paux_data *)b)->LocGrIndex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Calculates potential energy of spherical overdensity groups.
+ *
+ *  \param[out] egypot Array with potential energies in each group.
+ *
+ *  \return Time this routine took.
+ */
+double subfind_so_potegy(double *egypot)
+{
+  double t0 = second();
+  mpi_printf("SUBFIND: Starting SO potential energy computation\n");
+
+  size_t *count_send  = (size_t *)mymalloc_movable(&count_send, "count_send", NTask * sizeof(size_t));
+  size_t *offset_send = (size_t *)mymalloc_movable(&offset_send, "offset_send", NTask * sizeof(size_t));
+  size_t *count_recv  = (size_t *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(size_t));
+  size_t *offset_recv = (size_t *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(size_t));
+
+  for(int i = 0; i < NTask; i++)
+    count_send[i] = 0;
+
+  for(int i = 0; i < NumPaux; i++)
+    count_send[Paux[i].TaskOfGr]++;
+
+  MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  offset_send[0] = offset_recv[0] = 0;
+
+  for(int i = 1; i < NTask; i++)
+    {
+      offset_send[i] = offset_send[i - 1] + count_send[i - 1];
+      offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1];
+    }
+
+  struct paux_data *PauxTmp = (struct paux_data *)mymalloc_movable(&PauxTmp, "PauxTmp", NumPaux * sizeof(struct paux_data));
+
+  for(int i = 0; i < NTask; i++)
+    count_send[i] = 0;
+
+  for(int i = 0; i < NumPaux; i++)
+    {
+      int task     = Paux[i].TaskOfGr;
+      int loc      = offset_send[task] + count_send[task]++;
+      PauxTmp[loc] = Paux[i];
+    }
+
+  int NumPauxRecv = 0;
+
+  for(int i = 0; i < NTask; i++)
+    NumPauxRecv += count_recv[i];
+
+  LocPaux = (struct paux_data *)mymalloc_movable(&LocPaux, "LocPaux", NumPauxRecv * sizeof(struct paux_data));
+
+  myMPI_Alltoallv(PauxTmp, count_send, offset_send, LocPaux, count_recv, offset_recv, sizeof(struct paux_data), 1, MPI_COMM_WORLD);
+
+  myfree_movable(PauxTmp);
+
+  qsort(LocPaux, NumPauxRecv, sizeof(struct paux_data), subfind_compare_Paux_LocGrIndex);
+
+  int *group_len = (int *)mymalloc("group_len", Ngroups * sizeof(int));
+  int *group_off = (int *)mymalloc("group_off", Ngroups * sizeof(int));
+
+  for(int i = 0; i < Ngroups; i++)
+    group_len[i] = 0;
+
+  for(int i = 0; i < NumPauxRecv; i++)
+    {
+      int j = LocPaux[i].LocGrIndex;
+      if(j < 0 || j >= Ngroups)
+        terminate("j=%d Ngroups=%d", j, Ngroups);
+
+      group_len[j]++;
+    }
+
+  group_off[0] = 0;
+
+  for(int i = 1; i < Ngroups; i++)
+    group_off[i] = group_off[i - 1] + group_len[i - 1];
+
+  int MaxAllocPart = NumPart;
+  // extend in case a single group holds more particles than NumPart
+  for(int i = 0; i < Ngroups; i++)
+    if(group_len[i] > MaxAllocPart)
+      MaxAllocPart = group_len[i];
+
+  subfind_so_potegy_loctree_treeallocate((int)(All.TreeAllocFactor * MaxAllocPart) + NTopnodes, MaxAllocPart);
+
+  /* now do the actual potential calculation */
+  for(int i = 0; i < Ngroups; i++)
+    {
+      subfind_so_potegy_loctree_findExtent(group_len[i], group_off[i]);
+      subfind_so_potegy_loctree_treebuild(group_len[i], group_off[i]);
+
+      egypot[i] = 0;
+
+      for(int j = 0; j < group_len[i]; j++)
+        {
+          int target = group_off[i] + j;
+
+          double pot = subfind_so_potegy_loctree_treeevaluate_potential(target);
+
+          /* remove self-potential */
+          pot += LocPaux[target].Mass / (All.ForceSoftening[LocPaux[target].SofteningType] / 2.8);
+
+          pot *= All.G / All.cf_atime;
+
+          egypot[i] += 0.5 * pot * LocPaux[target].Mass;
+        }
+    }
+
+  subfind_so_potegy_loctree_treefree();
+
+  myfree(group_off);
+  myfree(group_len);
+
+  myfree(LocPaux);
+
+  myfree(offset_recv);
+  myfree(count_recv);
+  myfree(offset_send);
+  myfree(count_send);
+
+  double t1 = second();
+  mpi_printf("SUBFIND: SO potential energy computation took %g sec\n", timediff(t0, t1));
+
+  return timediff(t0, t1);
+}
+
+#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c
new file mode 100644
index 0000000000..5787cb3441
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c
@@ -0,0 +1,442 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_sort_kernels.c
+ * \date        05/2018
+ * \brief       Comparison functions that serve as sorting kernels for various
+ *              different structs used in subfind.
+ * \details     contains functions:
+ *                int subfind_compare_procassign_GrNr(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_submp_GrNr_DM_Density(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_submp_OldIndex(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_ID_list(const void *a, const void *b)
+ *                int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const
+ *                  void *b)
+ *                int subfind_compare_dist_rotcurve(const void *a, const void
+ *                  *b)
+ *                int subfind_compare_rlist_mhd(const void *a, const void *b)
+ *                int subfind_compare_binding_energy(const void *a, const void
+ *                  *b)
+ *                int subfind_compare_serial_candidates_boundlength(const void
+ *                  *a, const void *b)
+ *                int subfind_compare_serial_candidates_rank(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_serial_candidates_subnr(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_coll_candidates_subnr(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_coll_candidates_nsubs(const void *a,
+ *                  const void *b)
+ *                int subfind_compare_coll_candidates_boundlength(const void
+ *                  *a, const void *b)
+ *                int subfind_compare_coll_candidates_rank(const void *a,
+ *                  const void *b)
+ *                int subfind_fof_compare_ID(const void *a, const void *b)
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../domain/domain.h"
+#include "../fof/fof.h"
+#include "subfind.h"
+
+#ifdef SUBFIND
+
+/*! \brief Comparison function for proc_assign_data objects.
+ *
+ *  Sorting kernel comparing element GrNr.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_procassign_GrNr(const void *a, const void *b)
+{
+  if(((struct proc_assign_data *)a)->GrNr < ((struct proc_assign_data *)b)->GrNr)
+    return -1;
+
+  if(((struct proc_assign_data *)a)->GrNr > ((struct proc_assign_data *)b)->GrNr)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for submp_data objects.
+ *
+ *  Sorting kernel comparing element (most important first):
+ *  GrNr, DM_Density.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, except for DM density, where -1 if a > b
+ */
+int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b)
+{
+  if(((struct submp_data *)a)->GrNr < ((struct submp_data *)b)->GrNr)
+    return -1;
+
+  if(((struct submp_data *)a)->GrNr > ((struct submp_data *)b)->GrNr)
+    return +1;
+
+  if(((struct submp_data *)a)->DM_Density > ((struct submp_data *)b)->DM_Density)
+    return -1;
+
+  if(((struct submp_data *)a)->DM_Density < ((struct submp_data *)b)->DM_Density)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for submp_data objects.
+ *
+ *  Sorting kernel comparing element OldIndex.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_submp_OldIndex(const void *a, const void *b)
+{
+  if(((struct submp_data *)a)->OldIndex < ((struct submp_data *)b)->OldIndex)
+    return -1;
+
+  if(((struct submp_data *)a)->OldIndex > ((struct submp_data *)b)->OldIndex)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for id_list objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  GrNr, SubNr, Type, BindingEgy.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_ID_list(const void *a, const void *b)
+{
+  if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr)
+    return -1;
+
+  if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr)
+    return +1;
+
+  if(((struct id_list *)a)->SubNr < ((struct id_list *)b)->SubNr)
+    return -1;
+
+  if(((struct id_list *)a)->SubNr > ((struct id_list *)b)->SubNr)
+    return +1;
+
+  if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type)
+    return -1;
+
+  if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type)
+    return +1;
+
+  if(((struct id_list *)a)->BindingEgy < ((struct id_list *)b)->BindingEgy)
+    return -1;
+
+  if(((struct id_list *)a)->BindingEgy > ((struct id_list *)b)->BindingEgy)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for subgroup_properties objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  GrNr and SubNr.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b)
+{
+  if(((struct subgroup_properties *)a)->GrNr < ((struct subgroup_properties *)b)->GrNr)
+    return -1;
+
+  if(((struct subgroup_properties *)a)->GrNr > ((struct subgroup_properties *)b)->GrNr)
+    return +1;
+
+  if(((struct subgroup_properties *)a)->SubNr < ((struct subgroup_properties *)b)->SubNr)
+    return -1;
+
+  if(((struct subgroup_properties *)a)->SubNr > ((struct subgroup_properties *)b)->SubNr)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for sort_r2list objects.
+ *
+ *  Sorting kernel comparing element r.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_dist_rotcurve(const void *a, const void *b)
+{
+  if(((sort_r2list *)a)->r < ((sort_r2list *)b)->r)
+    return -1;
+
+  if(((sort_r2list *)a)->r > ((sort_r2list *)b)->r)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for variables of type double.
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_binding_energy(const void *a, const void *b)
+{
+  if(*((double *)a) > *((double *)b))
+    return -1;
+
+  if(*((double *)a) < *((double *)b))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for cand_dat objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  bound_length and rank.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, excpet bound length, where -1 if a > b.
+ */
+int subfind_compare_serial_candidates_boundlength(const void *a, const void *b)
+{
+  if(((struct cand_dat *)a)->bound_length > ((struct cand_dat *)b)->bound_length)
+    return -1;
+
+  if(((struct cand_dat *)a)->bound_length < ((struct cand_dat *)b)->bound_length)
+    return +1;
+
+  if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank)
+    return -1;
+
+  if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for cand_dat objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  rank and len.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, except for len where -1 if a>b.
+ */
+int subfind_compare_serial_candidates_rank(const void *a, const void *b)
+{
+  if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank)
+    return -1;
+
+  if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank)
+    return +1;
+
+  if(((struct cand_dat *)a)->len > ((struct cand_dat *)b)->len)
+    return -1;
+
+  if(((struct cand_dat *)a)->len < ((struct cand_dat *)b)->len)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for cand_dat objects.
+ *
+ *  Sorting kernel comparing element subnr.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_serial_candidates_subnr(const void *a, const void *b)
+{
+  if(((struct cand_dat *)a)->subnr < ((struct cand_dat *)b)->subnr)
+    return -1;
+
+  if(((struct cand_dat *)a)->subnr > ((struct cand_dat *)b)->subnr)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for coll_cand_dat objects.
+ *
+ *  Sorting kernel comparing element subnr.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_coll_candidates_subnr(const void *a, const void *b)
+{
+  if(((struct coll_cand_dat *)a)->subnr < ((struct coll_cand_dat *)b)->subnr)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->subnr > ((struct coll_cand_dat *)b)->subnr)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for coll_cand_dat objects.
+ *
+ *  Sorting kernel comparing element nsub.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_compare_coll_candidates_nsubs(const void *a, const void *b)
+{
+  if(((struct coll_cand_dat *)a)->nsub < ((struct coll_cand_dat *)b)->nsub)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->nsub > ((struct coll_cand_dat *)b)->nsub)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for coll_cand_dat objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  bound_length, rank.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, except for bound length where -1 if a > b.
+ */
+int subfind_compare_coll_candidates_boundlength(const void *a, const void *b)
+{
+  if(((struct coll_cand_dat *)a)->bound_length > ((struct coll_cand_dat *)b)->bound_length)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->bound_length < ((struct coll_cand_dat *)b)->bound_length)
+    return +1;
+
+  if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for coll_cand_dat objects.
+ *
+ *  Sorting kernel comparing elements (most important first):
+ *  rank and len.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b, except for len, where -1 if a > b
+ */
+int subfind_compare_coll_candidates_rank(const void *a, const void *b)
+{
+  if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank)
+    return +1;
+
+  if(((struct coll_cand_dat *)a)->len > ((struct coll_cand_dat *)b)->len)
+    return -1;
+
+  if(((struct coll_cand_dat *)a)->len < ((struct coll_cand_dat *)b)->len)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Comparison function for variables of MyIDType.
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First object to compare.
+ *  \param[in] b Second object to compare.
+ *
+ *  \return (-1,0,1), -1 if a < b.
+ */
+int subfind_fof_compare_ID(const void *a, const void *b)
+{
+  if(*((MyIDType *)a) < *((MyIDType *)b))
+    return -1;
+
+  if(*((MyIDType *)a) > *((MyIDType *)b))
+    return +1;
+
+  return 0;
+}
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/subfind/subfind_vars.c b/src/amuse/community/arepo/src/subfind/subfind_vars.c
new file mode 100644
index 0000000000..37d25a8cd5
--- /dev/null
+++ b/src/amuse/community/arepo/src/subfind/subfind_vars.c
@@ -0,0 +1,102 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/subfind/subfind_vars.c
+ * \date        05/2018
+ * \brief       Variables for the subfind algorithm.
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 14.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include "../main/allvars.h"
+
+#ifdef SUBFIND
+
+#include "../domain/domain.h"
+#include "../fof/fof.h"
+#include "subfind.h"
+
+double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac;
+double SubDomainInverseLen, SubDomainBigFac;
+
+MyDouble GrCM[3];
+
+int GrNr;
+int NumPartGroup;
+
+MPI_Comm SubComm;
+int CommSplitColor;
+int SubNTask, SubThisTask;
+int SubTagOffset;
+
+struct topnode_data *SubTopNodes;
+struct local_topnode_data *Sub_LocTopNodes;
+
+double SubTreeAllocFactor;
+
+#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES)
+int *NodeGrNr;
+#endif
+
+int *SubDomainTask;
+int *SubDomainNodeIndex;
+int *SubNextnode;
+int SubNTopleaves;
+int SubNTopnodes;
+
+int SubTree_MaxPart;
+int SubTree_NumNodes;
+int SubTree_MaxNodes;
+int SubTree_FirstNonTopLevelNode;
+int SubTree_NumPartImported;
+int SubTree_NumPartExported;
+int SubTree_ImportedNodeOffset;
+int SubTree_NextFreeNode;
+struct NODE *SubNodes;
+struct ExtNODE *SubExtNodes;
+int *SubTree_ResultIndexList;
+int *SubTree_Task_list;
+unsigned long long *SubTree_IntPos_list;
+MyDouble *SubTree_Pos_list;
+
+int Ncollective;
+int NprocsCollective;
+int MaxNsubgroups = 0;
+int MaxNgbs;
+int MaxSerialGroupLen;
+
+r2type *R2list;
+
+int NumPaux;
+
+struct paux_data *Paux;
+struct proc_assign_data *ProcAssign;
+struct subgroup_properties *SubGroup;
+struct nearest_r2_data *R2Loc;
+struct nearest_ngb_data *NgbLoc;
+struct submp_data *submp;
+struct cand_dat *candidates;
+struct coll_cand_dat *coll_candidates;
+
+#endif /* #ifdef SUBFIND */
diff --git a/src/amuse/community/arepo/src/time_integration/darkenergy.c b/src/amuse/community/arepo/src/time_integration/darkenergy.c
new file mode 100644
index 0000000000..c04f181e9f
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/darkenergy.c
@@ -0,0 +1,74 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/darkenergy.c
+ * \date        05/2018
+ * \brief       Contains the hubble function for a LCDM cosmology.
+ * \details     Using Dark Energy instead of a cosmological constant can be
+ *              archived by replacing Lambda by Lambda * a^(-3*(1+w)) in the
+ *              Hubble function. w = -1 gives back a  standard cosmological
+ *              constant! Also w = -1/3 gives Lambda / a^2 which then cancel
+ *              within the Hubble function and is then equal to the dynamics
+ *              of a universe with Lambda = 0 !
+ *
+ *              For a time varying w once has to replace Lambda * a^(-3*(1+w))
+ *              by Lambda * exp(Integral(a,1,3*(1+w)/a))
+ *
+ *              Dark Energy does not alter the powerspectrum of initial
+ *              conditions. To get the same cluster for various values or
+ *              functions of w, once has do assign a new redshift to the
+ *              initial conditions to match the linear growth factors, so
+ *              g(z=0)/g(z_ini) == g_w(z=0)/g_w(z_ini^new). Also the initial
+ *              velocities field has to be scaled by
+ *(Hubble_w(z_ini^new)*Omega_w(z_ini^new)^0.6)/(Hubble(z_ini)*Omega(z_ini)^0.6)
+ *              where _w means the according functions including the terms for
+ *              Dark Energy.
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Hubble function.
+ *
+ *  Returns the Hubble function at a given scalefactor for a LCDM cosmology.
+ *
+ *  \param[in] a Scalefactor.
+ *
+ *  \return Hubble parameter in internal units.
+ */
+double INLINE_FUNC hubble_function(double a)
+{
+  double hubble_a;
+
+  hubble_a = All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda) / (a * a) + All.OmegaLambda;
+  hubble_a = All.Hubble * sqrt(hubble_a);
+
+  return (hubble_a);
+}
diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
new file mode 100644
index 0000000000..88b7f89a34
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
@@ -0,0 +1,484 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/do_gravity_hydro.c
+ * \date        05/2018
+ * \brief       Contains the two half step kick operators.
+ * \details     This file contains the functions applying the gravitational
+ *              acceleration to the particles (both gas and gravity only).
+ *              The functions
+ *              find_gravity_timesteps_and_do_gravity_step_first_half and
+ *              do_gravity_step_second_half are directly called in the main
+ *              time-evolution loop in run.c.
+ *              contains functions:
+ *                static inline void kick_particle(int i, double dt_gravkick,
+ *                  MySingle * Grav)
+ *                void find_gravity_timesteps_and_do_gravity_step_first_half(
+ *                  void)
+ *                void do_gravity_step_second_half(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+/*! \brief Applies gravity kick to particles.
+ *
+ *  Apply change of velocity due to gravitational acceleration.
+ *  For hydrodynamic cells, both velocity and momentum are updated.
+ *
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] dt_gravkick Timestep of gravity kick operation.
+ *  \param[in] Grav Gravitational acceleration of particle.
+ *
+ *  \return void
+ */
+static inline void kick_particle(int i, double dt_gravkick, MySingle* Grav)
+{
+  int j;
+  double dvel[3];
+  if(P[i].Type == 0)
+    {
+      SphP[i].Energy -= 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]);
+      for(j = 0; j < 3; j++) /* do the kick for gas cells */
+        {
+          dvel[j] = Grav[j] * dt_gravkick;
+          P[i].Vel[j] += dvel[j];
+          SphP[i].Momentum[j] += P[i].Mass * dvel[j];
+        }
+      SphP[i].Energy += 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]);
+    }
+  else
+    {
+      for(j = 0; j < 3; j++) /* do the kick, only collisionless particles */
+        P[i].Vel[j] += Grav[j] * dt_gravkick;
+    }
+}
+
+/*! \brief Performs the first half step kick operator.
+ *
+ *  This function applies a half step kick similar to
+ *  do_gravity_step_second_half(). If we are on a PM step the kick due to
+ *  the particle mesh's long range gravity is applied first. Afterwards the
+ *  short range kick due to the tree force is added.
+ *  In both cases the momentum and energy for gas cells is updated.
+ *
+ *  \return void
+ */
+void find_gravity_timesteps_and_do_gravity_step_first_half(void)
+{
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+
+  TIMER_START(CPU_DRIFTS);
+
+  int idx, i;
+  integertime ti_step, tstart, tend;
+  double dt_gravkick;
+
+#ifdef PMGRID
+  if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */
+    {
+      ti_step = get_timestep_pm();
+
+      All.PM_Ti_begstep = All.PM_Ti_endstep;
+      All.PM_Ti_endstep = All.PM_Ti_begstep + ti_step;
+
+      tstart = All.PM_Ti_begstep;
+      tend   = tstart + ti_step / 2;
+
+      if(All.ComovingIntegrationOn)
+        dt_gravkick = get_gravkick_factor(tstart, tend);
+      else
+        dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+      for(i = 0; i < NumPart; i++)
+        kick_particle(i, dt_gravkick, P[i].GravPM);
+    }
+#endif /* #ifdef PMGRID */
+
+#ifdef HIERARCHICAL_GRAVITY
+  /* First, move all active particles to the highest allowed timestep for this synchronization time.
+   * They will then cascade down to smaller timesteps as needed.
+   */
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      int i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+      int bin    = All.HighestSynchronizedTimeBin;
+      int binold = P[i].TimeBinGrav;
+
+      timebin_move_particle(&TimeBinsGravity, i, binold, bin);
+      P[i].TimeBinGrav = bin;
+    }
+
+  long long Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles;
+
+  double dt_gravsum = 0;
+
+  int bin_highest_occupied = 0;
+  int timebin;
+  /* go over all timebins */
+
+  for(timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--)
+    {
+      TimeBinsGravity.NActiveParticles = 0;
+      timebin_add_particles_of_timebin_to_list_of_active_particles(&TimeBinsGravity, timebin);
+      sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
+
+      if(TimeBinsGravity.GlobalNActiveParticles == 0) /* we are done at this point */
+        break;
+
+      /* calculate gravity for all active particles */
+      if(TimeBinsGravity.GlobalNActiveParticles != Previous_GlobalNActiveGravity)
+        {
+          TIMER_STOP(CPU_DRIFTS);
+
+          compute_grav_accelerations(timebin, FLAG_PARTIAL_TREE);
+
+          TIMER_START(CPU_DRIFTS);
+        }
+
+      int nfine = 0;
+      for(int i = 0; i < TimeBinsGravity.NActiveParticles; i++)
+        {
+          int target = TimeBinsGravity.ActiveParticleList[i];
+          int binold = P[target].TimeBinGrav;
+
+          if(test_if_grav_timestep_is_too_large(target, binold))
+            nfine++;
+        }
+
+      long long nfine_tot;
+      sumup_large_ints(1, &nfine, &nfine_tot);
+
+      int push_down_flag = 0;
+      if(nfine_tot > 0.33 * TimeBinsGravity.GlobalNActiveParticles)
+        push_down_flag = 1;
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          int i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+          int binold = P[i].TimeBinGrav;
+
+          if(push_down_flag || test_if_grav_timestep_is_too_large(i, binold))
+            {
+              int bin = binold - 1;
+              if(bin == 0)
+                {
+                  print_particle_info(i);
+                  terminate("timestep too small");
+                }
+
+              timebin_move_particle(&TimeBinsGravity, i, binold, bin);
+              P[i].TimeBinGrav = bin;
+            }
+          else if(binold > bin_highest_occupied)
+            bin_highest_occupied = binold;
+        }
+
+      if(All.HighestOccupiedTimeBin == 0)
+        {
+          MPI_Allreduce(&bin_highest_occupied, &All.HighestOccupiedTimeBin, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+          if(All.HighestOccupiedTimeBin > 0)
+            {
+              mpi_printf("KICKS: Special Start-up Fix: All.HighestOccupiedGravTimeBin=%d\n", All.HighestOccupiedTimeBin);
+
+              for(i = 0; i < GRAVCOSTLEVELS; i++)
+                {
+                  if(All.LevelToTimeBin[i] == 0)
+                    All.LevelToTimeBin[i] = All.HighestOccupiedTimeBin;
+                }
+            }
+        }
+
+      if(TimeBinsGravity.GlobalNActiveParticles)
+        {
+          ti_step = timebin ? (((integertime)1) << timebin) : 0;
+          tstart  = All.Ti_begstep[timebin]; /* beginning of step */
+          tend    = tstart + ti_step / 2;    /* midpoint of step */
+
+          if(All.ComovingIntegrationOn)
+            dt_gravkick = get_gravkick_factor(tstart, tend);
+          else
+            dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+          if(timebin < All.HighestSynchronizedTimeBin)
+            {
+              ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0;
+
+              tstart = All.Ti_begstep[timebin + 1]; /* beginning of step */
+              tend   = tstart + ti_step / 2;        /* midpoint of step */
+
+              if(All.ComovingIntegrationOn)
+                dt_gravkick -= get_gravkick_factor(tstart, tend);
+              else
+                dt_gravkick -= (tend - tstart) * All.Timebase_interval;
+            }
+
+          dt_gravsum += dt_gravkick;
+
+          mpi_printf("KICKS: 1st gravity for hierarchical timebin=%d:  %lld particles\n", timebin,
+                     TimeBinsGravity.GlobalNActiveParticles);
+
+          for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+            {
+              int i = TimeBinsGravity.ActiveParticleList[idx];
+              if(i < 0)
+                continue;
+
+              kick_particle(i, dt_gravkick, P[i].GravAccel);
+            }
+          Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles;
+        }
+    }
+
+  /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */
+  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+  sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
+#else /* #ifdef HIERARCHICAL_GRAVITY */
+
+#ifdef FORCE_EQUAL_TIMESTEPS
+  // gravity timebin is already set, and not anymore 0 as All.HighestActiveTimeBin, but all particles should receive a first half kick
+  // in the 0-th timestep
+  if(All.NumCurrentTiStep == 0)
+    timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS);
+  else
+#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */
+    timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+  sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
+
+  mpi_printf("KICKS: 1st gravity for highest active timebin=%d:  particles %lld\n", All.HighestActiveTimeBin,
+             TimeBinsGravity.GlobalNActiveParticles);
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+#ifndef FORCE_EQUAL_TIMESTEPS
+      int binold = P[i].TimeBinGrav;
+      int bin    = -1;
+
+      ti_step = get_timestep_gravity(i);
+      timebins_get_bin_and_do_validity_checks(ti_step, &bin, P[i].TimeBinGrav);
+
+      if(P[i].Type == 0)
+        {
+          int bin_hydro = P[i].TimeBinHydro;
+          if(bin_hydro < bin)
+            bin = bin_hydro;
+        }
+
+      ti_step = bin ? (((integertime)1) << bin) : 0;
+
+      timebin_move_particle(&TimeBinsGravity, i, binold, bin);
+      P[i].TimeBinGrav = bin;
+#else  /* #ifndef FORCE_EQUAL_TIMESTEPS */
+      int bin = P[i].TimeBinGrav;
+      ti_step = bin ? (((integertime)1) << bin) : 0;
+#endif /* #ifndef FORCE_EQUAL_TIMESTEPS #else */
+
+      tstart = All.Ti_begstep[bin];  /* beginning of step */
+      tend   = tstart + ti_step / 2; /* midpoint of step */
+
+      if(All.ComovingIntegrationOn)
+        dt_gravkick = get_gravkick_factor(tstart, tend);
+      else
+        dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+      kick_particle(i, dt_gravkick, P[i].GravAccel);
+    }
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+
+  TIMER_STOP(CPU_DRIFTS);
+#endif
+}
+
+/*! \brief Performs the second half step kick operator.
+ *
+ * This function applies a half step kick similar to
+ * do_gravity_step_first_half(). First the short range kick due to the tree
+ * force is added. If we are on a PM step the kick due to the particle mesh's
+ * long range gravity is applied too. In both cases the momentum and energy
+ * for gas cells is updated.
+ */
+void do_gravity_step_second_half(void)
+{
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+  TIMER_START(CPU_DRIFTS);
+  int idx;
+  char fullmark[8];
+
+  if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin)
+    sprintf(fullmark, "(*)");
+  else
+    fullmark[0] = 0;
+
+  if(ThisTask == 0)
+    fprintf(FdTimings, "\nStep%s: %d, t: %g, dt: %g, highest active timebin: %d  (lowest active: %d, highest occupied: %d)\n",
+            fullmark, All.NumCurrentTiStep, All.Time, All.TimeStep, All.HighestActiveTimeBin, All.LowestActiveTimeBin,
+            All.HighestOccupiedTimeBin);
+
+  double dt_gravkick;
+#ifdef PMGRID
+  if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */
+    {
+      TIMER_STOP(CPU_DRIFTS);
+      long_range_force();
+      TIMER_START(CPU_DRIFTS);
+    }
+#endif /* #ifdef PMGRID */
+#ifdef HIERARCHICAL_GRAVITY
+  /* go over all timebins, in inverse sequence so that we end up getting the cumulative force at the end */
+  for(int timebin = 0; timebin <= All.HighestActiveTimeBin; timebin++)
+    {
+      if(TimeBinSynchronized[timebin])
+        {
+          /* need to make all timebins below the current one active */
+          timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, timebin);
+          sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
+
+          if(TimeBinsGravity.GlobalNActiveParticles)
+            {
+              TIMER_STOP(CPU_DRIFTS);
+
+              compute_grav_accelerations(timebin, (timebin == All.HighestActiveTimeBin) ? FLAG_FULL_TREE : FLAG_PARTIAL_TREE);
+
+              TIMER_START(CPU_DRIFTS);
+
+              mpi_printf("KICKS: 2nd gravity for hierarchical timebin=%d:  particles %lld\n", timebin,
+                         TimeBinsGravity.GlobalNActiveParticles);
+
+              integertime ti_step = timebin ? (((integertime)1) << timebin) : 0;
+
+              integertime tend = All.Ti_begstep[timebin]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next
+                                                             step at this point)   */
+              integertime tstart = tend - ti_step / 2;    /* midpoint of step */
+
+              if(All.ComovingIntegrationOn)
+                dt_gravkick = get_gravkick_factor(tstart, tend);
+              else
+                dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+              if(timebin < All.HighestActiveTimeBin)
+                {
+                  ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0;
+
+                  tend = All.Ti_begstep[timebin + 1]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next
+                                                         step at this point)   */
+                  tstart = tend - ti_step / 2;        /* midpoint of step */
+
+                  if(All.ComovingIntegrationOn)
+                    dt_gravkick -= get_gravkick_factor(tstart, tend);
+                  else
+                    dt_gravkick -= (tend - tstart) * All.Timebase_interval;
+                }
+
+              for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+                {
+                  int i = TimeBinsGravity.ActiveParticleList[idx];
+                  if(i < 0)
+                    continue;
+
+                  kick_particle(i, dt_gravkick, P[i].GravAccel);
+
+                  if(P[i].Type == 0)
+                    {
+                      if(All.HighestOccupiedTimeBin == timebin)
+                        for(int j = 0; j < 3; j++)
+                          SphP[i].FullGravAccel[j] = P[i].GravAccel[j];
+                    }
+                }
+            }
+        }
+    }
+
+#else  /* #ifdef HIERARCHICAL_GRAVITY */
+  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+  sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
+
+  if(TimeBinsGravity.GlobalNActiveParticles)
+    {
+      TIMER_STOP(CPU_DRIFTS);
+
+      /* calculate gravity for all active particles */
+      compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE);
+
+      TIMER_START(CPU_DRIFTS);
+
+      mpi_printf("KICKS: 2nd gravity for highest active timebin=%d:  particles %lld\n", All.HighestActiveTimeBin,
+                 TimeBinsGravity.GlobalNActiveParticles);
+
+      for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+        {
+          int i = TimeBinsGravity.ActiveParticleList[idx];
+          if(i < 0)
+            continue;
+
+          integertime ti_step = P[i].TimeBinGrav ? (((integertime)1) << P[i].TimeBinGrav) : 0;
+          integertime tend    = All.Ti_begstep[P[i].TimeBinGrav];
+          integertime tstart  = tend - ti_step / 2; /* midpoint of step */
+
+          if(All.ComovingIntegrationOn)
+            dt_gravkick = get_gravkick_factor(tstart, tend);
+          else
+            dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+          kick_particle(i, dt_gravkick, P[i].GravAccel);
+        }
+    }
+#endif /* #ifdef HIERARCHICAL_GRAVITY #else */
+
+#ifdef PMGRID
+  if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */
+    {
+      integertime ti_step = All.PM_Ti_endstep - All.PM_Ti_begstep;
+      integertime tstart  = All.PM_Ti_begstep + ti_step / 2;
+      integertime tend    = tstart + ti_step / 2;
+
+      if(All.ComovingIntegrationOn)
+        dt_gravkick = get_gravkick_factor(tstart, tend);
+      else
+        dt_gravkick = (tend - tstart) * All.Timebase_interval;
+
+      for(int i = 0; i < NumPart; i++)
+        kick_particle(i, dt_gravkick, P[i].GravPM);
+    }
+#endif /* #ifdef PMGRID */
+
+  TIMER_STOP(CPU_DRIFTS);
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY)|| defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */
+}
diff --git a/src/amuse/community/arepo/src/time_integration/driftfac.c b/src/amuse/community/arepo/src/time_integration/driftfac.c
new file mode 100644
index 0000000000..eae2e438aa
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/driftfac.c
@@ -0,0 +1,307 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/driftfac.c
+ * \date        05/2018
+ * \brief       Methods for drift and kick pre-factors needed for
+ *              simulations in a cosmologically expanding box.
+ * \details     contains functions:
+ *                double drift_integ(double a, void *param)
+ *                double gravkick_integ(double a, void *param)
+ *                double hydrokick_integ(double a, void *param)
+ *                void init_drift_table(void)
+ *                double get_drift_factor(integertime time0, integertime time1)
+ *                double get_gravkick_factor(integertime time0, integertime
+ *                  time1)
+ *                double get_hydrokick_factor(integertime time0, integertime
+ *                  time1)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 05.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! table for the cosmological drift factors */
+static double DriftTable[DRIFT_TABLE_LENGTH];
+
+/*! table for the cosmological kick factor for gravitational forces */
+static double GravKickTable[DRIFT_TABLE_LENGTH];
+
+/*! table for the cosmological kick factor for hydrodynmical forces */
+static double HydroKickTable[DRIFT_TABLE_LENGTH];
+
+static double logTimeBegin;
+static double logTimeMax;
+
+/*! \brief Integrand for drift factor calculation.
+ *
+ *  For cosmological simulations.
+ *
+ *  \param[in] a Scalefactor.
+ *  \param[in] param (unused)
+ *
+ *  \return Integrand for drift factor calculation.
+ */
+double drift_integ(double a, void *param)
+{
+  double h;
+
+  h = hubble_function(a);
+
+  return 1 / (h * a * a * a);
+}
+
+/*! \brief Integrand for gravitational kick factor calculation.
+ *
+ *  For cosmological simulations.
+ *
+ *  \param[in] a Scalefactor.
+ *  \param[in] param (unused)
+ *
+ *  \return Integrand for gravitational kick factor calculation.
+ */
+double gravkick_integ(double a, void *param)
+{
+  double h;
+
+  h = hubble_function(a);
+
+  return 1 / (h * a * a);
+}
+
+/*! \brief Integrand for hydrodynamics kick factor calculation.
+ *
+ *  For cosmological simulations.
+ *
+ *  \param[in] a Scalefactor.
+ *  \param[in] param (unused)
+ *
+ *  \return Integrand for hydrodynamics kick factor calculation.
+ */
+double hydrokick_integ(double a, void *param)
+{
+  double h;
+
+  h = hubble_function(a);
+
+  return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a);
+}
+
+/*! \brief Initializes lookup table for cosmological pre-factors for a drift.
+ *
+ *  Numerical integrals using the integrand functions defined above.
+ *
+ *  \return void
+ */
+void init_drift_table(void)
+{
+#define WORKSIZE 100000
+  int i;
+  double result, abserr;
+
+  gsl_function F;
+  gsl_integration_workspace *workspace;
+
+  logTimeBegin = log(All.TimeBegin);
+  logTimeMax   = log(All.TimeMax);
+
+  workspace = gsl_integration_workspace_alloc(WORKSIZE);
+
+  for(i = 0; i < DRIFT_TABLE_LENGTH; i++)
+    {
+      F.function = &drift_integ;
+      gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0,
+                          1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr);
+      DriftTable[i] = result;
+
+      F.function = &gravkick_integ;
+      gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0,
+                          1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr);
+      GravKickTable[i] = result;
+
+      F.function = &hydrokick_integ;
+      gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0,
+                          1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr);
+      HydroKickTable[i] = result;
+    }
+
+  gsl_integration_workspace_free(workspace);
+}
+
+/*! \brief This function integrates the cosmological prefactor for a drift
+ *         step between time0 and time1. A lookup-table is used for reasons
+ *         of speed.
+ *
+ *  \param[in] time0 Start time.
+ *  \param[in] time1 End time.
+ *
+ *   \return \f[ \int_{a_0}^{a_1} \frac{{\rm d}a}{H(a)} \f].
+ */
+double get_drift_factor(integertime time0, integertime time1)
+{
+  double a1, a2, df1, df2, u1, u2;
+  int i1, i2;
+  static integertime last_time0 = -1, last_time1 = -1;
+  static double last_value;
+
+  if(time0 == last_time0 && time1 == last_time1)
+    return last_value;
+
+  /* note: will only be called for cosmological integration */
+
+  a1 = logTimeBegin + time0 * All.Timebase_interval;
+  a2 = logTimeBegin + time1 * All.Timebase_interval;
+
+  u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i1 = (int)u1;
+  if(i1 >= DRIFT_TABLE_LENGTH)
+    i1 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i1 <= 1)
+    df1 = u1 * DriftTable[0];
+  else
+    df1 = DriftTable[i1 - 1] + (DriftTable[i1] - DriftTable[i1 - 1]) * (u1 - i1);
+
+  u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i2 = (int)u2;
+  if(i2 >= DRIFT_TABLE_LENGTH)
+    i2 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i2 <= 1)
+    df2 = u2 * DriftTable[0];
+  else
+    df2 = DriftTable[i2 - 1] + (DriftTable[i2] - DriftTable[i2 - 1]) * (u2 - i2);
+
+  last_time0 = time0;
+  last_time1 = time1;
+
+  return last_value = (df2 - df1);
+}
+
+/*! \brief This function integrates the cosmological prefactor for a
+ *         gravitational kick between time0 and time1. A lookup-table is used
+ *         for reasons of speed.
+ *
+ *  \param[in] time0 Start time.
+ *  \param[in] time1 End time.
+ *
+ *   \return Gravkick factor.
+ */
+double get_gravkick_factor(integertime time0, integertime time1)
+{
+  double a1, a2, df1, df2, u1, u2;
+  int i1, i2;
+  static integertime last_time0 = -1, last_time1 = -1;
+  static double last_value;
+
+  if(time0 == last_time0 && time1 == last_time1)
+    return last_value;
+
+  /* note: will only be called for cosmological integration */
+
+  a1 = logTimeBegin + time0 * All.Timebase_interval;
+  a2 = logTimeBegin + time1 * All.Timebase_interval;
+
+  u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i1 = (int)u1;
+  if(i1 >= DRIFT_TABLE_LENGTH)
+    i1 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i1 <= 1)
+    df1 = u1 * GravKickTable[0];
+  else
+    df1 = GravKickTable[i1 - 1] + (GravKickTable[i1] - GravKickTable[i1 - 1]) * (u1 - i1);
+
+  u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i2 = (int)u2;
+  if(i2 >= DRIFT_TABLE_LENGTH)
+    i2 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i2 <= 1)
+    df2 = u2 * GravKickTable[0];
+  else
+    df2 = GravKickTable[i2 - 1] + (GravKickTable[i2] - GravKickTable[i2 - 1]) * (u2 - i2);
+
+  last_time0 = time0;
+  last_time1 = time1;
+
+  return last_value = (df2 - df1);
+}
+
+/*! \brief This function integrates the cosmological prefactor for a
+ *         hydrodynamical kick between time0 and time1. A lookup-table is
+ *         used for reasons of speed.
+ *
+ *  \param[in] time0 Start time
+ *  \param[in] time1 End time
+ *
+ *   \return Hydro kick factor.
+ */
+double get_hydrokick_factor(integertime time0, integertime time1)
+{
+  double a1, a2, df1, df2, u1, u2;
+  int i1, i2;
+  static integertime last_time0 = -1, last_time1 = -1;
+  static double last_value;
+
+  if(time0 == last_time0 && time1 == last_time1)
+    return last_value;
+
+  /* note: will only be called for cosmological integration */
+
+  a1 = logTimeBegin + time0 * All.Timebase_interval;
+  a2 = logTimeBegin + time1 * All.Timebase_interval;
+
+  u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i1 = (int)u1;
+  if(i1 >= DRIFT_TABLE_LENGTH)
+    i1 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i1 <= 1)
+    df1 = u1 * HydroKickTable[0];
+  else
+    df1 = HydroKickTable[i1 - 1] + (HydroKickTable[i1] - HydroKickTable[i1 - 1]) * (u1 - i1);
+
+  u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH;
+  i2 = (int)u2;
+  if(i2 >= DRIFT_TABLE_LENGTH)
+    i2 = DRIFT_TABLE_LENGTH - 1;
+
+  if(i2 <= 1)
+    df2 = u2 * HydroKickTable[0];
+  else
+    df2 = HydroKickTable[i2 - 1] + (HydroKickTable[i2] - HydroKickTable[i2 - 1]) * (u2 - i2);
+
+  last_time0 = time0;
+  last_time1 = time1;
+
+  return last_value = (df2 - df1);
+}
diff --git a/src/amuse/community/arepo/src/time_integration/predict.c b/src/amuse/community/arepo/src/time_integration/predict.c
new file mode 100644
index 0000000000..f377af5b5e
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/predict.c
@@ -0,0 +1,506 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/predict.c
+ * \date        05/2018
+ * \brief       Routines to find the next sync point, manage the list
+ *              of active timebins/active particles and to drift particles.
+ * \details     contains functions:
+ *                void reconstruct_timebins(void)
+ *                void find_next_sync_point(void)
+ *                void mark_active_timebins(void)
+ *                void drift_all_particles(void)
+ *                void drift_particle(int i, integertime time1)
+ *                static int int_compare(const void *a, const void *b)
+ *                void make_list_of_active_particles(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 08.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief This function (re)builds the time bin lists.
+ *
+ *  It counts the number of particles in each timebin and updates the
+ *  linked lists containing the particles of each time bin. Afterwards the
+ *  linked list of active particles is updated by
+ *  make_list_of_active_particles().
+ *
+ *  The linked lists for each timebin are stored in 'FirstInTimeBin',
+ *  'LastInTimeBin', 'PrevInTimeBin' and 'NextInTimeBin'. The counters
+ *  of particles per timebin are 'TimeBinCount' and 'TimeBinCountSph'.
+ *
+ *  \return void
+ */
+void reconstruct_timebins(void)
+{
+  TIMER_START(CPU_TIMELINE);
+
+  int i, bin;
+
+  for(bin = 0; bin < TIMEBINS; bin++)
+    {
+      TimeBinsHydro.TimeBinCount[bin]   = 0;
+      TimeBinsHydro.FirstInTimeBin[bin] = -1;
+      TimeBinsHydro.LastInTimeBin[bin]  = -1;
+
+      TimeBinsGravity.TimeBinCount[bin]   = 0;
+      TimeBinsGravity.FirstInTimeBin[bin] = -1;
+      TimeBinsGravity.LastInTimeBin[bin]  = -1;
+
+#ifdef USE_SFR
+      TimeBinSfr[bin] = 0;
+#endif
+    }
+
+  for(i = 0; i < NumGas; i++)
+    {
+      if(P[i].ID == 0 && P[i].Mass == 0)
+        continue;
+
+      if(P[i].Type != 0)
+        continue;
+
+      bin = P[i].TimeBinHydro;
+
+      if(TimeBinsHydro.TimeBinCount[bin] > 0)
+        {
+          TimeBinsHydro.PrevInTimeBin[i]                                = TimeBinsHydro.LastInTimeBin[bin];
+          TimeBinsHydro.NextInTimeBin[i]                                = -1;
+          TimeBinsHydro.NextInTimeBin[TimeBinsHydro.LastInTimeBin[bin]] = i;
+          TimeBinsHydro.LastInTimeBin[bin]                              = i;
+        }
+      else
+        {
+          TimeBinsHydro.FirstInTimeBin[bin] = TimeBinsHydro.LastInTimeBin[bin] = i;
+          TimeBinsHydro.PrevInTimeBin[i] = TimeBinsHydro.NextInTimeBin[i] = -1;
+        }
+      TimeBinsHydro.TimeBinCount[bin]++;
+
+#ifdef USE_SFR
+      TimeBinSfr[bin] += SphP[i].Sfr;
+#endif
+    }
+
+  for(i = 0; i < NumPart; i++)
+    {
+      if(P[i].ID == 0 && P[i].Mass == 0)
+        continue;
+
+      bin = P[i].TimeBinGrav;
+
+      if(TimeBinsGravity.TimeBinCount[bin] > 0)
+        {
+          TimeBinsGravity.PrevInTimeBin[i]                                  = TimeBinsGravity.LastInTimeBin[bin];
+          TimeBinsGravity.NextInTimeBin[i]                                  = -1;
+          TimeBinsGravity.NextInTimeBin[TimeBinsGravity.LastInTimeBin[bin]] = i;
+          TimeBinsGravity.LastInTimeBin[bin]                                = i;
+        }
+      else
+        {
+          TimeBinsGravity.FirstInTimeBin[bin] = TimeBinsGravity.LastInTimeBin[bin] = i;
+          TimeBinsGravity.PrevInTimeBin[i] = TimeBinsGravity.NextInTimeBin[i] = -1;
+        }
+      TimeBinsGravity.TimeBinCount[bin]++;
+    }
+
+  make_list_of_active_particles();
+
+  TIMER_STOP(CPU_TIMELINE);
+}
+
+/*! \brief This function finds the next synchronization point of the system.
+ *         (i.e. the earliest point of time any of the particles needs a force
+ *         computation).
+ *
+ *  \return void
+ */
+void find_next_sync_point(void)
+{
+  int n;
+  integertime ti_next_kick, ti_next_kick_global, ti_next_for_bin, dt_bin;
+  double timeold;
+
+  TIMER_START(CPU_DRIFTS);
+
+  timeold = All.Time;
+
+  All.NumCurrentTiStep++;
+
+  /* find the next kick time */
+  ti_next_kick = TIMEBASE;
+
+  for(n = 0; n < TIMEBINS; n++)
+    {
+      int active = TimeBinsHydro.TimeBinCount[n];
+
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+      active += TimeBinsGravity.TimeBinCount[n];
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \
+        */
+      if(active)
+        {
+          if(n > 0)
+            {
+              dt_bin          = (((integertime)1) << n);
+              ti_next_for_bin = (All.Ti_Current / dt_bin) * dt_bin + dt_bin; /* next kick time for this timebin */
+            }
+          else
+            {
+              dt_bin          = 0;
+              ti_next_for_bin = All.Ti_Current;
+            }
+
+          if(ti_next_for_bin < ti_next_kick)
+            ti_next_kick = ti_next_for_bin;
+        }
+    }
+
+#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME
+  minimum_large_ints(1, &ti_next_kick, &ti_next_kick_global);
+#else  /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */
+  MPI_Allreduce(&ti_next_kick, &ti_next_kick_global, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */
+
+  All.Previous_Ti_Current = All.Ti_Current;
+  All.Ti_Current          = ti_next_kick_global;
+
+  if(All.ComovingIntegrationOn)
+    All.Time = All.TimeBegin * exp(All.Ti_Current * All.Timebase_interval);
+  else
+    All.Time = All.TimeBegin + All.Ti_Current * All.Timebase_interval;
+
+  set_cosmo_factors_for_current_time();
+
+  All.TimeStep = All.Time - timeold;
+
+  mark_active_timebins();
+
+  TIMER_STOP(CPU_DRIFTS);
+}
+
+/*! \brief Sets active timebins for current time-step in global variables.
+ *
+ *  \return void
+ */
+void mark_active_timebins(void)
+{
+  int n;
+  int lowest_active_bin = TIMEBINS, highest_active_bin = 0;
+  int lowest_occupied_bin = TIMEBINS, highest_occupied_bin = 0;
+  int lowest_occupied_gravity_bin = TIMEBINS, highest_occupied_gravity_bin = 0;
+  int highest_synchronized_bin = 0;
+  int nsynchronized_gravity = 0, nsynchronized_hydro = 0;
+  integertime dt_bin;
+
+  /* mark the bins that will be synchronized/active */
+
+  for(n = 0; n < TIMEBINS; n++)
+    {
+      if(TimeBinsGravity.TimeBinCount[n])
+        {
+          if(highest_occupied_gravity_bin < n)
+            highest_occupied_gravity_bin = n;
+
+          if(lowest_occupied_gravity_bin > n)
+            lowest_occupied_gravity_bin = n;
+        }
+
+      int active = TimeBinsHydro.TimeBinCount[n] + TimeBinsGravity.TimeBinCount[n];
+
+      if(active)
+        {
+          if(highest_occupied_bin < n)
+            highest_occupied_bin = n;
+
+          if(lowest_occupied_bin > n)
+            lowest_occupied_bin = n;
+        }
+
+      dt_bin = (((integertime)1) << n);
+
+      if((All.Ti_Current % dt_bin) == 0)
+        {
+          TimeBinSynchronized[n] = 1;
+          All.Ti_begstep[n]      = All.Ti_Current;
+
+          nsynchronized_gravity += TimeBinsGravity.TimeBinCount[n];
+          nsynchronized_hydro += TimeBinsHydro.TimeBinCount[n];
+
+          if(highest_synchronized_bin < n)
+            highest_synchronized_bin = n;
+
+          if(active)
+            {
+              if(highest_active_bin < n)
+                highest_active_bin = n;
+
+              if(lowest_active_bin > n)
+                lowest_active_bin = n;
+            }
+        }
+      else
+        TimeBinSynchronized[n] = 0;
+    }
+
+  int lowest_in[3], lowest_out[3];
+  lowest_in[0] = lowest_occupied_bin;
+  lowest_in[1] = lowest_occupied_gravity_bin;
+  lowest_in[2] = lowest_active_bin;
+  MPI_Allreduce(lowest_in, lowest_out, 3, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+  All.LowestOccupiedTimeBin     = lowest_out[0];
+  All.LowestOccupiedGravTimeBin = lowest_out[1];
+  All.LowestActiveTimeBin       = lowest_out[2];
+
+  int highest_in[4], highest_out[4];
+  highest_in[0] = highest_occupied_bin;
+  highest_in[1] = highest_occupied_gravity_bin;
+  highest_in[2] = highest_active_bin;
+  highest_in[3] = highest_synchronized_bin;
+  MPI_Allreduce(highest_in, highest_out, 4, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+  All.HighestOccupiedTimeBin     = highest_out[0];
+  All.HighestOccupiedGravTimeBin = highest_out[1];
+  All.HighestActiveTimeBin       = highest_out[2];
+  All.HighestSynchronizedTimeBin = highest_out[3];
+
+  /* note: the lowest synchronized bin is always 1 */
+
+  int input_ints[2 + 2 * TIMEBINS];
+  long long output_longs[2 + 2 * TIMEBINS];
+
+  input_ints[0] = nsynchronized_hydro;
+  input_ints[1] = nsynchronized_gravity;
+  memcpy(input_ints + 2, TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int));
+  memcpy(input_ints + 2 + TIMEBINS, TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int));
+
+  sumup_large_ints(2 + 2 * TIMEBINS, input_ints, output_longs);
+
+  All.GlobalNSynchronizedHydro   = output_longs[0];
+  All.GlobalNSynchronizedGravity = output_longs[1];
+  long long *tot_count_grav      = output_longs + 2;
+  long long *tot_count_sph       = output_longs + 2 + TIMEBINS;
+
+  long long tot_grav = 0, tot_sph = 0;
+
+  for(n = 0; n < TIMEBINS; n++)
+    {
+      tot_grav += tot_count_grav[n];
+      tot_sph += tot_count_sph[n];
+
+      if(n > 0)
+        {
+          tot_count_grav[n] += tot_count_grav[n - 1];
+          tot_count_sph[n] += tot_count_sph[n - 1];
+        }
+    }
+
+  All.SmallestTimeBinWithDomainDecomposition = All.HighestOccupiedTimeBin;
+
+  for(n = All.HighestOccupiedTimeBin; n >= All.LowestOccupiedTimeBin; n--)
+    {
+      if(tot_count_grav[n] > All.ActivePartFracForNewDomainDecomp * tot_grav ||
+         tot_count_sph[n] > All.ActivePartFracForNewDomainDecomp * tot_sph)
+        All.SmallestTimeBinWithDomainDecomposition = n;
+    }
+}
+
+/*! \brief Applies drift operation to all particles to current time.
+ *
+ *  \return void
+ */
+void drift_all_particles(void)
+{
+  int i;
+
+  TIMER_START(CPU_DRIFTS);
+
+  for(i = 0; i < NumPart; i++)
+    drift_particle(i, All.Ti_Current);
+
+  TIMER_STOP(CPU_DRIFTS);
+}
+
+/*! \brief This function drifts drifts a particle i to time1.
+ *
+ * \param[in] i Particle/cell index.
+ * \param[in] time1 Time to which particles get drifted.
+ *
+ * \return void
+ */
+void drift_particle(int i, integertime time1)
+{
+  int j;
+
+  if(i < 0)
+    terminate("i=%d  NumPart=%d", i, NumPart);
+
+  integertime time0 = P[i].Ti_Current;
+
+  if(time1 == time0)
+    return;
+
+  if(time1 < time0)
+    terminate("no prediction into past allowed: time0=%lld time1=%lld\n", (long long)time0, (long long)time1);
+
+  double dt_drift;
+
+  if(All.ComovingIntegrationOn)
+    dt_drift = get_drift_factor(time0, time1);
+  else
+    dt_drift = (time1 - time0) * All.Timebase_interval;
+
+  if(P[i].Type == 0)
+    {
+      for(j = 0; j < 3; j++)
+        {
+          P[i].Pos[j] += SphP[i].VelVertex[j] * dt_drift;
+        }
+    }
+  else
+    {
+#ifndef MESHRELAX
+      for(j = 0; j < 3; j++)
+        P[i].Pos[j] += P[i].Vel[j] * dt_drift;
+
+#if defined(REFLECTIVE_X)
+      if(P[i].Pos[0] < 0 || P[i].Pos[0] > boxSize_X)
+        {
+          P[i].Pos[0] = 2 * (P[i].Pos[0] > boxSize_X ? 1 : 0) * boxSize_X - P[i].Pos[0];
+          P[i].Vel[0] *= -1;
+        }
+#endif /* #if defined(REFLECTIVE_X) */
+#if defined(REFLECTIVE_Y)
+      if(P[i].Pos[1] < 0 || P[i].Pos[1] > boxSize_Y)
+        {
+          P[i].Pos[1] = 2 * (P[i].Pos[1] > boxSize_Y ? 1 : 0) * boxSize_Y - P[i].Pos[1];
+          P[i].Vel[1] *= -1;
+        }
+#endif /* #if defined(REFLECTIVE_Y) */
+#if defined(REFLECTIVE_Z)
+      if(P[i].Pos[2] < 0 || P[i].Pos[2] > boxSize_Z)
+        {
+          P[i].Pos[2] = 2 * (P[i].Pos[2] > boxSize_Z ? 1 : 0) * boxSize_Z - P[i].Pos[2];
+          P[i].Vel[2] *= -1;
+        }
+#endif /* #if defined(REFLECTIVE_Z) */
+
+#endif /* #ifndef MESHRELAX */
+    }
+
+  P[i].Ti_Current = time1;
+}
+
+/*! \brief Comparison function for two integer values.
+ *
+ *  \param[in] a First value.
+ *  \param[in] b Second value.
+ *
+ *  \return (-1,0,1); -1 if a < b
+ */
+static int int_compare(const void *a, const void *b)
+{
+  if(*((int *)a) < *((int *)b))
+    return -1;
+
+  if(*((int *)a) > *((int *)b))
+    return +1;
+
+  return 0;
+}
+
+/*! \brief This function builds the linear list of active particles.
+ *
+ *  The list is stored in the array ActiveParticleList of the TimeBinData
+ *  structs.
+ *
+ *  \return void
+ */
+void make_list_of_active_particles(void)
+{
+  TIMER_START(CPU_DRIFTS);
+
+  int i, n;
+  /* make a link list with the particles in the active time bins */
+  TimeBinsHydro.NActiveParticles = 0;
+
+  for(n = 0; n < TIMEBINS; n++)
+    {
+      if(TimeBinSynchronized[n])
+        {
+          for(i = TimeBinsHydro.FirstInTimeBin[n]; i >= 0; i = TimeBinsHydro.NextInTimeBin[i])
+            if((P[i].Type == 0) && !((P[i].ID == 0) && (P[i].Mass == 0)))
+              {
+                if(P[i].Ti_Current != All.Ti_Current)
+                  drift_particle(i, All.Ti_Current);
+
+                TimeBinsHydro.ActiveParticleList[TimeBinsHydro.NActiveParticles] = i;
+                TimeBinsHydro.NActiveParticles++;
+              }
+        }
+    }
+
+  TimeBinsGravity.NActiveParticles = 0;
+
+  for(n = 0; n < TIMEBINS; n++)
+    {
+      if(TimeBinSynchronized[n])
+        {
+          for(i = TimeBinsGravity.FirstInTimeBin[n]; i >= 0; i = TimeBinsGravity.NextInTimeBin[i])
+            {
+              if(!((P[i].ID == 0) && (P[i].Mass == 0)))
+                {
+                  if(P[i].Ti_Current != All.Ti_Current)
+                    drift_particle(i, All.Ti_Current);
+
+                  TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i;
+                  TimeBinsGravity.NActiveParticles++;
+                }
+            }
+        }
+    }
+
+  /* sort both lists for better memory efficiency */
+  mysort(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles, sizeof(int), int_compare);
+  mysort(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles, sizeof(int), int_compare);
+
+  int in[6];
+  long long out[6];
+
+  n     = 2;
+  in[0] = TimeBinsGravity.NActiveParticles;
+  in[1] = TimeBinsHydro.NActiveParticles;
+
+  sumup_large_ints(n, in, out);
+
+  TimeBinsGravity.GlobalNActiveParticles = out[0];
+  TimeBinsHydro.GlobalNActiveParticles   = out[1];
+
+  TIMER_STOP(CPU_DRIFTS);
+}
diff --git a/src/amuse/community/arepo/src/time_integration/timestep.c b/src/amuse/community/arepo/src/time_integration/timestep.c
new file mode 100644
index 0000000000..4224b3cc8f
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/timestep.c
@@ -0,0 +1,980 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/timestep.c
+ * \date        05/2018
+ * \brief       Routines for 'kicking' particles in
+ *              momentum space and assigning new timesteps.
+ * \details     contains functions:
+ *                void set_cosmo_factors_for_current_time(void)
+ *                void find_timesteps_without_gravity(void)
+ *                void update_timesteps_from_gravity(void)
+ *                integertime get_timestep_pm(void)
+ *                integertime get_timestep_gravity(int p)
+ *                integertime get_timestep_hydro(int p)
+ *                void validate_timestep(double dt, integertime ti_step, int p)
+ *                int test_if_grav_timestep_is_too_large(int p, int bin)
+ *                void find_long_range_step_constraint(void)
+ *                int get_timestep_bin(integertime ti_step)
+ *                double get_time_difference_in_Gyr(double a0, double a1)
+ *                void timebins_init(struct TimeBinData *tbData, const char
+ *                  *name, int *MaxPart)
+ *                void timebins_allocate(struct TimeBinData *tbData)
+ *                void timebins_reallocate(struct TimeBinData *tbData)
+ *                void timebins_get_bin_and_do_validity_checks(integertime
+ *                  ti_step, int *bin_new, int bin_old)
+ *                void timebin_move_particle(struct TimeBinData *tbData, int p,
+ *                  int timeBin_old, int timeBin_new)
+ *                void timebin_remove_particle(struct TimeBinData *tbData,
+ *                  int idx, int bin)
+ *                void timebin_add_particle(struct TimeBinData *tbData, int
+ *                  i_new, int i_old, int timeBin, int
+ *                  addToListOfActiveParticles)
+ *                void timebin_cleanup_list_of_active_particles(struct
+ *                  TimeBinData *tbData)
+ *                void timebin_move_sfr(int p, int timeBin_old, int
+ *                  timeBin_new)
+ *                void timebin_make_list_of_active_particles_up_to_timebin(
+ *                  struct TimeBinData *tbData, int timebin)
+ *                void timebin_add_particles_of_timebin_to_list_of_active_
+ *                  particles(struct TimeBinData *tbData, int timebin)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Sets various cosmological factors for the current simulation time.
+ *
+ *  \return void
+ */
+void set_cosmo_factors_for_current_time(void)
+{
+  if(All.ComovingIntegrationOn)
+    {
+      All.cf_atime    = All.Time;
+      All.cf_a2inv    = 1 / (All.Time * All.Time);
+      All.cf_a3inv    = 1 / (All.Time * All.Time * All.Time);
+      All.cf_afac1    = pow(All.Time, 3 * GAMMA_MINUS1);
+      All.cf_afac2    = 1 / pow(All.Time, 3 * GAMMA - 2);
+      All.cf_afac3    = pow(All.Time, 3 * (1 - GAMMA) / 2.0);
+      All.cf_hubble_a = All.cf_H = All.cf_Hrate = hubble_function(All.Time);
+      All.cf_time_hubble_a                      = All.Time * All.cf_hubble_a;
+      All.cf_redshift                           = 1 / All.Time - 1;
+    }
+  else
+    {
+      All.cf_atime         = 1;
+      All.cf_a2inv         = 1;
+      All.cf_a3inv         = 1;
+      All.cf_afac1         = 1;
+      All.cf_afac2         = 1;
+      All.cf_afac3         = 1;
+      All.cf_hubble_a      = 1;
+      All.cf_H             = All.Hubble;
+      All.cf_time_hubble_a = 1;
+      All.cf_Hrate         = 0;
+      All.cf_redshift      = 0;
+    }
+}
+
+/*! \brief Finds hydrodynamic timesteps for all particles.
+ *
+ *  Validates the timestep and moves particles to appropriate timebin/ linked
+ *  list of particles.
+ *
+ *  \return void
+ */
+void find_timesteps_without_gravity(void)
+{
+#ifdef TREE_BASED_TIMESTEPS
+  tree_based_timesteps();
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+  TIMER_START(CPU_TIMELINE);
+
+  int idx, i, bin, binold;
+  integertime ti_step;
+
+#ifdef FORCE_EQUAL_TIMESTEPS
+  integertime globTimeStep = TIMEBASE;
+
+#ifdef PMGRID
+  globTimeStep = get_timestep_pm();
+#endif /* #ifdef PMGRID */
+
+#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX)
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      ti_step = get_timestep_gravity(i);
+      if(ti_step < globTimeStep)
+        globTimeStep = ti_step;
+    }
+#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \
+        */
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      ti_step = get_timestep_hydro(i);
+      if(ti_step < globTimeStep)
+        globTimeStep = ti_step;
+    }
+
+#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME
+  minimum_large_ints(1, &globTimeStep, &All.GlobalTimeStep);
+#else  /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */
+  MPI_Allreduce(&globTimeStep, &All.GlobalTimeStep, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
+#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */
+
+  for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++)
+    {
+      i = TimeBinsGravity.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinGrav);
+      binold = P[i].TimeBinGrav;
+      timebin_move_particle(&TimeBinsGravity, i, binold, bin);
+      P[i].TimeBinGrav = bin;
+    }
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinHydro);
+      binold = P[i].TimeBinHydro;
+      timebin_move_particle(&TimeBinsHydro, i, binold, bin);
+      P[i].TimeBinHydro = bin;
+    }
+
+#else  /* #ifdef FORCE_EQUAL_TIMESTEPS */
+  /* Calculate and assign hydro timesteps */
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+
+      if(i < 0)
+        continue;
+
+      ti_step = get_timestep_hydro(i);
+
+      binold = P[i].TimeBinHydro;
+
+      timebins_get_bin_and_do_validity_checks(ti_step, &bin, binold);
+
+      timebin_move_particle(&TimeBinsHydro, i, binold, bin);
+
+      P[i].TimeBinHydro = bin;
+    }
+#endif /* #ifdef FORCE_EQUAL_TIMESTEPS #else */
+
+  TIMER_STOP(CPU_TIMELINE);
+}
+
+/*! \brief Moves particles to lower timestep bin if required by gravity
+ *         timestep criterion.
+ *
+ *  \return void
+ */
+void update_timesteps_from_gravity(void)
+{
+#ifdef FORCE_EQUAL_TIMESTEPS
+  return; /* don't need to do this */
+#endif    /* #ifdef FORCE_EQUAL_TIMESTEPS */
+
+#if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX)
+  return;
+#endif /* #if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX) \
+        */
+
+  TIMER_START(CPU_TIMELINE);
+
+  int idx, i, binold;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].TimeBinGrav < P[i].TimeBinHydro)
+        {
+          binold = P[i].TimeBinHydro;
+          timebin_move_particle(&TimeBinsHydro, i, binold, P[i].TimeBinGrav);
+          P[i].TimeBinHydro = P[i].TimeBinGrav;
+        }
+    }
+
+  TIMER_STOP(CPU_TIMELINE);
+}
+
+#ifdef PMGRID
+/*! \brief Returns particle-mesh timestep as an integer-time variable.
+ *
+ *  \return Integer timestep of particle-mesh algorithm.
+ */
+integertime get_timestep_pm(void)
+{
+  integertime ti_step = TIMEBASE;
+  while(ti_step > (All.DtDisplacement / All.Timebase_interval))
+    ti_step >>= 1;
+
+  if(ti_step > (All.PM_Ti_endstep - All.PM_Ti_begstep)) /* PM-timestep wants to increase */
+    {
+      int bin    = get_timestep_bin(ti_step);
+      int binold = get_timestep_bin(All.PM_Ti_endstep - All.PM_Ti_begstep);
+
+      while(TimeBinSynchronized[bin] == 0 && bin > binold) /* make sure the new step is synchronized */
+        bin--;
+
+      ti_step = bin ? (((integertime)1) << bin) : 0;
+    }
+
+  if(All.Ti_Current == TIMEBASE) /* we here finish the last timestep. */
+    ti_step = 0;
+
+  return ti_step;
+}
+#endif /* #ifdef PMGRID */
+
+/*! \brief Returns gravity timestep as an integer-time variable.
+ *
+ *  \param[in] p Index of particle in P array.
+ *
+ *  \return Integer timestep limited due to gravitational acceleration.
+ */
+integertime get_timestep_gravity(int p)
+{
+  double dt;
+  integertime ti_step;
+
+  double ax, ay, az, ac;
+  {
+    /* calculate total acceleration */
+    ax = All.cf_a2inv * P[p].GravAccel[0];
+    ay = All.cf_a2inv * P[p].GravAccel[1];
+    az = All.cf_a2inv * P[p].GravAccel[2];
+
+#if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP)
+    ax += All.cf_a2inv * P[p].GravPM[0];
+    ay += All.cf_a2inv * P[p].GravPM[1];
+    az += All.cf_a2inv * P[p].GravPM[2];
+#endif /* #if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP) */
+
+    ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */
+
+    if(ac == 0)
+      ac = 1.0e-30;
+
+    switch(All.TypeOfTimestepCriterion)
+      {
+        case 0:
+          /* only type 0 implemented at the moment -> remove type ? */
+          dt = sqrt(2 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / 2.8 / ac);
+          break;
+        default:
+          terminate("Undefined timestep criterion");
+          break;
+      }
+
+#ifdef EXTERNALGRAVITY
+    double dt_ext = sqrt(All.ErrTolIntAccuracy / P[p].dGravAccel);
+    if(dt_ext < dt)
+      dt = dt_ext;
+#endif
+  }
+
+  dt *= All.cf_hubble_a;
+
+  if(P[p].Mass == 0 && P[p].ID == 0)
+    dt = All.MaxSizeTimestep; /* this particle has been swallowed or eliminated */
+
+  if(dt >= All.MaxSizeTimestep)
+    dt = All.MaxSizeTimestep;
+
+  if(dt < All.MinSizeTimestep)
+    {
+#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP
+      dt = All.MinSizeTimestep;
+#else  /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */
+      print_particle_info(p);
+      terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep);
+#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */
+    }
+
+#ifdef PMGRID
+  if(dt >= All.DtDisplacement)
+    dt = All.DtDisplacement;
+#endif /* #ifdef PMGRID */
+
+  ti_step = (integertime)(dt / All.Timebase_interval);
+
+  validate_timestep(dt, ti_step, p);
+
+  return ti_step;
+}
+
+/*! \brief Returns hydrodynamics timestep as an integer-time variable.
+ *
+ *  \param[in] p Index of particle in P and SphP array.
+ *
+ *  \return Integer timestep limited due to CFL condition.
+ */
+integertime get_timestep_hydro(int p)
+{
+  double dt = 0, dt_courant = 0;
+  integertime ti_step;
+
+  assert(P[p].Type == 0);
+
+  double csnd = get_sound_speed(p);
+
+#if defined(VORONOI_STATIC_MESH)
+  csnd += sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime;
+#endif /* #if defined(VORONOI_STATIC_MESH) */
+
+  double rad = get_cell_radius(p);
+
+  if(csnd <= 0)
+    csnd = 1.0e-30;
+
+  dt_courant = rad / csnd;
+
+#ifdef TREE_BASED_TIMESTEPS
+  if(dt_courant > SphP[p].CurrentMaxTiStep)
+    dt_courant = SphP[p].CurrentMaxTiStep;
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+
+  dt_courant *= All.CourantFac;
+
+  if(All.ComovingIntegrationOn)
+    dt_courant *= All.Time;
+
+  dt = dt_courant;
+
+#if defined(USE_SFR)
+
+  if(P[p].Type == 0) /* to protect using a particle that has been turned into a star */
+    {
+      double sfr = get_starformation_rate(p);
+
+      double dt_sfr = 0.1 * P[p].Mass / (sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR)));
+      if(dt_sfr < dt)
+        dt = dt_sfr;
+    }
+#endif /* #if defined(USE_SFR) */
+
+#ifdef MHD_POWELL_LIMIT_TIMESTEP
+  double b         = sqrt(SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2]);
+  double bmin      = sqrt(2 * 0.01 * SphP[p].Utherm * SphP[p].Density * All.cf_atime);
+  double v         = sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime;
+  double dt_powell = 0.5 * (b + bmin) / (fabs(SphP[p].DivB / All.cf_atime * v));
+
+  if(dt_powell < dt)
+    dt = dt_powell;
+#endif /* #ifdef MHD_POWELL_LIMIT_TIMESTEP */
+
+  /* convert the physical timestep to dloga if needed. Note: If comoving integration has not been selected,
+     All.cf_hubble_a=1.
+   */
+
+  dt *= All.cf_hubble_a;
+
+  if(dt >= All.MaxSizeTimestep)
+    dt = All.MaxSizeTimestep;
+
+#ifdef TIMESTEP_OUTPUT_LIMIT
+  if(dt >= All.TimestepOutputLimit)
+    dt = All.TimestepOutputLimit;
+#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */
+
+  if(dt < All.MinSizeTimestep)
+    {
+#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP
+      dt = All.MinSizeTimestep;
+#else  /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */
+      print_particle_info(p);
+      terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep);
+#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */
+    }
+
+#ifdef PMGRID
+  if(dt >= All.DtDisplacement)
+    dt = All.DtDisplacement;
+#endif /* #ifdef PMGRID */
+
+  ti_step = (integertime)(dt / All.Timebase_interval);
+
+  validate_timestep(dt, ti_step, p);
+
+  return ti_step;
+}
+
+/*! \brief Checks if timestep is a valid one.
+ *
+ *  Terminates the simulation with error message otherwise.
+ *
+ *  \return void
+ */
+void validate_timestep(double dt, integertime ti_step, int p)
+{
+  if(!(ti_step > 0 && ti_step < TIMEBASE))
+    {
+      printf(
+          "\nError: An invalid timestep was assigned on the integer timeline!\n"
+          "We better stop.\n"
+          "Task=%d Part-ID=%lld type=%d",
+          ThisTask, (long long)P[p].ID, P[p].Type);
+
+      printf("tibase=%g dt=%g ti_step=%d, xyz=(%g|%g|%g) vel=(%g|%g|%g) tree=(%g|%g|%g) mass=%g\n\n", All.Timebase_interval, dt,
+             (int)ti_step, P[p].Pos[0], P[p].Pos[1], P[p].Pos[2], P[p].Vel[0], P[p].Vel[1], P[p].Vel[2], P[p].GravAccel[0],
+             P[p].GravAccel[1], P[p].GravAccel[2], P[p].Mass);
+
+      print_particle_info(p);
+      myflush(stdout);
+      terminate("integer timestep outside of allowed range");
+    }
+
+  if(ti_step == 1)
+    {
+      printf("Time-step of integer size 1 found for particle i=%d, pos=(%g|%g|%g), ID=%lld, dt=%g\n", p, P[p].Pos[0], P[p].Pos[1],
+             P[p].Pos[2], (long long)P[p].ID, dt);
+      print_particle_info(p);
+    }
+}
+
+/*! \brief Checks if timestep according to its present timebin is too large
+ *         compared to the requirements from gravity and hydrodynamics
+ *
+ *  I.e. does the cell need to be moved to a finer timebin?
+ *
+ *  \param[in] p Index of particle/cell.
+ *  \param[in] bin Timebin to compare to.
+ *
+ *  \return 0: not too large; 1: too large.
+ */
+int test_if_grav_timestep_is_too_large(int p, int bin)
+{
+  integertime ti_step_bin = bin ? (((integertime)1) << bin) : 0;
+
+  integertime ti_step = get_timestep_gravity(p);
+
+  if(P[p].Type == 0)
+    {
+      if((P[p].ID != 0) && (P[p].Mass != 0))
+        {
+          int bin_hydro             = P[p].TimeBinHydro;
+          integertime ti_step_hydro = bin_hydro ? (((integertime)1) << bin_hydro) : 0;
+          if(ti_step_hydro < ti_step)
+            ti_step = ti_step_hydro;
+        }
+    }
+
+  if(ti_step < ti_step_bin)
+    return 1;
+  else
+    return 0;
+}
+
+#ifdef PMGRID
+/*! \brief Sets the global timestep for the long-range force calculation.
+ *
+ *  Evaluates timestep constraints due to long range force acceleration of all
+ *  simulation particles and finds its global minimum.
+ *
+ *  \return void
+ */
+void find_long_range_step_constraint(void)
+{
+  int p;
+  double ax, ay, az, ac;
+  double dt, dtmin = MAX_DOUBLE_NUMBER;
+
+  for(p = 0; p < NumPart; p++)
+    {
+      if(P[p].Type == 0)
+        continue;
+
+#ifdef PM_TIMESTEP_BASED_ON_TYPES
+      if(((1 << P[p].Type) & (PM_TIMESTEP_BASED_ON_TYPES)))
+#endif /* #ifdef PM_TIMESTEP_BASED_ON_TYPES */
+        {
+          /* calculate acceleration */
+          ax = All.cf_a2inv * P[p].GravPM[0];
+          ay = All.cf_a2inv * P[p].GravPM[1];
+          az = All.cf_a2inv * P[p].GravPM[2];
+
+          ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */
+
+          if(ac < MIN_FLOAT_NUMBER)
+            ac = MIN_FLOAT_NUMBER;
+
+          dt = sqrt(2.0 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / (2.8 * ac));
+
+          dt *= All.cf_hubble_a;
+
+          if(dt < dtmin)
+            dtmin = dt;
+        }
+    }
+
+  dtmin *= 2.0; /* move it one timebin higher to prevent being too conservative */
+
+  MPI_Allreduce(&dtmin, &All.DtDisplacement, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+
+  mpi_printf("TIMESTEPS: displacement time constraint: %g  (%g)\n", All.DtDisplacement, All.MaxSizeTimestep);
+
+  if(All.DtDisplacement > All.MaxSizeTimestep)
+    All.DtDisplacement = All.MaxSizeTimestep;
+
+  if(All.DtDisplacement < All.MinSizeTimestep)
+    All.DtDisplacement = All.MinSizeTimestep;
+}
+#endif /* #ifdef PMGRID */
+
+/*! \brief Converts an integer time to a time bin.
+ *
+ *  \param[in] ti_step Timestep as integertime variable.
+ *
+ *  \return Associated time-bin.
+ */
+int get_timestep_bin(integertime ti_step)
+{
+  int bin = -1;
+
+  if(ti_step == 0)
+    return 0;
+
+  if(ti_step == 1)
+    terminate("time-step of integer size 1 not allowed\n");
+
+  while(ti_step)
+    {
+      bin++;
+      ti_step >>= 1;
+    }
+
+  return bin;
+}
+
+/*! \brief Calculates time difference in Gyr between two time integration unit
+ *         values.
+ *
+ *  If simulation non-cosmological, a0 and a1 are proper time in code units,
+ *  for cosmological simulation a0 and a1 are scalefactors.
+ *
+ *  \param[in] a0 First time or scalefactor.
+ *  \param[in] a1 Second time or scalefactor.
+ *
+ *  \return Time difference in Gyr.
+ */
+double get_time_difference_in_Gyr(double a0, double a1)
+{
+  double result, time_diff = 0, t0, t1, factor1, factor2, term1, term2;
+
+  if(All.ComovingIntegrationOn)
+    {
+      if(All.OmegaLambda + All.Omega0 != 1)
+        printf("only implemented for flat cosmology so far.");
+
+      factor1 = 2.0 / (3.0 * sqrt(All.OmegaLambda));
+
+      term1   = sqrt(All.OmegaLambda / All.Omega0) * pow(a0, 1.5);
+      term2   = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a0, 3));
+      factor2 = log(term1 + term2);
+
+      t0 = factor1 * factor2;
+
+      term1   = sqrt(All.OmegaLambda / All.Omega0) * pow(a1, 1.5);
+      term2   = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a1, 3));
+      factor2 = log(term1 + term2);
+
+      t1 = factor1 * factor2;
+
+      result = t1 - t0;
+
+      time_diff = result / (HUBBLE * All.HubbleParam); /* now in seconds */
+      time_diff /= SEC_PER_MEGAYEAR * 1000;            /* now in gigayears */
+    }
+  else
+    {
+      time_diff = (a1 - a0) * All.UnitTime_in_s / All.HubbleParam; /* now in seconds */
+      time_diff /= SEC_PER_MEGAYEAR * 1000;                        /* now in gigayears */
+    }
+
+  return time_diff;
+}
+
+/*! \brief Initializes time bin data.
+ *
+ *  Does not allocate anything!
+ *
+ *  \param[out] tbData Time bin data to be initialized.
+ *  \param[in] name Name stored in time bin data.
+ *  \param[in] MaxPart Maximum number of particles in time bin data.
+ *
+ *  \return void
+ */
+void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart)
+{
+  int i;
+  tbData->NActiveParticles   = 0;
+  tbData->ActiveParticleList = 0;
+
+  for(i = 0; i < TIMEBINS; i++)
+    {
+      tbData->FirstInTimeBin[i] = -1;
+      tbData->LastInTimeBin[i]  = -1;
+    }
+
+  tbData->NextInTimeBin = 0;
+  tbData->PrevInTimeBin = 0;
+
+  strncpy(tbData->Name, name, 99);
+  tbData->Name[99] = 0;
+  tbData->MaxPart  = MaxPart;
+}
+
+/*! \brief Allocates linked lists in time bin data.
+ *
+ *  With tbData->MaxPart elements.
+ *
+ *  \param[in, out] tbData Pointer to time bin data to be allocated.
+ *
+ *  \return void
+ */
+void timebins_allocate(struct TimeBinData *tbData)
+{
+  char Identifier[200];
+  Identifier[199] = 0;
+
+  snprintf(Identifier, 199, "NextActiveParticle%s", tbData->Name);
+  tbData->ActiveParticleList = (int *)mymalloc_movable(&tbData->ActiveParticleList, Identifier, *(tbData->MaxPart) * sizeof(int));
+
+  snprintf(Identifier, 199, "NextInTimeBin%s", tbData->Name);
+  tbData->NextInTimeBin = (int *)mymalloc_movable(&tbData->NextInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int));
+
+  snprintf(Identifier, 199, "PrevInTimeBin%s", tbData->Name);
+  tbData->PrevInTimeBin = (int *)mymalloc_movable(&tbData->PrevInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int));
+}
+
+/*! \brief Re-allocates linked lists in time bin data.
+ *
+ *  With tbData->MaxPart elements.
+ *
+ *  \param[out] tbData Pointer to time bin data to be re-allocated.
+ *
+ *  \return void
+ */
+void timebins_reallocate(struct TimeBinData *tbData)
+{
+  tbData->ActiveParticleList = (int *)myrealloc_movable(tbData->ActiveParticleList, *(tbData->MaxPart) * sizeof(int));
+  tbData->NextInTimeBin      = (int *)myrealloc_movable(tbData->NextInTimeBin, *(tbData->MaxPart) * sizeof(int));
+  tbData->PrevInTimeBin      = (int *)myrealloc_movable(tbData->PrevInTimeBin, *(tbData->MaxPart) * sizeof(int));
+}
+
+/*! \brief Gets timebin and checks if bin is valid.
+ *
+ *  Checks for example if old bin is synchronized with the bin it should be
+ *  moved to.
+ *
+ *  \param[in] ti_step Timestep in integertime.
+ *  \param[out] bin_new New time bin.
+ *  \param[in] bin_old Old time bin.
+ *
+ *  \return void
+ */
+void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old)
+{
+  /* make it a power 2 subdivision */
+  integertime ti_min = TIMEBASE;
+  while(ti_min > ti_step)
+    ti_min >>= 1;
+  ti_step = ti_min;
+
+  /* get timestep bin */
+  int bin = -1;
+
+  if(ti_step == 0)
+    bin = 0;
+
+  if(ti_step == 1)
+    terminate("time-step of integer size 1 not allowed\n");
+
+  while(ti_step)
+    {
+      bin++;
+      ti_step >>= 1;
+    }
+
+  if(bin > bin_old) /* timestep wants to increase */
+    {
+      while(TimeBinSynchronized[bin] == 0 && bin > bin_old) /* make sure the new step is synchronized */
+        bin--;
+
+      ti_step = bin ? (((integertime)1) << bin) : 0;
+    }
+
+  if(All.Ti_Current >= TIMEBASE) /* we here finish the last timestep. */
+    {
+      ti_step = 0;
+      bin     = 0;
+    }
+
+  if((TIMEBASE - All.Ti_Current) < ti_step) /* check that we don't run beyond the end */
+    {
+      terminate("we are beyond the end of the timeline"); /* should not happen */
+    }
+
+  *bin_new = bin;
+}
+
+/*! \brief Move particle from one time bin to another.
+ *
+ *  \param[in, out] tbData Time bin data structure to operate on.
+ *  \param[in] p Index of the particle to be moved.
+ *  \param[in] timeBin_old Old time bin of particle to be moved.
+ *  \param[in] timeBin_new New time bin of particle to be moved.
+ *
+ *  \return void
+ */
+void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new)
+{
+  if(timeBin_old == timeBin_new)
+    return;
+
+  tbData->TimeBinCount[timeBin_old]--;
+
+  int prev = tbData->PrevInTimeBin[p];
+  int next = tbData->NextInTimeBin[p];
+
+  if(tbData->FirstInTimeBin[timeBin_old] == p)
+    tbData->FirstInTimeBin[timeBin_old] = next;
+  if(tbData->LastInTimeBin[timeBin_old] == p)
+    tbData->LastInTimeBin[timeBin_old] = prev;
+  if(prev >= 0)
+    tbData->NextInTimeBin[prev] = next;
+  if(next >= 0)
+    tbData->PrevInTimeBin[next] = prev;
+
+  if(tbData->TimeBinCount[timeBin_new] > 0)
+    {
+      tbData->PrevInTimeBin[p]                                  = tbData->LastInTimeBin[timeBin_new];
+      tbData->NextInTimeBin[tbData->LastInTimeBin[timeBin_new]] = p;
+      tbData->NextInTimeBin[p]                                  = -1;
+      tbData->LastInTimeBin[timeBin_new]                        = p;
+    }
+  else
+    {
+      tbData->FirstInTimeBin[timeBin_new] = tbData->LastInTimeBin[timeBin_new] = p;
+      tbData->PrevInTimeBin[p] = tbData->NextInTimeBin[p] = -1;
+    }
+
+  tbData->TimeBinCount[timeBin_new]++;
+
+#ifdef USE_SFR
+  if((P[p].Type == 0) && (tbData == &TimeBinsHydro))
+    timebin_move_sfr(p, timeBin_old, timeBin_new);
+#endif /* #ifdef USE_SFR */
+}
+
+/*! \brief Removes a particle from time bin structure.
+ *
+ *  Can only be done with active particles.
+ *
+ *  \param[in, out] tbData Time bin structure to be operated on.
+ *  \param[in] idx Index of particle in ActiveParticleList.
+ *  \param[in] bin Timebin in which particle is currently. If left -1, function
+ *             will determine bin by itself.
+ *
+ *  \return void
+ */
+void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin)
+{
+  int p                           = tbData->ActiveParticleList[idx];
+  tbData->ActiveParticleList[idx] = -1;
+
+  if(bin == -1)
+    {
+      if(tbData == &TimeBinsGravity)
+        bin = P[p].TimeBinGrav;
+      else
+        bin = P[p].TimeBinHydro;
+    }
+
+  tbData->TimeBinCount[bin]--;
+
+  if(p >= 0)
+    {
+      int prev = tbData->PrevInTimeBin[p];
+      int next = tbData->NextInTimeBin[p];
+
+      if(prev >= 0)
+        tbData->NextInTimeBin[prev] = next;
+      if(next >= 0)
+        tbData->PrevInTimeBin[next] = prev;
+
+      if(tbData->FirstInTimeBin[bin] == p)
+        tbData->FirstInTimeBin[bin] = next;
+      if(tbData->LastInTimeBin[bin] == p)
+        tbData->LastInTimeBin[bin] = prev;
+    }
+}
+
+/* \brief Inserts a particle into the timebin struct behind another already
+ *        existing particle.
+ *
+ *  \param[in, out] tbData Time bin structure to be operated on.
+ *  \param[in] i_new New index in linked lists of time bin data.
+ *  \param[in] i_old old index in linked lists of time bin data.
+ *  \param[in] timeBin Time bin to which it should be added.
+ *  \param[in] addToListOfActiveParticles Flag if particle should be added as
+ *             an active particle.
+ *
+ *  \return void
+ */
+void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles)
+{
+  tbData->TimeBinCount[timeBin]++;
+
+  if(i_old < 0)
+    {
+      /* if we don't have an existing particle to add if after, let's take the last one in this timebin */
+      i_old = tbData->LastInTimeBin[timeBin];
+
+      if(i_old < 0)
+        {
+          /* the timebin is empty at the moment, so just add the new particle */
+          tbData->FirstInTimeBin[timeBin] = i_new;
+          tbData->LastInTimeBin[timeBin]  = i_new;
+          tbData->NextInTimeBin[i_new]    = -1;
+          tbData->PrevInTimeBin[i_new]    = -1;
+        }
+    }
+
+  if(i_old >= 0)
+    {
+      /* otherwise we added it already */
+      tbData->PrevInTimeBin[i_new] = i_old;
+      tbData->NextInTimeBin[i_new] = tbData->NextInTimeBin[i_old];
+      if(tbData->NextInTimeBin[i_old] >= 0)
+        tbData->PrevInTimeBin[tbData->NextInTimeBin[i_old]] = i_new;
+      tbData->NextInTimeBin[i_old] = i_new;
+      if(tbData->LastInTimeBin[timeBin] == i_old)
+        tbData->LastInTimeBin[timeBin] = i_new;
+    }
+
+  if(addToListOfActiveParticles)
+    {
+      tbData->ActiveParticleList[tbData->NActiveParticles] = i_new;
+      tbData->NActiveParticles++;
+    }
+}
+
+/*! \brief Removes active particles that have ID and Mass 0, i.e. that were
+ *         flagged as deleted from time bin data structure.
+ *
+ *  \param[in, out] tbData Time bin data structure to be operated on.
+ *
+ *  \return void
+ */
+void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData)
+{
+  int idx, i;
+  for(idx = 0; idx < tbData->NActiveParticles; idx++)
+    {
+      i = tbData->ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      if(P[i].ID == 0 && P[i].Mass == 0)
+        timebin_remove_particle(tbData, idx, -1);
+    }
+}
+
+#ifdef USE_SFR
+/*! \brief Updates TimeBinSfr when a gas cell changes timebin.
+ *
+ *  \param[in] p Index of cell in SphP array.
+ *  \param[in] timeBin_old Old time bin.
+ *  \param[in] timeBin_new New time bin.
+ *
+ *  \return void
+ */
+void timebin_move_sfr(int p, int timeBin_old, int timeBin_new)
+{
+  TimeBinSfr[timeBin_old] -= SphP[p].Sfr;
+  TimeBinSfr[timeBin_new] += SphP[p].Sfr;
+}
+#endif /* #ifdef USE_SFR */
+
+/*! \brief Crates list of active particles up to a specified timebin.
+ *
+ *  \param[in, out] tbData Time bin data to be operated on.
+ *  \param[in] timebin Up to which timebin should particles be included.
+ *
+ *  \return void
+ */
+void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin)
+{
+  int tbin;
+  tbData->NActiveParticles = 0;
+  for(tbin = timebin; tbin >= 0; tbin--)
+    timebin_add_particles_of_timebin_to_list_of_active_particles(tbData, tbin);
+}
+
+/*! \brief Add particles of a specific timebin to active particle list.
+ *
+ *  \param[in, out] tbData Time bin data to be operated on.
+ *  \param[in] timebin Time bin which should be included.
+ *
+ *  \return void
+ */
+void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin)
+{
+  int i;
+  for(i = tbData->FirstInTimeBin[timebin]; i >= 0; i = tbData->NextInTimeBin[i])
+    if(!(P[i].ID == 0 && P[i].Mass == 0))
+      {
+        tbData->ActiveParticleList[tbData->NActiveParticles] = i;
+        tbData->NActiveParticles++;
+      }
+}
diff --git a/src/amuse/community/arepo/src/time_integration/timestep.h b/src/amuse/community/arepo/src/time_integration/timestep.h
new file mode 100644
index 0000000000..c110178a31
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/timestep.h
@@ -0,0 +1,88 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/timestep.h
+ * \date        05/2018
+ * \brief       Header for timestep criteria.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 29.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef TIMESTEP_H
+#define TIMESTEP_H
+
+#include "../main/allvars.h"
+
+#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME
+typedef long long integertime;
+#define TIMEBINS 60
+#define TIMEBASE                                                                                           \
+  (((long long)1) << TIMEBINS) /* The simulated timespan is mapped onto the integer interval [0,TIMESPAN], \
+                                *  where TIMESPAN needs to be a power of 2. */
+#else                          /* #ifdef   ENLARGE_DYNAMIC_RANGE_IN_TIME */
+typedef int integertime;
+#define TIMEBINS 29
+#define TIMEBASE (1 << TIMEBINS)
+#endif /* #ifdef   ENLARGE_DYNAMIC_RANGE_IN_TIME #else */
+
+/*! \brief Linked list for particles in specific timebin.
+ */
+struct TimeBinData
+{
+  int NActiveParticles;
+  long long GlobalNActiveParticles;
+  int *ActiveParticleList;
+  int TimeBinCount[TIMEBINS];
+
+  int FirstInTimeBin[TIMEBINS];
+  int LastInTimeBin[TIMEBINS];
+  int *NextInTimeBin;
+  int *PrevInTimeBin;
+  char Name[100];
+  int *MaxPart;
+};
+
+void find_timesteps_without_gravity(void);
+void update_timesteps_from_gravity(void);
+integertime get_timestep_gravity(int p);
+integertime get_timestep_hydro(int p);
+integertime get_timestep_pm(void);
+int test_if_grav_timestep_is_too_large(int p, int bin);
+void validate_timestep(double dt, integertime ti_step, int p);
+int get_timestep_bin(integertime ti_step);
+double get_time_difference_in_Gyr(double a0, double a1);
+
+/* TimeBinData stuff */
+void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart);
+void timebins_allocate(struct TimeBinData *tbData);
+void timebins_reallocate(struct TimeBinData *tbData);
+void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old);
+void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new);
+void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles);
+void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin);
+void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData);
+void timebin_move_sfr(int p, int timeBin_old, int timeBin_new);
+void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin);
+void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin);
+
+#endif /* TIMESTEP */
diff --git a/src/amuse/community/arepo/src/time_integration/timestep_treebased.c b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c
new file mode 100644
index 0000000000..1e58a240cb
--- /dev/null
+++ b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c
@@ -0,0 +1,494 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/time_integration/timestep_treebased.c
+ * \date        05/2018
+ * \brief       Algorithm to compute non-local time-step criterion.
+ * \details     This is necessary for local time-stepping if material that
+ *              would require a short time-step is arriving in cells that
+ *              would formally be integrated at a large time-step.
+ *              contains functions:
+ *                static void particle2in(data_in * in, int i, int firstnode)
+ *                static void out2particle(data_out * out, int i, int mode)
+ *                static void kernel_local(void)
+ *                static void kernel_imported(void)
+ *                void tree_based_timesteps(void)
+ *                int timestep_evaluate(int target, int mode, int threadid)
+ *                void tree_based_timesteps_setsoundspeeds(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#ifdef TREE_BASED_TIMESTEPS
+/*! \brief Local data structure for collecting particle/cell data that is sent
+ *         to other processors if needed. Type called data_in and static
+ *         pointers DataIn and DataGet needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyDouble Pos[3];
+  MyFloat Vel[3];
+  MyFloat Csnd;
+  MyFloat cellrad;
+  MyFloat CurrentMaxTiStep;
+
+  int Firstnode; /* this is needed as part of the communication alogorithm */
+} data_in;
+
+static data_in *DataIn, *DataGet;
+
+/*! \brief Routine that fills the relevant particle/cell data into the input
+ *         structure defined above. Needed by generic_comm_helpers2.
+ *
+ *  \param[out] in Data structure to fill.
+ *  \param[in] i Index of particle in P and SphP arrays.
+ *  \param[in] firstnode First note of communication.
+ *
+ *  \return void
+ */
+static void particle2in(data_in *in, int i, int firstnode)
+{
+  int k;
+
+  for(k = 0; k < 3; k++)
+    {
+      in->Pos[k] = P[i].Pos[k];
+      in->Vel[k] = P[i].Vel[k];
+    }
+
+  in->Csnd             = SphP[i].Csnd;
+  in->cellrad          = get_cell_radius(i);
+  in->CurrentMaxTiStep = SphP[i].CurrentMaxTiStep;
+
+  in->Firstnode = firstnode;
+}
+
+/*! \brief Local data structure that holds results acquired on remote
+ *         processors. Type called data_out and static pointers DataResult and
+ *         DataOut needed by generic_comm_helpers2.
+ */
+typedef struct
+{
+  MyFloat CurrentMaxTiStep;
+} data_out;
+
+static data_out *DataResult, *DataOut;
+
+/*! \brief Routine to store or combine result data. Needed by
+ *         generic_comm_helpers2.
+ *
+ *  \param[in] out Data to be moved to appropriate variables in global
+ *  particle and cell data arrays (P, SphP,...)
+ *  \param[in] i Index of particle in P and SphP arrays
+ *  \param[in] mode Mode of function: local particles or information that was
+ *  communicated from other tasks and has to be added locally?
+ *
+ *  \return void
+ */
+static void out2particle(data_out *out, int i, int mode)
+{
+  if(mode == MODE_LOCAL_PARTICLES) /* initial store */
+    {
+      SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep;
+    }
+  else /* combine */
+    {
+      if(SphP[i].CurrentMaxTiStep > out->CurrentMaxTiStep)
+        SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep;
+    }
+}
+
+#include "../utils/generic_comm_helpers2.h"
+
+/*! \brief Routine that defines what to do with local particles.
+ *
+ *  Calls the *_evaluate function in MODE_LOCAL_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_local(void)
+{
+  int idx;
+
+  /* do local particles */
+  {
+    int j, threadid = get_thread_num();
+
+    for(j = 0; j < NTask; j++)
+      Thread[threadid].Exportflag[j] = -1;
+
+    while(1)
+      {
+        if(Thread[threadid].ExportSpace < MinSpace)
+          break;
+
+        idx = NextParticle++;
+
+        if(idx >= TimeBinsHydro.NActiveParticles)
+          break;
+
+        int i = TimeBinsHydro.ActiveParticleList[idx];
+        if(i < 0)
+          continue;
+
+        if(P[i].Mass == 0 && P[i].ID == 0)
+          continue;
+
+        timestep_evaluate(i, MODE_LOCAL_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Routine that defines what to do with imported particles.
+ *
+ *  Calls the *_evaluate function in MODE_IMPORTED_PARTICLES.
+ *
+ *  \return void
+ */
+static void kernel_imported(void)
+{
+  /* now do the particles that were sent to us */
+  int i, cnt = 0;
+  {
+    int threadid = get_thread_num();
+
+    while(1)
+      {
+        i = cnt++;
+
+        if(i >= Nimport)
+          break;
+
+        timestep_evaluate(i, MODE_IMPORTED_PARTICLES, threadid);
+      }
+  }
+}
+
+/*! \brief Main function to call tree-based timesteps.
+ *
+ *  This function is called in find_timesteps_without_gravity() (timestep.c).
+ *
+ *  \return void
+ */
+void tree_based_timesteps(void)
+{
+  CPU_Step[CPU_MISC] += measure_time();
+
+  tree_based_timesteps_setsoundspeeds();
+
+  generic_set_MaxNexport();
+
+  double t0 = second();
+
+  generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported);
+
+  double t1 = second();
+
+  mpi_printf("TIMESTEPS: timestep-treewalk: sec=%g\n", timediff(t0, t1));
+
+  CPU_Step[CPU_TREE_TIMESTEPS] += measure_time();
+}
+
+/*! \brief The 'core' of the tree-based timestep computation.
+ *
+ *  A target particle is specified which may either be local, or reside in the
+ *  communication buffer.
+ *
+ *  \param[in] target Index of particle/cell.
+ *  \param[in] mode Flag if it operates on local or imported data.
+ *  \param[in] threadid ID of thread.
+ *
+ *  \return cost, i.e. number of nodes that had to be opened.
+ */
+int timestep_evaluate(int target, int mode, int threadid)
+{
+  int k, cost = 0, numnodes, *firstnode;
+  MyDouble *pos;
+  MyFloat *vel;
+  double dxp, dxm, dyp, dym, dzp, dzm, pos_m[3], pos_p[3];
+  int no, p;
+  struct NgbNODE *current;
+  double dx, dy, dz, dist, csnd, cellrad, xtmp, ytmp, ztmp;
+
+  data_out out;
+  data_in local, *target_data;
+
+  if(mode == MODE_LOCAL_PARTICLES)
+    {
+      particle2in(&local, target, 0);
+      target_data = &local;
+
+      numnodes  = 1;
+      firstnode = NULL;
+    }
+  else
+    {
+      target_data = &DataGet[target];
+
+      generic_get_numnodes(target, &numnodes, &firstnode);
+    }
+
+  pos     = target_data->Pos;
+  vel     = target_data->Vel;
+  csnd    = target_data->Csnd;
+  cellrad = target_data->cellrad;
+
+  out.CurrentMaxTiStep = target_data->CurrentMaxTiStep;
+
+  pos_m[0] = pos[0] - boxSize_X;
+  pos_p[0] = pos[0] + boxSize_X;
+  pos_m[1] = pos[1] - boxSize_Y;
+  pos_p[1] = pos[1] + boxSize_Y;
+  pos_m[2] = pos[2] - boxSize_Z;
+  pos_p[2] = pos[2] + boxSize_Z;
+
+  double atimeinv;
+  if(All.ComovingIntegrationOn)
+    atimeinv = 1 / All.Time;
+  else
+    atimeinv = 1.0;
+
+  /* Now start the actual tree-walk computation for this particle */
+
+  for(k = 0; k < numnodes; k++)
+    {
+      if(mode == MODE_LOCAL_PARTICLES)
+        {
+          no = Ngb_MaxPart; /* root node */
+        }
+      else
+        {
+          no = firstnode[k];
+          no = Ngb_Nodes[no].u.d.nextnode; /* open it */
+        }
+
+      while(no >= 0)
+        {
+          cost++;
+
+          if(no < Ngb_MaxPart) /* single particle */
+            {
+              p  = no;
+              no = Ngb_Nextnode[no];
+
+              if(P[p].Type > 0)
+                continue;
+
+              if(P[p].Mass == 0 && P[p].ID == 0) /* skip eliminated cells */
+                continue;
+
+              if(P[p].Ti_Current != All.Ti_Current)
+                {
+                  drift_particle(p, All.Ti_Current);
+                }
+
+              dx = NEAREST_X(P[p].Pos[0] - pos[0]);
+              dy = NEAREST_Y(P[p].Pos[1] - pos[1]);
+              dz = NEAREST_Z(P[p].Pos[2] - pos[2]);
+
+              dist = sqrt(dx * dx + dy * dy + dz * dz);
+
+              if(dist > 0)
+                {
+                  double vsig = csnd + SphP[p].Csnd -
+                                ((P[p].Vel[0] - vel[0]) * dx + (P[p].Vel[1] - vel[1]) * dy + (P[p].Vel[2] - vel[2]) * dz) / dist;
+
+                  if(vsig > 0)
+                    {
+                      dist += cellrad; /* take one cell radius as minimum distance in order to protect against unreasonably small steps
+                                          if two mesh-generating points are extremely close */
+                      if(out.CurrentMaxTiStep > dist / vsig)
+                        out.CurrentMaxTiStep = dist / vsig;
+                    }
+                }
+            }
+          else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal  */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                {
+                  if(no <
+                     Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
+                    break;
+                }
+
+              current = &Ngb_Nodes[no];
+
+              if(current->Ti_Current != All.Ti_Current)
+                {
+                  drift_node(current, All.Ti_Current);
+                }
+
+              if(pos[0] > current->u.d.range_max[0] && pos_m[0] < current->u.d.range_min[0])
+                {
+                  dxp = pos[0] - current->u.d.range_max[0];
+                  dxm = pos_m[0] - current->u.d.range_min[0]; /* negative */
+                }
+              else if(pos_p[0] > current->u.d.range_max[0] && pos[0] < current->u.d.range_min[0])
+                {
+                  dxp = pos_p[0] - current->u.d.range_max[0];
+                  dxm = pos[0] - current->u.d.range_min[0]; /* negative */
+                }
+              else
+                dxp = dxm = 0;
+
+              if(pos[1] > current->u.d.range_max[1] && pos_m[1] < current->u.d.range_min[1])
+                {
+                  dyp = pos[1] - current->u.d.range_max[1];
+                  dym = pos_m[1] - current->u.d.range_min[1]; /* negative */
+                }
+              else if(pos_p[1] > current->u.d.range_max[1] && pos[1] < current->u.d.range_min[1])
+                {
+                  dyp = pos_p[1] - current->u.d.range_max[1];
+                  dym = pos[1] - current->u.d.range_min[1]; /* negative */
+                }
+              else
+                dyp = dym = 0;
+
+              if(pos[2] > current->u.d.range_max[2] && pos_m[2] < current->u.d.range_min[2])
+                {
+                  dzp = pos[2] - current->u.d.range_max[2];
+                  dzm = pos_m[2] - current->u.d.range_min[2]; /* negative */
+                }
+              else if(pos_p[2] > current->u.d.range_max[2] && pos[2] < current->u.d.range_min[2])
+                {
+                  dzp = pos_p[2] - current->u.d.range_max[2];
+                  dzm = pos[2] - current->u.d.range_min[2]; /* negative */
+                }
+              else
+                dzp = dzm = 0;
+
+              double vsig = csnd + ExtNgb_Nodes[no].MaxCsnd;
+
+              int flag = 0;
+
+              if(dxp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[0] * atimeinv - vel[0])))
+                flag++;
+              else if(-dxm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[0] * atimeinv - vel[0])))
+                flag++;
+
+              if(dyp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[1] * atimeinv - vel[1])))
+                flag++;
+              else if(-dym + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[1] * atimeinv - vel[1])))
+                flag++;
+
+              if(dzp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[2] * atimeinv - vel[2])))
+                flag++;
+              else if(-dzm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[2] * atimeinv - vel[2])))
+                flag++;
+
+              if(flag >= 3)
+                {
+                  /* need to open */
+                  no = current->u.d.nextnode;
+                  continue;
+                }
+
+              /* in this case the node can be discarded */
+              no = current->u.d.sibling;
+              continue;
+            }
+          else /* pseudo particle */
+            {
+              if(mode == MODE_IMPORTED_PARTICLES)
+                terminate("mode == 1");
+
+              if(target >= 0) /* if no target is given, export will not occur */
+                ngb_treefind_export_node_threads(no, target, threadid, 0);
+
+              no = Ngb_Nextnode[no - Ngb_MaxNodes];
+              continue;
+            }
+        }
+    }
+
+  /* Now collect the result at the right place */
+  if(mode == MODE_LOCAL_PARTICLES)
+    out2particle(&out, target, MODE_LOCAL_PARTICLES);
+  else
+    DataResult[target] = out;
+
+  return cost;
+}
+
+/*! \brief Sets local sound speed and time-step limits from local conditions.
+ *
+ *  This is a sort of initialization of the tree-based time-steps algorithm.
+ *
+ *  \return void
+ */
+void tree_based_timesteps_setsoundspeeds(void)
+{
+  int idx, i;
+  double rad, csnd;
+  double hubble_a, atime;
+
+  if(All.ComovingIntegrationOn)
+    {
+      hubble_a = hubble_function(All.Time);
+      atime    = All.Time;
+    }
+  else
+    hubble_a = atime = 1;
+
+  for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++)
+    {
+      i = TimeBinsHydro.ActiveParticleList[idx];
+      if(i < 0)
+        continue;
+
+      csnd = get_sound_speed(i);
+
+      if(csnd <= 1.0e-30)
+        csnd = 1.0e-30;
+
+      SphP[i].Csnd = csnd;
+
+      rad = get_cell_radius(i);
+
+#ifdef VORONOI_STATIC_MESH
+      csnd += sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]) / All.cf_atime;
+#else  /* #ifdef VORONOI_STATIC_MESH */
+      csnd += sqrt((P[i].Vel[0] - SphP[i].VelVertex[0]) * (P[i].Vel[0] - SphP[i].VelVertex[0]) +
+                   (P[i].Vel[1] - SphP[i].VelVertex[1]) * (P[i].Vel[1] - SphP[i].VelVertex[1]) +
+                   (P[i].Vel[2] - SphP[i].VelVertex[2]) * (P[i].Vel[2] - SphP[i].VelVertex[2])) /
+              All.cf_atime;
+#endif /* #ifdef VORONOI_STATIC_MESH #else */
+
+      SphP[i].CurrentMaxTiStep = rad / csnd;
+
+      /* note: for cosmological integration, CurrentMaxTiStep stores  1/a times the maximum allowed physical timestep */
+
+      if(SphP[i].CurrentMaxTiStep >= All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac)
+        SphP[i].CurrentMaxTiStep = All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac;
+    }
+}
+
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
diff --git a/src/amuse/community/arepo/src/utils/allocate.c b/src/amuse/community/arepo/src/utils/allocate.c
new file mode 100644
index 0000000000..234ec06041
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/allocate.c
@@ -0,0 +1,133 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/allocate.c
+ * \date        05/2018
+ * \brief       Functions to allocate and reallocate global arrays.
+ * \details     contains functions
+ *                void allocate_memory(void)
+ *                void reallocate_memory_maxpart(void)
+ *                void reallocate_memory_maxpartsph(void)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 03.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Allocates memory for global arrays.
+ *
+ *  This routine allocates memory for
+ *  - particle storage, both the collisionless and the cells (SPH particles),
+ *  - the ordered binary tree of the timeline,
+ *  - communication buffers.
+ *
+ *  \return void
+ */
+void allocate_memory(void)
+{
+  int NTaskTimesThreads;
+
+  NTaskTimesThreads = MaxThreads * NTask;
+
+  Exportflag      = (int *)mymalloc("Exportflag", NTaskTimesThreads * sizeof(int));
+  Exportindex     = (int *)mymalloc("Exportindex", NTaskTimesThreads * sizeof(int));
+  Exportnodecount = (int *)mymalloc("Exportnodecount", NTaskTimesThreads * sizeof(int));
+
+  Send = (struct send_recv_counts *)mymalloc("Send", sizeof(struct send_recv_counts) * NTask);
+  Recv = (struct send_recv_counts *)mymalloc("Recv", sizeof(struct send_recv_counts) * NTask);
+
+  TasksThatSend = (int *)mymalloc("TasksThatSend", sizeof(int) * NTask);
+  TasksThatRecv = (int *)mymalloc("TasksThatRecv", sizeof(int) * NTask);
+
+  Send_count  = (int *)mymalloc("Send_count", sizeof(int) * NTaskTimesThreads);
+  Send_offset = (int *)mymalloc("Send_offset", sizeof(int) * NTaskTimesThreads);
+  Recv_count  = (int *)mymalloc("Recv_count", sizeof(int) * NTask);
+  Recv_offset = (int *)mymalloc("Recv_offset", sizeof(int) * NTask);
+
+  Send_count_nodes  = (int *)mymalloc("Send_count_nodes", sizeof(int) * NTask);
+  Send_offset_nodes = (int *)mymalloc("Send_offset_nodes", sizeof(int) * NTask);
+  Recv_count_nodes  = (int *)mymalloc("Recv_count_nodes", sizeof(int) * NTask);
+  Recv_offset_nodes = (int *)mymalloc("Recv_offset_nodes", sizeof(int) * NTask);
+
+  Mesh_Send_count  = (int *)mymalloc("Mesh_Send_count", sizeof(int) * NTask);
+  Mesh_Send_offset = (int *)mymalloc("Mesh_Send_offset", sizeof(int) * NTask);
+  Mesh_Recv_count  = (int *)mymalloc("Mesh_Recv_count", sizeof(int) * NTask);
+  Mesh_Recv_offset = (int *)mymalloc("Mesh_Recv_offset", sizeof(int) * NTask);
+
+  Force_Send_count  = (int *)mymalloc("Force_Send_count", sizeof(int) * NTask);
+  Force_Send_offset = (int *)mymalloc("Force_Send_offset", sizeof(int) * NTask);
+  Force_Recv_count  = (int *)mymalloc("Force_Recv_count", sizeof(int) * NTask);
+  Force_Recv_offset = (int *)mymalloc("Force_Recv_offset", sizeof(int) * NTask);
+
+  mpi_printf("ALLOCATE: initial allocation for MaxPart = %d\n", All.MaxPart);
+  P = (struct particle_data *)mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data));
+
+  mpi_printf("ALLOCATE: initial allocation for MaxPartSph = %d\n", All.MaxPartSph);
+  SphP = (struct sph_particle_data *)mymalloc_movable(&SphP, "SphP", All.MaxPartSph * sizeof(struct sph_particle_data));
+
+#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
+  PartSpecialListGlobal = (struct special_particle_data *)mymalloc_movable(&PartSpecialListGlobal, "PartSpecialListGlobal",
+                                                                           All.MaxPartSpecial * sizeof(struct special_particle_data));
+#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */
+
+  timebins_allocate(&TimeBinsHydro);
+  timebins_allocate(&TimeBinsGravity);
+
+  /* set to zero */
+  memset(P, 0, All.MaxPart * sizeof(struct particle_data));
+  memset(SphP, 0, All.MaxPartSph * sizeof(struct sph_particle_data));
+}
+
+/*! \brief Reallocates memory for particle data.
+ *
+ *  Reallocates memory for P and TimeBinsGravity arrays.
+ *
+ *  \return void
+ */
+void reallocate_memory_maxpart(void)
+{
+  mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart);
+
+  P = (struct particle_data *)myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data));
+  timebins_reallocate(&TimeBinsGravity);
+}
+
+/*! \brief Reallocate memory for cell data.
+ *
+ *  Reallocates memory for cells in SphP and TimeBinsHydro arrays.
+ *
+ *  \return void
+ */
+void reallocate_memory_maxpartsph(void)
+{
+  mpi_printf("ALLOCATE: Changing to MaxPartSph = %d\n", All.MaxPartSph);
+
+  SphP = (struct sph_particle_data *)myrealloc_movable(SphP, All.MaxPartSph * sizeof(struct sph_particle_data));
+  timebins_reallocate(&TimeBinsHydro);
+}
diff --git a/src/amuse/community/arepo/src/utils/debug.c b/src/amuse/community/arepo/src/utils/debug.c
new file mode 100644
index 0000000000..c425ce7d40
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/debug.c
@@ -0,0 +1,148 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/debug.c
+ * \date        05/2018
+ * \brief       Print relevant information about a particle / face for
+ *              debugging.
+ * \details     The functions contained in this file are mostly called when a
+ *              condition, that causes the abort of the run, is met. In that
+ *              case, the information about the state of the particle / face
+ *              which triggered that condition is printed to the standard
+ *              output.
+ *              contains functions:
+ *                void print_particle_info(int i)
+ *                void print_particle_info_from_ID(MyIDType ID)
+ *                void print_state_info(struct state *st)
+ *                void print_state_face_info(struct state_face *st)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 03.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief Prints particle / cell information to standard output.
+ *
+ *  \param[in] i Index of particle / cell.
+ *
+ *  \return void
+ */
+void print_particle_info(int i)
+{
+  printf("Task=%d, ID=%llu, Type=%d, TimeBinGrav=%d, TimeBinHydro=%d, Mass=%g, pos=%g|%g|%g, vel=%g|%g|%g\n", ThisTask,
+         (unsigned long long)P[i].ID, P[i].Type, P[i].TimeBinGrav, P[i].TimeBinHydro, P[i].Mass, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2],
+         P[i].Vel[0], P[i].Vel[1], P[i].Vel[2]);
+#ifdef PMGRID
+  printf("GravAccel=%g|%g|%g, GravPM=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1],
+         P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], All.ForceSoftening[P[i].SofteningType], P[i].SofteningType,
+         P[i].OldAcc);
+#else  /* #ifdef PMGRID */
+  printf("GravAccel=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1], P[i].GravAccel[2],
+         All.ForceSoftening[P[i].SofteningType], P[i].SofteningType, P[i].OldAcc);
+#endif /* #ifdef PMGRID #else */
+
+  if(P[i].Type == 0)
+    {
+      printf("Vol=%g, rad=%g, rho=%g, p=%g,u=%g, velVertex=%g|%g|%g, csnd=%g\n", SphP[i].Volume, get_cell_radius(i), SphP[i].Density,
+             SphP[i].Pressure, SphP[i].Utherm, SphP[i].VelVertex[0], SphP[i].VelVertex[1], SphP[i].VelVertex[2], get_sound_speed(i));
+      printf("Center-Pos=%g|%g|%g\n", SphP[i].Center[0] - P[i].Pos[0], SphP[i].Center[1] - P[i].Pos[1],
+             SphP[i].Center[2] - P[i].Pos[2]);
+#ifndef MHD
+      printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2],
+             SphP[i].Energy, SphP[i].Utherm * P[i].Mass,
+             0.5 * P[i].Mass *
+                 ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) +
+                  (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) +
+                  (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass)));
+#else  /* #ifndef MHD */
+      printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g, EB=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2],
+             SphP[i].Energy, SphP[i].Utherm * P[i].Mass,
+             0.5 * P[i].Mass *
+                 ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) +
+                  (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) +
+                  (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass)),
+             0.5 * SphP[i].Volume * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]));
+#endif /* #ifndef MHD #else */
+
+#ifdef MHD
+      double err = pow(SphP[i].Volume, 1. / 3.) * fabs(SphP[i].DivB) /
+                   sqrt(SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]);
+      printf("B=%g|%g|%g, divb=%g, err=%g\n", SphP[i].B[0], SphP[i].B[1], SphP[i].B[2], SphP[i].DivB, err);
+#endif /* #ifdef MHD */
+
+#ifdef TREE_BASED_TIMESTEPS
+      printf("ID=%llu SphP[p].CurrentMaxTiStep=%g\n", (unsigned long long)P[i].ID, SphP[i].CurrentMaxTiStep);
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+    }
+}
+
+/*! \brief Prints particle / cell information of the cell with a specific ID.
+ *
+ *  \param[in] ID particle / cell ID.
+ *
+ *  \return void
+ */
+void print_particle_info_from_ID(MyIDType ID)
+{
+  int i;
+  for(i = 0; i < NumPart; i++)
+    if(P[i].ID == ID)
+      print_particle_info(i);
+}
+
+/*! \brief Prints information of the left or right state of a face to standard
+ *         output.
+ *
+ *  \param[in] st Structure containing the left or right state of a face.
+ *
+ *  \return void
+ */
+void print_state_info(struct state *st)
+{
+  printf("Task=%d, ID=%llu rho=%g, p=%g, vel=%g|%g|%g, velVertex=%g|%g|%g\n", ThisTask, (unsigned long long)st->ID, st->rho, st->press,
+         st->velx, st->vely, st->velz, st->velVertex[0], st->velVertex[1], st->velVertex[2]);
+  printf("dx=%g, dy=%g, dz=%g, dt_half=%g\n", st->dx, st->dy, st->dz, st->dt_half);
+  printf("timeBin=%d, volume=%g, activearea=%g, surfacearea=%g, csnd=%g\n", st->timeBin, st->volume, st->activearea, st->surfacearea,
+         st->csnd);
+#ifdef MHD
+  printf("B=%g|%g|%g\n", st->Bx, st->By, st->Bz);
+#endif /* #ifdef MHD */
+}
+
+/*! \brief Prints information of the state the of a face as determined by
+ *         the Riemman solver to standard output.
+ *
+ *  \param[in] st Structure containing the state of a face after the solution
+ *             of the Riemann problem.
+ *
+ *  \return void
+ */
+void print_state_face_info(struct state_face *st)
+{
+  printf("rho=%g, p=%g, vel=%g|%g|%g\n", st->rho, st->press, st->velx, st->vely, st->velz);
+}
diff --git a/src/amuse/community/arepo/src/utils/dtypes.h b/src/amuse/community/arepo/src/utils/dtypes.h
new file mode 100644
index 0000000000..816412b529
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/dtypes.h
@@ -0,0 +1,195 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/dtypes.h
+ * \date        05/2018
+ * \brief       Definition of intrinsic datatypes.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 28.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifndef DTYPES_H
+#define DTYPES_H
+
+#ifndef FFTW
+#define CONCAT(prefix, name) prefix##name
+#ifdef DOUBLEPRECISION_FFTW
+#define FFTW(x) CONCAT(fftw_, x)
+#else /* #ifdef DOUBLEPRECISION_FFTW */
+#define FFTW(x) CONCAT(fftwf_, x)
+#endif /* #ifdef DOUBLEPRECISION_FFTW #else */
+#endif /* #ifndef FFTW */
+
+#ifndef LONGIDS
+typedef unsigned int MyIDType;
+#define MPI_MYIDTYPE MPI_UNSIGNED
+#else /* #ifndef LONGIDS */
+typedef unsigned long long MyIDType;
+#define MPI_MYIDTYPE MPI_UNSIGNED_LONG_LONG
+#endif /* #ifndef LONGIDS #else */
+
+#ifndef DOUBLEPRECISION /* default is single-precision */
+typedef float MySingle;
+typedef float MyFloat;
+typedef float MyDouble;
+#define MPI_MYFLOAT MPI_FLOAT
+#define MPI_MYDOUBLE MPI_FLOAT
+#else                     /* #ifndef DOUBLEPRECISION */
+#if(DOUBLEPRECISION == 2) /* mixed precision */
+typedef float MySingle;
+typedef float MyFloat;
+typedef double MyDouble;
+#define MPI_MYFLOAT MPI_FLOAT
+#define MPI_MYDOUBLE MPI_DOUBLE
+#else                     /* #if (DOUBLEPRECISION == 2) */
+#if(DOUBLEPRECISION == 3) /* mixed precision, fewer single precision variables */
+typedef float MySingle;
+typedef double MyFloat;
+typedef double MyDouble;
+#define MPI_MYFLOAT MPI_FLOAT
+#define MPI_MYDOUBLE MPI_DOUBLE
+#else /* #if (DOUBLEPRECISION == 3) */
+/* everything double-precision */
+typedef double MySingle;
+typedef double MyFloat;
+typedef double MyDouble;
+#define MPI_MYFLOAT MPI_DOUBLE
+#define MPI_MYDOUBLE MPI_DOUBLE
+#endif /* #if (DOUBLEPRECISION == 3) #else */
+#endif /* #if (DOUBLEPRECISION == 2) #else */
+#endif /* #ifndef DOUBLEPRECISION #else */
+
+#ifdef OUTPUT_IN_DOUBLEPRECISION
+typedef double MyOutputFloat;
+#else  /* #ifdef OUTPUT_IN_DOUBLEPRECISION */
+typedef float MyOutputFloat;
+#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */
+
+#ifdef INPUT_IN_DOUBLEPRECISION
+typedef double MyInputFloat;
+#else  /* #ifdef INPUT_IN_DOUBLEPRECISION */
+typedef float MyInputFloat;
+#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */
+
+#ifndef NGB_TREE_DOUBLEPRECISION
+typedef float MyNgbTreeFloat;
+#define MAX_NGBRANGE_NUMBER MAX_FLOAT_NUMBER
+#else /* #ifndef NGB_TREE_DOUBLEPRECISION */
+typedef double MyNgbTreeFloat;
+#define MAX_NGBRANGE_NUMBER MAX_DOUBLE_NUMBER
+#endif /* #ifndef NGB_TREE_DOUBLEPRECISION #else */
+
+#if defined(PMGRID)
+#include <fftw3.h>
+
+#ifdef DOUBLEPRECISION_FFTW
+typedef double fft_real;
+typedef fftw_complex fft_complex;
+#else  /* #ifdef DOUBLEPRECISION_FFTW */
+typedef float fft_real;
+typedef fftwf_complex fft_complex;
+#endif /* #ifdef DOUBLEPRECISION_FFTW #else */
+typedef ptrdiff_t fft_ptrdiff_t;
+
+typedef struct
+{
+  int NgridX, NgridY, NgridZ;
+  int Ngridz, Ngrid2;
+
+  FFTW(plan) forward_plan_zdir;
+  FFTW(plan) forward_plan_xdir;
+  FFTW(plan) forward_plan_ydir;
+
+  FFTW(plan) backward_plan_zdir;
+  FFTW(plan) backward_plan_ydir;
+  FFTW(plan) backward_plan_xdir;
+
+#ifndef FFT_COLUMN_BASED
+
+  int *slab_to_task; /*!< Maps a slab index to the task responsible for the slab */
+  int *slabs_x_per_task;
+  int *first_slab_x_of_task; /*!< Array containing the index of the first slab of each task */
+  int *slabs_y_per_task;     /*!< Array containing the number of slabs each task is responsible for */
+  int *first_slab_y_of_task; /*!< Array containing the index of the first slab of each task */
+
+  int nslab_x, slabstart_x, nslab_y, slabstart_y;
+  int largest_x_slab; /*!< size of the largest slab in x direction */
+  int largest_y_slab; /*!< size of the largest slab in y direction */
+
+#else /* #ifndef FFT_COLUMN_BASED */
+
+  size_t max_datasize;
+  size_t fftsize;
+
+  int base_firstcol, base_ncol, base_lastcol;
+  int transposed_firstcol, transposed_ncol;
+  int second_transposed_firstcol, second_transposed_ncol;
+  size_t second_transposed_ncells;
+
+  int firstcol_XZ, ncol_XZ;
+  int firstcol_YZ, ncol_YZ;
+
+  int pivotcol; /* to go from column number to task */
+  int avg;
+  int tasklastsection;
+
+  size_t *offsets_send_A;
+  size_t *offsets_recv_A;
+  size_t *offsets_send_B;
+  size_t *offsets_recv_B;
+  size_t *offsets_send_C;
+  size_t *offsets_recv_C;
+  size_t *offsets_send_D;
+  size_t *offsets_recv_D;
+  size_t *offsets_send_13;
+  size_t *offsets_recv_13;
+  size_t *offsets_send_23;
+  size_t *offsets_recv_23;
+  size_t *offsets_send_13back;
+  size_t *offsets_recv_13back;
+  size_t *offsets_send_23back;
+  size_t *offsets_recv_23back;
+
+  size_t *count_send_A;
+  size_t *count_recv_A;
+  size_t *count_send_B;
+  size_t *count_recv_B;
+  size_t *count_send_C;
+  size_t *count_recv_C;
+  size_t *count_send_D;
+  size_t *count_recv_D;
+  size_t *count_send_13;
+  size_t *count_recv_13;
+  size_t *count_send_23;
+  size_t *count_recv_23;
+  size_t *count_send_13back;
+  size_t *count_recv_13back;
+  size_t *count_send_23back;
+  size_t *count_recv_23back;
+
+#endif /* #ifndef FFT_COLUMN_BASED */
+} fft_plan;
+
+#endif /* #if defined(PMGRID) */
+
+#endif /* #ifndef DTYPES_H */
diff --git a/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h
new file mode 100644
index 0000000000..a159b17045
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h
@@ -0,0 +1,724 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/generic_comm_helpers.h
+ * \date        05/2018
+ * \brief       Generic 'template' MPI communication structure used in many
+ *              parts of the code.
+ * \details     Usage:
+ *                see e.g. src/init/density.c
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 04.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#ifdef USE_SUBCOMM_COMMUNICATOR
+#define MYCOMMUNICATOR SubComm
+#define MyThisTask SubThisTask
+#define MyNTask SubNTask
+#else /* #ifdef USE_SUBCOMM_COMMUNICATOR */
+#define MYCOMMUNICATOR MPI_COMM_WORLD
+#define MyThisTask ThisTask
+#define MyNTask NTask
+#endif /* #ifdef USE_SUBCOMM_COMMUNICATOR #else */
+
+#define EXTRA_SPACE 16384
+
+typedef struct datanodelist datanodelist;
+typedef struct data_partlist data_partlist;
+
+static size_t ExportSpace;
+static size_t MinSpace;
+static int NextParticle;
+static int Nexport, Nimport;
+static int NexportNodes, NimportNodes;
+static long long SumNexport;
+static int *NodeDataIn;
+static int *NodeDataGet;
+
+static char callorigin[1000];
+
+#ifdef USE_DSDE
+static void generic_prepare_import_counts_ibarrier(void);
+#endif /* #ifdef USE_DSDE */
+
+#ifdef USE_INLINED_IBARRIER
+static void generic_prepare_import_counts_inlined_ibarrier(void);
+#endif /* #ifdef USE_INLINED_IBARRIER */
+
+#define generic_set_MaxNexport(...)                     \
+  {                                                     \
+    generic_set_info(__FUNCTION__, __FILE__, __LINE__); \
+  }
+
+/*! \brief This function determines how much buffer space we may use based on
+ *         the memory that is locally still free, and it computes how much
+ *         memory may at most be needed to process a single particle. We will
+ *         only continue with a particle if this can still be safely processed.
+ */
+static void generic_set_info(const char *func, const char *file, int line)
+{
+  ExportSpace = 0.3 * (FreeBytes); /* we just grab at most 30% of the still available memory here */
+  ExportSpace /= NUM_THREADS;
+  ExportSpace -= NumPart * sizeof(int); /* to account for the neighbor list buffer that every thread allocated */
+
+  /* make the size a multiple both of data_partlist and datanodelist */
+  ExportSpace /= (sizeof(data_partlist) * sizeof(datanodelist));
+  ExportSpace *= (sizeof(data_partlist) * sizeof(datanodelist));
+
+  MinSpace =
+      (MyNTask - 1) * (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out)) + NTopleaves * (sizeof(datanodelist) + sizeof(int));
+
+  sprintf(callorigin, "%s|%d|", file, line);
+
+#ifdef VERBOSE
+  mpi_printf(
+      "GENERIC: function %s(), file %s, line %d: MinSpace = %g MB  NTopleaves = %d  ExportSpace = %g MB sizeof(data_in)=%d "
+      "sizeof(data_out)=%d\n",
+      func, file, line, MinSpace / (1024.0 * 1024.0), NTopleaves, ExportSpace / (1024.0 * 1024.0)),
+      (int)sizeof(data_in), (int)sizeof(data_out);
+#endif /* #ifdef VERBOSE */
+
+  if(ExportSpace < MinSpace)
+    terminate(
+        "Bummer. Can't even safely process a single particle for the available memory. FreeBytes=%lld  ExportSpace=%lld  "
+        "MinSpace=%lld  MyNTask=%d  NTopleaves=%d",
+        (long long)FreeBytes, (long long)ExportSpace, (long long)MinSpace, MyNTask, NTopleaves);
+}
+
+/*! \brief This function does the memory allocation at the beginning of a loop
+ *         over the remaining local particles. The fields PartList[] and
+ *         NodeList[] share the buffer space of size "ExportSpace" (in bytes).
+ *         Here PartList will be filled in from the beginning, while NodeList
+ *         will be filled in from the end. Since we do not know a priory the
+ *         relative share of these two fields, we can make optimum use of
+ *         the available space in this way.
+ */
+static void generic_alloc_partlist_nodelist_ngblist_threadbufs(void)
+{
+  for(int i = 0; i < NUM_THREADS; i++)
+    {
+      Thread[i].Nexport      = 0;
+      Thread[i].NexportNodes = 0;
+      Thread[i].ExportSpace  = ExportSpace;
+      Thread[i].InitialSpace = ExportSpace;
+      Thread[i].ItemSize     = (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out));
+
+      Thread[i].PartList = (struct data_partlist *)mymalloc_movable_g(&Thread[i].PartList, "PartList", ExportSpace);
+      /* note: the NodeList array will be attached to the end of this buffer, growing backwards */
+      /* Thread[i].NodeList = (struct datanodelist *) (((char *) Thread[i].PartList) + InitialSpace);
+       */
+      Thread[i].Ngblist    = (int *)mymalloc_movable_g(&Thread[i].Ngblist, "Ngblist", NumPart * sizeof(int));
+      Thread[i].R2list     = (double *)mymalloc_movable_g(&Thread[i].R2list, "R2list", NumPart * sizeof(double));
+      Thread[i].Exportflag = Exportflag + i * ((((MyNTask - 1) / 16) + 1) * 16);
+    }
+}
+
+/*! \brief The corresponding deallocation routine.
+ */
+static void generic_free_partlist_nodelist_ngblist_threadbufs(void)
+{
+  for(int i = NUM_THREADS - 1; i >= 0; i--)
+    {
+      myfree(Thread[i].R2list);
+      myfree(Thread[i].Ngblist);
+      myfree(Thread[i].PartList);
+      Thread[i].R2list   = NULL;
+      Thread[i].Ngblist  = NULL;
+      Thread[i].PartList = NULL;
+    }
+}
+
+static void generic_prepare_export_counts(void)
+{
+  for(int j = 0; j < MyNTask; j++)
+    {
+      Send[j].Count      = 0;
+      Send[j].CountNodes = 0;
+    }
+
+  Nexport      = 0;
+  NexportNodes = 0;
+
+  for(int i = 0; i < NUM_THREADS; i++)
+    {
+      for(int j = 0; j < Thread[i].Nexport; j++)
+        Send[Thread[i].PartList[j].Task].Count++;
+
+      struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace);
+
+      for(int j = 0; j < Thread[i].NexportNodes; j++)
+        Send[nodelist[-1 - j].Task].CountNodes++;
+
+      Nexport += Thread[i].Nexport;
+      NexportNodes += Thread[i].NexportNodes;
+    }
+
+  SumNexport += Nexport;
+}
+
+/*! \brief Establishes the Recv counts from the Send counts (effectively a big
+ *         transpose).
+ */
+static void generic_prepare_import_counts(void)
+{
+  /* our standard approach for this is to use an all-to-all communication. For very large processor counts,
+   * this in principle becomes inefficient since mostly zeros need to be communicated.
+   * we have also two option experimental communication routines that use a sparse=communication pattern instead.
+   */
+#ifdef USE_DSDE
+  generic_prepare_import_counts_ibarrier();
+#else /* #ifdef USE_DSDE */
+#ifdef USE_INLINED_IBARRIER
+  generic_prepare_import_counts_inlined_ibarrier();
+#else  /* #ifdef USE_INLINED_IBARRIER */
+  /* the default */
+  MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_BYTE, Recv, sizeof(struct send_recv_counts), MPI_BYTE, MYCOMMUNICATOR);
+#endif /* #ifdef USE_INLINED_IBARRIER #else */
+#endif /* #ifdef USE_DSDE #else */
+}
+
+/*! \brief Initializes offset tables that we need for the communication.
+ */
+static void generic_prepare_export_offsets(void)
+{
+  Send_offset[0]       = 0;
+  Send_offset_nodes[0] = 0;
+
+  for(int j = 1; j < MyNTask; j++)
+    {
+      Send_offset[j]       = Send_offset[j - 1] + Send[j - 1].Count;
+      Send_offset_nodes[j] = Send_offset_nodes[j - 1] + Send[j - 1].CountNodes;
+    }
+}
+
+/*! \brief Organizes the particle and node data for export in contiguous
+ *         memory regions.
+ */
+static void generic_prepare_particle_data_for_export(void)
+{
+  int *rel_node_index = (int *)mymalloc_g("rel_node_index", MyNTask * sizeof(int));
+
+  for(int j = 0; j < MyNTask; j++)
+    {
+      Send[j].Count      = 0;
+      Send[j].CountNodes = 0;
+      rel_node_index[j]  = 0;
+    }
+
+  for(int i = 0; i < NUM_THREADS; i++)
+    {
+      struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace);
+
+      for(int j = 0, jj = 0; j < Thread[i].Nexport; j++)
+        {
+          int task = Thread[i].PartList[j].Task;
+          int off  = Send_offset[task] + Send[task].Count++;
+
+          int target = Thread[i].PartList[j].Index;
+
+          particle2in(&DataIn[off], target, rel_node_index[task]);
+
+          if(j < Thread[i].Nexport - 1)
+            if(Thread[i].PartList[j].Index == Thread[i].PartList[j + 1].Index)
+              continue;
+
+          while(jj < Thread[i].NexportNodes && Thread[i].PartList[j].Index == nodelist[-1 - jj].Index)
+            {
+              int task = nodelist[-1 - jj].Task;
+              int off  = Send_offset_nodes[task] + Send[task].CountNodes++;
+
+              NodeDataIn[off] = nodelist[-1 - jj].Node;
+
+              rel_node_index[task]++;
+              jj++;
+            }
+        }
+    }
+
+  myfree(rel_node_index);
+}
+
+/*! \brief Driver routine to process the results that we obtained for a
+ *         particle from a remote processor by working on it with the supplied
+ *         out2particle() routine.
+ */
+static void generic_add_results_to_local(void)
+{
+  for(int j = 0; j < MyNTask; j++)
+    Send[j].Count = 0;
+
+  for(int i = 0; i < NUM_THREADS; i++)
+    for(int j = 0; j < Thread[i].Nexport; j++)
+      {
+        int task = Thread[i].PartList[j].Task;
+        int off  = Send_offset[task] + Send[task].Count++;
+
+        int target = Thread[i].PartList[j].Index;
+
+        out2particle(&DataOut[off], target, MODE_IMPORTED_PARTICLES);
+      }
+}
+
+/*! \brief This function is called in the actual tree walk routine to find out
+ *         how the number and starting index of the section in the node-list
+ *         that needs to be processed for the imported particle.
+ */
+static void generic_get_numnodes(int target, int *numnodes, int **firstnode)
+{
+  if(target == Nimport - 1)
+    *numnodes = NimportNodes - DataGet[target].Firstnode;
+  else
+    *numnodes = DataGet[target + 1].Firstnode - DataGet[target].Firstnode;
+
+  *firstnode = &NodeDataGet[DataGet[target].Firstnode];
+}
+
+/*! \brief Calculates how many space we need to allocate to safely process a
+ *         certain number of nodes and particles that are imported.
+ */
+static size_t generic_calc_import_storage(int nimport, int nimportnodes)
+{
+  size_t needed = nimport * sizeof(data_in) + nimportnodes * sizeof(int) + nimport * sizeof(data_out);
+
+  /* add some extra space to not go to the last byte */
+  needed += EXTRA_SPACE;
+
+  return needed;
+}
+
+/*! \brief This routine carries out the communication step in several phases
+ *         if needed.
+ */
+static void generic_multiple_phases(void (*kernel)(void))
+{
+  int ncycles;
+
+  for(int ngrpstart = 1; ngrpstart < (1 << PTask); ngrpstart += ncycles)
+    {
+      /* now decide how many cycles we can process in this iteration */
+      ncycles = (1 << PTask) - ngrpstart;
+
+      do
+        {
+          Nimport      = 0;
+          NimportNodes = 0;
+
+          for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++)
+            {
+              int recvTask = MyThisTask ^ ngrp;
+
+              if(recvTask < MyNTask)
+                {
+                  if(Recv[recvTask].Count > 0)
+                    {
+                      Nimport += Recv[recvTask].Count;
+                      NimportNodes += Recv[recvTask].CountNodes;
+                    }
+                }
+            }
+
+          int flag = 0, flagall;
+
+          if(generic_calc_import_storage(Nimport, NimportNodes) > FreeBytes)
+            flag = 1;
+
+          MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_MAX, MYCOMMUNICATOR);
+
+          if(flagall)
+            ncycles /= 2;
+          else
+            break;
+        }
+      while(ncycles > 0);
+
+      if(ncycles == 0)
+        terminate(
+            "Seems like we can't even do one cycle: ncycles=%d  ngrpstart=%d  Nimport=%d  NimportNodes=%d  FreeBytes=%lld  needed "
+            "storage=%lld",
+            ncycles, ngrpstart, Nimport, NimportNodes, (long long)FreeBytes,
+            (long long)generic_calc_import_storage(Nimport, NimportNodes));
+
+      if(ngrpstart == 1 && ncycles != ((1 << PTask) - ngrpstart) && MyThisTask == 0)
+        warn("need multiple import/export phases to avoid memory overflow");
+
+      /* now allocated the import and results buffers */
+
+      DataGet     = (data_in *)mymalloc_movable_g(&DataGet, "DataGet", Nimport * sizeof(data_in));
+      NodeDataGet = (int *)mymalloc_movable_g(&NodeDataGet, "NodeDataGet", NimportNodes * sizeof(int));
+      DataResult  = (data_out *)mymalloc_movable_g(&DataResult, "DataResult", Nimport * sizeof(data_out));
+
+      Nimport      = 0;
+      NimportNodes = 0;
+
+      /* exchange particle data */
+      for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++)
+        {
+          int recvTask = MyThisTask ^ ngrp;
+
+          if(recvTask < MyNTask)
+            {
+              if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0)
+                {
+                  size_t len = sizeof(data_in);
+
+                  /* get the particles */
+                  MPI_Sendrecv(&DataIn[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A,
+                               &DataGet[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, MYCOMMUNICATOR,
+                               MPI_STATUS_IGNORE);
+
+                  /* get the nodes */
+                  MPI_Sendrecv(&NodeDataIn[Send_offset_nodes[recvTask]], Send[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B,
+                               &NodeDataGet[NimportNodes], Recv[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B, MYCOMMUNICATOR,
+                               MPI_STATUS_IGNORE);
+
+                  for(int k = 0; k < Recv[recvTask].Count; k++)
+                    DataGet[Nimport + k].Firstnode += NimportNodes;
+
+                  Nimport += Recv[recvTask].Count;
+                  NimportNodes += Recv[recvTask].CountNodes;
+                }
+            }
+        }
+
+      /* now do the actual work for the imported points */
+      kernel();
+
+      /* send the results */
+      Nimport      = 0;
+      NimportNodes = 0;
+
+      for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++)
+        {
+          int recvTask = MyThisTask ^ ngrp;
+          if(recvTask < MyNTask)
+            {
+              if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0)
+                {
+                  size_t len = sizeof(data_out);
+
+                  /* exchange the results */
+                  MPI_Sendrecv(&DataResult[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B,
+                               &DataOut[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B,
+                               MYCOMMUNICATOR, MPI_STATUS_IGNORE);
+
+                  Nimport += Recv[recvTask].Count;
+                  NimportNodes += Recv[recvTask].CountNodes;
+                }
+            }
+        }
+
+      myfree(DataResult);
+      myfree(NodeDataGet);
+      myfree(DataGet);
+    }
+}
+
+/*! \brief This function deals with the communication step, and then processes
+ *         the imported particles, and finally computes the results back. If
+ *         there is not enough memory available to hold all the data sent to
+ *         us from other processors, we process the incoming data in multiple
+ *         stages, which will always be possible.
+ */
+static void generic_exchange(void (*kernel)(void))
+{
+  /* set up Sendcount table */
+  generic_prepare_export_counts();
+
+  /* do the all-to-all exchange so that we have the Recvcount table as well */
+  generic_prepare_import_counts();
+
+  /* prepare offsets in export tables */
+  generic_prepare_export_offsets();
+
+  /* allocate particle data buffers */
+  DataIn     = (data_in *)mymalloc_movable_g(&DataIn, "DataIn", Nexport * sizeof(data_in));
+  NodeDataIn = (int *)mymalloc_movable_g(&NodeDataIn, "NodeDataIn", NexportNodes * sizeof(int));
+  DataOut    = (data_out *)mymalloc_movable_g(&DataOut, "DataOut", Nexport * sizeof(data_out));
+
+  /* prepare particle data for export */
+  generic_prepare_particle_data_for_export();
+
+  /* export particles and process them, if needed in several installments */
+  generic_multiple_phases(kernel);
+
+  /* add the results to the local particles */
+  generic_add_results_to_local();
+
+  myfree(DataOut);
+  myfree(NodeDataIn);
+  myfree(DataIn);
+}
+
+/* \brief Implements a repeated loop over the local particles in the list,
+ *        processing them with the local kernel function, until we're done or
+ *        the export buffer is full. Then we exchange the data, and process
+ *        the imported ones with the provided kernel. We repeat if neeed until
+ *        all processors are done.
+ */
+static int generic_comm_pattern(int nactive, void (*kernel_loc)(void), void (*kernel_imp)(void))
+{
+  int ndone_flag, ndone, iter = 0;
+
+  SumNexport = 0; /* can be queried as a book-keeping variable */
+
+  NextParticle = 0; /* first particle index for this task */
+
+  do
+    {
+      iter++;
+
+      /* allocate buffers to arrange communication */
+      generic_alloc_partlist_nodelist_ngblist_threadbufs();
+
+      /* do local particles */
+      kernel_loc();
+
+      /* do all necessary bookkeeping, data exchange, and processing of imported particles */
+      generic_exchange(kernel_imp);
+
+      /* free the rest of the buffers */
+      generic_free_partlist_nodelist_ngblist_threadbufs();
+
+      /* check whether we are done */
+      if(NextParticle >= nactive)
+        ndone_flag = 1;
+      else
+        ndone_flag = 0;
+
+      MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR);
+    }
+  while(ndone < MyNTask);
+
+  return iter;
+}
+
+/*! \brief Same as generic_comm_pattern but you can pass the indices of the
+ *         particles to be processed.
+ */
+static int generic_comm_pattern_for_given_particles(int nactive, int indices[], void (*kernel_loc)(int, int *),
+                                                    void (*kernel_imp)(void))
+{
+  int ndone_flag, ndone, iter = 0;
+
+  SumNexport = 0; /* can be queried as a book-keeping variable */
+
+  NextParticle = 0; /* first particle index for this task */
+
+  do
+    {
+      iter++;
+
+      /* allocate buffers to arrange communication */
+      generic_alloc_partlist_nodelist_ngblist_threadbufs();
+
+      /* do local particles */
+      kernel_loc(nactive, indices);
+
+      /* do all necessary bookkeeping, data exchange, and processing of imported particles */
+      generic_exchange(kernel_imp);
+
+      /* free the rest of the buffers */
+      generic_free_partlist_nodelist_ngblist_threadbufs();
+
+      /* check whether we are done */
+      if(NextParticle >= nactive)
+        ndone_flag = 1;
+      else
+        ndone_flag = 0;
+
+      MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR);
+    }
+  while(ndone < MyNTask);
+
+  return iter;
+}
+
+#ifdef USE_INLINED_IBARRIER
+/*! \brief Can replace
+ *         MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv,
+ *           sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR);
+ *         with a space communication pattern that effectively involves a
+ *         home-grown non-blocking barrier to establish that we can stop
+ *         listening.
+ */
+static void generic_prepare_import_counts_inlined_ibarrier(void)
+{
+  int nLevels         = my_fls(MyNTask - 1);
+  int received_levels = 0, sent_levels = 0;
+
+  int *stagelist = (int *)mymalloc("stagelist", nLevels * sizeof(int));
+  for(int j = 0; j < nLevels; j++)
+    stagelist[j] = j;
+
+  MPI_Request *level_requests = (MPI_Request *)mymalloc("level_requests", nLevels * sizeof(MPI_Request));
+
+  MPI_Request *requests = (MPI_Request *)mymalloc("requests", MyNTask * sizeof(MPI_Request));
+  int n_requests        = 0;
+
+  for(int j = 0; j < MyNTask; j++)
+    {
+      if(Send[j].Count > 0)
+        MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]);
+
+      Recv[j].Count      = 0;
+      Recv[j].CountNodes = 0;
+    }
+
+  int barrier_active = 0;
+
+  while(1)
+    {
+      int flag;
+      MPI_Status status;
+
+      MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status);
+
+      if(flag)
+        {
+          int source = status.MPI_SOURCE;
+          int tag    = status.MPI_TAG;
+
+          MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE);
+        }
+
+      MPI_Iprobe(MPI_ANY_SOURCE, TAG_BARRIER, MYCOMMUNICATOR, &flag, &status);
+
+      if(flag)
+        {
+          int source = status.MPI_SOURCE;
+          int tag    = status.MPI_TAG;
+
+          int stage;
+          MPI_Recv(&stage, 1, MPI_INT, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE);
+          received_levels |= (1 << stage);
+        }
+
+      if(barrier_active)
+        {
+          for(int stage = 0; stage < nLevels; stage++)
+            if(!(sent_levels & (1 << stage)))
+              {
+                int mask = ((1 << stage) - 1);
+
+                if((mask & received_levels) == mask)
+                  {
+                    sent_levels |= (1 << stage);
+
+                    int target = (MyThisTask + (1 << stage)) % MyNTask;
+
+                    MPI_Issend(&stagelist[stage], 1, MPI_INT, target, TAG_BARRIER, MYCOMMUNICATOR, &level_requests[stage]);
+                  }
+              }
+
+          if(received_levels == ((1 << nLevels) - 1) && send_levels == ((1 << nLevels) - 1))
+            break;
+        }
+      else
+        {
+          MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE);
+
+          if(flag)
+            barrier_active = 1;
+        }
+    }
+
+  MPI_Waitall(nLevels, level_requests, MPI_STATUSES_IGNORE); /* as we are going to free stagelist */
+
+  myfree(requests);
+  myfree(level_requests);
+  myfree(stagelist);
+}
+#endif /* #ifdef USE_INLINED_IBARRIER */
+
+#ifdef USE_DSDE
+/*! \brief Can replace
+ *         MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv,
+ *           sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR);
+ *         with a space communication pattern that involves a non-blocking
+ *         barrier (requires MPI-3.0).
+ */
+static int generic_prepare_import_counts_ibarrier(void)
+{
+  MPI_Request barrier_request;
+  MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", MyNTask * sizeof(MPI_Request));
+  int n_requests        = 0;
+
+  for(int j = 0; j < MyNTask; j++)
+    {
+      if(Send[j].Count > 0)
+        MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]);
+
+      Recv[j].Count      = 0;
+      Recv[j].CountNodes = 0;
+    }
+
+  int barrier_active = 0;
+
+  while(1)
+    {
+      int flag;
+      MPI_Status status;
+
+      MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status);
+
+      if(flag)
+        {
+          int source = status.MPI_SOURCE;
+          int tag    = status.MPI_TAG;
+
+          int count;
+          MPI_Get_count(&status, MPI_BYTE, &count);
+
+          if(tag == TAG_N && source != MyThisTask)
+            {
+              if(count != 8)
+                terminate("count=%d\n", count);
+
+              MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE);
+            }
+        }
+
+      if(barrier_active)
+        {
+          int flag2;
+
+          MPI_Test(&barrier_request, &flag2, &status);
+
+          if(flag2 != 0)
+            break;
+        }
+      else
+        {
+          MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE);
+
+          if(flag)
+            {
+              barrier_active = 1;
+
+              MPI_Ibarrier(MYCOMMUNICATOR, &barrier_request);
+            }
+        }
+    }
+
+  myfree(requests);
+}
+#endif /* #ifdef USE_DSDE */
diff --git a/src/amuse/community/arepo/src/utils/mpz_extension.c b/src/amuse/community/arepo/src/utils/mpz_extension.c
new file mode 100644
index 0000000000..87ba79dc9a
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/mpz_extension.c
@@ -0,0 +1,119 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/mpz_extension.c
+ * \date        05/2018
+ * \brief       Auxiliary functions to facilitate usage of mpz functions.
+ * \details     Integer arithmetic used by Voronoi mesh construction.
+ *              contains functions:
+ *                void MY_mpz_set_si(mpz_t dest, signed long long int val)
+ *                void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long
+ *                  int val)
+ *                void MY_mpz_sub_ui(mpz_t prod, mpz_t mult,
+ *                  unsigned long long int val)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 20.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gmp.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#include "../mesh/voronoi/voronoi.h"
+
+#if USEDBITS > 31
+
+/*! \brief Sets mpz variable from signed long long int.
+ *
+ *  For Arepo-internal use of mpz.
+ *
+ *  \param[out] dest Variable to be set.
+ *  \param[in] val Value in signed long long int.
+ *
+ *  \return void
+ */
+void MY_mpz_set_si(mpz_t dest, signed long long int val)
+{
+  mpz_t tmp, tmp2;
+
+  unsigned long int lower = (unsigned long int)(val & 0xffffffffL);
+  signed long int higher  = (signed long int)(val >> 32);
+
+  mpz_init(tmp);
+  mpz_init(tmp2);
+
+  mpz_set_si(tmp, higher);
+  mpz_mul_2exp(tmp2, tmp, 32);
+  mpz_add_ui(dest, tmp2, lower);
+
+  mpz_clear(tmp2);
+  mpz_clear(tmp);
+}
+
+/*! \brief Multiplies an mpz type with a signed long long int.
+ *
+ *  \param[out] pred Result of multiplication.
+ *  \param[in] mult Multiplicator (mpz_t).
+ *  \param[in] val Multiplicand (signed long long int).
+ *
+ *  \return void
+ */
+void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val)
+{
+  mpz_t tmp;
+
+  mpz_init(tmp);
+
+  MY_mpz_set_si(tmp, val);
+
+  mpz_mul(prod, mult, tmp);
+
+  mpz_clear(tmp);
+}
+
+/*! \brief Subtracts 'val' from 'mult'.
+ *
+ *  \param[out] prod Result of subtraction.
+ *  \param[in] mult Minuend (mpz_t).
+ *  \param[in] val Subtrahend (unsigned long long int).
+ *
+ *  \return void
+ */
+void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val)
+{
+  mpz_t tmp;
+
+  mpz_init(tmp);
+
+  MY_mpz_set_si(tmp, val);
+
+  mpz_sub(prod, mult, tmp);
+
+  mpz_clear(tmp);
+}
+
+#endif
diff --git a/src/amuse/community/arepo/src/utils/mymalloc.c b/src/amuse/community/arepo/src/utils/mymalloc.c
new file mode 100644
index 0000000000..f3173883e6
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/mymalloc.c
@@ -0,0 +1,792 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/mymalloc.c
+ * \date        05/2018
+ * \brief       Manager for dynamic memory allocation.
+ * \details     This module handles the dynamic memory allocation for AREPO.
+ *              To avoid memory allocation/dellocation overhead a big chunk of
+ *              memory (which will be the maximum amount of dinamically
+ *              allocatable memory) is allocated upon initialization. This
+ *              chunk is then filled by the memory blocks as in a stack
+ *              structure. The blocks are automatically aligned to a 64 bit
+ *              boundary. Memory blocks come in two flavours: movable and
+ *              non-movable. In non-movable blocks the starting address is
+ *              fixed once the block is allocated and cannot be changed.
+ *              Due to the stack structure of the dynamic memory, this implies
+ *              that the last (non-movable) block allocated must be the first
+ *              block to be deallocated. If this condition is not met, an abort
+ *              condition is triggered. If more flexibility is needed, movable
+ *              memory blocks can be used. In this case, the starting address
+ *              of the block is again fixed upon allocation but the block can
+ *              be shifted (therefore its initial address changes) according
+ *              to needs. For a movable block to be successfully shifted it is
+ *              required that all the subsequent allocated blocks are movable.
+ *              Again, an abort condition is triggered if this condition is
+ *              not met. Movable blocks can be deallocated in any order
+ *              provided that the condition just described holds. The gap
+ *              resulting form the deallocation of a block that is not in
+ *              the last position will be automatically filled by shifting all
+ *              the blocks coming after the deallocated block.
+ *
+ *              contains functions:
+ *                static void *hmalloc(size_t size) (HUGEPAGES)
+ *                static void *hmalloc(size_t size)
+ *                void mymalloc_init(void)
+ *                void report_memory_usage(int rank, char *tabbuf)
+ *                void report_detailed_memory_usage_of_largest_task(void)
+ *                void dump_memory_table(void)
+ *                int dump_memory_table_buffer(char *p)
+ *                void *mymalloc_fullinfo
+ *                void *mymalloc_movable_fullinfo
+ *                size_t roundup_to_multiple_of_cacheline_size(size_t n)
+ *                void myfree_fullinfo(void *p, const char *func, const char
+ *                  *file, int line)
+ *                void *myfree_query_last_block(void)
+ *                void myfree_movable_fullinfo(void *p, const char *func,
+ *                  const char *file, int line)
+ *                void *myrealloc_fullinfo(void *p, size_t n, const char
+ *                  *func, const char *file, int line)
+ *                void *myrealloc_movable_fullinfo(void *p, size_t n,
+ *                  const char *func, const char *file, int line)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 07.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_math.h>
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#define CACHELINESIZE 64
+
+#define MAXBLOCKS 5000
+#define MAXCHARS 40
+
+static size_t AllocatedBytesGeneric;
+
+static size_t HighMarkBytes;
+static size_t HighMarkBytesWithoutGeneric;
+
+static double OldGlobHighMarkMB;
+static double OldGlobHighMarkMBWithoutGeneric;
+
+static size_t TotBytes; /*!< The total dimension (in bytes) of dynamic memory available to the current task. */
+static void *Base;      /*!< Base pointer (initial memory address) of the stack. */
+
+static unsigned long Nblocks; /*!< The current number of allocated memory blocks. */
+
+static void **Table;         /*!< Table containing the initial addresses of the allocated memory blocks. */
+static size_t *BlockSize;    /*!< Array containing the size (in bytes) of all the allocated memory blocks. */
+static char *MovableFlag;    /*!< Identifies whether a block is movable. */
+static char *GenericFlag;    /*!< Identifies whether a block has been identified in the generic allocation routines. */
+static void ***BasePointers; /*!< Base pointers containing the initial addresses of movable memory blocks */
+static char *VarName;        /*!< The name of the variable with which the block has been allocated. */
+static char *FunctionName;   /*!< The function name that has allocated the memory block. */
+static char *ParentFileName; /*!< The location from which the generich routines were called */
+static char *FileName;       /*!< The file name where the function that has allocated the block is called. */
+static int *LineNumber;      /*!< The line number in FileName where the function that allocated the block has been called. */
+static char *HighMarkTabBuf; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use that has
+                                occurred on this task */
+static char *HighMarkTabBufWithoutGeneric; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use
+                                              that has occurred on this task */
+
+#ifdef HUGEPAGES
+#include <hugetlbfs.h>
+/*! \brief Allocation function wrapper for hugepages usage.
+ *
+ *  \param[in] size Size of the allocated memory.
+ *
+ *  \return void pointer to address in memory.
+ */
+static void *hmalloc(size_t size)
+{
+  void *p = get_hugepage_region(size, GHR_STRICT);
+
+  if(!p)
+    {
+      warn("Failed to get_hugepage_region of size %g\n", size / (1024.0 * 1024));
+
+      p = malloc(size);
+
+      if(!p)
+        terminate("Failed to allocate memory of size %g\n", size / (1024.0 * 1024));
+    }
+
+  memset(p, 255, size);
+  memset(p, 0, size);
+
+  return p;
+}
+#else  /* #ifdef HUGEPAGES */
+
+/*! \brief Allocation function wrapper without hugepages usage.
+ *
+ *  \param[in] size Size of the allocated memory.
+ *
+ *  \return void pointer to address in memory.
+ */
+static void *hmalloc(size_t size) { return malloc(size); }
+#endif /* #ifdef HUGEPAGES #else */
+
+/*! \brief Initializes memory manager.
+ *
+ *  This function initializes the memory manager. In particular, it sets
+ *  the global variables of the module to their initial value and allocates
+ *  the memory for the stack.
+ *
+ *  \return void
+ */
+void mymalloc_init(void)
+{
+  BlockSize                    = (size_t *)hmalloc(MAXBLOCKS * sizeof(size_t));
+  Table                        = (void **)hmalloc(MAXBLOCKS * sizeof(void *));
+  MovableFlag                  = (char *)hmalloc(MAXBLOCKS * sizeof(char));
+  GenericFlag                  = (char *)hmalloc(MAXBLOCKS * sizeof(char));
+  BasePointers                 = (void ***)hmalloc(MAXBLOCKS * sizeof(void **));
+  VarName                      = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char));
+  FunctionName                 = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char));
+  ParentFileName               = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char));
+  FileName                     = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char));
+  LineNumber                   = (int *)hmalloc(MAXBLOCKS * sizeof(int));
+  HighMarkTabBuf               = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10));
+  HighMarkTabBufWithoutGeneric = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10));
+
+  memset(VarName, 0, MAXBLOCKS * MAXCHARS);
+  memset(FunctionName, 0, MAXBLOCKS * MAXCHARS);
+  memset(ParentFileName, 0, MAXBLOCKS * MAXCHARS);
+  memset(FileName, 0, MAXBLOCKS * MAXCHARS);
+
+  size_t n = All.MaxMemSize * ((size_t)1024 * 1024);
+
+  n = roundup_to_multiple_of_cacheline_size(n);
+
+  if(!(Base = hmalloc(n)))
+    terminate("Failed to allocate memory for `Base' (%d Mbytes).\n", All.MaxMemSize);
+
+  TotBytes = FreeBytes = n;
+
+  AllocatedBytes                  = 0;
+  Nblocks                         = 0;
+  HighMarkBytes                   = 0;
+  HighMarkBytesWithoutGeneric     = 0;
+  OldGlobHighMarkMB               = 0;
+  OldGlobHighMarkMBWithoutGeneric = 0;
+}
+
+/*! \brief Writes memory usage in FdMemory.
+ *
+ *  \param[in] rank Number of tasks involved.
+ *  \param[in] tabbuf Header message written in FdMemory.
+ *
+ *  \return void
+ */
+void report_memory_usage(int rank, char *tabbuf)
+{
+  if(ThisTask == rank)
+    {
+      char *buf = mymalloc("buf", (100 + 4 * MAXCHARS) * (Nblocks + 10));
+      int cc    = 0;
+      cc += sprintf(buf + cc, "\nMEMORY:  Largest Allocation = %g Mbyte  |  Largest Allocation Without Generic = %g Mbyte\n\n",
+                    OldGlobHighMarkMB, OldGlobHighMarkMBWithoutGeneric);
+
+      cc += sprintf(buf + cc, "%s", tabbuf);
+      if(ThisTask == 0)
+        {
+          if(RestartFlag <= 2)
+            {
+              fprintf(FdMemory, "%s", buf);
+              fflush(FdMemory);
+            }
+        }
+      else
+        {
+          MPI_Send(&cc, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD);
+          MPI_Send(buf, cc + 1, MPI_BYTE, 0, TAG_PDATA, MPI_COMM_WORLD);
+        }
+      myfree(buf);
+    }
+
+  if(ThisTask == 0 && rank > 0)
+    {
+      int cc;
+      MPI_Recv(&cc, 1, MPI_INT, rank, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      char *buf = mymalloc("buf", cc + 1);
+      MPI_Recv(buf, cc + 1, MPI_BYTE, rank, TAG_PDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      if(RestartFlag <= 2)
+        {
+          fprintf(FdMemory, "%s", buf);
+          fflush(FdMemory);
+        }
+      myfree(buf);
+    }
+}
+
+/*! \brief Writes memory useage of largest task in FdMemory.
+ *
+ *  \return void
+ */
+void report_detailed_memory_usage_of_largest_task(void)
+{
+  int flag = 0;
+
+  struct
+  {
+    double mem;
+    int rank;
+  } local, global;
+
+  local.mem  = HighMarkBytes / (1024.0 * 1024.0);
+  local.rank = ThisTask;
+
+  MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD);
+
+  if(global.mem >= 1.05 * OldGlobHighMarkMB)
+    {
+      OldGlobHighMarkMB = global.mem;
+      flag |= 1;
+    }
+
+  local.mem  = HighMarkBytesWithoutGeneric / (1024.0 * 1024.0);
+  local.rank = ThisTask;
+
+  MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD);
+
+  if(global.mem >= 1.05 * OldGlobHighMarkMBWithoutGeneric)
+    {
+      OldGlobHighMarkMBWithoutGeneric = global.mem;
+      flag |= 2;
+    }
+
+  if(flag & 2)
+    report_memory_usage(global.rank, HighMarkTabBufWithoutGeneric);
+
+  if(flag & 1)
+    report_memory_usage(global.rank, HighMarkTabBuf);
+}
+
+/*! \brief Dumps the buffer where the memory information is stored to the
+ *         standard output.
+ *
+ *  \return void
+ */
+void dump_memory_table(void)
+{
+  char *buf = malloc(200 * (Nblocks + 10));
+  dump_memory_table_buffer(buf);
+  printf("%s", buf);
+  free(buf);
+}
+
+/*! \brief Fills the output buffer with the memory log.
+ *
+ *  \param[out] p Output buffer.
+ *
+ *  \return The number of charcter written to p.
+ */
+int dump_memory_table_buffer(char *p)
+{
+  int cc              = 0;
+  size_t totBlocksize = 0;
+
+  cc +=
+      sprintf(p + cc, "-------------------------- Allocated Memory Blocks---- ( Step %8d )------------------\n", All.NumCurrentTiStep);
+  cc += sprintf(p + cc, "Task    Nr F                  Variable      MBytes   Cumulative  Function|File|Linenumber\n");
+  cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n");
+  for(int i = 0; i < Nblocks; i++)
+    {
+      totBlocksize += BlockSize[i];
+
+      cc += sprintf(p + cc, "%4d %5d %d %40s  %10.4f   %10.4f  %s%s()|%s|%d\n", ThisTask, i, MovableFlag[i], VarName + i * MAXCHARS,
+                    BlockSize[i] / (1024.0 * 1024.0), totBlocksize / (1024.0 * 1024.0), ParentFileName + i * MAXCHARS,
+                    FunctionName + i * MAXCHARS, FileName + i * MAXCHARS, LineNumber[i]);
+    }
+  cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n");
+
+  return cc;
+}
+
+/*! \brief Allocates a non-movable memory block and store the relative
+ *         information.
+ *
+ *  \param[in] varname Name of the variable to be stored in the allocated
+ *             block.
+ *  \param[in] n Size of the memory block in bytes.
+ *  \param[in] func Name of function that has called the allocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file File where the function that has called the allocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line Line number of file where the allocation routine was
+ *             called (usually given by the __LINE__ macro).
+ *
+ *  \return A pointer to the beginning of the allocated memory block.
+ */
+void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int line, int clear_flag, char *callorigin)
+{
+  if((n % CACHELINESIZE) > 0)
+    n = (n / CACHELINESIZE + 1) * CACHELINESIZE;
+
+  if(n < CACHELINESIZE)
+    n = CACHELINESIZE;
+
+  if(Nblocks >= MAXBLOCKS)
+    terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line,
+              MAXBLOCKS);
+
+  if(n > FreeBytes)
+    {
+      dump_memory_table();
+      terminate(
+          "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g "
+          "MB).\n",
+          ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0));
+    }
+  Table[Nblocks] = Base + (TotBytes - FreeBytes);
+  FreeBytes -= n;
+
+  strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1);
+  if(callorigin)
+    {
+      strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1);
+      GenericFlag[Nblocks] = 1;
+      AllocatedBytesGeneric += n;
+    }
+  else
+    {
+      memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS);
+      GenericFlag[Nblocks] = 0;
+    }
+  strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1);
+  strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1);
+  LineNumber[Nblocks] = line;
+
+  AllocatedBytes += n;
+  BlockSize[Nblocks]   = n;
+  MovableFlag[Nblocks] = 0;
+
+  Nblocks += 1;
+
+  if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric)
+    {
+      HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric;
+      dump_memory_table_buffer(HighMarkTabBufWithoutGeneric);
+    }
+
+  if(AllocatedBytes > HighMarkBytes)
+    {
+      HighMarkBytes = AllocatedBytes;
+      dump_memory_table_buffer(HighMarkTabBuf);
+    }
+
+  if(clear_flag)
+    memset(Table[Nblocks - 1], 0, n);
+
+  return Table[Nblocks - 1];
+}
+
+/*! \brief Allocates a movable memory block and store the relative information.
+ *
+ *  \param[in] ptr Pointer to the initial memory address of the block.
+ *  \param[in] varname Name of the variable to be stored in the allocated block.
+ *  \param[in] n Size of the memory block in bytes.
+ *  \param[in] func Name of function that has called the allocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file File where the function that has called the allocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line Line number of file where the allocation routine was
+ *             called (usually given by the __LINE__ macro).
+ *
+ *  \return A pointer to the beginning of the allocated memory block.
+ */
+void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line,
+                                char *callorigin)
+{
+  if((n % CACHELINESIZE) > 0)
+    n = (n / CACHELINESIZE + 1) * CACHELINESIZE;
+
+  if(n < CACHELINESIZE)
+    n = CACHELINESIZE;
+
+  if(Nblocks >= MAXBLOCKS)
+    terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line,
+              MAXBLOCKS);
+
+  if(n > FreeBytes)
+    {
+      dump_memory_table();
+      terminate(
+          "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g "
+          "MB).\n",
+          ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0));
+    }
+  Table[Nblocks] = Base + (TotBytes - FreeBytes);
+  FreeBytes -= n;
+
+  strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1);
+  if(callorigin)
+    {
+      strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1);
+      GenericFlag[Nblocks] = 1;
+      AllocatedBytesGeneric += n;
+    }
+  else
+    {
+      memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS);
+      GenericFlag[Nblocks] = 0;
+    }
+  strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1);
+  strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1);
+  LineNumber[Nblocks] = line;
+
+  AllocatedBytes += n;
+  BlockSize[Nblocks]    = n;
+  MovableFlag[Nblocks]  = 1;
+  BasePointers[Nblocks] = ptr;
+
+  Nblocks += 1;
+
+  if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric)
+    {
+      HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric;
+      dump_memory_table_buffer(HighMarkTabBufWithoutGeneric);
+    }
+
+  if(AllocatedBytes > HighMarkBytes)
+    {
+      HighMarkBytes = AllocatedBytes;
+      dump_memory_table_buffer(HighMarkTabBuf);
+    }
+
+  return Table[Nblocks - 1];
+}
+
+/*! \brief Rounds up size to cachline size.
+ *
+ *  \param[in] n Size.
+ *
+ *  \return Rounded up size.
+ */
+size_t roundup_to_multiple_of_cacheline_size(size_t n)
+{
+  if((n % CACHELINESIZE) > 0)
+    n = (n / CACHELINESIZE + 1) * CACHELINESIZE;
+
+  return n;
+}
+
+/*! \brief Deallocates a non-movable memory block.
+ *
+ *  For this operation to be successful the block that has to be deallocated
+ *  must be the last allocated one.
+ *
+ *  \param[in] p Pointer to the memory block to be deallocated.
+ *  \param[in] func Name of function that has called the deallocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file File where the function that has called the deallocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line Line number of file where the deallocation routine was
+ *             called (usually given by the __LINE__ macro).
+ */
+void myfree_fullinfo(void *p, const char *func, const char *file, int line)
+{
+  if(Nblocks == 0)
+    terminate("no allocated blocks that could be freed");
+
+  if(p != Table[Nblocks - 1])
+    {
+      dump_memory_table();
+      terminate("Task=%d: Wrong call of myfree() at %s()/%s/line %d: not the last allocated block!\n", ThisTask, func, file, line);
+    }
+
+  Nblocks -= 1;
+  AllocatedBytes -= BlockSize[Nblocks];
+
+  if(GenericFlag[Nblocks])
+    AllocatedBytesGeneric -= BlockSize[Nblocks];
+
+  FreeBytes += BlockSize[Nblocks];
+}
+
+/*! \brief Finds last allocated block.
+ *
+ *  \return void pointer to last allocated block.
+ */
+void *myfree_query_last_block(void)
+{
+  if(Nblocks == 0)
+    terminate("no allocated blocks that could be returned");
+
+  return Table[Nblocks - 1];
+}
+
+/*! \brief Deallocates a movable memory block.
+ *
+ *  For this operation to be successful all the blocks allocated after the
+ *  block that has to be freed must be of movable type.
+ *
+ *  \param[in] p pointer to the memory block to be deallocated.
+ *  \param[in] func name of function that has called the deallocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file file where the function that has called the deallocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line line number of file where the deallocation routine was
+ *             called (usually given by the __LINE__ macro).
+ *
+ *  \return void
+ */
+void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line)
+{
+  int i;
+
+  if(Nblocks == 0)
+    terminate("no allocated blocks that could be freed");
+
+  /* first, let's find the block */
+  int nr;
+
+  for(nr = Nblocks - 1; nr >= 0; nr--)
+    if(p == Table[nr])
+      break;
+
+  if(nr < 0)
+    {
+      dump_memory_table();
+      terminate("Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask, func,
+                file, line);
+    }
+
+  if(nr < Nblocks - 1) /* the block is not the last allocated block */
+    {
+      /* check that all subsequent blocks are actually movable */
+      for(i = nr + 1; i < Nblocks; i++)
+        if(MovableFlag[i] == 0)
+          {
+            dump_memory_table();
+            myflush(stdout);
+            terminate(
+                "Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable "
+                "allocated blocks\n",
+                ThisTask, func, file, line, nr);
+          }
+    }
+
+  if(GenericFlag[nr])
+    AllocatedBytesGeneric -= BlockSize[nr];
+
+  AllocatedBytes -= BlockSize[nr];
+  FreeBytes += BlockSize[nr];
+
+  ptrdiff_t offset = -BlockSize[nr];
+  size_t length    = 0;
+
+  for(i = nr + 1; i < Nblocks; i++)
+    length += BlockSize[i];
+
+  if(nr < Nblocks - 1)
+    memmove(Table[nr + 1] + offset, Table[nr + 1], length);
+
+  for(i = nr + 1; i < Nblocks; i++)
+    {
+      Table[i] += offset;
+      *BasePointers[i] = *BasePointers[i] + offset;
+    }
+
+  for(i = nr + 1; i < Nblocks; i++)
+    {
+      Table[i - 1]        = Table[i];
+      BasePointers[i - 1] = BasePointers[i];
+      BlockSize[i - 1]    = BlockSize[i];
+      MovableFlag[i - 1]  = MovableFlag[i];
+      GenericFlag[i - 1]  = GenericFlag[i];
+
+      strncpy(VarName + (i - 1) * MAXCHARS, VarName + i * MAXCHARS, MAXCHARS - 1);
+      strncpy(FunctionName + (i - 1) * MAXCHARS, FunctionName + i * MAXCHARS, MAXCHARS - 1);
+      strncpy(ParentFileName + (i - 1) * MAXCHARS, ParentFileName + i * MAXCHARS, MAXCHARS - 1);
+      strncpy(FileName + (i - 1) * MAXCHARS, FileName + i * MAXCHARS, MAXCHARS - 1);
+      LineNumber[i - 1] = LineNumber[i];
+    }
+
+  Nblocks -= 1;
+}
+
+/*! \brief Reallocates an existing non-movable memory block.
+ *
+ *  For this operation to be successful this must be the last allocated block.
+ *
+ *  \param[in] p Pointer to the existing memory block to be reallocated.
+ *  \param[in] n The new size of the memory block in bytes.
+ *  \param[in] func Name of function that has called the reallocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file File where the function that has called the reallocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line Line number of file where the reallocation routine was
+ *             called (usually given by the __LINE__ macro).
+ *
+ *  \return A pointer to the beginning of the newly allocated memory block.
+ */
+void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line)
+{
+  if((n % CACHELINESIZE) > 0)
+    n = (n / CACHELINESIZE + 1) * CACHELINESIZE;
+
+  if(n < CACHELINESIZE)
+    n = CACHELINESIZE;
+
+  if(Nblocks == 0)
+    terminate("no allocated blocks that could be reallocated");
+
+  if(p != Table[Nblocks - 1])
+    {
+      dump_memory_table();
+      terminate("Task=%d: Wrong call of myrealloc() at %s()/%s/line %d - not the last allocated block!\n", ThisTask, func, file, line);
+    }
+
+  if(GenericFlag[Nblocks - 1])
+    AllocatedBytesGeneric -= BlockSize[Nblocks - 1];
+
+  AllocatedBytes -= BlockSize[Nblocks - 1];
+  FreeBytes += BlockSize[Nblocks - 1];
+
+  if(n > FreeBytes)
+    {
+      dump_memory_table();
+      terminate("Task=%d: Not enough memory in myremalloc(n=%g MB) at %s()/%s/line %d. previous=%g FreeBytes=%g MB\n", ThisTask,
+                n / (1024.0 * 1024.0), func, file, line, BlockSize[Nblocks - 1] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0));
+    }
+  Table[Nblocks - 1] = Base + (TotBytes - FreeBytes);
+  FreeBytes -= n;
+
+  AllocatedBytes += n;
+  BlockSize[Nblocks - 1] = n;
+
+  if(AllocatedBytes > HighMarkBytes)
+    {
+      HighMarkBytes = AllocatedBytes;
+      dump_memory_table_buffer(HighMarkTabBuf);
+    }
+
+  return Table[Nblocks - 1];
+}
+
+/*! \brief Reallocates an existing movable memory block.
+ *
+ *  For this operation to be successful all the blocks allocated after the
+ *  block that has to be reallocated must be of movable type.
+ *
+ *  \param[in] p Pointer to the existing memory block to be reallocated.
+ *  \param[in] n The new size of the memory block in bytes.
+ *  \param[in] func Name of function that has called the reallocation routine
+ *             (usually given by the __FUNCTION__ macro).
+ *  \param[in] file File where the function that has called the reallocation
+ *             routine resides (usually given by the __FILE__ macro).
+ *  \param[in] line Line number of file where the reallocation routine was
+ *             called (usually given by the __LINE__ macro).
+ *
+ *  \return A pointer to the beginning of the newly allocated memory block.
+ */
+void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line)
+{
+  int i;
+
+  if((n % CACHELINESIZE) > 0)
+    n = (n / CACHELINESIZE + 1) * CACHELINESIZE;
+
+  if(n < CACHELINESIZE)
+    n = CACHELINESIZE;
+
+  if(Nblocks == 0)
+    terminate("no allocated blocks that could be reallocated");
+
+  /* first, let's find the block */
+  int nr;
+
+  for(nr = Nblocks - 1; nr >= 0; nr--)
+    if(p == Table[nr])
+      break;
+
+  if(nr < 0)
+    {
+      dump_memory_table();
+      terminate("Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask,
+                func, file, line);
+    }
+
+  if(nr < Nblocks - 1) /* the block is not the last allocated block */
+    {
+      /* check that all subsequent blocks are actually movable */
+      for(i = nr + 1; i < Nblocks; i++)
+        if(MovableFlag[i] == 0)
+          {
+            dump_memory_table();
+            terminate(
+                "Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable "
+                "allocated blocks\n",
+                ThisTask, func, file, line, nr);
+          }
+    }
+
+  if(GenericFlag[nr])
+    terminate("unexpected");
+
+  AllocatedBytes -= BlockSize[nr];
+  FreeBytes += BlockSize[nr];
+
+  if(n > FreeBytes)
+    {
+      dump_memory_table();
+      terminate("Task=%d: at %s()/%s/line %d: Not enough memory in myremalloc_movable(n=%g MB). previous=%g FreeBytes=%g MB\n",
+                ThisTask, func, file, line, n / (1024.0 * 1024.0), BlockSize[nr] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0));
+    }
+
+  ptrdiff_t offset = n - BlockSize[nr];
+  size_t length    = 0;
+
+  for(i = nr + 1; i < Nblocks; i++)
+    length += BlockSize[i];
+
+  if(nr < Nblocks - 1)
+    memmove(Table[nr + 1] + offset, Table[nr + 1], length);
+
+  for(i = nr + 1; i < Nblocks; i++)
+    {
+      Table[i] += offset;
+
+      *BasePointers[i] = *BasePointers[i] + offset;
+    }
+
+  FreeBytes -= n;
+  AllocatedBytes += n;
+  BlockSize[nr] = n;
+
+  if(AllocatedBytes > HighMarkBytes)
+    {
+      HighMarkBytes = AllocatedBytes;
+      dump_memory_table_buffer(HighMarkTabBuf);
+    }
+
+  return Table[nr];
+}
diff --git a/src/amuse/community/arepo/src/utils/parallel_sort.c b/src/amuse/community/arepo/src/utils/parallel_sort.c
new file mode 100644
index 0000000000..f825a9f220
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/parallel_sort.c
@@ -0,0 +1,743 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/parallel_sort.c
+ * \date        05/2018
+ * \brief       MPI parallel sorting routine.
+ * \details     contains functions:
+ *                int parallel_sort_indirect_compare(const void *a,
+ *                  const void *b)
+ *                double parallel_sort(void *base, size_t nmemb, size_t size,
+ *                  int (*compar) (const void *, const void *))
+ *                double parallel_sort_comm(void *base, size_t nmemb, size_t
+ *                  size, int (*compar) (const void *, const void *),
+ *                  MPI_Comm comm)
+ *                static void get_local_rank(char *element, size_t
+ *                  tie_braking_rank, char *base, size_t nmemb, size_t size,
+ *                  size_t noffs_thistask, long long left, long long right,
+ *                  size_t * loc, int (*compar) (const void *, const void *))
+ *                static void check_local_rank(char *element, size_t
+ *                  tie_braking_rank, char *base, size_t nmemb, size_t size,
+ *                  size_t noffs_thistask, long long left, long long right,
+ *                  size_t loc, int (*compar) (const void *, const void *))
+ *                static void serial_sort(char *base, size_t nmemb, size_t
+ *                  size, int (*compar) (const void *, const void *))
+ *                static void msort_serial_with_tmp(char *base, size_t n,
+ *                  size_t s, int (*compar) (const void *, const void *),
+ *                  char *t)
+ *                void parallel_sort_test_order(char *base, size_t nmemb,
+ *                  size_t size, int (*compar) (const void *, const void *))
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 21.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+#define TRANSFER_SIZE_LIMIT 1000000000
+#define MAX_ITER_PARALLEL_SORT 500
+
+/* Note: For gcc-4.1.2, I found that the compiler produces incorrect code for this routune if optimization level O1 or higher is used.
+ *       In  gcc-4.3.4, this problem is absent.
+ */
+
+#define TAG_TRANSFER 100
+
+static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
+static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t);
+static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask,
+                           long long left, long long right, size_t *loc, int (*compar)(const void *, const void *));
+
+static int (*comparfunc)(const void *, const void *);
+static char *median_element_list;
+static size_t element_size;
+
+/*! \brief Wrapper for comparison of  two elements.
+ *
+ *  \param[in] a First element.
+ *  \param[in] b Second element.
+ *
+ *  \return (-1,0,+1) -1 if a < b.
+ */
+int parallel_sort_indirect_compare(const void *a, const void *b)
+{
+  return (*comparfunc)(median_element_list + *((int *)a) * element_size, median_element_list + *((int *)b) * element_size);
+}
+
+/*! \brief Main function to perform a parallel sort.
+ *
+ *   Using MPI_COMM_WORLD as communicator.
+ *
+ *  \param[in, out] base Array to be sorted.
+ *  \param nmemb Number of entries in array.
+ *  \param[in] size Size of an element in array to be sorted.
+ *  \param[in] compar Comparison function.
+ *
+ *  \return Time it took to sort array.
+ */
+double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *))
+{
+  return parallel_sort_comm(base, nmemb, size, compar, MPI_COMM_WORLD);
+}
+
+/*! \brief Function to perform a parallel sort with specified MPI communicator.
+ *
+ *  \param[in, out] base Array to be sorted.
+ *  \param[in] nmemb Number of entries in array.
+ *  \param[in] size Size of an element in array to be sorted.
+ *  \param[in] compar Comparison function.
+ *  \param[in] comm MPI communicator.
+ *
+ *  \return Time it took to sort array.
+ */
+double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm)
+{
+  int i, j, ranks_not_found, Local_ThisTask, Local_NTask, Local_PTask, Color, new_max_loc;
+  size_t tie_braking_rank, new_tie_braking_rank, rank;
+  MPI_Comm MPI_CommLocal;
+
+  double ta = second();
+
+  /* do a serial sort of the local data up front */
+  serial_sort((char *)base, nmemb, size, compar);
+
+  /* we create a communicator that contains just those tasks with nmemb > 0. This makes
+   *  it easier to deal with CPUs that do not hold any data.
+   */
+  if(nmemb)
+    Color = 1;
+  else
+    Color = 0;
+
+  MPI_Comm_split(comm, Color, ThisTask, &MPI_CommLocal);
+  MPI_Comm_rank(MPI_CommLocal, &Local_ThisTask);
+  MPI_Comm_size(MPI_CommLocal, &Local_NTask);
+
+  if(Local_NTask > 1 && Color == 1)
+    {
+      for(Local_PTask = 0; Local_NTask > (1 << Local_PTask); Local_PTask++)
+        ;
+
+      size_t *nlist = (size_t *)mymalloc("nlist", Local_NTask * sizeof(size_t));
+      size_t *noffs = (size_t *)mymalloc("noffs", Local_NTask * sizeof(size_t));
+
+      MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_CommLocal);
+
+      for(i = 1, noffs[0] = 0; i < Local_NTask; i++)
+        noffs[i] = noffs[i - 1] + nlist[i - 1];
+
+      char *element_guess              = mymalloc("element_guess", Local_NTask * size);
+      size_t *element_tie_braking_rank = mymalloc("element_tie_braking_rank", Local_NTask * sizeof(size_t));
+      size_t *desired_glob_rank        = mymalloc("desired_glob_rank", Local_NTask * sizeof(size_t));
+      size_t *current_glob_rank        = mymalloc("current_glob_rank", Local_NTask * sizeof(size_t));
+      size_t *current_loc_rank         = mymalloc("current_loc_rank", Local_NTask * sizeof(size_t));
+      long long *range_left            = mymalloc("range_left", Local_NTask * sizeof(long long));
+      long long *range_right           = mymalloc("range_right", Local_NTask * sizeof(long long));
+      int *max_loc                     = mymalloc("max_loc", Local_NTask * sizeof(int));
+
+      size_t *list                         = mymalloc("list", Local_NTask * sizeof(size_t));
+      size_t *range_len_list               = mymalloc("range_len_list", Local_NTask * sizeof(long long));
+      char *median_element                 = mymalloc("median_element", size);
+      median_element_list                  = mymalloc("median_element_list", Local_NTask * size);
+      size_t *tie_braking_rank_list        = mymalloc("tie_braking_rank_list", Local_NTask * sizeof(size_t));
+      int *index_list                      = mymalloc("index_list", Local_NTask * sizeof(int));
+      int *max_loc_list                    = mymalloc("max_loc_list", Local_NTask * sizeof(int));
+      size_t *source_range_len_list        = mymalloc("source_range_len_list", Local_NTask * sizeof(long long));
+      size_t *source_tie_braking_rank_list = mymalloc("source_tie_braking_rank_list", Local_NTask * sizeof(long long));
+      char *source_median_element_list     = mymalloc("source_median_element_list", Local_NTask * size);
+      char *new_element_guess              = mymalloc("new_element_guess", size);
+
+      for(i = 0; i < Local_NTask - 1; i++)
+        {
+          desired_glob_rank[i] = noffs[i + 1];
+          current_glob_rank[i] = 0;
+          range_left[i]        = 0;     /* first element that it can be */
+          range_right[i]       = nmemb; /* first element that it can not be */
+        }
+
+      /* now we determine the first split element guess, which is the same for all divisions in the first iteration */
+
+      /* find the median of each processor, and then take the median among those values.
+       * This should work reasonably well even for extremely skewed distributions
+       */
+      long long range_len = range_right[0] - range_left[0];
+
+      if(range_len >= 1)
+        {
+          long long mid = (range_left[0] + range_right[0]) / 2;
+          memcpy(median_element, (char *)base + mid * size, size);
+          tie_braking_rank = mid + noffs[Local_ThisTask];
+        }
+
+      MPI_Gather(&range_len, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, 0, MPI_CommLocal);
+      MPI_Gather(median_element, size, MPI_BYTE, median_element_list, size, MPI_BYTE, 0, MPI_CommLocal);
+      MPI_Gather(&tie_braking_rank, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal);
+
+      if(Local_ThisTask == 0)
+        {
+          for(j = 0; j < Local_NTask; j++)
+            max_loc_list[j] = j;
+
+          /* eliminate the elements that are undefined because the corresponding CPU has zero range left */
+          int nleft = Local_NTask;
+
+          for(j = 0; j < nleft; j++)
+            {
+              if(range_len_list[j] < 1)
+                {
+                  range_len_list[j] = range_len_list[nleft - 1];
+                  if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1))
+                    {
+                      memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size);
+                      memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t));
+                      max_loc_list[j] = max_loc_list[nleft - 1];
+                    }
+
+                  nleft--;
+                  j--;
+                }
+            }
+
+          /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well) */
+          comparfunc   = compar;
+          element_size = size;
+          for(j = 0; j < nleft; j++)
+            index_list[j] = j;
+          qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare);
+
+          /* now select the median of the medians */
+          int mid = nleft / 2;
+          memcpy(&element_guess[0], median_element_list + index_list[mid] * size, size);
+          element_tie_braking_rank[0] = tie_braking_rank_list[index_list[mid]];
+          max_loc[0]                  = max_loc_list[index_list[mid]];
+        }
+
+      MPI_Bcast(element_guess, size, MPI_BYTE, 0, MPI_CommLocal);
+      MPI_Bcast(&element_tie_braking_rank[0], sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal);
+      MPI_Bcast(&max_loc[0], 1, MPI_INT, 0, MPI_CommLocal);
+
+      for(i = 1; i < Local_NTask - 1; i++)
+        {
+          memcpy(element_guess + i * size, element_guess, size);
+          element_tie_braking_rank[i] = element_tie_braking_rank[0];
+          max_loc[i]                  = max_loc[0];
+        }
+
+      int iter = 0;
+
+      do
+        {
+          for(i = 0; i < Local_NTask - 1; i++)
+            {
+              if(current_glob_rank[i] != desired_glob_rank[i])
+                {
+                  get_local_rank(element_guess + i * size, element_tie_braking_rank[i], (char *)base, nmemb, size,
+                                 noffs[Local_ThisTask], range_left[i], range_right[i], &current_loc_rank[i], compar);
+                }
+            }
+
+          /* now compute the global ranks by summing the local ranks */
+          /* Note: the last element in current_loc_rank is not defined. It will be summed by the last processor, and stored in the last
+           * element of current_glob_rank */
+          MPI_Alltoall(current_loc_rank, sizeof(size_t), MPI_BYTE, list, sizeof(size_t), MPI_BYTE, MPI_CommLocal);
+          for(j = 0, rank = 0; j < Local_NTask; j++)
+            rank += list[j];
+          MPI_Allgather(&rank, sizeof(size_t), MPI_BYTE, current_glob_rank, sizeof(size_t), MPI_BYTE, MPI_CommLocal);
+
+          for(i = 0, ranks_not_found = 0; i < Local_NTask - 1; i++)
+            {
+              if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */
+                {
+                  ranks_not_found++;
+
+                  if(current_glob_rank[i] < desired_glob_rank[i])
+                    {
+                      range_left[i] = current_loc_rank[i];
+
+                      if(Local_ThisTask == max_loc[i])
+                        range_left[i]++;
+                    }
+
+                  if(current_glob_rank[i] > desired_glob_rank[i])
+                    range_right[i] = current_loc_rank[i];
+                }
+            }
+
+          /* now we need to determine new element guesses */
+          for(i = 0; i < Local_NTask - 1; i++)
+            {
+              if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */
+                {
+                  /* find the median of each processor, and then take the median among those values.
+                   * This should work reasonably well even for extremely skewed distributions
+                   */
+                  source_range_len_list[i] = range_right[i] - range_left[i];
+
+                  if(source_range_len_list[i] >= 1)
+                    {
+                      long long middle = (range_left[i] + range_right[i]) / 2;
+                      memcpy(source_median_element_list + i * size, (char *)base + middle * size, size);
+                      source_tie_braking_rank_list[i] = middle + noffs[Local_ThisTask];
+                    }
+                }
+            }
+
+          MPI_Alltoall(source_range_len_list, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, MPI_CommLocal);
+          MPI_Alltoall(source_median_element_list, size, MPI_BYTE, median_element_list, size, MPI_BYTE, MPI_CommLocal);
+          MPI_Alltoall(source_tie_braking_rank_list, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE,
+                       MPI_CommLocal);
+
+          if(Local_ThisTask < Local_NTask - 1)
+            {
+              if(current_glob_rank[Local_ThisTask] !=
+                 desired_glob_rank[Local_ThisTask]) /* in this case we're not yet done for this split point */
+                {
+                  for(j = 0; j < Local_NTask; j++)
+                    max_loc_list[j] = j;
+
+                  /* eliminate the elements that are undefined because the corresponding CPU has zero range left */
+                  int nleft = Local_NTask;
+
+                  for(j = 0; j < nleft; j++)
+                    {
+                      if(range_len_list[j] < 1)
+                        {
+                          range_len_list[j] = range_len_list[nleft - 1];
+                          if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1))
+                            {
+                              memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size);
+                              memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t));
+                              max_loc_list[j] = max_loc_list[nleft - 1];
+                            }
+
+                          nleft--;
+                          j--;
+                        }
+                    }
+
+                  if((iter & 1))
+                    {
+                      int max_range, maxj;
+
+                      for(j = 0, maxj = 0, max_range = 0; j < nleft; j++)
+                        if(range_len_list[j] > max_range)
+                          {
+                            max_range = range_len_list[j];
+                            maxj      = j;
+                          }
+
+                      /* now select the median element from the task which has the largest range */
+                      memcpy(new_element_guess, median_element_list + maxj * size, size);
+                      new_tie_braking_rank = tie_braking_rank_list[maxj];
+                      new_max_loc          = max_loc_list[maxj];
+                    }
+                  else
+                    {
+                      /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well)
+                       */
+                      comparfunc   = compar;
+                      element_size = size;
+                      for(j = 0; j < nleft; j++)
+                        index_list[j] = j;
+                      qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare);
+
+                      /* now select the median of the medians */
+                      int mid = nleft / 2;
+                      memcpy(new_element_guess, median_element_list + index_list[mid] * size, size);
+                      new_tie_braking_rank = tie_braking_rank_list[index_list[mid]];
+                      new_max_loc          = max_loc_list[index_list[mid]];
+                    }
+                }
+              else
+                {
+                  /* in order to preserve existing guesses */
+                  memcpy(new_element_guess, element_guess + Local_ThisTask * size, size);
+                  new_tie_braking_rank = element_tie_braking_rank[Local_ThisTask];
+                  new_max_loc          = max_loc[Local_ThisTask];
+                }
+            }
+
+          MPI_Allgather(new_element_guess, size, MPI_BYTE, element_guess, size, MPI_BYTE, MPI_CommLocal);
+          MPI_Allgather(&new_tie_braking_rank, sizeof(size_t), MPI_BYTE, element_tie_braking_rank, sizeof(size_t), MPI_BYTE,
+                        MPI_CommLocal);
+          MPI_Allgather(&new_max_loc, 1, MPI_INT, max_loc, 1, MPI_INT, MPI_CommLocal);
+
+          iter++;
+
+          if(iter > (MAX_ITER_PARALLEL_SORT - 100) && Local_ThisTask == 0)
+            {
+              printf("PSORT: iter=%d: ranks_not_found=%d  Local_NTask=%d\n", iter, ranks_not_found, Local_NTask);
+              myflush(stdout);
+              if(iter > MAX_ITER_PARALLEL_SORT)
+                terminate("can't find the split points. That's odd");
+            }
+        }
+      while(ranks_not_found);
+
+      myfree(new_element_guess);
+      myfree(source_median_element_list);
+      myfree(source_tie_braking_rank_list);
+      myfree(source_range_len_list);
+      myfree(max_loc_list);
+      myfree(index_list);
+      myfree(tie_braking_rank_list);
+      myfree(median_element_list);
+      myfree(median_element);
+
+      /* At this point we have found all the elements corresponding to the desired split points */
+      /* we can now go ahead and determine how many elements of the local CPU have to go to each other CPU */
+
+      if(nmemb * size > (1LL << 31))
+        terminate("currently, local data must be smaller than 2 GB");
+      /* note: to restrict this limitation, the send/recv count arrays have to made 64-bit,
+       * and the MPI data exchange though MPI_Alltoall has to be modified such that buffers > 2 GB become possible
+       */
+
+      int *send_count  = mymalloc("send_count", Local_NTask * sizeof(int));
+      int *recv_count  = mymalloc("recv_count", Local_NTask * sizeof(int));
+      int *send_offset = mymalloc("send_offset", Local_NTask * sizeof(int));
+      int *recv_offset = mymalloc("recv_offset", Local_NTask * sizeof(int));
+
+      for(i = 0; i < Local_NTask; i++)
+        send_count[i] = 0;
+
+      int target = 0;
+
+      for(i = 0; i < nmemb; i++)
+        {
+          while(target < Local_NTask - 1)
+            {
+              int cmp = compar((char *)base + i * size, element_guess + target * size);
+              if(cmp == 0)
+                {
+                  if(i + noffs[Local_ThisTask] < element_tie_braking_rank[target])
+                    cmp = -1;
+                  else if(i + noffs[Local_ThisTask] > element_tie_braking_rank[target])
+                    cmp = +1;
+                }
+              if(cmp >= 0)
+                target++;
+              else
+                break;
+            }
+          send_count[target]++;
+        }
+
+      MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_CommLocal);
+
+      size_t nimport;
+
+      for(j = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < Local_NTask; j++)
+        {
+          nimport += recv_count[j];
+
+          if(j > 0)
+            {
+              send_offset[j] = send_offset[j - 1] + send_count[j - 1];
+              recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
+            }
+        }
+
+      if(nimport != nmemb)
+        terminate("nimport != nmemb");
+
+      for(j = 0; j < Local_NTask; j++)
+        {
+          send_count[j] *= size;
+          recv_count[j] *= size;
+
+          send_offset[j] *= size;
+          recv_offset[j] *= size;
+        }
+
+      char *basetmp = mymalloc("basetmp", nmemb * size);
+
+      /* exchange the data */
+      MPI_Alltoallv(base, send_count, send_offset, MPI_BYTE, basetmp, recv_count, recv_offset, MPI_BYTE, MPI_CommLocal);
+
+      memcpy(base, basetmp, nmemb * size);
+      myfree(basetmp);
+
+      serial_sort((char *)base, nmemb, size, compar);
+
+      myfree(recv_offset);
+      myfree(send_offset);
+      myfree(recv_count);
+      myfree(send_count);
+
+      myfree(range_len_list);
+      myfree(list);
+      myfree(max_loc);
+      myfree(range_right);
+      myfree(range_left);
+      myfree(current_loc_rank);
+      myfree(current_glob_rank);
+      myfree(desired_glob_rank);
+      myfree(element_tie_braking_rank);
+      myfree(element_guess);
+      myfree(noffs);
+      myfree(nlist);
+    }
+
+  MPI_Comm_free(&MPI_CommLocal);
+
+  double tb = second();
+  return timediff(ta, tb);
+}
+
+/*! \brief Get rank of an element.
+ *
+ *  \param[in] element Element of which we want the rank.
+ *  \param[in] tie_braking_rank The inital global rank of this element (needed
+ *             for braking ties).
+ *  \param[in] base Base address of local data.
+ *  \param[in] nmemb Number of elements in array.
+ *  \param[in] size Size of local data.
+ *  \param[in] noffs_thistask Cumulative length of data on lower tasks.
+ *  \param[in] left Range of elements on local task that may hold the element.
+ *  \param[in] right Range of elements on local task that may hold the element.
+ *  \param[out] loc Local rank of the element.
+ *  \param[in] compar User-specified  comparison function.
+ *
+ *  \return void
+ */
+static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask,
+                           long long left, long long right, size_t *loc, int (*compar)(const void *, const void *))
+{
+  if(right < left)
+    terminate("right < left");
+
+  if(left == 0 && right == nmemb + 1)
+    {
+      if(compar(base + (nmemb - 1) * size, element) < 0)
+        {
+          *loc = nmemb;
+          return;
+        }
+      else if(compar(base, element) > 0)
+        {
+          *loc = 0;
+          return;
+        }
+    }
+
+  if(right == left) /* looks like we already converged to the proper rank */
+    {
+      *loc = left;
+    }
+  else
+    {
+      if(compar(base + (right - 1) * size, element) < 0) /* the last element is smaller, hence all elements are on the left */
+        *loc = (right - 1) + 1;
+      else if(compar(base + left * size, element) > 0) /* the first element is already larger, hence no element is on the left */
+        *loc = left;
+      else
+        {
+          while(right > left)
+            {
+              long long mid = ((right - 1) + left) / 2;
+
+              int cmp = compar(base + mid * size, element);
+              if(cmp == 0)
+                {
+                  if(mid + noffs_thistask < tie_braking_rank)
+                    cmp = -1;
+                  else if(mid + noffs_thistask > tie_braking_rank)
+                    cmp = +1;
+                }
+
+              if(cmp == 0) /* element has exactly been found */
+                {
+                  *loc = mid;
+                  break;
+                }
+
+              if((right - 1) == left) /* elements is not on this CPU */
+                {
+                  if(cmp < 0)
+                    *loc = mid + 1;
+                  else
+                    *loc = mid;
+                  break;
+                }
+
+              if(cmp < 0)
+                {
+                  left = mid + 1;
+                }
+              else
+                {
+                  if((right - 1) == left + 1)
+                    {
+                      if(mid != left)
+                        terminate("Can't be: -->left=%lld  right=%lld\n", left, right);
+
+                      *loc = left;
+                      break;
+                    }
+
+                  right = mid;
+                }
+            }
+        }
+    }
+}
+
+/*! \brief Wrapper for serial sorting algorithm.
+ *
+ *  Calls a merge sort algorithm.
+ *
+ *  \param[in, out] base Array to be sorted.
+ *  \param[in] nmemb Number of elements in array.
+ *  \param[in] size Size of each element.
+ *  \param[in] compar Comparison funciton.
+ *
+ *  \return void
+ */
+static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *))
+{
+  size_t storage = nmemb * size;
+  char *tmp      = (char *)mymalloc("tmp", storage);
+
+  msort_serial_with_tmp(base, nmemb, size, compar, tmp);
+
+  myfree(tmp);
+}
+
+/*! \brief Merge sort algorithm (serial).
+ *
+ *  \param[in, out] base Array to be sorted.
+ *  \param[in] n Number of elements.
+ *  \param[in] s Size of each element.
+ *  \param[in] compar Comparison function.
+ *  \param[in, out] t Array for temporary data storage.
+ *
+ *  \return void
+ */
+static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t)
+{
+  char *tmp;
+  char *b1, *b2;
+  size_t n1, n2;
+
+  if(n <= 1)
+    return;
+
+  n1 = n / 2;
+  n2 = n - n1;
+  b1 = base;
+  b2 = base + n1 * s;
+
+  msort_serial_with_tmp(b1, n1, s, compar, t);
+  msort_serial_with_tmp(b2, n2, s, compar, t);
+
+  tmp = t;
+
+  while(n1 > 0 && n2 > 0)
+    {
+      if(compar(b1, b2) < 0)
+        {
+          --n1;
+          memcpy(tmp, b1, s);
+          tmp += s;
+          b1 += s;
+        }
+      else
+        {
+          --n2;
+          memcpy(tmp, b2, s);
+          tmp += s;
+          b2 += s;
+        }
+    }
+
+  if(n1 > 0)
+    memcpy(tmp, b1, n1 * s);
+
+  memcpy(base, t, (n - n2) * s);
+}
+
+/*! \brief Test function for parallel sort.
+ *
+ *  \param[in] base Array to be checked.
+ *  \param[in] nmemb Number of elements in array.
+ *  \param[in] size Size of each element.
+ *  \param[in] compar Comparison function.
+ *
+ *  \return void
+ */
+void parallel_sort_test_order(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *))
+{
+  int i, recv, send;
+  size_t *nlist;
+
+  nlist = (size_t *)mymalloc("nlist", NTask * sizeof(size_t));
+
+  MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
+
+  for(i = 0, recv = -1; i < ThisTask && nmemb > 0; i++)
+    if(nlist[i] > 0)
+      recv = i;
+
+  for(i = ThisTask + 1, send = -1; nmemb > 0 && i < NTask; i++)
+    if(nlist[i] > 0)
+      {
+        send = i;
+        break;
+      }
+
+  char *element = mymalloc("element", size);
+
+  MPI_Request requests[2];
+  int nreq = 0;
+
+  if(send >= 0)
+    MPI_Isend(base + (nmemb - 1) * size, size, MPI_BYTE, send, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]);
+
+  if(recv >= 0)
+    MPI_Irecv(element, size, MPI_BYTE, recv, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]);
+
+  MPI_Waitall(nreq, requests, MPI_STATUSES_IGNORE);
+
+  if(recv >= 0)
+    {
+      for(i = 0; i < nmemb; i++)
+        {
+          if(compar(element, base + i * size) > 0)
+            terminate("wrong order");
+        }
+    }
+
+  myfree(element);
+  myfree(nlist);
+}
diff --git a/src/amuse/community/arepo/src/utils/predicates.c b/src/amuse/community/arepo/src/utils/predicates.c
new file mode 100644
index 0000000000..bd06b00166
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/predicates.c
@@ -0,0 +1,4292 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/predicates.c
+ * \date        05/2018
+ * \brief
+ * \details
+ *
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ */
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Routines for Arbitrary Precision Floating-point Arithmetic               */
+/*  and Fast Robust Geometric Predicates                                     */
+/*  (predicates.c)                                                           */
+/*                                                                           */
+/*  May 18, 1996                                                             */
+/*                                                                           */
+/*  Placed in the public domain by                                           */
+/*  Jonathan Richard Shewchuk                                                */
+/*  School of Computer Science                                               */
+/*  Carnegie Mellon University                                               */
+/*  5000 Forbes Avenue                                                       */
+/*  Pittsburgh, Pennsylvania  15213-3891                                     */
+/*  jrs@cs.cmu.edu                                                           */
+/*                                                                           */
+/*  This file contains C implementation of algorithms for exact addition     */
+/*    and multiplication of floating-point numbers, and predicates for       */
+/*    robustly performing the orientation and incircle tests used in         */
+/*    computational geometry.  The algorithms and underlying theory are      */
+/*    described in Jonathan Richard Shewchuk.  "Adaptive Precision Floating- */
+/*    Point Arithmetic and Fast Robust Geometric Predicates."  Technical     */
+/*    Report CMU-CS-96-140, School of Computer Science, Carnegie Mellon      */
+/*    University, Pittsburgh, Pennsylvania, May 1996.  (Submitted to         */
+/*    Discrete & Computational Geometry.)                                    */
+/*                                                                           */
+/*  This file, the paper listed above, and other information are available   */
+/*    from the Web page http://www.cs.cmu.edu/~quake/robust.html .           */
+/*                                                                           */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Using this code:                                                         */
+/*                                                                           */
+/*  First, read the short or long version of the paper (from the Web page    */
+/*    above).                                                                */
+/*                                                                           */
+/*  Be sure to call exactinit() once, before calling any of the arithmetic   */
+/*    functions or geometric predicates.  Also be sure to turn on the        */
+/*    optimizer when compiling this file.                                    */
+/*                                                                           */
+/*                                                                           */
+/*  Several geometric predicates are defined.  Their parameters are all      */
+/*    points.  Each point is an array of two or three floating-point         */
+/*    numbers.  The geometric predicates, described in the papers, are       */
+/*                                                                           */
+/*    orient2d(pa, pb, pc)                                                   */
+/*    orient2dfast(pa, pb, pc)                                               */
+/*    orient3d(pa, pb, pc, pd)                                               */
+/*    orient3dfast(pa, pb, pc, pd)                                           */
+/*    incircle(pa, pb, pc, pd)                                               */
+/*    incirclefast(pa, pb, pc, pd)                                           */
+/*    insphere(pa, pb, pc, pd, pe)                                           */
+/*    inspherefast(pa, pb, pc, pd, pe)                                       */
+/*                                                                           */
+/*  Those with suffix "fast" are approximate, non-robust versions.  Those    */
+/*    without the suffix are adaptive precision, robust versions.  There     */
+/*    are also versions with the suffices "exact" and "slow", which are      */
+/*    non-adaptive, exact arithmetic versions, which I use only for timings  */
+/*    in my arithmetic papers.                                               */
+/*                                                                           */
+/*                                                                           */
+/*  An expansion is represented by an array of floating-point numbers,       */
+/*    sorted from smallest to largest magnitude (possibly with interspersed  */
+/*    zeros).  The length of each expansion is stored as a separate integer, */
+/*    and each arithmetic function returns an integer which is the length    */
+/*    of the expansion it created.                                           */
+/*                                                                           */
+/*  Several arithmetic functions are defined.  Their parameters are          */
+/*                                                                           */
+/*    e, f           Input expansions                                        */
+/*    elen, flen     Lengths of input expansions (must be >= 1)              */
+/*    h              Output expansion                                        */
+/*    b              Input scalar                                            */
+/*                                                                           */
+/*  The arithmetic functions are                                             */
+/*                                                                           */
+/*    grow_expansion(elen, e, b, h)                                          */
+/*    grow_expansion_zeroelim(elen, e, b, h)                                 */
+/*    expansion_sum(elen, e, flen, f, h)                                     */
+/*    expansion_sum_zeroelim1(elen, e, flen, f, h)                           */
+/*    expansion_sum_zeroelim2(elen, e, flen, f, h)                           */
+/*    fast_expansion_sum(elen, e, flen, f, h)                                */
+/*    fast_expansion_sum_zeroelim(elen, e, flen, f, h)                       */
+/*    linear_expansion_sum(elen, e, flen, f, h)                              */
+/*    linear_expansion_sum_zeroelim(elen, e, flen, f, h)                     */
+/*    scale_expansion(elen, e, b, h)                                         */
+/*    scale_expansion_zeroelim(elen, e, b, h)                                */
+/*    compress(elen, e, h)                                                   */
+/*                                                                           */
+/*  All of these are described in the long version of the paper; some are    */
+/*    described in the short version.  All return an integer that is the     */
+/*    length of h.  Those with suffix _zeroelim perform zero elimination,    */
+/*    and are recommended over their counterparts.  The procedure            */
+/*    fast_expansion_sum_zeroelim() (or linear_expansion_sum_zeroelim() on   */
+/*    processors that do not use the round-to-even tiebreaking rule) is      */
+/*    recommended over expansion_sum_zeroelim().  Each procedure has a       */
+/*    little note next to it (in the code below) that tells you whether or   */
+/*    not the output expansion may be the same array as one of the input     */
+/*    expansions.                                                            */
+/*                                                                           */
+/*                                                                           */
+/*  If you look around below, you'll also find macros for a bunch of         */
+/*    simple unrolled arithmetic operations, and procedures for printing     */
+/*    expansions (commented out because they don't work with all C           */
+/*    compilers) and for generating random floating-point numbers whose      */
+/*    significand bits are all random.  Most of the macros have undocumented */
+/*    requirements that certain of their parameters should not be the same   */
+/*    variable; for safety, better to make sure all the parameters are       */
+/*    distinct variables.  Feel free to send email to jrs@cs.cmu.edu if you  */
+/*    have questions.                                                        */
+/*                                                                           */
+/*****************************************************************************/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+/* On some machines, the exact arithmetic routines might be defeated by the  */
+/*   use of internal extended precision floating-point registers.  Sometimes */
+/*   this problem can be fixed by defining certain values to be volatile,    */
+/*   thus forcing them to be stored to memory and rounded off.  This isn't   */
+/*   a great solution, though, as it slows the arithmetic down.              */
+/*                                                                           */
+/* To try this out, write "#define INEXACT volatile" below.  Normally,       */
+/*   however, INEXACT should be defined to be nothing.  ("#define INEXACT".) */
+
+#define INEXACT /* Nothing */
+/* #define INEXACT volatile */
+
+#define REAL double /* float or double */
+#define REALPRINT doubleprint
+#define REALRAND doublerand
+#define NARROWRAND narrowdoublerand
+#define UNIFORMRAND uniformdoublerand
+
+/* Which of the following two methods of finding the absolute values is      */
+/*   fastest is compiler-dependent.  A few compilers can inline and optimize */
+/*   the fabs() call; but most will incur the overhead of a function call,   */
+/*   which is disastrously slow.  A faster way on IEEE machines might be to  */
+/*   mask the appropriate bit, but that's difficult to do in C.              */
+
+#define Absolute(a) ((a) >= 0.0 ? (a) : -(a))
+/* #define Absolute(a)  fabs(a) */
+
+/* Many of the operations are broken up into two pieces, a main part that    */
+/*   performs an approximate operation, and a "tail" that computes the       */
+/*   roundoff error of that operation.                                       */
+/*                                                                           */
+/* The operations Fast_Two_Sum(), Fast_Two_Diff(), Two_Sum(), Two_Diff(),    */
+/*   Split(), and Two_Product() are all implemented as described in the      */
+/*   reference.  Each of these macros requires certain variables to be       */
+/*   defined in the calling routine.  The variables `bvirt', `c', `abig',    */
+/*   `_i', `_j', `_k', `_l', `_m', and `_n' are declared `INEXACT' because   */
+/*   they store the result of an operation that may incur roundoff error.    */
+/*   The input parameter `x' (or the highest numbered `x_' parameter) must   */
+/*   also be declared `INEXACT'.                                             */
+
+#define Fast_Two_Sum_Tail(a, b, x, y) \
+  bvirt = x - a;                      \
+  y     = b - bvirt
+
+#define Fast_Two_Sum(a, b, x, y) \
+  x = (REAL)(a + b);             \
+  Fast_Two_Sum_Tail(a, b, x, y)
+
+#define Fast_Two_Diff_Tail(a, b, x, y) \
+  bvirt = a - x;                       \
+  y     = bvirt - b
+
+#define Fast_Two_Diff(a, b, x, y) \
+  x = (REAL)(a - b);              \
+  Fast_Two_Diff_Tail(a, b, x, y)
+
+#define Two_Sum_Tail(a, b, x, y) \
+  bvirt  = (REAL)(x - a);        \
+  avirt  = x - bvirt;            \
+  bround = b - bvirt;            \
+  around = a - avirt;            \
+  y      = around + bround
+
+#define Two_Sum(a, b, x, y) \
+  x = (REAL)(a + b);        \
+  Two_Sum_Tail(a, b, x, y)
+
+#define Two_Diff_Tail(a, b, x, y) \
+  bvirt  = (REAL)(a - x);         \
+  avirt  = x + bvirt;             \
+  bround = bvirt - b;             \
+  around = a - avirt;             \
+  y      = around + bround
+
+#define Two_Diff(a, b, x, y) \
+  x = (REAL)(a - b);         \
+  Two_Diff_Tail(a, b, x, y)
+
+#define Split(a, ahi, alo)     \
+  c    = (REAL)(splitter * a); \
+  abig = (REAL)(c - a);        \
+  ahi  = c - abig;             \
+  alo  = a - ahi
+
+#define Two_Product_Tail(a, b, x, y) \
+  Split(a, ahi, alo);                \
+  Split(b, bhi, blo);                \
+  err1 = x - (ahi * bhi);            \
+  err2 = err1 - (alo * bhi);         \
+  err3 = err2 - (ahi * blo);         \
+  y    = (alo * blo) - err3
+
+#define Two_Product(a, b, x, y) \
+  x = (REAL)(a * b);            \
+  Two_Product_Tail(a, b, x, y)
+
+/* Two_Product_Presplit() is Two_Product() where one of the inputs has       */
+/*   already been split.  Avoids redundant splitting.                        */
+
+#define Two_Product_Presplit(a, b, bhi, blo, x, y) \
+  x = (REAL)(a * b);                               \
+  Split(a, ahi, alo);                              \
+  err1 = x - (ahi * bhi);                          \
+  err2 = err1 - (alo * bhi);                       \
+  err3 = err2 - (ahi * blo);                       \
+  y    = (alo * blo) - err3
+
+/* Two_Product_2Presplit() is Two_Product() where both of the inputs have    */
+/*   already been split.  Avoids redundant splitting.                        */
+
+#define Two_Product_2Presplit(a, ahi, alo, b, bhi, blo, x, y) \
+  x    = (REAL)(a * b);                                       \
+  err1 = x - (ahi * bhi);                                     \
+  err2 = err1 - (alo * bhi);                                  \
+  err3 = err2 - (ahi * blo);                                  \
+  y    = (alo * blo) - err3
+
+/* Square() can be done more quickly than Two_Product().                     */
+
+#define Square_Tail(a, x, y)         \
+  Split(a, ahi, alo);                \
+  err1 = x - (ahi * ahi);            \
+  err3 = err1 - ((ahi + ahi) * alo); \
+  y    = (alo * alo) - err3
+
+#define Square(a, x, y) \
+  x = (REAL)(a * a);    \
+  Square_Tail(a, x, y)
+
+/* Macros for summing expansions of various fixed lengths.  These are all    */
+/*   unrolled versions of Expansion_Sum().                                   */
+
+#define Two_One_Sum(a1, a0, b, x2, x1, x0) \
+  Two_Sum(a0, b, _i, x0);                  \
+  Two_Sum(a1, _i, x2, x1)
+
+#define Two_One_Diff(a1, a0, b, x2, x1, x0) \
+  Two_Diff(a0, b, _i, x0);                  \
+  Two_Sum(a1, _i, x2, x1)
+
+#define Two_Two_Sum(a1, a0, b1, b0, x3, x2, x1, x0) \
+  Two_One_Sum(a1, a0, b0, _j, _0, x0);              \
+  Two_One_Sum(_j, _0, b1, x3, x2, x1)
+
+#define Two_Two_Diff(a1, a0, b1, b0, x3, x2, x1, x0) \
+  Two_One_Diff(a1, a0, b0, _j, _0, x0);              \
+  Two_One_Diff(_j, _0, b1, x3, x2, x1)
+
+#define Four_One_Sum(a3, a2, a1, a0, b, x4, x3, x2, x1, x0) \
+  Two_One_Sum(a1, a0, b, _j, x1, x0);                       \
+  Two_One_Sum(a3, a2, _j, x4, x3, x2)
+
+#define Four_Two_Sum(a3, a2, a1, a0, b1, b0, x5, x4, x3, x2, x1, x0) \
+  Four_One_Sum(a3, a2, a1, a0, b0, _k, _2, _1, _0, x0);              \
+  Four_One_Sum(_k, _2, _1, _0, b1, x5, x4, x3, x2, x1)
+
+#define Four_Four_Sum(a3, a2, a1, a0, b4, b3, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Four_Two_Sum(a3, a2, a1, a0, b1, b0, _l, _2, _1, _0, x1, x0);                       \
+  Four_Two_Sum(_l, _2, _1, _0, b4, b3, x7, x6, x5, x4, x3, x2)
+
+#define Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b, x8, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Four_One_Sum(a3, a2, a1, a0, b, _j, x3, x2, x1, x0);                                       \
+  Four_One_Sum(a7, a6, a5, a4, _j, x8, x7, x6, x5, x4)
+
+#define Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b0, _k, _6, _5, _4, _3, _2, _1, _0, x0);              \
+  Eight_One_Sum(_k, _6, _5, _4, _3, _2, _1, _0, b1, x9, x8, x7, x6, x5, x4, x3, x2, x1)
+
+#define Eight_Four_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b4, b3, b1, b0, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, _l, _6, _5, _4, _3, _2, _1, _0, x1, x0);                         \
+  Eight_Two_Sum(_l, _6, _5, _4, _3, _2, _1, _0, b4, b3, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2)
+
+/* Macros for multiplying expansions of various fixed lengths.               */
+
+#define Two_One_Product(a1, a0, b, x3, x2, x1, x0) \
+  Split(b, bhi, blo);                              \
+  Two_Product_Presplit(a0, b, bhi, blo, _i, x0);   \
+  Two_Product_Presplit(a1, b, bhi, blo, _j, _0);   \
+  Two_Sum(_i, _0, _k, x1);                         \
+  Fast_Two_Sum(_j, _k, x3, x2)
+
+#define Four_One_Product(a3, a2, a1, a0, b, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Split(b, bhi, blo);                                                       \
+  Two_Product_Presplit(a0, b, bhi, blo, _i, x0);                            \
+  Two_Product_Presplit(a1, b, bhi, blo, _j, _0);                            \
+  Two_Sum(_i, _0, _k, x1);                                                  \
+  Fast_Two_Sum(_j, _k, _i, x2);                                             \
+  Two_Product_Presplit(a2, b, bhi, blo, _j, _0);                            \
+  Two_Sum(_i, _0, _k, x3);                                                  \
+  Fast_Two_Sum(_j, _k, _i, x4);                                             \
+  Two_Product_Presplit(a3, b, bhi, blo, _j, _0);                            \
+  Two_Sum(_i, _0, _k, x5);                                                  \
+  Fast_Two_Sum(_j, _k, x7, x6)
+
+#define Two_Two_Product(a1, a0, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \
+  Split(a0, a0hi, a0lo);                                                \
+  Split(b0, bhi, blo);                                                  \
+  Two_Product_2Presplit(a0, a0hi, a0lo, b0, bhi, blo, _i, x0);          \
+  Split(a1, a1hi, a1lo);                                                \
+  Two_Product_2Presplit(a1, a1hi, a1lo, b0, bhi, blo, _j, _0);          \
+  Two_Sum(_i, _0, _k, _1);                                              \
+  Fast_Two_Sum(_j, _k, _l, _2);                                         \
+  Split(b1, bhi, blo);                                                  \
+  Two_Product_2Presplit(a0, a0hi, a0lo, b1, bhi, blo, _i, _0);          \
+  Two_Sum(_1, _0, _k, x1);                                              \
+  Two_Sum(_2, _k, _j, _1);                                              \
+  Two_Sum(_l, _j, _m, _2);                                              \
+  Two_Product_2Presplit(a1, a1hi, a1lo, b1, bhi, blo, _j, _0);          \
+  Two_Sum(_i, _0, _n, _0);                                              \
+  Two_Sum(_1, _0, _i, x2);                                              \
+  Two_Sum(_2, _i, _k, _1);                                              \
+  Two_Sum(_m, _k, _l, _2);                                              \
+  Two_Sum(_j, _n, _k, _0);                                              \
+  Two_Sum(_1, _0, _j, x3);                                              \
+  Two_Sum(_2, _j, _i, _1);                                              \
+  Two_Sum(_l, _i, _m, _2);                                              \
+  Two_Sum(_1, _k, _i, x4);                                              \
+  Two_Sum(_2, _i, _k, x5);                                              \
+  Two_Sum(_m, _k, x7, x6)
+
+/* An expansion of length two can be squared more quickly than finding the   */
+/*   product of two different expansions of length two, and the result is    */
+/*   guaranteed to have no more than six (rather than eight) components.     */
+
+#define Two_Square(a1, a0, x5, x4, x3, x2, x1, x0) \
+  Square(a0, _j, x0);                              \
+  _0 = a0 + a0;                                    \
+  Two_Product(a1, _0, _k, _1);                     \
+  Two_One_Sum(_k, _1, _j, _l, _2, x1);             \
+  Square(a1, _j, _1);                              \
+  Two_Two_Sum(_j, _1, _l, _2, x5, x4, x3, x2)
+
+REAL splitter; /* = 2^ceiling(p / 2) + 1.  Used to split floats in half. */
+REAL epsilon;  /* = 2^(-p).  Used to estimate roundoff errors. */
+
+/* A set of coefficients used to calculate maximum roundoff errors.          */
+REAL resulterrbound;
+REAL ccwerrboundA, ccwerrboundB, ccwerrboundC;
+REAL o3derrboundA, o3derrboundB, o3derrboundC;
+REAL iccerrboundA, iccerrboundB, iccerrboundC;
+REAL isperrboundA, isperrboundB, isperrboundC;
+
+/*****************************************************************************/
+/*                                                                           */
+/*  doubleprint()   Print the bit representation of a double.                */
+/*                                                                           */
+/*  Useful for debugging exact arithmetic routines.                          */
+/*                                                                           */
+/*****************************************************************************/
+
+/*
+void doubleprint(number)
+double number;
+{
+  unsigned long long no;
+  unsigned long long sign, expo;
+  int exponent;
+  int i, bottomi;
+
+  no = *(unsigned long long *) &number;
+  sign = no & 0x8000000000000000ll;
+  expo = (no >> 52) & 0x7ffll;
+  exponent = (int) expo;
+  exponent = exponent - 1023;
+  if (sign) {
+    printf("-");
+  } else {
+    printf(" ");
+  }
+  if (exponent == -1023) {
+    printf(
+      "0.0000000000000000000000000000000000000000000000000000_     (   )");
+  } else {
+    printf("1.");
+    bottomi = -1;
+    for (i = 0; i < 52; i++) {
+      if (no & 0x0008000000000000ll) {
+        printf("1");
+        bottomi = i;
+      } else {
+        printf("0");
+      }
+      no <<= 1;
+    }
+    printf("_%d  (%d)", exponent, exponent - 1 - bottomi);
+  }
+}
+*/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  floatprint()   Print the bit representation of a float.                  */
+/*                                                                           */
+/*  Useful for debugging exact arithmetic routines.                          */
+/*                                                                           */
+/*****************************************************************************/
+
+/*
+void floatprint(number)
+float number;
+{
+  unsigned no;
+  unsigned sign, expo;
+  int exponent;
+  int i, bottomi;
+
+  no = *(unsigned *) &number;
+  sign = no & 0x80000000;
+  expo = (no >> 23) & 0xff;
+  exponent = (int) expo;
+  exponent = exponent - 127;
+  if (sign) {
+    printf("-");
+  } else {
+    printf(" ");
+  }
+  if (exponent == -127) {
+    printf("0.00000000000000000000000_     (   )");
+  } else {
+    printf("1.");
+    bottomi = -1;
+    for (i = 0; i < 23; i++) {
+      if (no & 0x00400000) {
+        printf("1");
+        bottomi = i;
+      } else {
+        printf("0");
+      }
+      no <<= 1;
+    }
+    printf("_%3d  (%3d)", exponent, exponent - 1 - bottomi);
+  }
+}
+*/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  expansion_print()   Print the bit representation of an expansion.        */
+/*                                                                           */
+/*  Useful for debugging exact arithmetic routines.                          */
+/*                                                                           */
+/*****************************************************************************/
+
+/*
+void expansion_print(elen, e)
+int elen;
+REAL *e;
+{
+  int i;
+
+  for (i = elen - 1; i >= 0; i--) {
+    REALPRINT(e[i]);
+    if (i > 0) {
+      printf(" +\n");
+    } else {
+      printf("\n");
+    }
+  }
+}
+*/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  doublerand()   Generate a double with random 53-bit significand and a    */
+/*                 random exponent in [0, 511].                              */
+/*                                                                           */
+/*****************************************************************************/
+
+double doublerand()
+{
+  double result;
+  double expo;
+  long a, b, c;
+  long i;
+
+  a      = random();
+  b      = random();
+  c      = random();
+  result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8);
+  for(i = 512, expo = 2; i <= 131072; i *= 2, expo = expo * expo)
+    {
+      if(c & i)
+        {
+          result *= expo;
+        }
+    }
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  narrowdoublerand()   Generate a double with random 53-bit significand    */
+/*                       and a random exponent in [0, 7].                    */
+/*                                                                           */
+/*****************************************************************************/
+
+double narrowdoublerand()
+{
+  double result;
+  double expo;
+  long a, b, c;
+  long i;
+
+  a      = random();
+  b      = random();
+  c      = random();
+  result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8);
+  for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo)
+    {
+      if(c & i)
+        {
+          result *= expo;
+        }
+    }
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  uniformdoublerand()   Generate a double with random 53-bit significand.  */
+/*                                                                           */
+/*****************************************************************************/
+
+double uniformdoublerand()
+{
+  double result;
+  long a, b;
+
+  a      = random();
+  b      = random();
+  result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8);
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  floatrand()   Generate a float with random 24-bit significand and a      */
+/*                random exponent in [0, 63].                                */
+/*                                                                           */
+/*****************************************************************************/
+
+float floatrand()
+{
+  float result;
+  float expo;
+  long a, c;
+  long i;
+
+  a      = random();
+  c      = random();
+  result = (float)((a - 1073741824) >> 6);
+  for(i = 512, expo = 2; i <= 16384; i *= 2, expo = expo * expo)
+    {
+      if(c & i)
+        {
+          result *= expo;
+        }
+    }
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  narrowfloatrand()   Generate a float with random 24-bit significand and  */
+/*                      a random exponent in [0, 7].                         */
+/*                                                                           */
+/*****************************************************************************/
+
+float narrowfloatrand()
+{
+  float result;
+  float expo;
+  long a, c;
+  long i;
+
+  a      = random();
+  c      = random();
+  result = (float)((a - 1073741824) >> 6);
+  for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo)
+    {
+      if(c & i)
+        {
+          result *= expo;
+        }
+    }
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  uniformfloatrand()   Generate a float with random 24-bit significand.    */
+/*                                                                           */
+/*****************************************************************************/
+
+float uniformfloatrand()
+{
+  float result;
+  long a;
+
+  a      = random();
+  result = (float)((a - 1073741824) >> 6);
+  return result;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  exactinit()   Initialize the variables used for exact arithmetic.        */
+/*                                                                           */
+/*  `epsilon' is the largest power of two such that 1.0 + epsilon = 1.0 in   */
+/*  floating-point arithmetic.  `epsilon' bounds the relative roundoff       */
+/*  error.  It is used for floating-point error analysis.                    */
+/*                                                                           */
+/*  `splitter' is used to split floating-point numbers into two half-        */
+/*  length significands for exact multiplication.                            */
+/*                                                                           */
+/*  I imagine that a highly optimizing compiler might be too smart for its   */
+/*  own good, and somehow cause this routine to fail, if it pretends that    */
+/*  floating-point arithmetic is too much like real arithmetic.              */
+/*                                                                           */
+/*  Don't change this routine unless you fully understand it.                */
+/*                                                                           */
+/*****************************************************************************/
+
+void exactinit()
+{
+  REAL half;
+  REAL check, lastcheck;
+  int every_other;
+
+  every_other = 1;
+  half        = 0.5;
+  epsilon     = 1.0;
+  splitter    = 1.0;
+  check       = 1.0;
+  /* Repeatedly divide `epsilon' by two until it is too small to add to    */
+  /*   one without causing roundoff.  (Also check if the sum is equal to   */
+  /*   the previous sum, for machines that round up instead of using exact */
+  /*   rounding.  Not that this library will work on such machines anyway. */
+  do
+    {
+      lastcheck = check;
+      epsilon *= half;
+      if(every_other)
+        {
+          splitter *= 2.0;
+        }
+      every_other = !every_other;
+      check       = 1.0 + epsilon;
+    }
+  while((check != 1.0) && (check != lastcheck));
+  splitter += 1.0;
+
+  /* Error bounds for orientation and incircle tests. */
+  resulterrbound = (3.0 + 8.0 * epsilon) * epsilon;
+  ccwerrboundA   = (3.0 + 16.0 * epsilon) * epsilon;
+  ccwerrboundB   = (2.0 + 12.0 * epsilon) * epsilon;
+  ccwerrboundC   = (9.0 + 64.0 * epsilon) * epsilon * epsilon;
+  o3derrboundA   = (7.0 + 56.0 * epsilon) * epsilon;
+  o3derrboundB   = (3.0 + 28.0 * epsilon) * epsilon;
+  o3derrboundC   = (26.0 + 288.0 * epsilon) * epsilon * epsilon;
+  iccerrboundA   = (10.0 + 96.0 * epsilon) * epsilon;
+  iccerrboundB   = (4.0 + 48.0 * epsilon) * epsilon;
+  iccerrboundC   = (44.0 + 576.0 * epsilon) * epsilon * epsilon;
+  isperrboundA   = (16.0 + 224.0 * epsilon) * epsilon;
+  isperrboundB   = (5.0 + 72.0 * epsilon) * epsilon;
+  isperrboundC   = (71.0 + 1408.0 * epsilon) * epsilon * epsilon;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  grow_expansion()   Add a scalar to an expansion.                         */
+/*                                                                           */
+/*  Sets h = e + b.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the strongly nonoverlapping and nonadjacent    */
+/*  properties as well.  (That is, if e has one of these properties, so      */
+/*  will h.)                                                                 */
+/*                                                                           */
+/*****************************************************************************/
+
+int grow_expansion(elen, e, b, h) /* e and h can be the same. */
+    int elen;
+REAL *e;
+REAL b;
+REAL *h;
+{
+  REAL Q;
+  INEXACT REAL Qnew;
+  int eindex;
+  REAL enow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+
+  Q = b;
+  for(eindex = 0; eindex < elen; eindex++)
+    {
+      enow = e[eindex];
+      Two_Sum(Q, enow, Qnew, h[eindex]);
+      Q = Qnew;
+    }
+  h[eindex] = Q;
+  return eindex + 1;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  grow_expansion_zeroelim()   Add a scalar to an expansion, eliminating    */
+/*                              zero components from the output expansion.   */
+/*                                                                           */
+/*  Sets h = e + b.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the strongly nonoverlapping and nonadjacent    */
+/*  properties as well.  (That is, if e has one of these properties, so      */
+/*  will h.)                                                                 */
+/*                                                                           */
+/*****************************************************************************/
+
+int grow_expansion_zeroelim(elen, e, b, h) /* e and h can be the same. */
+    int elen;
+REAL *e;
+REAL b;
+REAL *h;
+{
+  REAL Q, hh;
+  INEXACT REAL Qnew;
+  int eindex, hindex;
+  REAL enow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+
+  hindex = 0;
+  Q      = b;
+  for(eindex = 0; eindex < elen; eindex++)
+    {
+      enow = e[eindex];
+      Two_Sum(Q, enow, Qnew, hh);
+      Q = Qnew;
+      if(hh != 0.0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  if((Q != 0.0) || (hindex == 0))
+    {
+      h[hindex++] = Q;
+    }
+  return hindex;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  expansion_sum()   Sum two expansions.                                    */
+/*                                                                           */
+/*  Sets h = e + f.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the nonadjacent property as well.  (That is,   */
+/*  if e has one of these properties, so will h.)  Does NOT maintain the     */
+/*  strongly nonoverlapping property.                                        */
+/*                                                                           */
+/*****************************************************************************/
+
+int expansion_sum(elen, e, flen, f, h)
+    /* e and h can be the same, but f and h cannot. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q;
+  INEXACT REAL Qnew;
+  int findex, hindex, hlast;
+  REAL hnow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+
+  Q = f[0];
+  for(hindex = 0; hindex < elen; hindex++)
+    {
+      hnow = e[hindex];
+      Two_Sum(Q, hnow, Qnew, h[hindex]);
+      Q = Qnew;
+    }
+  h[hindex] = Q;
+  hlast     = hindex;
+  for(findex = 1; findex < flen; findex++)
+    {
+      Q = f[findex];
+      for(hindex = findex; hindex <= hlast; hindex++)
+        {
+          hnow = h[hindex];
+          Two_Sum(Q, hnow, Qnew, h[hindex]);
+          Q = Qnew;
+        }
+      h[++hlast] = Q;
+    }
+  return hlast + 1;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  expansion_sum_zeroelim1()   Sum two expansions, eliminating zero         */
+/*                              components from the output expansion.        */
+/*                                                                           */
+/*  Sets h = e + f.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the nonadjacent property as well.  (That is,   */
+/*  if e has one of these properties, so will h.)  Does NOT maintain the     */
+/*  strongly nonoverlapping property.                                        */
+/*                                                                           */
+/*****************************************************************************/
+
+int expansion_sum_zeroelim1(elen, e, flen, f, h)
+    /* e and h can be the same, but f and h cannot. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q;
+  INEXACT REAL Qnew;
+  int index, findex, hindex, hlast;
+  REAL hnow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+
+  Q = f[0];
+  for(hindex = 0; hindex < elen; hindex++)
+    {
+      hnow = e[hindex];
+      Two_Sum(Q, hnow, Qnew, h[hindex]);
+      Q = Qnew;
+    }
+  h[hindex] = Q;
+  hlast     = hindex;
+  for(findex = 1; findex < flen; findex++)
+    {
+      Q = f[findex];
+      for(hindex = findex; hindex <= hlast; hindex++)
+        {
+          hnow = h[hindex];
+          Two_Sum(Q, hnow, Qnew, h[hindex]);
+          Q = Qnew;
+        }
+      h[++hlast] = Q;
+    }
+  hindex = -1;
+  for(index = 0; index <= hlast; index++)
+    {
+      hnow = h[index];
+      if(hnow != 0.0)
+        {
+          h[++hindex] = hnow;
+        }
+    }
+  if(hindex == -1)
+    {
+      return 1;
+    }
+  else
+    {
+      return hindex + 1;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  expansion_sum_zeroelim2()   Sum two expansions, eliminating zero         */
+/*                              components from the output expansion.        */
+/*                                                                           */
+/*  Sets h = e + f.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the nonadjacent property as well.  (That is,   */
+/*  if e has one of these properties, so will h.)  Does NOT maintain the     */
+/*  strongly nonoverlapping property.                                        */
+/*                                                                           */
+/*****************************************************************************/
+
+int expansion_sum_zeroelim2(elen, e, flen, f, h)
+    /* e and h can be the same, but f and h cannot. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q, hh;
+  INEXACT REAL Qnew;
+  int eindex, findex, hindex, hlast;
+  REAL enow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+
+  hindex = 0;
+  Q      = f[0];
+  for(eindex = 0; eindex < elen; eindex++)
+    {
+      enow = e[eindex];
+      Two_Sum(Q, enow, Qnew, hh);
+      Q = Qnew;
+      if(hh != 0.0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  h[hindex] = Q;
+  hlast     = hindex;
+  for(findex = 1; findex < flen; findex++)
+    {
+      hindex = 0;
+      Q      = f[findex];
+      for(eindex = 0; eindex <= hlast; eindex++)
+        {
+          enow = h[eindex];
+          Two_Sum(Q, enow, Qnew, hh);
+          Q = Qnew;
+          if(hh != 0)
+            {
+              h[hindex++] = hh;
+            }
+        }
+      h[hindex] = Q;
+      hlast     = hindex;
+    }
+  return hlast + 1;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  fast_expansion_sum()   Sum two expansions.                               */
+/*                                                                           */
+/*  Sets h = e + f.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  If round-to-even is used (as with IEEE 754), maintains the strongly      */
+/*  nonoverlapping property.  (That is, if e is strongly nonoverlapping, h   */
+/*  will be also.)  Does NOT maintain the nonoverlapping or nonadjacent      */
+/*  properties.                                                              */
+/*                                                                           */
+/*****************************************************************************/
+
+int fast_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q;
+  INEXACT REAL Qnew;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  int eindex, findex, hindex;
+  REAL enow, fnow;
+
+  enow   = e[0];
+  fnow   = f[0];
+  eindex = findex = 0;
+  if((fnow > enow) == (fnow > -enow))
+    {
+      Q    = enow;
+      enow = e[++eindex];
+    }
+  else
+    {
+      Q    = fnow;
+      fnow = f[++findex];
+    }
+  hindex = 0;
+  if((eindex < elen) && (findex < flen))
+    {
+      if((fnow > enow) == (fnow > -enow))
+        {
+          Fast_Two_Sum(enow, Q, Qnew, h[0]);
+          enow = e[++eindex];
+        }
+      else
+        {
+          Fast_Two_Sum(fnow, Q, Qnew, h[0]);
+          fnow = f[++findex];
+        }
+      Q      = Qnew;
+      hindex = 1;
+      while((eindex < elen) && (findex < flen))
+        {
+          if((fnow > enow) == (fnow > -enow))
+            {
+              Two_Sum(Q, enow, Qnew, h[hindex]);
+              enow = e[++eindex];
+            }
+          else
+            {
+              Two_Sum(Q, fnow, Qnew, h[hindex]);
+              fnow = f[++findex];
+            }
+          Q = Qnew;
+          hindex++;
+        }
+    }
+  while(eindex < elen)
+    {
+      Two_Sum(Q, enow, Qnew, h[hindex]);
+      enow = e[++eindex];
+      Q    = Qnew;
+      hindex++;
+    }
+  while(findex < flen)
+    {
+      Two_Sum(Q, fnow, Qnew, h[hindex]);
+      fnow = f[++findex];
+      Q    = Qnew;
+      hindex++;
+    }
+  h[hindex] = Q;
+  return hindex + 1;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  fast_expansion_sum_zeroelim()   Sum two expansions, eliminating zero     */
+/*                                  components from the output expansion.    */
+/*                                                                           */
+/*  Sets h = e + f.  See the long version of my paper for details.           */
+/*                                                                           */
+/*  If round-to-even is used (as with IEEE 754), maintains the strongly      */
+/*  nonoverlapping property.  (That is, if e is strongly nonoverlapping, h   */
+/*  will be also.)  Does NOT maintain the nonoverlapping or nonadjacent      */
+/*  properties.                                                              */
+/*                                                                           */
+/*****************************************************************************/
+
+int fast_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q;
+  INEXACT REAL Qnew;
+  INEXACT REAL hh;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  int eindex, findex, hindex;
+  REAL enow, fnow;
+
+  enow   = e[0];
+  fnow   = f[0];
+  eindex = findex = 0;
+  if((fnow > enow) == (fnow > -enow))
+    {
+      Q    = enow;
+      enow = e[++eindex];
+    }
+  else
+    {
+      Q    = fnow;
+      fnow = f[++findex];
+    }
+  hindex = 0;
+  if((eindex < elen) && (findex < flen))
+    {
+      if((fnow > enow) == (fnow > -enow))
+        {
+          Fast_Two_Sum(enow, Q, Qnew, hh);
+          enow = e[++eindex];
+        }
+      else
+        {
+          Fast_Two_Sum(fnow, Q, Qnew, hh);
+          fnow = f[++findex];
+        }
+      Q = Qnew;
+      if(hh != 0.0)
+        {
+          h[hindex++] = hh;
+        }
+      while((eindex < elen) && (findex < flen))
+        {
+          if((fnow > enow) == (fnow > -enow))
+            {
+              Two_Sum(Q, enow, Qnew, hh);
+              enow = e[++eindex];
+            }
+          else
+            {
+              Two_Sum(Q, fnow, Qnew, hh);
+              fnow = f[++findex];
+            }
+          Q = Qnew;
+          if(hh != 0.0)
+            {
+              h[hindex++] = hh;
+            }
+        }
+    }
+  while(eindex < elen)
+    {
+      Two_Sum(Q, enow, Qnew, hh);
+      enow = e[++eindex];
+      Q    = Qnew;
+      if(hh != 0.0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  while(findex < flen)
+    {
+      Two_Sum(Q, fnow, Qnew, hh);
+      fnow = f[++findex];
+      Q    = Qnew;
+      if(hh != 0.0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  if((Q != 0.0) || (hindex == 0))
+    {
+      h[hindex++] = Q;
+    }
+  return hindex;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  linear_expansion_sum()   Sum two expansions.                             */
+/*                                                                           */
+/*  Sets h = e + f.  See either version of my paper for details.             */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  (That is, if e is                */
+/*  nonoverlapping, h will be also.)                                         */
+/*                                                                           */
+/*****************************************************************************/
+
+int linear_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q, q;
+  INEXACT REAL Qnew;
+  INEXACT REAL R;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  int eindex, findex, hindex;
+  REAL enow, fnow;
+  REAL g0;
+
+  enow   = e[0];
+  fnow   = f[0];
+  eindex = findex = 0;
+  if((fnow > enow) == (fnow > -enow))
+    {
+      g0   = enow;
+      enow = e[++eindex];
+    }
+  else
+    {
+      g0   = fnow;
+      fnow = f[++findex];
+    }
+  if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow))))
+    {
+      Fast_Two_Sum(enow, g0, Qnew, q);
+      enow = e[++eindex];
+    }
+  else
+    {
+      Fast_Two_Sum(fnow, g0, Qnew, q);
+      fnow = f[++findex];
+    }
+  Q = Qnew;
+  for(hindex = 0; hindex < elen + flen - 2; hindex++)
+    {
+      if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow))))
+        {
+          Fast_Two_Sum(enow, q, R, h[hindex]);
+          enow = e[++eindex];
+        }
+      else
+        {
+          Fast_Two_Sum(fnow, q, R, h[hindex]);
+          fnow = f[++findex];
+        }
+      Two_Sum(Q, R, Qnew, q);
+      Q = Qnew;
+    }
+  h[hindex]     = q;
+  h[hindex + 1] = Q;
+  return hindex + 2;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  linear_expansion_sum_zeroelim()   Sum two expansions, eliminating zero   */
+/*                                    components from the output expansion.  */
+/*                                                                           */
+/*  Sets h = e + f.  See either version of my paper for details.             */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  (That is, if e is                */
+/*  nonoverlapping, h will be also.)                                         */
+/*                                                                           */
+/*****************************************************************************/
+
+int linear_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */
+    int elen;
+REAL *e;
+int flen;
+REAL *f;
+REAL *h;
+{
+  REAL Q, q, hh;
+  INEXACT REAL Qnew;
+  INEXACT REAL R;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  int eindex, findex, hindex;
+  int count;
+  REAL enow, fnow;
+  REAL g0;
+
+  enow   = e[0];
+  fnow   = f[0];
+  eindex = findex = 0;
+  hindex          = 0;
+  if((fnow > enow) == (fnow > -enow))
+    {
+      g0   = enow;
+      enow = e[++eindex];
+    }
+  else
+    {
+      g0   = fnow;
+      fnow = f[++findex];
+    }
+  if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow))))
+    {
+      Fast_Two_Sum(enow, g0, Qnew, q);
+      enow = e[++eindex];
+    }
+  else
+    {
+      Fast_Two_Sum(fnow, g0, Qnew, q);
+      fnow = f[++findex];
+    }
+  Q = Qnew;
+  for(count = 2; count < elen + flen; count++)
+    {
+      if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow))))
+        {
+          Fast_Two_Sum(enow, q, R, hh);
+          enow = e[++eindex];
+        }
+      else
+        {
+          Fast_Two_Sum(fnow, q, R, hh);
+          fnow = f[++findex];
+        }
+      Two_Sum(Q, R, Qnew, q);
+      Q = Qnew;
+      if(hh != 0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  if(q != 0)
+    {
+      h[hindex++] = q;
+    }
+  if((Q != 0.0) || (hindex == 0))
+    {
+      h[hindex++] = Q;
+    }
+  return hindex;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  scale_expansion()   Multiply an expansion by a scalar.                   */
+/*                                                                           */
+/*  Sets h = be.  See either version of my paper for details.                */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the strongly nonoverlapping and nonadjacent    */
+/*  properties as well.  (That is, if e has one of these properties, so      */
+/*  will h.)                                                                 */
+/*                                                                           */
+/*****************************************************************************/
+
+int scale_expansion(elen, e, b, h) /* e and h cannot be the same. */
+    int elen;
+REAL *e;
+REAL b;
+REAL *h;
+{
+  INEXACT REAL Q;
+  INEXACT REAL sum;
+  INEXACT REAL product1;
+  REAL product0;
+  int eindex, hindex;
+  REAL enow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+
+  Split(b, bhi, blo);
+  Two_Product_Presplit(e[0], b, bhi, blo, Q, h[0]);
+  hindex = 1;
+  for(eindex = 1; eindex < elen; eindex++)
+    {
+      enow = e[eindex];
+      Two_Product_Presplit(enow, b, bhi, blo, product1, product0);
+      Two_Sum(Q, product0, sum, h[hindex]);
+      hindex++;
+      Two_Sum(product1, sum, Q, h[hindex]);
+      hindex++;
+    }
+  h[hindex] = Q;
+  return elen + elen;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  scale_expansion_zeroelim()   Multiply an expansion by a scalar,          */
+/*                               eliminating zero components from the        */
+/*                               output expansion.                           */
+/*                                                                           */
+/*  Sets h = be.  See either version of my paper for details.                */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), maintains the strongly nonoverlapping and nonadjacent    */
+/*  properties as well.  (That is, if e has one of these properties, so      */
+/*  will h.)                                                                 */
+/*                                                                           */
+/*****************************************************************************/
+
+int scale_expansion_zeroelim(elen, e, b, h) /* e and h cannot be the same. */
+    int elen;
+REAL *e;
+REAL b;
+REAL *h;
+{
+  INEXACT REAL Q, sum;
+  REAL hh;
+  INEXACT REAL product1;
+  REAL product0;
+  int eindex, hindex;
+  REAL enow;
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+
+  Split(b, bhi, blo);
+  Two_Product_Presplit(e[0], b, bhi, blo, Q, hh);
+  hindex = 0;
+  if(hh != 0)
+    {
+      h[hindex++] = hh;
+    }
+  for(eindex = 1; eindex < elen; eindex++)
+    {
+      enow = e[eindex];
+      Two_Product_Presplit(enow, b, bhi, blo, product1, product0);
+      Two_Sum(Q, product0, sum, hh);
+      if(hh != 0)
+        {
+          h[hindex++] = hh;
+        }
+      Fast_Two_Sum(product1, sum, Q, hh);
+      if(hh != 0)
+        {
+          h[hindex++] = hh;
+        }
+    }
+  if((Q != 0.0) || (hindex == 0))
+    {
+      h[hindex++] = Q;
+    }
+  return hindex;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  compress()   Compress an expansion.                                      */
+/*                                                                           */
+/*  See the long version of my paper for details.                            */
+/*                                                                           */
+/*  Maintains the nonoverlapping property.  If round-to-even is used (as     */
+/*  with IEEE 754), then any nonoverlapping expansion is converted to a      */
+/*  nonadjacent expansion.                                                   */
+/*                                                                           */
+/*****************************************************************************/
+
+int compress(elen, e, h) /* e and h may be the same. */
+    int elen;
+REAL *e;
+REAL *h;
+{
+  REAL Q, q;
+  INEXACT REAL Qnew;
+  int eindex, hindex;
+  INEXACT REAL bvirt;
+  REAL enow, hnow;
+  int top, bottom;
+
+  bottom = elen - 1;
+  Q      = e[bottom];
+  for(eindex = elen - 2; eindex >= 0; eindex--)
+    {
+      enow = e[eindex];
+      Fast_Two_Sum(Q, enow, Qnew, q);
+      if(q != 0)
+        {
+          h[bottom--] = Qnew;
+          Q           = q;
+        }
+      else
+        {
+          Q = Qnew;
+        }
+    }
+  top = 0;
+  for(hindex = bottom + 1; hindex < elen; hindex++)
+    {
+      hnow = h[hindex];
+      Fast_Two_Sum(hnow, Q, Qnew, q);
+      if(q != 0)
+        {
+          h[top++] = q;
+        }
+      Q = Qnew;
+    }
+  h[top] = Q;
+  return top + 1;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  estimate()   Produce a one-word estimate of an expansion's value.        */
+/*                                                                           */
+/*  See either version of my paper for details.                              */
+/*                                                                           */
+/*****************************************************************************/
+
+REAL estimate(elen, e) int elen;
+REAL *e;
+{
+  REAL Q;
+  int eindex;
+
+  Q = e[0];
+  for(eindex = 1; eindex < elen; eindex++)
+    {
+      Q += e[eindex];
+    }
+  return Q;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  orient2dfast()   Approximate 2D orientation test.  Nonrobust.            */
+/*  orient2dexact()   Exact 2D orientation test.  Robust.                    */
+/*  orient2dslow()   Another exact 2D orientation test.  Robust.             */
+/*  orient2d()   Adaptive exact 2D orientation test.  Robust.                */
+/*                                                                           */
+/*               Return a positive value if the points pa, pb, and pc occur  */
+/*               in counterclockwise order; a negative value if they occur   */
+/*               in clockwise order; and zero if they are collinear.  The    */
+/*               result is also a rough approximation of twice the signed    */
+/*               area of the triangle defined by the three points.           */
+/*                                                                           */
+/*  Only the first and last routine should be used; the middle two are for   */
+/*  timings.                                                                 */
+/*                                                                           */
+/*  The last three use exact arithmetic to ensure a correct answer.  The     */
+/*  result returned is the determinant of a matrix.  In orient2d() only,     */
+/*  this determinant is computed adaptively, in the sense that exact         */
+/*  arithmetic is used only to the degree it is needed to ensure that the    */
+/*  returned value has the correct sign.  Hence, orient2d() is usually quite */
+/*  fast, but will run more slowly when the input points are collinear or    */
+/*  nearly so.                                                               */
+/*                                                                           */
+/*****************************************************************************/
+
+REAL orient2dfast(pa, pb, pc) REAL *pa;
+REAL *pb;
+REAL *pc;
+{
+  REAL acx, bcx, acy, bcy;
+
+  acx = pa[0] - pc[0];
+  bcx = pb[0] - pc[0];
+  acy = pa[1] - pc[1];
+  bcy = pb[1] - pc[1];
+  return acx * bcy - acy * bcx;
+}
+
+REAL orient2dexact(pa, pb, pc) REAL *pa;
+REAL *pb;
+REAL *pc;
+{
+  INEXACT REAL axby1, axcy1, bxcy1, bxay1, cxay1, cxby1;
+  REAL axby0, axcy0, bxcy0, bxay0, cxay0, cxby0;
+  REAL aterms[4], bterms[4], cterms[4];
+  INEXACT REAL aterms3, bterms3, cterms3;
+  REAL v[8], w[12];
+  int vlength, wlength;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  Two_Product(pa[0], pb[1], axby1, axby0);
+  Two_Product(pa[0], pc[1], axcy1, axcy0);
+  Two_Two_Diff(axby1, axby0, axcy1, axcy0, aterms3, aterms[2], aterms[1], aterms[0]);
+  aterms[3] = aterms3;
+
+  Two_Product(pb[0], pc[1], bxcy1, bxcy0);
+  Two_Product(pb[0], pa[1], bxay1, bxay0);
+  Two_Two_Diff(bxcy1, bxcy0, bxay1, bxay0, bterms3, bterms[2], bterms[1], bterms[0]);
+  bterms[3] = bterms3;
+
+  Two_Product(pc[0], pa[1], cxay1, cxay0);
+  Two_Product(pc[0], pb[1], cxby1, cxby0);
+  Two_Two_Diff(cxay1, cxay0, cxby1, cxby0, cterms3, cterms[2], cterms[1], cterms[0]);
+  cterms[3] = cterms3;
+
+  vlength = fast_expansion_sum_zeroelim(4, aterms, 4, bterms, v);
+  wlength = fast_expansion_sum_zeroelim(vlength, v, 4, cterms, w);
+
+  return w[wlength - 1];
+}
+
+REAL orient2dslow(pa, pb, pc) REAL *pa;
+REAL *pb;
+REAL *pc;
+{
+  INEXACT REAL acx, acy, bcx, bcy;
+  REAL acxtail, acytail;
+  REAL bcxtail, bcytail;
+  REAL negate, negatetail;
+  REAL axby[8], bxay[8];
+  INEXACT REAL axby7, bxay7;
+  REAL deter[16];
+  int deterlen;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL a0hi, a0lo, a1hi, a1lo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j, _k, _l, _m, _n;
+  REAL _0, _1, _2;
+
+  Two_Diff(pa[0], pc[0], acx, acxtail);
+  Two_Diff(pa[1], pc[1], acy, acytail);
+  Two_Diff(pb[0], pc[0], bcx, bcxtail);
+  Two_Diff(pb[1], pc[1], bcy, bcytail);
+
+  Two_Two_Product(acx, acxtail, bcy, bcytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]);
+  axby[7]    = axby7;
+  negate     = -acy;
+  negatetail = -acytail;
+  Two_Two_Product(bcx, bcxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]);
+  bxay[7] = bxay7;
+
+  deterlen = fast_expansion_sum_zeroelim(8, axby, 8, bxay, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL orient2dadapt(pa, pb, pc, detsum) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL detsum;
+{
+  INEXACT REAL acx, acy, bcx, bcy;
+  REAL acxtail, acytail, bcxtail, bcytail;
+  INEXACT REAL detleft, detright;
+  REAL detlefttail, detrighttail;
+  REAL det, errbound;
+  REAL B[4], C1[8], C2[12], D[16];
+  INEXACT REAL B3;
+  int C1length, C2length, Dlength;
+  REAL u[4];
+  INEXACT REAL u3;
+  INEXACT REAL s1, t1;
+  REAL s0, t0;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  acx = (REAL)(pa[0] - pc[0]);
+  bcx = (REAL)(pb[0] - pc[0]);
+  acy = (REAL)(pa[1] - pc[1]);
+  bcy = (REAL)(pb[1] - pc[1]);
+
+  Two_Product(acx, bcy, detleft, detlefttail);
+  Two_Product(acy, bcx, detright, detrighttail);
+
+  Two_Two_Diff(detleft, detlefttail, detright, detrighttail, B3, B[2], B[1], B[0]);
+  B[3] = B3;
+
+  det      = estimate(4, B);
+  errbound = ccwerrboundB * detsum;
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  Two_Diff_Tail(pa[0], pc[0], acx, acxtail);
+  Two_Diff_Tail(pb[0], pc[0], bcx, bcxtail);
+  Two_Diff_Tail(pa[1], pc[1], acy, acytail);
+  Two_Diff_Tail(pb[1], pc[1], bcy, bcytail);
+
+  if((acxtail == 0.0) && (acytail == 0.0) && (bcxtail == 0.0) && (bcytail == 0.0))
+    {
+      return det;
+    }
+
+  errbound = ccwerrboundC * detsum + resulterrbound * Absolute(det);
+  det += (acx * bcytail + bcy * acxtail) - (acy * bcxtail + bcx * acytail);
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  Two_Product(acxtail, bcy, s1, s0);
+  Two_Product(acytail, bcx, t1, t0);
+  Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]);
+  u[3]     = u3;
+  C1length = fast_expansion_sum_zeroelim(4, B, 4, u, C1);
+
+  Two_Product(acx, bcytail, s1, s0);
+  Two_Product(acy, bcxtail, t1, t0);
+  Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]);
+  u[3]     = u3;
+  C2length = fast_expansion_sum_zeroelim(C1length, C1, 4, u, C2);
+
+  Two_Product(acxtail, bcytail, s1, s0);
+  Two_Product(acytail, bcxtail, t1, t0);
+  Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]);
+  u[3]    = u3;
+  Dlength = fast_expansion_sum_zeroelim(C2length, C2, 4, u, D);
+
+  return (D[Dlength - 1]);
+}
+
+REAL orient2d(pa, pb, pc) REAL *pa;
+REAL *pb;
+REAL *pc;
+{
+  REAL detleft, detright, det;
+  REAL detsum, errbound;
+
+  detleft  = (pa[0] - pc[0]) * (pb[1] - pc[1]);
+  detright = (pa[1] - pc[1]) * (pb[0] - pc[0]);
+  det      = detleft - detright;
+
+  if(detleft > 0.0)
+    {
+      if(detright <= 0.0)
+        {
+          return det;
+        }
+      else
+        {
+          detsum = detleft + detright;
+        }
+    }
+  else if(detleft < 0.0)
+    {
+      if(detright >= 0.0)
+        {
+          return det;
+        }
+      else
+        {
+          detsum = -detleft - detright;
+        }
+    }
+  else
+    {
+      return det;
+    }
+
+  errbound = ccwerrboundA * detsum;
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  return orient2dadapt(pa, pb, pc, detsum);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  orient3dfast()   Approximate 3D orientation test.  Nonrobust.            */
+/*  orient3dexact()   Exact 3D orientation test.  Robust.                    */
+/*  orient3dslow()   Another exact 3D orientation test.  Robust.             */
+/*  orient3d()   Adaptive exact 3D orientation test.  Robust.                */
+/*                                                                           */
+/*               Return a positive value if the point pd lies below the      */
+/*               plane passing through pa, pb, and pc; "below" is defined so */
+/*               that pa, pb, and pc appear in counterclockwise order when   */
+/*               viewed from above the plane.  Returns a negative value if   */
+/*               pd lies above the plane.  Returns zero if the points are    */
+/*               coplanar.  The result is also a rough approximation of six  */
+/*               times the signed volume of the tetrahedron defined by the   */
+/*               four points.                                                */
+/*                                                                           */
+/*  Only the first and last routine should be used; the middle two are for   */
+/*  timings.                                                                 */
+/*                                                                           */
+/*  The last three use exact arithmetic to ensure a correct answer.  The     */
+/*  result returned is the determinant of a matrix.  In orient3d() only,     */
+/*  this determinant is computed adaptively, in the sense that exact         */
+/*  arithmetic is used only to the degree it is needed to ensure that the    */
+/*  returned value has the correct sign.  Hence, orient3d() is usually quite */
+/*  fast, but will run more slowly when the input points are coplanar or     */
+/*  nearly so.                                                               */
+/*                                                                           */
+/*****************************************************************************/
+
+REAL orient3dfast(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  REAL adx, bdx, cdx;
+  REAL ady, bdy, cdy;
+  REAL adz, bdz, cdz;
+
+  adx = pa[0] - pd[0];
+  bdx = pb[0] - pd[0];
+  cdx = pc[0] - pd[0];
+  ady = pa[1] - pd[1];
+  bdy = pb[1] - pd[1];
+  cdy = pc[1] - pd[1];
+  adz = pa[2] - pd[2];
+  bdz = pb[2] - pd[2];
+  cdz = pc[2] - pd[2];
+
+  return adx * (bdy * cdz - bdz * cdy) + bdx * (cdy * adz - cdz * ady) + cdx * (ady * bdz - adz * bdy);
+}
+
+REAL orient3dexact(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1;
+  INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1;
+  REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0;
+  REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0;
+  REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4];
+  REAL temp8[8];
+  int templen;
+  REAL abc[12], bcd[12], cda[12], dab[12];
+  int abclen, bcdlen, cdalen, dablen;
+  REAL adet[24], bdet[24], cdet[24], ddet[24];
+  int alen, blen, clen, dlen;
+  REAL abdet[48], cddet[48];
+  int ablen, cdlen;
+  REAL deter[96];
+  int deterlen;
+  int i;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  Two_Product(pa[0], pb[1], axby1, axby0);
+  Two_Product(pb[0], pa[1], bxay1, bxay0);
+  Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]);
+
+  Two_Product(pb[0], pc[1], bxcy1, bxcy0);
+  Two_Product(pc[0], pb[1], cxby1, cxby0);
+  Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]);
+
+  Two_Product(pc[0], pd[1], cxdy1, cxdy0);
+  Two_Product(pd[0], pc[1], dxcy1, dxcy0);
+  Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]);
+
+  Two_Product(pd[0], pa[1], dxay1, dxay0);
+  Two_Product(pa[0], pd[1], axdy1, axdy0);
+  Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]);
+
+  Two_Product(pa[0], pc[1], axcy1, axcy0);
+  Two_Product(pc[0], pa[1], cxay1, cxay0);
+  Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]);
+
+  Two_Product(pb[0], pd[1], bxdy1, bxdy0);
+  Two_Product(pd[0], pb[1], dxby1, dxby0);
+  Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]);
+
+  templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8);
+  cdalen  = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda);
+  templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8);
+  dablen  = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab);
+  for(i = 0; i < 4; i++)
+    {
+      bd[i] = -bd[i];
+      ac[i] = -ac[i];
+    }
+  templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8);
+  abclen  = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc);
+  templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8);
+  bcdlen  = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd);
+
+  alen = scale_expansion_zeroelim(bcdlen, bcd, pa[2], adet);
+  blen = scale_expansion_zeroelim(cdalen, cda, -pb[2], bdet);
+  clen = scale_expansion_zeroelim(dablen, dab, pc[2], cdet);
+  dlen = scale_expansion_zeroelim(abclen, abc, -pd[2], ddet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  cdlen    = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL orient3dslow(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  INEXACT REAL adx, ady, adz, bdx, bdy, bdz, cdx, cdy, cdz;
+  REAL adxtail, adytail, adztail;
+  REAL bdxtail, bdytail, bdztail;
+  REAL cdxtail, cdytail, cdztail;
+  REAL negate, negatetail;
+  INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7;
+  REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8];
+  REAL temp16[16], temp32[32], temp32t[32];
+  int temp16len, temp32len, temp32tlen;
+  REAL adet[64], bdet[64], cdet[64];
+  int alen, blen, clen;
+  REAL abdet[128];
+  int ablen;
+  REAL deter[192];
+  int deterlen;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL a0hi, a0lo, a1hi, a1lo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j, _k, _l, _m, _n;
+  REAL _0, _1, _2;
+
+  Two_Diff(pa[0], pd[0], adx, adxtail);
+  Two_Diff(pa[1], pd[1], ady, adytail);
+  Two_Diff(pa[2], pd[2], adz, adztail);
+  Two_Diff(pb[0], pd[0], bdx, bdxtail);
+  Two_Diff(pb[1], pd[1], bdy, bdytail);
+  Two_Diff(pb[2], pd[2], bdz, bdztail);
+  Two_Diff(pc[0], pd[0], cdx, cdxtail);
+  Two_Diff(pc[1], pd[1], cdy, cdytail);
+  Two_Diff(pc[2], pd[2], cdz, cdztail);
+
+  Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]);
+  axby[7]    = axby7;
+  negate     = -ady;
+  negatetail = -adytail;
+  Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]);
+  bxay[7] = bxay7;
+  Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]);
+  bxcy[7]    = bxcy7;
+  negate     = -bdy;
+  negatetail = -bdytail;
+  Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]);
+  cxby[7] = cxby7;
+  Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]);
+  cxay[7]    = cxay7;
+  negate     = -cdy;
+  negatetail = -cdytail;
+  Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]);
+  axcy[7] = axcy7;
+
+  temp16len  = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16);
+  temp32len  = scale_expansion_zeroelim(temp16len, temp16, adz, temp32);
+  temp32tlen = scale_expansion_zeroelim(temp16len, temp16, adztail, temp32t);
+  alen       = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, adet);
+
+  temp16len  = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16);
+  temp32len  = scale_expansion_zeroelim(temp16len, temp16, bdz, temp32);
+  temp32tlen = scale_expansion_zeroelim(temp16len, temp16, bdztail, temp32t);
+  blen       = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, bdet);
+
+  temp16len  = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16);
+  temp32len  = scale_expansion_zeroelim(temp16len, temp16, cdz, temp32);
+  temp32tlen = scale_expansion_zeroelim(temp16len, temp16, cdztail, temp32t);
+  clen       = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, cdet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL orient3dadapt(pa, pb, pc, pd, permanent) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL permanent;
+{
+  INEXACT REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz;
+  REAL det, errbound;
+
+  INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1;
+  REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0;
+  REAL bc[4], ca[4], ab[4];
+  INEXACT REAL bc3, ca3, ab3;
+  REAL adet[8], bdet[8], cdet[8];
+  int alen, blen, clen;
+  REAL abdet[16];
+  int ablen;
+  REAL *finnow, *finother, *finswap;
+  REAL fin1[192], fin2[192];
+  int finlength;
+
+  REAL adxtail, bdxtail, cdxtail;
+  REAL adytail, bdytail, cdytail;
+  REAL adztail, bdztail, cdztail;
+  INEXACT REAL at_blarge, at_clarge;
+  INEXACT REAL bt_clarge, bt_alarge;
+  INEXACT REAL ct_alarge, ct_blarge;
+  REAL at_b[4], at_c[4], bt_c[4], bt_a[4], ct_a[4], ct_b[4];
+  int at_blen, at_clen, bt_clen, bt_alen, ct_alen, ct_blen;
+  INEXACT REAL bdxt_cdy1, cdxt_bdy1, cdxt_ady1;
+  INEXACT REAL adxt_cdy1, adxt_bdy1, bdxt_ady1;
+  REAL bdxt_cdy0, cdxt_bdy0, cdxt_ady0;
+  REAL adxt_cdy0, adxt_bdy0, bdxt_ady0;
+  INEXACT REAL bdyt_cdx1, cdyt_bdx1, cdyt_adx1;
+  INEXACT REAL adyt_cdx1, adyt_bdx1, bdyt_adx1;
+  REAL bdyt_cdx0, cdyt_bdx0, cdyt_adx0;
+  REAL adyt_cdx0, adyt_bdx0, bdyt_adx0;
+  REAL bct[8], cat[8], abt[8];
+  int bctlen, catlen, abtlen;
+  INEXACT REAL bdxt_cdyt1, cdxt_bdyt1, cdxt_adyt1;
+  INEXACT REAL adxt_cdyt1, adxt_bdyt1, bdxt_adyt1;
+  REAL bdxt_cdyt0, cdxt_bdyt0, cdxt_adyt0;
+  REAL adxt_cdyt0, adxt_bdyt0, bdxt_adyt0;
+  REAL u[4], v[12], w[16];
+  INEXACT REAL u3;
+  int vlength, wlength;
+  REAL negate;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j, _k;
+  REAL _0;
+
+  adx = (REAL)(pa[0] - pd[0]);
+  bdx = (REAL)(pb[0] - pd[0]);
+  cdx = (REAL)(pc[0] - pd[0]);
+  ady = (REAL)(pa[1] - pd[1]);
+  bdy = (REAL)(pb[1] - pd[1]);
+  cdy = (REAL)(pc[1] - pd[1]);
+  adz = (REAL)(pa[2] - pd[2]);
+  bdz = (REAL)(pb[2] - pd[2]);
+  cdz = (REAL)(pc[2] - pd[2]);
+
+  Two_Product(bdx, cdy, bdxcdy1, bdxcdy0);
+  Two_Product(cdx, bdy, cdxbdy1, cdxbdy0);
+  Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]);
+  bc[3] = bc3;
+  alen  = scale_expansion_zeroelim(4, bc, adz, adet);
+
+  Two_Product(cdx, ady, cdxady1, cdxady0);
+  Two_Product(adx, cdy, adxcdy1, adxcdy0);
+  Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]);
+  ca[3] = ca3;
+  blen  = scale_expansion_zeroelim(4, ca, bdz, bdet);
+
+  Two_Product(adx, bdy, adxbdy1, adxbdy0);
+  Two_Product(bdx, ady, bdxady1, bdxady0);
+  Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]);
+  ab[3] = ab3;
+  clen  = scale_expansion_zeroelim(4, ab, cdz, cdet);
+
+  ablen     = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1);
+
+  det      = estimate(finlength, fin1);
+  errbound = o3derrboundB * permanent;
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  Two_Diff_Tail(pa[0], pd[0], adx, adxtail);
+  Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail);
+  Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail);
+  Two_Diff_Tail(pa[1], pd[1], ady, adytail);
+  Two_Diff_Tail(pb[1], pd[1], bdy, bdytail);
+  Two_Diff_Tail(pc[1], pd[1], cdy, cdytail);
+  Two_Diff_Tail(pa[2], pd[2], adz, adztail);
+  Two_Diff_Tail(pb[2], pd[2], bdz, bdztail);
+  Two_Diff_Tail(pc[2], pd[2], cdz, cdztail);
+
+  if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0) &&
+     (adztail == 0.0) && (bdztail == 0.0) && (cdztail == 0.0))
+    {
+      return det;
+    }
+
+  errbound = o3derrboundC * permanent + resulterrbound * Absolute(det);
+  det += (adz * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) + adztail * (bdx * cdy - bdy * cdx)) +
+         (bdz * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) + bdztail * (cdx * ady - cdy * adx)) +
+         (cdz * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) + cdztail * (adx * bdy - ady * bdx));
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  finnow   = fin1;
+  finother = fin2;
+
+  if(adxtail == 0.0)
+    {
+      if(adytail == 0.0)
+        {
+          at_b[0] = 0.0;
+          at_blen = 1;
+          at_c[0] = 0.0;
+          at_clen = 1;
+        }
+      else
+        {
+          negate = -adytail;
+          Two_Product(negate, bdx, at_blarge, at_b[0]);
+          at_b[1] = at_blarge;
+          at_blen = 2;
+          Two_Product(adytail, cdx, at_clarge, at_c[0]);
+          at_c[1] = at_clarge;
+          at_clen = 2;
+        }
+    }
+  else
+    {
+      if(adytail == 0.0)
+        {
+          Two_Product(adxtail, bdy, at_blarge, at_b[0]);
+          at_b[1] = at_blarge;
+          at_blen = 2;
+          negate  = -adxtail;
+          Two_Product(negate, cdy, at_clarge, at_c[0]);
+          at_c[1] = at_clarge;
+          at_clen = 2;
+        }
+      else
+        {
+          Two_Product(adxtail, bdy, adxt_bdy1, adxt_bdy0);
+          Two_Product(adytail, bdx, adyt_bdx1, adyt_bdx0);
+          Two_Two_Diff(adxt_bdy1, adxt_bdy0, adyt_bdx1, adyt_bdx0, at_blarge, at_b[2], at_b[1], at_b[0]);
+          at_b[3] = at_blarge;
+          at_blen = 4;
+          Two_Product(adytail, cdx, adyt_cdx1, adyt_cdx0);
+          Two_Product(adxtail, cdy, adxt_cdy1, adxt_cdy0);
+          Two_Two_Diff(adyt_cdx1, adyt_cdx0, adxt_cdy1, adxt_cdy0, at_clarge, at_c[2], at_c[1], at_c[0]);
+          at_c[3] = at_clarge;
+          at_clen = 4;
+        }
+    }
+  if(bdxtail == 0.0)
+    {
+      if(bdytail == 0.0)
+        {
+          bt_c[0] = 0.0;
+          bt_clen = 1;
+          bt_a[0] = 0.0;
+          bt_alen = 1;
+        }
+      else
+        {
+          negate = -bdytail;
+          Two_Product(negate, cdx, bt_clarge, bt_c[0]);
+          bt_c[1] = bt_clarge;
+          bt_clen = 2;
+          Two_Product(bdytail, adx, bt_alarge, bt_a[0]);
+          bt_a[1] = bt_alarge;
+          bt_alen = 2;
+        }
+    }
+  else
+    {
+      if(bdytail == 0.0)
+        {
+          Two_Product(bdxtail, cdy, bt_clarge, bt_c[0]);
+          bt_c[1] = bt_clarge;
+          bt_clen = 2;
+          negate  = -bdxtail;
+          Two_Product(negate, ady, bt_alarge, bt_a[0]);
+          bt_a[1] = bt_alarge;
+          bt_alen = 2;
+        }
+      else
+        {
+          Two_Product(bdxtail, cdy, bdxt_cdy1, bdxt_cdy0);
+          Two_Product(bdytail, cdx, bdyt_cdx1, bdyt_cdx0);
+          Two_Two_Diff(bdxt_cdy1, bdxt_cdy0, bdyt_cdx1, bdyt_cdx0, bt_clarge, bt_c[2], bt_c[1], bt_c[0]);
+          bt_c[3] = bt_clarge;
+          bt_clen = 4;
+          Two_Product(bdytail, adx, bdyt_adx1, bdyt_adx0);
+          Two_Product(bdxtail, ady, bdxt_ady1, bdxt_ady0);
+          Two_Two_Diff(bdyt_adx1, bdyt_adx0, bdxt_ady1, bdxt_ady0, bt_alarge, bt_a[2], bt_a[1], bt_a[0]);
+          bt_a[3] = bt_alarge;
+          bt_alen = 4;
+        }
+    }
+  if(cdxtail == 0.0)
+    {
+      if(cdytail == 0.0)
+        {
+          ct_a[0] = 0.0;
+          ct_alen = 1;
+          ct_b[0] = 0.0;
+          ct_blen = 1;
+        }
+      else
+        {
+          negate = -cdytail;
+          Two_Product(negate, adx, ct_alarge, ct_a[0]);
+          ct_a[1] = ct_alarge;
+          ct_alen = 2;
+          Two_Product(cdytail, bdx, ct_blarge, ct_b[0]);
+          ct_b[1] = ct_blarge;
+          ct_blen = 2;
+        }
+    }
+  else
+    {
+      if(cdytail == 0.0)
+        {
+          Two_Product(cdxtail, ady, ct_alarge, ct_a[0]);
+          ct_a[1] = ct_alarge;
+          ct_alen = 2;
+          negate  = -cdxtail;
+          Two_Product(negate, bdy, ct_blarge, ct_b[0]);
+          ct_b[1] = ct_blarge;
+          ct_blen = 2;
+        }
+      else
+        {
+          Two_Product(cdxtail, ady, cdxt_ady1, cdxt_ady0);
+          Two_Product(cdytail, adx, cdyt_adx1, cdyt_adx0);
+          Two_Two_Diff(cdxt_ady1, cdxt_ady0, cdyt_adx1, cdyt_adx0, ct_alarge, ct_a[2], ct_a[1], ct_a[0]);
+          ct_a[3] = ct_alarge;
+          ct_alen = 4;
+          Two_Product(cdytail, bdx, cdyt_bdx1, cdyt_bdx0);
+          Two_Product(cdxtail, bdy, cdxt_bdy1, cdxt_bdy0);
+          Two_Two_Diff(cdyt_bdx1, cdyt_bdx0, cdxt_bdy1, cdxt_bdy0, ct_blarge, ct_b[2], ct_b[1], ct_b[0]);
+          ct_b[3] = ct_blarge;
+          ct_blen = 4;
+        }
+    }
+
+  bctlen    = fast_expansion_sum_zeroelim(bt_clen, bt_c, ct_blen, ct_b, bct);
+  wlength   = scale_expansion_zeroelim(bctlen, bct, adz, w);
+  finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+  finswap   = finnow;
+  finnow    = finother;
+  finother  = finswap;
+
+  catlen    = fast_expansion_sum_zeroelim(ct_alen, ct_a, at_clen, at_c, cat);
+  wlength   = scale_expansion_zeroelim(catlen, cat, bdz, w);
+  finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+  finswap   = finnow;
+  finnow    = finother;
+  finother  = finswap;
+
+  abtlen    = fast_expansion_sum_zeroelim(at_blen, at_b, bt_alen, bt_a, abt);
+  wlength   = scale_expansion_zeroelim(abtlen, abt, cdz, w);
+  finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+  finswap   = finnow;
+  finnow    = finother;
+  finother  = finswap;
+
+  if(adztail != 0.0)
+    {
+      vlength   = scale_expansion_zeroelim(4, bc, adztail, v);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+  if(bdztail != 0.0)
+    {
+      vlength   = scale_expansion_zeroelim(4, ca, bdztail, v);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+  if(cdztail != 0.0)
+    {
+      vlength   = scale_expansion_zeroelim(4, ab, cdztail, v);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+
+  if(adxtail != 0.0)
+    {
+      if(bdytail != 0.0)
+        {
+          Two_Product(adxtail, bdytail, adxt_bdyt1, adxt_bdyt0);
+          Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(cdztail != 0.0)
+            {
+              Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+      if(cdytail != 0.0)
+        {
+          negate = -adxtail;
+          Two_Product(negate, cdytail, adxt_cdyt1, adxt_cdyt0);
+          Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(bdztail != 0.0)
+            {
+              Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+    }
+  if(bdxtail != 0.0)
+    {
+      if(cdytail != 0.0)
+        {
+          Two_Product(bdxtail, cdytail, bdxt_cdyt1, bdxt_cdyt0);
+          Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(adztail != 0.0)
+            {
+              Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+      if(adytail != 0.0)
+        {
+          negate = -bdxtail;
+          Two_Product(negate, adytail, bdxt_adyt1, bdxt_adyt0);
+          Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(cdztail != 0.0)
+            {
+              Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+    }
+  if(cdxtail != 0.0)
+    {
+      if(adytail != 0.0)
+        {
+          Two_Product(cdxtail, adytail, cdxt_adyt1, cdxt_adyt0);
+          Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(bdztail != 0.0)
+            {
+              Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+      if(bdytail != 0.0)
+        {
+          negate = -cdxtail;
+          Two_Product(negate, bdytail, cdxt_bdyt1, cdxt_bdyt0);
+          Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adz, u3, u[2], u[1], u[0]);
+          u[3]      = u3;
+          finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+          finswap   = finnow;
+          finnow    = finother;
+          finother  = finswap;
+          if(adztail != 0.0)
+            {
+              Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adztail, u3, u[2], u[1], u[0]);
+              u[3]      = u3;
+              finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother);
+              finswap   = finnow;
+              finnow    = finother;
+              finother  = finswap;
+            }
+        }
+    }
+
+  if(adztail != 0.0)
+    {
+      wlength   = scale_expansion_zeroelim(bctlen, bct, adztail, w);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+  if(bdztail != 0.0)
+    {
+      wlength   = scale_expansion_zeroelim(catlen, cat, bdztail, w);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+  if(cdztail != 0.0)
+    {
+      wlength   = scale_expansion_zeroelim(abtlen, abt, cdztail, w);
+      finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother);
+      finswap   = finnow;
+      finnow    = finother;
+      finother  = finswap;
+    }
+
+  return finnow[finlength - 1];
+}
+
+REAL orient3d(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz;
+  REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady;
+  REAL det;
+  REAL permanent, errbound;
+
+  adx = pa[0] - pd[0];
+  bdx = pb[0] - pd[0];
+  cdx = pc[0] - pd[0];
+  ady = pa[1] - pd[1];
+  bdy = pb[1] - pd[1];
+  cdy = pc[1] - pd[1];
+  adz = pa[2] - pd[2];
+  bdz = pb[2] - pd[2];
+  cdz = pc[2] - pd[2];
+
+  bdxcdy = bdx * cdy;
+  cdxbdy = cdx * bdy;
+
+  cdxady = cdx * ady;
+  adxcdy = adx * cdy;
+
+  adxbdy = adx * bdy;
+  bdxady = bdx * ady;
+
+  det = adz * (bdxcdy - cdxbdy) + bdz * (cdxady - adxcdy) + cdz * (adxbdy - bdxady);
+
+  permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * Absolute(adz) + (Absolute(cdxady) + Absolute(adxcdy)) * Absolute(bdz) +
+              (Absolute(adxbdy) + Absolute(bdxady)) * Absolute(cdz);
+  errbound = o3derrboundA * permanent;
+  if((det > errbound) || (-det > errbound))
+    {
+      return det;
+    }
+
+  return orient3dadapt(pa, pb, pc, pd, permanent);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  incirclefast()   Approximate 2D incircle test.  Nonrobust.               */
+/*  incircleexact()   Exact 2D incircle test.  Robust.                       */
+/*  incircleslow()   Another exact 2D incircle test.  Robust.                */
+/*  incircle()   Adaptive exact 2D incircle test.  Robust.                   */
+/*                                                                           */
+/*               Return a positive value if the point pd lies inside the     */
+/*               circle passing through pa, pb, and pc; a negative value if  */
+/*               it lies outside; and zero if the four points are cocircular.*/
+/*               The points pa, pb, and pc must be in counterclockwise       */
+/*               order, or the sign of the result will be reversed.          */
+/*                                                                           */
+/*  Only the first and last routine should be used; the middle two are for   */
+/*  timings.                                                                 */
+/*                                                                           */
+/*  The last three use exact arithmetic to ensure a correct answer.  The     */
+/*  result returned is the determinant of a matrix.  In incircle() only,     */
+/*  this determinant is computed adaptively, in the sense that exact         */
+/*  arithmetic is used only to the degree it is needed to ensure that the    */
+/*  returned value has the correct sign.  Hence, incircle() is usually quite */
+/*  fast, but will run more slowly when the input points are cocircular or   */
+/*  nearly so.                                                               */
+/*                                                                           */
+/*****************************************************************************/
+
+REAL incirclefast(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  REAL adx, ady, bdx, bdy, cdx, cdy;
+  REAL abdet, bcdet, cadet;
+  REAL alift, blift, clift;
+
+  adx = pa[0] - pd[0];
+  ady = pa[1] - pd[1];
+  bdx = pb[0] - pd[0];
+  bdy = pb[1] - pd[1];
+  cdx = pc[0] - pd[0];
+  cdy = pc[1] - pd[1];
+
+  abdet = adx * bdy - bdx * ady;
+  bcdet = bdx * cdy - cdx * bdy;
+  cadet = cdx * ady - adx * cdy;
+  alift = adx * adx + ady * ady;
+  blift = bdx * bdx + bdy * bdy;
+  clift = cdx * cdx + cdy * cdy;
+
+  return alift * bcdet + blift * cadet + clift * abdet;
+}
+
+REAL incircleexact(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1;
+  INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1;
+  REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0;
+  REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0;
+  REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4];
+  REAL temp8[8];
+  int templen;
+  REAL abc[12], bcd[12], cda[12], dab[12];
+  int abclen, bcdlen, cdalen, dablen;
+  REAL det24x[24], det24y[24], det48x[48], det48y[48];
+  int xlen, ylen;
+  REAL adet[96], bdet[96], cdet[96], ddet[96];
+  int alen, blen, clen, dlen;
+  REAL abdet[192], cddet[192];
+  int ablen, cdlen;
+  REAL deter[384];
+  int deterlen;
+  int i;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  Two_Product(pa[0], pb[1], axby1, axby0);
+  Two_Product(pb[0], pa[1], bxay1, bxay0);
+  Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]);
+
+  Two_Product(pb[0], pc[1], bxcy1, bxcy0);
+  Two_Product(pc[0], pb[1], cxby1, cxby0);
+  Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]);
+
+  Two_Product(pc[0], pd[1], cxdy1, cxdy0);
+  Two_Product(pd[0], pc[1], dxcy1, dxcy0);
+  Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]);
+
+  Two_Product(pd[0], pa[1], dxay1, dxay0);
+  Two_Product(pa[0], pd[1], axdy1, axdy0);
+  Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]);
+
+  Two_Product(pa[0], pc[1], axcy1, axcy0);
+  Two_Product(pc[0], pa[1], cxay1, cxay0);
+  Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]);
+
+  Two_Product(pb[0], pd[1], bxdy1, bxdy0);
+  Two_Product(pd[0], pb[1], dxby1, dxby0);
+  Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]);
+
+  templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8);
+  cdalen  = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda);
+  templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8);
+  dablen  = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab);
+  for(i = 0; i < 4; i++)
+    {
+      bd[i] = -bd[i];
+      ac[i] = -ac[i];
+    }
+  templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8);
+  abclen  = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc);
+  templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8);
+  bcdlen  = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd);
+
+  xlen = scale_expansion_zeroelim(bcdlen, bcd, pa[0], det24x);
+  xlen = scale_expansion_zeroelim(xlen, det24x, pa[0], det48x);
+  ylen = scale_expansion_zeroelim(bcdlen, bcd, pa[1], det24y);
+  ylen = scale_expansion_zeroelim(ylen, det24y, pa[1], det48y);
+  alen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, adet);
+
+  xlen = scale_expansion_zeroelim(cdalen, cda, pb[0], det24x);
+  xlen = scale_expansion_zeroelim(xlen, det24x, -pb[0], det48x);
+  ylen = scale_expansion_zeroelim(cdalen, cda, pb[1], det24y);
+  ylen = scale_expansion_zeroelim(ylen, det24y, -pb[1], det48y);
+  blen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, bdet);
+
+  xlen = scale_expansion_zeroelim(dablen, dab, pc[0], det24x);
+  xlen = scale_expansion_zeroelim(xlen, det24x, pc[0], det48x);
+  ylen = scale_expansion_zeroelim(dablen, dab, pc[1], det24y);
+  ylen = scale_expansion_zeroelim(ylen, det24y, pc[1], det48y);
+  clen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, cdet);
+
+  xlen = scale_expansion_zeroelim(abclen, abc, pd[0], det24x);
+  xlen = scale_expansion_zeroelim(xlen, det24x, -pd[0], det48x);
+  ylen = scale_expansion_zeroelim(abclen, abc, pd[1], det24y);
+  ylen = scale_expansion_zeroelim(ylen, det24y, -pd[1], det48y);
+  dlen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, ddet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  cdlen    = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL incircleslow(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  INEXACT REAL adx, bdx, cdx, ady, bdy, cdy;
+  REAL adxtail, bdxtail, cdxtail;
+  REAL adytail, bdytail, cdytail;
+  REAL negate, negatetail;
+  INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7;
+  REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8];
+  REAL temp16[16];
+  int temp16len;
+  REAL detx[32], detxx[64], detxt[32], detxxt[64], detxtxt[64];
+  int xlen, xxlen, xtlen, xxtlen, xtxtlen;
+  REAL x1[128], x2[192];
+  int x1len, x2len;
+  REAL dety[32], detyy[64], detyt[32], detyyt[64], detytyt[64];
+  int ylen, yylen, ytlen, yytlen, ytytlen;
+  REAL y1[128], y2[192];
+  int y1len, y2len;
+  REAL adet[384], bdet[384], cdet[384], abdet[768], deter[1152];
+  int alen, blen, clen, ablen, deterlen;
+  int i;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL a0hi, a0lo, a1hi, a1lo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j, _k, _l, _m, _n;
+  REAL _0, _1, _2;
+
+  Two_Diff(pa[0], pd[0], adx, adxtail);
+  Two_Diff(pa[1], pd[1], ady, adytail);
+  Two_Diff(pb[0], pd[0], bdx, bdxtail);
+  Two_Diff(pb[1], pd[1], bdy, bdytail);
+  Two_Diff(pc[0], pd[0], cdx, cdxtail);
+  Two_Diff(pc[1], pd[1], cdy, cdytail);
+
+  Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]);
+  axby[7]    = axby7;
+  negate     = -ady;
+  negatetail = -adytail;
+  Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]);
+  bxay[7] = bxay7;
+  Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]);
+  bxcy[7]    = bxcy7;
+  negate     = -bdy;
+  negatetail = -bdytail;
+  Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]);
+  cxby[7] = cxby7;
+  Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]);
+  cxay[7]    = cxay7;
+  negate     = -cdy;
+  negatetail = -cdytail;
+  Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]);
+  axcy[7] = axcy7;
+
+  temp16len = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16);
+
+  xlen   = scale_expansion_zeroelim(temp16len, temp16, adx, detx);
+  xxlen  = scale_expansion_zeroelim(xlen, detx, adx, detxx);
+  xtlen  = scale_expansion_zeroelim(temp16len, temp16, adxtail, detxt);
+  xxtlen = scale_expansion_zeroelim(xtlen, detxt, adx, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, adxtail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+
+  ylen   = scale_expansion_zeroelim(temp16len, temp16, ady, dety);
+  yylen  = scale_expansion_zeroelim(ylen, dety, ady, detyy);
+  ytlen  = scale_expansion_zeroelim(temp16len, temp16, adytail, detyt);
+  yytlen = scale_expansion_zeroelim(ytlen, detyt, ady, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, adytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+
+  alen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, adet);
+
+  temp16len = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16);
+
+  xlen   = scale_expansion_zeroelim(temp16len, temp16, bdx, detx);
+  xxlen  = scale_expansion_zeroelim(xlen, detx, bdx, detxx);
+  xtlen  = scale_expansion_zeroelim(temp16len, temp16, bdxtail, detxt);
+  xxtlen = scale_expansion_zeroelim(xtlen, detxt, bdx, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bdxtail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+
+  ylen   = scale_expansion_zeroelim(temp16len, temp16, bdy, dety);
+  yylen  = scale_expansion_zeroelim(ylen, dety, bdy, detyy);
+  ytlen  = scale_expansion_zeroelim(temp16len, temp16, bdytail, detyt);
+  yytlen = scale_expansion_zeroelim(ytlen, detyt, bdy, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, bdytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+
+  blen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, bdet);
+
+  temp16len = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16);
+
+  xlen   = scale_expansion_zeroelim(temp16len, temp16, cdx, detx);
+  xxlen  = scale_expansion_zeroelim(xlen, detx, cdx, detxx);
+  xtlen  = scale_expansion_zeroelim(temp16len, temp16, cdxtail, detxt);
+  xxtlen = scale_expansion_zeroelim(xtlen, detxt, cdx, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cdxtail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+
+  ylen   = scale_expansion_zeroelim(temp16len, temp16, cdy, dety);
+  yylen  = scale_expansion_zeroelim(ylen, dety, cdy, detyy);
+  ytlen  = scale_expansion_zeroelim(temp16len, temp16, cdytail, detyt);
+  yytlen = scale_expansion_zeroelim(ytlen, detyt, cdy, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, cdytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+
+  clen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, cdet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL incircleadapt(pa, pb, pc, pd, permanent) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL permanent;
+{
+  INEXACT REAL adx, bdx, cdx, ady, bdy, cdy;
+  REAL det, errbound;
+
+  INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1;
+  REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0;
+  REAL bc[4], ca[4], ab[4];
+  INEXACT REAL bc3, ca3, ab3;
+  REAL axbc[8], axxbc[16], aybc[8], ayybc[16], adet[32];
+  int axbclen, axxbclen, aybclen, ayybclen, alen;
+  REAL bxca[8], bxxca[16], byca[8], byyca[16], bdet[32];
+  int bxcalen, bxxcalen, bycalen, byycalen, blen;
+  REAL cxab[8], cxxab[16], cyab[8], cyyab[16], cdet[32];
+  int cxablen, cxxablen, cyablen, cyyablen, clen;
+  REAL abdet[64];
+  int ablen;
+  REAL fin1[1152], fin2[1152];
+  REAL *finnow, *finother, *finswap;
+  int finlength;
+
+  REAL adxtail, bdxtail, cdxtail, adytail, bdytail, cdytail;
+  INEXACT REAL adxadx1, adyady1, bdxbdx1, bdybdy1, cdxcdx1, cdycdy1;
+  REAL adxadx0, adyady0, bdxbdx0, bdybdy0, cdxcdx0, cdycdy0;
+  REAL aa[4], bb[4], cc[4];
+  INEXACT REAL aa3, bb3, cc3;
+  INEXACT REAL ti1, tj1;
+  REAL ti0, tj0;
+  REAL u[4], v[4];
+  INEXACT REAL u3, v3;
+  REAL temp8[8], temp16a[16], temp16b[16], temp16c[16];
+  REAL temp32a[32], temp32b[32], temp48[48], temp64[64];
+  int temp8len, temp16alen, temp16blen, temp16clen;
+  int temp32alen, temp32blen, temp48len, temp64len;
+  REAL axtbb[8], axtcc[8], aytbb[8], aytcc[8];
+  int axtbblen, axtcclen, aytbblen, aytcclen;
+  REAL bxtaa[8], bxtcc[8], bytaa[8], bytcc[8];
+  int bxtaalen, bxtcclen, bytaalen, bytcclen;
+  REAL cxtaa[8], cxtbb[8], cytaa[8], cytbb[8];
+  int cxtaalen, cxtbblen, cytaalen, cytbblen;
+  REAL axtbc[8], aytbc[8], bxtca[8], bytca[8], cxtab[8], cytab[8];
+  int axtbclen = 0, aytbclen = 0, bxtcalen = 0, bytcalen = 0, cxtablen = 0, cytablen = 0;
+  REAL axtbct[16], aytbct[16], bxtcat[16], bytcat[16], cxtabt[16], cytabt[16];
+  int axtbctlen, aytbctlen, bxtcatlen, bytcatlen, cxtabtlen, cytabtlen;
+  REAL axtbctt[8], aytbctt[8], bxtcatt[8];
+  REAL bytcatt[8], cxtabtt[8], cytabtt[8];
+  int axtbcttlen, aytbcttlen, bxtcattlen, bytcattlen, cxtabttlen, cytabttlen;
+  REAL abt[8], bct[8], cat[8];
+  int abtlen, bctlen, catlen;
+  REAL abtt[4], bctt[4], catt[4];
+  int abttlen, bcttlen, cattlen;
+  INEXACT REAL abtt3, bctt3, catt3;
+  REAL negate;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  adx = (REAL)(pa[0] - pd[0]);
+  bdx = (REAL)(pb[0] - pd[0]);
+  cdx = (REAL)(pc[0] - pd[0]);
+  ady = (REAL)(pa[1] - pd[1]);
+  bdy = (REAL)(pb[1] - pd[1]);
+  cdy = (REAL)(pc[1] - pd[1]);
+
+  Two_Product(bdx, cdy, bdxcdy1, bdxcdy0);
+  Two_Product(cdx, bdy, cdxbdy1, cdxbdy0);
+  Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]);
+  bc[3]    = bc3;
+  axbclen  = scale_expansion_zeroelim(4, bc, adx, axbc);
+  axxbclen = scale_expansion_zeroelim(axbclen, axbc, adx, axxbc);
+  aybclen  = scale_expansion_zeroelim(4, bc, ady, aybc);
+  ayybclen = scale_expansion_zeroelim(aybclen, aybc, ady, ayybc);
+  alen     = fast_expansion_sum_zeroelim(axxbclen, axxbc, ayybclen, ayybc, adet);
+
+  Two_Product(cdx, ady, cdxady1, cdxady0);
+  Two_Product(adx, cdy, adxcdy1, adxcdy0);
+  Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]);
+  ca[3]    = ca3;
+  bxcalen  = scale_expansion_zeroelim(4, ca, bdx, bxca);
+  bxxcalen = scale_expansion_zeroelim(bxcalen, bxca, bdx, bxxca);
+  bycalen  = scale_expansion_zeroelim(4, ca, bdy, byca);
+  byycalen = scale_expansion_zeroelim(bycalen, byca, bdy, byyca);
+  blen     = fast_expansion_sum_zeroelim(bxxcalen, bxxca, byycalen, byyca, bdet);
+
+  Two_Product(adx, bdy, adxbdy1, adxbdy0);
+  Two_Product(bdx, ady, bdxady1, bdxady0);
+  Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]);
+  ab[3]    = ab3;
+  cxablen  = scale_expansion_zeroelim(4, ab, cdx, cxab);
+  cxxablen = scale_expansion_zeroelim(cxablen, cxab, cdx, cxxab);
+  cyablen  = scale_expansion_zeroelim(4, ab, cdy, cyab);
+  cyyablen = scale_expansion_zeroelim(cyablen, cyab, cdy, cyyab);
+  clen     = fast_expansion_sum_zeroelim(cxxablen, cxxab, cyyablen, cyyab, cdet);
+
+  ablen     = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1);
+
+  det      = estimate(finlength, fin1);
+  errbound = iccerrboundB * permanent;
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  Two_Diff_Tail(pa[0], pd[0], adx, adxtail);
+  Two_Diff_Tail(pa[1], pd[1], ady, adytail);
+  Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail);
+  Two_Diff_Tail(pb[1], pd[1], bdy, bdytail);
+  Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail);
+  Two_Diff_Tail(pc[1], pd[1], cdy, cdytail);
+  if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0))
+    {
+      return det;
+    }
+
+  errbound = iccerrboundC * permanent + resulterrbound * Absolute(det);
+  det += ((adx * adx + ady * ady) * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) +
+          2.0 * (adx * adxtail + ady * adytail) * (bdx * cdy - bdy * cdx)) +
+         ((bdx * bdx + bdy * bdy) * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) +
+          2.0 * (bdx * bdxtail + bdy * bdytail) * (cdx * ady - cdy * adx)) +
+         ((cdx * cdx + cdy * cdy) * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) +
+          2.0 * (cdx * cdxtail + cdy * cdytail) * (adx * bdy - ady * bdx));
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  finnow   = fin1;
+  finother = fin2;
+
+  if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0))
+    {
+      Square(adx, adxadx1, adxadx0);
+      Square(ady, adyady1, adyady0);
+      Two_Two_Sum(adxadx1, adxadx0, adyady1, adyady0, aa3, aa[2], aa[1], aa[0]);
+      aa[3] = aa3;
+    }
+  if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0))
+    {
+      Square(bdx, bdxbdx1, bdxbdx0);
+      Square(bdy, bdybdy1, bdybdy0);
+      Two_Two_Sum(bdxbdx1, bdxbdx0, bdybdy1, bdybdy0, bb3, bb[2], bb[1], bb[0]);
+      bb[3] = bb3;
+    }
+  if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0))
+    {
+      Square(cdx, cdxcdx1, cdxcdx0);
+      Square(cdy, cdycdy1, cdycdy0);
+      Two_Two_Sum(cdxcdx1, cdxcdx0, cdycdy1, cdycdy0, cc3, cc[2], cc[1], cc[0]);
+      cc[3] = cc3;
+    }
+
+  if(adxtail != 0.0)
+    {
+      axtbclen   = scale_expansion_zeroelim(4, bc, adxtail, axtbc);
+      temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, 2.0 * adx, temp16a);
+
+      axtcclen   = scale_expansion_zeroelim(4, cc, adxtail, axtcc);
+      temp16blen = scale_expansion_zeroelim(axtcclen, axtcc, bdy, temp16b);
+
+      axtbblen   = scale_expansion_zeroelim(4, bb, adxtail, axtbb);
+      temp16clen = scale_expansion_zeroelim(axtbblen, axtbb, -cdy, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+  if(adytail != 0.0)
+    {
+      aytbclen   = scale_expansion_zeroelim(4, bc, adytail, aytbc);
+      temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, 2.0 * ady, temp16a);
+
+      aytbblen   = scale_expansion_zeroelim(4, bb, adytail, aytbb);
+      temp16blen = scale_expansion_zeroelim(aytbblen, aytbb, cdx, temp16b);
+
+      aytcclen   = scale_expansion_zeroelim(4, cc, adytail, aytcc);
+      temp16clen = scale_expansion_zeroelim(aytcclen, aytcc, -bdx, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+  if(bdxtail != 0.0)
+    {
+      bxtcalen   = scale_expansion_zeroelim(4, ca, bdxtail, bxtca);
+      temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, 2.0 * bdx, temp16a);
+
+      bxtaalen   = scale_expansion_zeroelim(4, aa, bdxtail, bxtaa);
+      temp16blen = scale_expansion_zeroelim(bxtaalen, bxtaa, cdy, temp16b);
+
+      bxtcclen   = scale_expansion_zeroelim(4, cc, bdxtail, bxtcc);
+      temp16clen = scale_expansion_zeroelim(bxtcclen, bxtcc, -ady, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+  if(bdytail != 0.0)
+    {
+      bytcalen   = scale_expansion_zeroelim(4, ca, bdytail, bytca);
+      temp16alen = scale_expansion_zeroelim(bytcalen, bytca, 2.0 * bdy, temp16a);
+
+      bytcclen   = scale_expansion_zeroelim(4, cc, bdytail, bytcc);
+      temp16blen = scale_expansion_zeroelim(bytcclen, bytcc, adx, temp16b);
+
+      bytaalen   = scale_expansion_zeroelim(4, aa, bdytail, bytaa);
+      temp16clen = scale_expansion_zeroelim(bytaalen, bytaa, -cdx, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+  if(cdxtail != 0.0)
+    {
+      cxtablen   = scale_expansion_zeroelim(4, ab, cdxtail, cxtab);
+      temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, 2.0 * cdx, temp16a);
+
+      cxtbblen   = scale_expansion_zeroelim(4, bb, cdxtail, cxtbb);
+      temp16blen = scale_expansion_zeroelim(cxtbblen, cxtbb, ady, temp16b);
+
+      cxtaalen   = scale_expansion_zeroelim(4, aa, cdxtail, cxtaa);
+      temp16clen = scale_expansion_zeroelim(cxtaalen, cxtaa, -bdy, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+  if(cdytail != 0.0)
+    {
+      cytablen   = scale_expansion_zeroelim(4, ab, cdytail, cytab);
+      temp16alen = scale_expansion_zeroelim(cytablen, cytab, 2.0 * cdy, temp16a);
+
+      cytaalen   = scale_expansion_zeroelim(4, aa, cdytail, cytaa);
+      temp16blen = scale_expansion_zeroelim(cytaalen, cytaa, bdx, temp16b);
+
+      cytbblen   = scale_expansion_zeroelim(4, bb, cdytail, cytbb);
+      temp16clen = scale_expansion_zeroelim(cytbblen, cytbb, -adx, temp16c);
+
+      temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a);
+      temp48len  = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48);
+      finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+      finswap    = finnow;
+      finnow     = finother;
+      finother   = finswap;
+    }
+
+  if((adxtail != 0.0) || (adytail != 0.0))
+    {
+      if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0))
+        {
+          Two_Product(bdxtail, cdy, ti1, ti0);
+          Two_Product(bdx, cdytail, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]);
+          u[3]   = u3;
+          negate = -bdy;
+          Two_Product(cdxtail, negate, ti1, ti0);
+          negate = -bdytail;
+          Two_Product(cdx, negate, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]);
+          v[3]   = v3;
+          bctlen = fast_expansion_sum_zeroelim(4, u, 4, v, bct);
+
+          Two_Product(bdxtail, cdytail, ti1, ti0);
+          Two_Product(cdxtail, bdytail, tj1, tj0);
+          Two_Two_Diff(ti1, ti0, tj1, tj0, bctt3, bctt[2], bctt[1], bctt[0]);
+          bctt[3] = bctt3;
+          bcttlen = 4;
+        }
+      else
+        {
+          bct[0]  = 0.0;
+          bctlen  = 1;
+          bctt[0] = 0.0;
+          bcttlen = 1;
+        }
+
+      if(adxtail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, adxtail, temp16a);
+          axtbctlen  = scale_expansion_zeroelim(bctlen, bct, adxtail, axtbct);
+          temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, 2.0 * adx, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+          if(bdytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, cc, adxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+          if(cdytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, bb, -adxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+
+          temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, adxtail, temp32a);
+          axtbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adxtail, axtbctt);
+          temp16alen = scale_expansion_zeroelim(axtbcttlen, axtbctt, 2.0 * adx, temp16a);
+          temp16blen = scale_expansion_zeroelim(axtbcttlen, axtbctt, adxtail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+      if(adytail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, adytail, temp16a);
+          aytbctlen  = scale_expansion_zeroelim(bctlen, bct, adytail, aytbct);
+          temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, 2.0 * ady, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+
+          temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, adytail, temp32a);
+          aytbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adytail, aytbctt);
+          temp16alen = scale_expansion_zeroelim(aytbcttlen, aytbctt, 2.0 * ady, temp16a);
+          temp16blen = scale_expansion_zeroelim(aytbcttlen, aytbctt, adytail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+    }
+  if((bdxtail != 0.0) || (bdytail != 0.0))
+    {
+      if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0))
+        {
+          Two_Product(cdxtail, ady, ti1, ti0);
+          Two_Product(cdx, adytail, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]);
+          u[3]   = u3;
+          negate = -cdy;
+          Two_Product(adxtail, negate, ti1, ti0);
+          negate = -cdytail;
+          Two_Product(adx, negate, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]);
+          v[3]   = v3;
+          catlen = fast_expansion_sum_zeroelim(4, u, 4, v, cat);
+
+          Two_Product(cdxtail, adytail, ti1, ti0);
+          Two_Product(adxtail, cdytail, tj1, tj0);
+          Two_Two_Diff(ti1, ti0, tj1, tj0, catt3, catt[2], catt[1], catt[0]);
+          catt[3] = catt3;
+          cattlen = 4;
+        }
+      else
+        {
+          cat[0]  = 0.0;
+          catlen  = 1;
+          catt[0] = 0.0;
+          cattlen = 1;
+        }
+
+      if(bdxtail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, bdxtail, temp16a);
+          bxtcatlen  = scale_expansion_zeroelim(catlen, cat, bdxtail, bxtcat);
+          temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, 2.0 * bdx, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+          if(cdytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, aa, bdxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+          if(adytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, cc, -bdxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+
+          temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, bdxtail, temp32a);
+          bxtcattlen = scale_expansion_zeroelim(cattlen, catt, bdxtail, bxtcatt);
+          temp16alen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, 2.0 * bdx, temp16a);
+          temp16blen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, bdxtail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+      if(bdytail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(bytcalen, bytca, bdytail, temp16a);
+          bytcatlen  = scale_expansion_zeroelim(catlen, cat, bdytail, bytcat);
+          temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, 2.0 * bdy, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+
+          temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, bdytail, temp32a);
+          bytcattlen = scale_expansion_zeroelim(cattlen, catt, bdytail, bytcatt);
+          temp16alen = scale_expansion_zeroelim(bytcattlen, bytcatt, 2.0 * bdy, temp16a);
+          temp16blen = scale_expansion_zeroelim(bytcattlen, bytcatt, bdytail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+    }
+  if((cdxtail != 0.0) || (cdytail != 0.0))
+    {
+      if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0))
+        {
+          Two_Product(adxtail, bdy, ti1, ti0);
+          Two_Product(adx, bdytail, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]);
+          u[3]   = u3;
+          negate = -ady;
+          Two_Product(bdxtail, negate, ti1, ti0);
+          negate = -adytail;
+          Two_Product(bdx, negate, tj1, tj0);
+          Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]);
+          v[3]   = v3;
+          abtlen = fast_expansion_sum_zeroelim(4, u, 4, v, abt);
+
+          Two_Product(adxtail, bdytail, ti1, ti0);
+          Two_Product(bdxtail, adytail, tj1, tj0);
+          Two_Two_Diff(ti1, ti0, tj1, tj0, abtt3, abtt[2], abtt[1], abtt[0]);
+          abtt[3] = abtt3;
+          abttlen = 4;
+        }
+      else
+        {
+          abt[0]  = 0.0;
+          abtlen  = 1;
+          abtt[0] = 0.0;
+          abttlen = 1;
+        }
+
+      if(cdxtail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, cdxtail, temp16a);
+          cxtabtlen  = scale_expansion_zeroelim(abtlen, abt, cdxtail, cxtabt);
+          temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, 2.0 * cdx, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+          if(adytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, bb, cdxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+          if(bdytail != 0.0)
+            {
+              temp8len   = scale_expansion_zeroelim(4, aa, -cdxtail, temp8);
+              temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a);
+              finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother);
+              finswap    = finnow;
+              finnow     = finother;
+              finother   = finswap;
+            }
+
+          temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, cdxtail, temp32a);
+          cxtabttlen = scale_expansion_zeroelim(abttlen, abtt, cdxtail, cxtabtt);
+          temp16alen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, 2.0 * cdx, temp16a);
+          temp16blen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, cdxtail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+      if(cdytail != 0.0)
+        {
+          temp16alen = scale_expansion_zeroelim(cytablen, cytab, cdytail, temp16a);
+          cytabtlen  = scale_expansion_zeroelim(abtlen, abt, cdytail, cytabt);
+          temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, 2.0 * cdy, temp32a);
+          temp48len  = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+
+          temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, cdytail, temp32a);
+          cytabttlen = scale_expansion_zeroelim(abttlen, abtt, cdytail, cytabtt);
+          temp16alen = scale_expansion_zeroelim(cytabttlen, cytabtt, 2.0 * cdy, temp16a);
+          temp16blen = scale_expansion_zeroelim(cytabttlen, cytabtt, cdytail, temp16b);
+          temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b);
+          temp64len  = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64);
+          finlength  = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother);
+          finswap    = finnow;
+          finnow     = finother;
+          finother   = finswap;
+        }
+    }
+
+  return finnow[finlength - 1];
+}
+
+REAL incircle(pa, pb, pc, pd) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+{
+  REAL adx, bdx, cdx, ady, bdy, cdy;
+  REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady;
+  REAL alift, blift, clift;
+  REAL det;
+  REAL permanent, errbound;
+
+  adx = pa[0] - pd[0];
+  bdx = pb[0] - pd[0];
+  cdx = pc[0] - pd[0];
+  ady = pa[1] - pd[1];
+  bdy = pb[1] - pd[1];
+  cdy = pc[1] - pd[1];
+
+  bdxcdy = bdx * cdy;
+  cdxbdy = cdx * bdy;
+  alift  = adx * adx + ady * ady;
+
+  cdxady = cdx * ady;
+  adxcdy = adx * cdy;
+  blift  = bdx * bdx + bdy * bdy;
+
+  adxbdy = adx * bdy;
+  bdxady = bdx * ady;
+  clift  = cdx * cdx + cdy * cdy;
+
+  det = alift * (bdxcdy - cdxbdy) + blift * (cdxady - adxcdy) + clift * (adxbdy - bdxady);
+
+  permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * alift + (Absolute(cdxady) + Absolute(adxcdy)) * blift +
+              (Absolute(adxbdy) + Absolute(bdxady)) * clift;
+  errbound = iccerrboundA * permanent;
+  if((det > errbound) || (-det > errbound))
+    {
+      return det;
+    }
+
+  return incircleadapt(pa, pb, pc, pd, permanent);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  inspherefast()   Approximate 3D insphere test.  Nonrobust.               */
+/*  insphereexact()   Exact 3D insphere test.  Robust.                       */
+/*  insphereslow()   Another exact 3D insphere test.  Robust.                */
+/*  insphere()   Adaptive exact 3D insphere test.  Robust.                   */
+/*                                                                           */
+/*               Return a positive value if the point pe lies inside the     */
+/*               sphere passing through pa, pb, pc, and pd; a negative value */
+/*               if it lies outside; and zero if the five points are         */
+/*               cospherical.  The points pa, pb, pc, and pd must be ordered */
+/*               so that they have a positive orientation (as defined by     */
+/*               orient3d()), or the sign of the result will be reversed.    */
+/*                                                                           */
+/*  Only the first and last routine should be used; the middle two are for   */
+/*  timings.                                                                 */
+/*                                                                           */
+/*  The last three use exact arithmetic to ensure a correct answer.  The     */
+/*  result returned is the determinant of a matrix.  In insphere() only,     */
+/*  this determinant is computed adaptively, in the sense that exact         */
+/*  arithmetic is used only to the degree it is needed to ensure that the    */
+/*  returned value has the correct sign.  Hence, insphere() is usually quite */
+/*  fast, but will run more slowly when the input points are cospherical or  */
+/*  nearly so.                                                               */
+/*                                                                           */
+/*****************************************************************************/
+
+REAL inspherefast(pa, pb, pc, pd, pe) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL *pe;
+{
+  REAL aex, bex, cex, dex;
+  REAL aey, bey, cey, dey;
+  REAL aez, bez, cez, dez;
+  REAL alift, blift, clift, dlift;
+  REAL ab, bc, cd, da, ac, bd;
+  REAL abc, bcd, cda, dab;
+
+  aex = pa[0] - pe[0];
+  bex = pb[0] - pe[0];
+  cex = pc[0] - pe[0];
+  dex = pd[0] - pe[0];
+  aey = pa[1] - pe[1];
+  bey = pb[1] - pe[1];
+  cey = pc[1] - pe[1];
+  dey = pd[1] - pe[1];
+  aez = pa[2] - pe[2];
+  bez = pb[2] - pe[2];
+  cez = pc[2] - pe[2];
+  dez = pd[2] - pe[2];
+
+  ab = aex * bey - bex * aey;
+  bc = bex * cey - cex * bey;
+  cd = cex * dey - dex * cey;
+  da = dex * aey - aex * dey;
+
+  ac = aex * cey - cex * aey;
+  bd = bex * dey - dex * bey;
+
+  abc = aez * bc - bez * ac + cez * ab;
+  bcd = bez * cd - cez * bd + dez * bc;
+  cda = cez * da + dez * ac + aez * cd;
+  dab = dez * ab + aez * bd + bez * da;
+
+  alift = aex * aex + aey * aey + aez * aez;
+  blift = bex * bex + bey * bey + bez * bez;
+  clift = cex * cex + cey * cey + cez * cez;
+  dlift = dex * dex + dey * dey + dez * dez;
+
+  return (dlift * abc - clift * dab) + (blift * cda - alift * bcd);
+}
+
+REAL insphereexact(pa, pb, pc, pd, pe) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL *pe;
+{
+  INEXACT REAL axby1, bxcy1, cxdy1, dxey1, exay1;
+  INEXACT REAL bxay1, cxby1, dxcy1, exdy1, axey1;
+  INEXACT REAL axcy1, bxdy1, cxey1, dxay1, exby1;
+  INEXACT REAL cxay1, dxby1, excy1, axdy1, bxey1;
+  REAL axby0, bxcy0, cxdy0, dxey0, exay0;
+  REAL bxay0, cxby0, dxcy0, exdy0, axey0;
+  REAL axcy0, bxdy0, cxey0, dxay0, exby0;
+  REAL cxay0, dxby0, excy0, axdy0, bxey0;
+  REAL ab[4], bc[4], cd[4], de[4], ea[4];
+  REAL ac[4], bd[4], ce[4], da[4], eb[4];
+  REAL temp8a[8], temp8b[8], temp16[16];
+  int temp8alen, temp8blen, temp16len;
+  REAL abc[24], bcd[24], cde[24], dea[24], eab[24];
+  REAL abd[24], bce[24], cda[24], deb[24], eac[24];
+  int abclen, bcdlen, cdelen, dealen, eablen;
+  int abdlen, bcelen, cdalen, deblen, eaclen;
+  REAL temp48a[48], temp48b[48];
+  int temp48alen, temp48blen;
+  REAL abcd[96], bcde[96], cdea[96], deab[96], eabc[96];
+  int abcdlen, bcdelen, cdealen, deablen, eabclen;
+  REAL temp192[192];
+  REAL det384x[384], det384y[384], det384z[384];
+  int xlen, ylen, zlen;
+  REAL detxy[768];
+  int xylen;
+  REAL adet[1152], bdet[1152], cdet[1152], ddet[1152], edet[1152];
+  int alen, blen, clen, dlen, elen;
+  REAL abdet[2304], cddet[2304], cdedet[3456];
+  int ablen, cdlen;
+  REAL deter[5760];
+  int deterlen;
+  int i;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  Two_Product(pa[0], pb[1], axby1, axby0);
+  Two_Product(pb[0], pa[1], bxay1, bxay0);
+  Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]);
+
+  Two_Product(pb[0], pc[1], bxcy1, bxcy0);
+  Two_Product(pc[0], pb[1], cxby1, cxby0);
+  Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]);
+
+  Two_Product(pc[0], pd[1], cxdy1, cxdy0);
+  Two_Product(pd[0], pc[1], dxcy1, dxcy0);
+  Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]);
+
+  Two_Product(pd[0], pe[1], dxey1, dxey0);
+  Two_Product(pe[0], pd[1], exdy1, exdy0);
+  Two_Two_Diff(dxey1, dxey0, exdy1, exdy0, de[3], de[2], de[1], de[0]);
+
+  Two_Product(pe[0], pa[1], exay1, exay0);
+  Two_Product(pa[0], pe[1], axey1, axey0);
+  Two_Two_Diff(exay1, exay0, axey1, axey0, ea[3], ea[2], ea[1], ea[0]);
+
+  Two_Product(pa[0], pc[1], axcy1, axcy0);
+  Two_Product(pc[0], pa[1], cxay1, cxay0);
+  Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]);
+
+  Two_Product(pb[0], pd[1], bxdy1, bxdy0);
+  Two_Product(pd[0], pb[1], dxby1, dxby0);
+  Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]);
+
+  Two_Product(pc[0], pe[1], cxey1, cxey0);
+  Two_Product(pe[0], pc[1], excy1, excy0);
+  Two_Two_Diff(cxey1, cxey0, excy1, excy0, ce[3], ce[2], ce[1], ce[0]);
+
+  Two_Product(pd[0], pa[1], dxay1, dxay0);
+  Two_Product(pa[0], pd[1], axdy1, axdy0);
+  Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]);
+
+  Two_Product(pe[0], pb[1], exby1, exby0);
+  Two_Product(pb[0], pe[1], bxey1, bxey0);
+  Two_Two_Diff(exby1, exby0, bxey1, bxey0, eb[3], eb[2], eb[1], eb[0]);
+
+  temp8alen = scale_expansion_zeroelim(4, bc, pa[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ac, -pb[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, ab, pc[2], temp8a);
+  abclen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abc);
+
+  temp8alen = scale_expansion_zeroelim(4, cd, pb[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, bd, -pc[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, bc, pd[2], temp8a);
+  bcdlen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bcd);
+
+  temp8alen = scale_expansion_zeroelim(4, de, pc[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ce, -pd[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, cd, pe[2], temp8a);
+  cdelen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cde);
+
+  temp8alen = scale_expansion_zeroelim(4, ea, pd[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, da, -pe[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, de, pa[2], temp8a);
+  dealen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, dea);
+
+  temp8alen = scale_expansion_zeroelim(4, ab, pe[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, eb, -pa[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, ea, pb[2], temp8a);
+  eablen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eab);
+
+  temp8alen = scale_expansion_zeroelim(4, bd, pa[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, da, pb[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, ab, pd[2], temp8a);
+  abdlen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abd);
+
+  temp8alen = scale_expansion_zeroelim(4, ce, pb[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, eb, pc[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, bc, pe[2], temp8a);
+  bcelen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bce);
+
+  temp8alen = scale_expansion_zeroelim(4, da, pc[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ac, pd[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, cd, pa[2], temp8a);
+  cdalen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cda);
+
+  temp8alen = scale_expansion_zeroelim(4, eb, pd[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, bd, pe[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, de, pb[2], temp8a);
+  deblen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, deb);
+
+  temp8alen = scale_expansion_zeroelim(4, ac, pe[2], temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ce, pa[2], temp8b);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp8alen = scale_expansion_zeroelim(4, ea, pc[2], temp8a);
+  eaclen    = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eac);
+
+  temp48alen = fast_expansion_sum_zeroelim(cdelen, cde, bcelen, bce, temp48a);
+  temp48blen = fast_expansion_sum_zeroelim(deblen, deb, bcdlen, bcd, temp48b);
+  for(i = 0; i < temp48blen; i++)
+    {
+      temp48b[i] = -temp48b[i];
+    }
+  bcdelen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, bcde);
+  xlen    = scale_expansion_zeroelim(bcdelen, bcde, pa[0], temp192);
+  xlen    = scale_expansion_zeroelim(xlen, temp192, pa[0], det384x);
+  ylen    = scale_expansion_zeroelim(bcdelen, bcde, pa[1], temp192);
+  ylen    = scale_expansion_zeroelim(ylen, temp192, pa[1], det384y);
+  zlen    = scale_expansion_zeroelim(bcdelen, bcde, pa[2], temp192);
+  zlen    = scale_expansion_zeroelim(zlen, temp192, pa[2], det384z);
+  xylen   = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy);
+  alen    = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, adet);
+
+  temp48alen = fast_expansion_sum_zeroelim(dealen, dea, cdalen, cda, temp48a);
+  temp48blen = fast_expansion_sum_zeroelim(eaclen, eac, cdelen, cde, temp48b);
+  for(i = 0; i < temp48blen; i++)
+    {
+      temp48b[i] = -temp48b[i];
+    }
+  cdealen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, cdea);
+  xlen    = scale_expansion_zeroelim(cdealen, cdea, pb[0], temp192);
+  xlen    = scale_expansion_zeroelim(xlen, temp192, pb[0], det384x);
+  ylen    = scale_expansion_zeroelim(cdealen, cdea, pb[1], temp192);
+  ylen    = scale_expansion_zeroelim(ylen, temp192, pb[1], det384y);
+  zlen    = scale_expansion_zeroelim(cdealen, cdea, pb[2], temp192);
+  zlen    = scale_expansion_zeroelim(zlen, temp192, pb[2], det384z);
+  xylen   = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy);
+  blen    = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, bdet);
+
+  temp48alen = fast_expansion_sum_zeroelim(eablen, eab, deblen, deb, temp48a);
+  temp48blen = fast_expansion_sum_zeroelim(abdlen, abd, dealen, dea, temp48b);
+  for(i = 0; i < temp48blen; i++)
+    {
+      temp48b[i] = -temp48b[i];
+    }
+  deablen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, deab);
+  xlen    = scale_expansion_zeroelim(deablen, deab, pc[0], temp192);
+  xlen    = scale_expansion_zeroelim(xlen, temp192, pc[0], det384x);
+  ylen    = scale_expansion_zeroelim(deablen, deab, pc[1], temp192);
+  ylen    = scale_expansion_zeroelim(ylen, temp192, pc[1], det384y);
+  zlen    = scale_expansion_zeroelim(deablen, deab, pc[2], temp192);
+  zlen    = scale_expansion_zeroelim(zlen, temp192, pc[2], det384z);
+  xylen   = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy);
+  clen    = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, cdet);
+
+  temp48alen = fast_expansion_sum_zeroelim(abclen, abc, eaclen, eac, temp48a);
+  temp48blen = fast_expansion_sum_zeroelim(bcelen, bce, eablen, eab, temp48b);
+  for(i = 0; i < temp48blen; i++)
+    {
+      temp48b[i] = -temp48b[i];
+    }
+  eabclen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, eabc);
+  xlen    = scale_expansion_zeroelim(eabclen, eabc, pd[0], temp192);
+  xlen    = scale_expansion_zeroelim(xlen, temp192, pd[0], det384x);
+  ylen    = scale_expansion_zeroelim(eabclen, eabc, pd[1], temp192);
+  ylen    = scale_expansion_zeroelim(ylen, temp192, pd[1], det384y);
+  zlen    = scale_expansion_zeroelim(eabclen, eabc, pd[2], temp192);
+  zlen    = scale_expansion_zeroelim(zlen, temp192, pd[2], det384z);
+  xylen   = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy);
+  dlen    = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, ddet);
+
+  temp48alen = fast_expansion_sum_zeroelim(bcdlen, bcd, abdlen, abd, temp48a);
+  temp48blen = fast_expansion_sum_zeroelim(cdalen, cda, abclen, abc, temp48b);
+  for(i = 0; i < temp48blen; i++)
+    {
+      temp48b[i] = -temp48b[i];
+    }
+  abcdlen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, abcd);
+  xlen    = scale_expansion_zeroelim(abcdlen, abcd, pe[0], temp192);
+  xlen    = scale_expansion_zeroelim(xlen, temp192, pe[0], det384x);
+  ylen    = scale_expansion_zeroelim(abcdlen, abcd, pe[1], temp192);
+  ylen    = scale_expansion_zeroelim(ylen, temp192, pe[1], det384y);
+  zlen    = scale_expansion_zeroelim(abcdlen, abcd, pe[2], temp192);
+  zlen    = scale_expansion_zeroelim(zlen, temp192, pe[2], det384z);
+  xylen   = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy);
+  elen    = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, edet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  cdlen    = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet);
+  cdelen   = fast_expansion_sum_zeroelim(cdlen, cddet, elen, edet, cdedet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdelen, cdedet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL insphereslow(pa, pb, pc, pd, pe) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL *pe;
+{
+  INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez;
+  REAL aextail, bextail, cextail, dextail;
+  REAL aeytail, beytail, ceytail, deytail;
+  REAL aeztail, beztail, ceztail, deztail;
+  REAL negate, negatetail;
+  INEXACT REAL axby7, bxcy7, cxdy7, dxay7, axcy7, bxdy7;
+  INEXACT REAL bxay7, cxby7, dxcy7, axdy7, cxay7, dxby7;
+  REAL axby[8], bxcy[8], cxdy[8], dxay[8], axcy[8], bxdy[8];
+  REAL bxay[8], cxby[8], dxcy[8], axdy[8], cxay[8], dxby[8];
+  REAL ab[16], bc[16], cd[16], da[16], ac[16], bd[16];
+  int ablen, bclen, cdlen, dalen, aclen, bdlen;
+  REAL temp32a[32], temp32b[32], temp64a[64], temp64b[64], temp64c[64];
+  int temp32alen, temp32blen, temp64alen, temp64blen, temp64clen;
+  REAL temp128[128], temp192[192];
+  int temp128len, temp192len;
+  REAL detx[384], detxx[768], detxt[384], detxxt[768], detxtxt[768];
+  int xlen, xxlen, xtlen, xxtlen, xtxtlen;
+  REAL x1[1536], x2[2304];
+  int x1len, x2len;
+  REAL dety[384], detyy[768], detyt[384], detyyt[768], detytyt[768];
+  int ylen, yylen, ytlen, yytlen, ytytlen;
+  REAL y1[1536], y2[2304];
+  int y1len, y2len;
+  REAL detz[384], detzz[768], detzt[384], detzzt[768], detztzt[768];
+  int zlen, zzlen, ztlen, zztlen, ztztlen;
+  REAL z1[1536], z2[2304];
+  int z1len, z2len;
+  REAL detxy[4608];
+  int xylen;
+  REAL adet[6912], bdet[6912], cdet[6912], ddet[6912];
+  int alen, blen, clen, dlen;
+  REAL abdet[13824], cddet[13824], deter[27648];
+  int deterlen;
+  int i;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL a0hi, a0lo, a1hi, a1lo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j, _k, _l, _m, _n;
+  REAL _0, _1, _2;
+
+  Two_Diff(pa[0], pe[0], aex, aextail);
+  Two_Diff(pa[1], pe[1], aey, aeytail);
+  Two_Diff(pa[2], pe[2], aez, aeztail);
+  Two_Diff(pb[0], pe[0], bex, bextail);
+  Two_Diff(pb[1], pe[1], bey, beytail);
+  Two_Diff(pb[2], pe[2], bez, beztail);
+  Two_Diff(pc[0], pe[0], cex, cextail);
+  Two_Diff(pc[1], pe[1], cey, ceytail);
+  Two_Diff(pc[2], pe[2], cez, ceztail);
+  Two_Diff(pd[0], pe[0], dex, dextail);
+  Two_Diff(pd[1], pe[1], dey, deytail);
+  Two_Diff(pd[2], pe[2], dez, deztail);
+
+  Two_Two_Product(aex, aextail, bey, beytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]);
+  axby[7]    = axby7;
+  negate     = -aey;
+  negatetail = -aeytail;
+  Two_Two_Product(bex, bextail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]);
+  bxay[7] = bxay7;
+  ablen   = fast_expansion_sum_zeroelim(8, axby, 8, bxay, ab);
+  Two_Two_Product(bex, bextail, cey, ceytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]);
+  bxcy[7]    = bxcy7;
+  negate     = -bey;
+  negatetail = -beytail;
+  Two_Two_Product(cex, cextail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]);
+  cxby[7] = cxby7;
+  bclen   = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, bc);
+  Two_Two_Product(cex, cextail, dey, deytail, cxdy7, cxdy[6], cxdy[5], cxdy[4], cxdy[3], cxdy[2], cxdy[1], cxdy[0]);
+  cxdy[7]    = cxdy7;
+  negate     = -cey;
+  negatetail = -ceytail;
+  Two_Two_Product(dex, dextail, negate, negatetail, dxcy7, dxcy[6], dxcy[5], dxcy[4], dxcy[3], dxcy[2], dxcy[1], dxcy[0]);
+  dxcy[7] = dxcy7;
+  cdlen   = fast_expansion_sum_zeroelim(8, cxdy, 8, dxcy, cd);
+  Two_Two_Product(dex, dextail, aey, aeytail, dxay7, dxay[6], dxay[5], dxay[4], dxay[3], dxay[2], dxay[1], dxay[0]);
+  dxay[7]    = dxay7;
+  negate     = -dey;
+  negatetail = -deytail;
+  Two_Two_Product(aex, aextail, negate, negatetail, axdy7, axdy[6], axdy[5], axdy[4], axdy[3], axdy[2], axdy[1], axdy[0]);
+  axdy[7] = axdy7;
+  dalen   = fast_expansion_sum_zeroelim(8, dxay, 8, axdy, da);
+  Two_Two_Product(aex, aextail, cey, ceytail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]);
+  axcy[7]    = axcy7;
+  negate     = -aey;
+  negatetail = -aeytail;
+  Two_Two_Product(cex, cextail, negate, negatetail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]);
+  cxay[7] = cxay7;
+  aclen   = fast_expansion_sum_zeroelim(8, axcy, 8, cxay, ac);
+  Two_Two_Product(bex, bextail, dey, deytail, bxdy7, bxdy[6], bxdy[5], bxdy[4], bxdy[3], bxdy[2], bxdy[1], bxdy[0]);
+  bxdy[7]    = bxdy7;
+  negate     = -bey;
+  negatetail = -beytail;
+  Two_Two_Product(dex, dextail, negate, negatetail, dxby7, dxby[6], dxby[5], dxby[4], dxby[3], dxby[2], dxby[1], dxby[0]);
+  dxby[7] = dxby7;
+  bdlen   = fast_expansion_sum_zeroelim(8, bxdy, 8, dxby, bd);
+
+  temp32alen = scale_expansion_zeroelim(cdlen, cd, -bez, temp32a);
+  temp32blen = scale_expansion_zeroelim(cdlen, cd, -beztail, temp32b);
+  temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a);
+  temp32alen = scale_expansion_zeroelim(bdlen, bd, cez, temp32a);
+  temp32blen = scale_expansion_zeroelim(bdlen, bd, ceztail, temp32b);
+  temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b);
+  temp32alen = scale_expansion_zeroelim(bclen, bc, -dez, temp32a);
+  temp32blen = scale_expansion_zeroelim(bclen, bc, -deztail, temp32b);
+  temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c);
+  temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128);
+  temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192);
+  xlen       = scale_expansion_zeroelim(temp192len, temp192, aex, detx);
+  xxlen      = scale_expansion_zeroelim(xlen, detx, aex, detxx);
+  xtlen      = scale_expansion_zeroelim(temp192len, temp192, aextail, detxt);
+  xxtlen     = scale_expansion_zeroelim(xtlen, detxt, aex, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, aextail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+  ylen    = scale_expansion_zeroelim(temp192len, temp192, aey, dety);
+  yylen   = scale_expansion_zeroelim(ylen, dety, aey, detyy);
+  ytlen   = scale_expansion_zeroelim(temp192len, temp192, aeytail, detyt);
+  yytlen  = scale_expansion_zeroelim(ytlen, detyt, aey, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, aeytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+  zlen    = scale_expansion_zeroelim(temp192len, temp192, aez, detz);
+  zzlen   = scale_expansion_zeroelim(zlen, detz, aez, detzz);
+  ztlen   = scale_expansion_zeroelim(temp192len, temp192, aeztail, detzt);
+  zztlen  = scale_expansion_zeroelim(ztlen, detzt, aez, detzzt);
+  for(i = 0; i < zztlen; i++)
+    {
+      detzzt[i] *= 2.0;
+    }
+  ztztlen = scale_expansion_zeroelim(ztlen, detzt, aeztail, detztzt);
+  z1len   = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1);
+  z2len   = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2);
+  xylen   = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy);
+  alen    = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, adet);
+
+  temp32alen = scale_expansion_zeroelim(dalen, da, cez, temp32a);
+  temp32blen = scale_expansion_zeroelim(dalen, da, ceztail, temp32b);
+  temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a);
+  temp32alen = scale_expansion_zeroelim(aclen, ac, dez, temp32a);
+  temp32blen = scale_expansion_zeroelim(aclen, ac, deztail, temp32b);
+  temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b);
+  temp32alen = scale_expansion_zeroelim(cdlen, cd, aez, temp32a);
+  temp32blen = scale_expansion_zeroelim(cdlen, cd, aeztail, temp32b);
+  temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c);
+  temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128);
+  temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192);
+  xlen       = scale_expansion_zeroelim(temp192len, temp192, bex, detx);
+  xxlen      = scale_expansion_zeroelim(xlen, detx, bex, detxx);
+  xtlen      = scale_expansion_zeroelim(temp192len, temp192, bextail, detxt);
+  xxtlen     = scale_expansion_zeroelim(xtlen, detxt, bex, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bextail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+  ylen    = scale_expansion_zeroelim(temp192len, temp192, bey, dety);
+  yylen   = scale_expansion_zeroelim(ylen, dety, bey, detyy);
+  ytlen   = scale_expansion_zeroelim(temp192len, temp192, beytail, detyt);
+  yytlen  = scale_expansion_zeroelim(ytlen, detyt, bey, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, beytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+  zlen    = scale_expansion_zeroelim(temp192len, temp192, bez, detz);
+  zzlen   = scale_expansion_zeroelim(zlen, detz, bez, detzz);
+  ztlen   = scale_expansion_zeroelim(temp192len, temp192, beztail, detzt);
+  zztlen  = scale_expansion_zeroelim(ztlen, detzt, bez, detzzt);
+  for(i = 0; i < zztlen; i++)
+    {
+      detzzt[i] *= 2.0;
+    }
+  ztztlen = scale_expansion_zeroelim(ztlen, detzt, beztail, detztzt);
+  z1len   = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1);
+  z2len   = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2);
+  xylen   = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy);
+  blen    = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, bdet);
+
+  temp32alen = scale_expansion_zeroelim(ablen, ab, -dez, temp32a);
+  temp32blen = scale_expansion_zeroelim(ablen, ab, -deztail, temp32b);
+  temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a);
+  temp32alen = scale_expansion_zeroelim(bdlen, bd, -aez, temp32a);
+  temp32blen = scale_expansion_zeroelim(bdlen, bd, -aeztail, temp32b);
+  temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b);
+  temp32alen = scale_expansion_zeroelim(dalen, da, -bez, temp32a);
+  temp32blen = scale_expansion_zeroelim(dalen, da, -beztail, temp32b);
+  temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c);
+  temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128);
+  temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192);
+  xlen       = scale_expansion_zeroelim(temp192len, temp192, cex, detx);
+  xxlen      = scale_expansion_zeroelim(xlen, detx, cex, detxx);
+  xtlen      = scale_expansion_zeroelim(temp192len, temp192, cextail, detxt);
+  xxtlen     = scale_expansion_zeroelim(xtlen, detxt, cex, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cextail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+  ylen    = scale_expansion_zeroelim(temp192len, temp192, cey, dety);
+  yylen   = scale_expansion_zeroelim(ylen, dety, cey, detyy);
+  ytlen   = scale_expansion_zeroelim(temp192len, temp192, ceytail, detyt);
+  yytlen  = scale_expansion_zeroelim(ytlen, detyt, cey, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, ceytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+  zlen    = scale_expansion_zeroelim(temp192len, temp192, cez, detz);
+  zzlen   = scale_expansion_zeroelim(zlen, detz, cez, detzz);
+  ztlen   = scale_expansion_zeroelim(temp192len, temp192, ceztail, detzt);
+  zztlen  = scale_expansion_zeroelim(ztlen, detzt, cez, detzzt);
+  for(i = 0; i < zztlen; i++)
+    {
+      detzzt[i] *= 2.0;
+    }
+  ztztlen = scale_expansion_zeroelim(ztlen, detzt, ceztail, detztzt);
+  z1len   = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1);
+  z2len   = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2);
+  xylen   = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy);
+  clen    = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, cdet);
+
+  temp32alen = scale_expansion_zeroelim(bclen, bc, aez, temp32a);
+  temp32blen = scale_expansion_zeroelim(bclen, bc, aeztail, temp32b);
+  temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a);
+  temp32alen = scale_expansion_zeroelim(aclen, ac, -bez, temp32a);
+  temp32blen = scale_expansion_zeroelim(aclen, ac, -beztail, temp32b);
+  temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b);
+  temp32alen = scale_expansion_zeroelim(ablen, ab, cez, temp32a);
+  temp32blen = scale_expansion_zeroelim(ablen, ab, ceztail, temp32b);
+  temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c);
+  temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128);
+  temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192);
+  xlen       = scale_expansion_zeroelim(temp192len, temp192, dex, detx);
+  xxlen      = scale_expansion_zeroelim(xlen, detx, dex, detxx);
+  xtlen      = scale_expansion_zeroelim(temp192len, temp192, dextail, detxt);
+  xxtlen     = scale_expansion_zeroelim(xtlen, detxt, dex, detxxt);
+  for(i = 0; i < xxtlen; i++)
+    {
+      detxxt[i] *= 2.0;
+    }
+  xtxtlen = scale_expansion_zeroelim(xtlen, detxt, dextail, detxtxt);
+  x1len   = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1);
+  x2len   = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2);
+  ylen    = scale_expansion_zeroelim(temp192len, temp192, dey, dety);
+  yylen   = scale_expansion_zeroelim(ylen, dety, dey, detyy);
+  ytlen   = scale_expansion_zeroelim(temp192len, temp192, deytail, detyt);
+  yytlen  = scale_expansion_zeroelim(ytlen, detyt, dey, detyyt);
+  for(i = 0; i < yytlen; i++)
+    {
+      detyyt[i] *= 2.0;
+    }
+  ytytlen = scale_expansion_zeroelim(ytlen, detyt, deytail, detytyt);
+  y1len   = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1);
+  y2len   = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2);
+  zlen    = scale_expansion_zeroelim(temp192len, temp192, dez, detz);
+  zzlen   = scale_expansion_zeroelim(zlen, detz, dez, detzz);
+  ztlen   = scale_expansion_zeroelim(temp192len, temp192, deztail, detzt);
+  zztlen  = scale_expansion_zeroelim(ztlen, detzt, dez, detzzt);
+  for(i = 0; i < zztlen; i++)
+    {
+      detzzt[i] *= 2.0;
+    }
+  ztztlen = scale_expansion_zeroelim(ztlen, detzt, deztail, detztzt);
+  z1len   = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1);
+  z2len   = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2);
+  xylen   = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy);
+  dlen    = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, ddet);
+
+  ablen    = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  cdlen    = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet);
+  deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter);
+
+  return deter[deterlen - 1];
+}
+
+REAL insphereadapt(pa, pb, pc, pd, pe, permanent) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL *pe;
+REAL permanent;
+{
+  INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez;
+  REAL det, errbound;
+
+  INEXACT REAL aexbey1, bexaey1, bexcey1, cexbey1;
+  INEXACT REAL cexdey1, dexcey1, dexaey1, aexdey1;
+  INEXACT REAL aexcey1, cexaey1, bexdey1, dexbey1;
+  REAL aexbey0, bexaey0, bexcey0, cexbey0;
+  REAL cexdey0, dexcey0, dexaey0, aexdey0;
+  REAL aexcey0, cexaey0, bexdey0, dexbey0;
+  REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4];
+  INEXACT REAL ab3, bc3, cd3, da3, ac3, bd3;
+  REAL abeps, bceps, cdeps, daeps, aceps, bdeps;
+  REAL temp8a[8], temp8b[8], temp8c[8], temp16[16], temp24[24], temp48[48];
+  int temp8alen, temp8blen, temp8clen, temp16len, temp24len, temp48len;
+  REAL xdet[96], ydet[96], zdet[96], xydet[192];
+  int xlen, ylen, zlen, xylen;
+  REAL adet[288], bdet[288], cdet[288], ddet[288];
+  int alen, blen, clen, dlen;
+  REAL abdet[576], cddet[576];
+  int ablen, cdlen;
+  REAL fin1[1152];
+  int finlength;
+
+  REAL aextail, bextail, cextail, dextail;
+  REAL aeytail, beytail, ceytail, deytail;
+  REAL aeztail, beztail, ceztail, deztail;
+
+  INEXACT REAL bvirt;
+  REAL avirt, bround, around;
+  INEXACT REAL c;
+  INEXACT REAL abig;
+  REAL ahi, alo, bhi, blo;
+  REAL err1, err2, err3;
+  INEXACT REAL _i, _j;
+  REAL _0;
+
+  aex = (REAL)(pa[0] - pe[0]);
+  bex = (REAL)(pb[0] - pe[0]);
+  cex = (REAL)(pc[0] - pe[0]);
+  dex = (REAL)(pd[0] - pe[0]);
+  aey = (REAL)(pa[1] - pe[1]);
+  bey = (REAL)(pb[1] - pe[1]);
+  cey = (REAL)(pc[1] - pe[1]);
+  dey = (REAL)(pd[1] - pe[1]);
+  aez = (REAL)(pa[2] - pe[2]);
+  bez = (REAL)(pb[2] - pe[2]);
+  cez = (REAL)(pc[2] - pe[2]);
+  dez = (REAL)(pd[2] - pe[2]);
+
+  Two_Product(aex, bey, aexbey1, aexbey0);
+  Two_Product(bex, aey, bexaey1, bexaey0);
+  Two_Two_Diff(aexbey1, aexbey0, bexaey1, bexaey0, ab3, ab[2], ab[1], ab[0]);
+  ab[3] = ab3;
+
+  Two_Product(bex, cey, bexcey1, bexcey0);
+  Two_Product(cex, bey, cexbey1, cexbey0);
+  Two_Two_Diff(bexcey1, bexcey0, cexbey1, cexbey0, bc3, bc[2], bc[1], bc[0]);
+  bc[3] = bc3;
+
+  Two_Product(cex, dey, cexdey1, cexdey0);
+  Two_Product(dex, cey, dexcey1, dexcey0);
+  Two_Two_Diff(cexdey1, cexdey0, dexcey1, dexcey0, cd3, cd[2], cd[1], cd[0]);
+  cd[3] = cd3;
+
+  Two_Product(dex, aey, dexaey1, dexaey0);
+  Two_Product(aex, dey, aexdey1, aexdey0);
+  Two_Two_Diff(dexaey1, dexaey0, aexdey1, aexdey0, da3, da[2], da[1], da[0]);
+  da[3] = da3;
+
+  Two_Product(aex, cey, aexcey1, aexcey0);
+  Two_Product(cex, aey, cexaey1, cexaey0);
+  Two_Two_Diff(aexcey1, aexcey0, cexaey1, cexaey0, ac3, ac[2], ac[1], ac[0]);
+  ac[3] = ac3;
+
+  Two_Product(bex, dey, bexdey1, bexdey0);
+  Two_Product(dex, bey, dexbey1, dexbey0);
+  Two_Two_Diff(bexdey1, bexdey0, dexbey1, dexbey0, bd3, bd[2], bd[1], bd[0]);
+  bd[3] = bd3;
+
+  temp8alen = scale_expansion_zeroelim(4, cd, bez, temp8a);
+  temp8blen = scale_expansion_zeroelim(4, bd, -cez, temp8b);
+  temp8clen = scale_expansion_zeroelim(4, bc, dez, temp8c);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, aex, temp48);
+  xlen      = scale_expansion_zeroelim(temp48len, temp48, -aex, xdet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, aey, temp48);
+  ylen      = scale_expansion_zeroelim(temp48len, temp48, -aey, ydet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, aez, temp48);
+  zlen      = scale_expansion_zeroelim(temp48len, temp48, -aez, zdet);
+  xylen     = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet);
+  alen      = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, adet);
+
+  temp8alen = scale_expansion_zeroelim(4, da, cez, temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ac, dez, temp8b);
+  temp8clen = scale_expansion_zeroelim(4, cd, aez, temp8c);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, bex, temp48);
+  xlen      = scale_expansion_zeroelim(temp48len, temp48, bex, xdet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, bey, temp48);
+  ylen      = scale_expansion_zeroelim(temp48len, temp48, bey, ydet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, bez, temp48);
+  zlen      = scale_expansion_zeroelim(temp48len, temp48, bez, zdet);
+  xylen     = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet);
+  blen      = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, bdet);
+
+  temp8alen = scale_expansion_zeroelim(4, ab, dez, temp8a);
+  temp8blen = scale_expansion_zeroelim(4, bd, aez, temp8b);
+  temp8clen = scale_expansion_zeroelim(4, da, bez, temp8c);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, cex, temp48);
+  xlen      = scale_expansion_zeroelim(temp48len, temp48, -cex, xdet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, cey, temp48);
+  ylen      = scale_expansion_zeroelim(temp48len, temp48, -cey, ydet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, cez, temp48);
+  zlen      = scale_expansion_zeroelim(temp48len, temp48, -cez, zdet);
+  xylen     = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet);
+  clen      = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, cdet);
+
+  temp8alen = scale_expansion_zeroelim(4, bc, aez, temp8a);
+  temp8blen = scale_expansion_zeroelim(4, ac, -bez, temp8b);
+  temp8clen = scale_expansion_zeroelim(4, ab, cez, temp8c);
+  temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16);
+  temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, dex, temp48);
+  xlen      = scale_expansion_zeroelim(temp48len, temp48, dex, xdet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, dey, temp48);
+  ylen      = scale_expansion_zeroelim(temp48len, temp48, dey, ydet);
+  temp48len = scale_expansion_zeroelim(temp24len, temp24, dez, temp48);
+  zlen      = scale_expansion_zeroelim(temp48len, temp48, dez, zdet);
+  xylen     = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet);
+  dlen      = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, ddet);
+
+  ablen     = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet);
+  cdlen     = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet);
+  finlength = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, fin1);
+
+  det      = estimate(finlength, fin1);
+  errbound = isperrboundB * permanent;
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  Two_Diff_Tail(pa[0], pe[0], aex, aextail);
+  Two_Diff_Tail(pa[1], pe[1], aey, aeytail);
+  Two_Diff_Tail(pa[2], pe[2], aez, aeztail);
+  Two_Diff_Tail(pb[0], pe[0], bex, bextail);
+  Two_Diff_Tail(pb[1], pe[1], bey, beytail);
+  Two_Diff_Tail(pb[2], pe[2], bez, beztail);
+  Two_Diff_Tail(pc[0], pe[0], cex, cextail);
+  Two_Diff_Tail(pc[1], pe[1], cey, ceytail);
+  Two_Diff_Tail(pc[2], pe[2], cez, ceztail);
+  Two_Diff_Tail(pd[0], pe[0], dex, dextail);
+  Two_Diff_Tail(pd[1], pe[1], dey, deytail);
+  Two_Diff_Tail(pd[2], pe[2], dez, deztail);
+  if((aextail == 0.0) && (aeytail == 0.0) && (aeztail == 0.0) && (bextail == 0.0) && (beytail == 0.0) && (beztail == 0.0) &&
+     (cextail == 0.0) && (ceytail == 0.0) && (ceztail == 0.0) && (dextail == 0.0) && (deytail == 0.0) && (deztail == 0.0))
+    {
+      return det;
+    }
+
+  errbound = isperrboundC * permanent + resulterrbound * Absolute(det);
+  abeps    = (aex * beytail + bey * aextail) - (aey * bextail + bex * aeytail);
+  bceps    = (bex * ceytail + cey * bextail) - (bey * cextail + cex * beytail);
+  cdeps    = (cex * deytail + dey * cextail) - (cey * dextail + dex * ceytail);
+  daeps    = (dex * aeytail + aey * dextail) - (dey * aextail + aex * deytail);
+  aceps    = (aex * ceytail + cey * aextail) - (aey * cextail + cex * aeytail);
+  bdeps    = (bex * deytail + dey * bextail) - (bey * dextail + dex * beytail);
+  det += (((bex * bex + bey * bey + bez * bez) *
+               ((cez * daeps + dez * aceps + aez * cdeps) + (ceztail * da3 + deztail * ac3 + aeztail * cd3)) +
+           (dex * dex + dey * dey + dez * dez) *
+               ((aez * bceps - bez * aceps + cez * abeps) + (aeztail * bc3 - beztail * ac3 + ceztail * ab3))) -
+          ((aex * aex + aey * aey + aez * aez) *
+               ((bez * cdeps - cez * bdeps + dez * bceps) + (beztail * cd3 - ceztail * bd3 + deztail * bc3)) +
+           (cex * cex + cey * cey + cez * cez) *
+               ((dez * abeps + aez * bdeps + bez * daeps) + (deztail * ab3 + aeztail * bd3 + beztail * da3)))) +
+         2.0 * (((bex * bextail + bey * beytail + bez * beztail) * (cez * da3 + dez * ac3 + aez * cd3) +
+                 (dex * dextail + dey * deytail + dez * deztail) * (aez * bc3 - bez * ac3 + cez * ab3)) -
+                ((aex * aextail + aey * aeytail + aez * aeztail) * (bez * cd3 - cez * bd3 + dez * bc3) +
+                 (cex * cextail + cey * ceytail + cez * ceztail) * (dez * ab3 + aez * bd3 + bez * da3)));
+  if((det >= errbound) || (-det >= errbound))
+    {
+      return det;
+    }
+
+  return insphereexact(pa, pb, pc, pd, pe);
+}
+
+REAL insphere(pa, pb, pc, pd, pe) REAL *pa;
+REAL *pb;
+REAL *pc;
+REAL *pd;
+REAL *pe;
+{
+  REAL aex, bex, cex, dex;
+  REAL aey, bey, cey, dey;
+  REAL aez, bez, cez, dez;
+  REAL aexbey, bexaey, bexcey, cexbey, cexdey, dexcey, dexaey, aexdey;
+  REAL aexcey, cexaey, bexdey, dexbey;
+  REAL alift, blift, clift, dlift;
+  REAL ab, bc, cd, da, ac, bd;
+  REAL abc, bcd, cda, dab;
+  REAL aezplus, bezplus, cezplus, dezplus;
+  REAL aexbeyplus, bexaeyplus, bexceyplus, cexbeyplus;
+  REAL cexdeyplus, dexceyplus, dexaeyplus, aexdeyplus;
+  REAL aexceyplus, cexaeyplus, bexdeyplus, dexbeyplus;
+  REAL det;
+  REAL permanent, errbound;
+
+  aex = pa[0] - pe[0];
+  bex = pb[0] - pe[0];
+  cex = pc[0] - pe[0];
+  dex = pd[0] - pe[0];
+  aey = pa[1] - pe[1];
+  bey = pb[1] - pe[1];
+  cey = pc[1] - pe[1];
+  dey = pd[1] - pe[1];
+  aez = pa[2] - pe[2];
+  bez = pb[2] - pe[2];
+  cez = pc[2] - pe[2];
+  dez = pd[2] - pe[2];
+
+  aexbey = aex * bey;
+  bexaey = bex * aey;
+  ab     = aexbey - bexaey;
+  bexcey = bex * cey;
+  cexbey = cex * bey;
+  bc     = bexcey - cexbey;
+  cexdey = cex * dey;
+  dexcey = dex * cey;
+  cd     = cexdey - dexcey;
+  dexaey = dex * aey;
+  aexdey = aex * dey;
+  da     = dexaey - aexdey;
+
+  aexcey = aex * cey;
+  cexaey = cex * aey;
+  ac     = aexcey - cexaey;
+  bexdey = bex * dey;
+  dexbey = dex * bey;
+  bd     = bexdey - dexbey;
+
+  abc = aez * bc - bez * ac + cez * ab;
+  bcd = bez * cd - cez * bd + dez * bc;
+  cda = cez * da + dez * ac + aez * cd;
+  dab = dez * ab + aez * bd + bez * da;
+
+  alift = aex * aex + aey * aey + aez * aez;
+  blift = bex * bex + bey * bey + bez * bez;
+  clift = cex * cex + cey * cey + cez * cez;
+  dlift = dex * dex + dey * dey + dez * dez;
+
+  det = (dlift * abc - clift * dab) + (blift * cda - alift * bcd);
+
+  aezplus    = Absolute(aez);
+  bezplus    = Absolute(bez);
+  cezplus    = Absolute(cez);
+  dezplus    = Absolute(dez);
+  aexbeyplus = Absolute(aexbey);
+  bexaeyplus = Absolute(bexaey);
+  bexceyplus = Absolute(bexcey);
+  cexbeyplus = Absolute(cexbey);
+  cexdeyplus = Absolute(cexdey);
+  dexceyplus = Absolute(dexcey);
+  dexaeyplus = Absolute(dexaey);
+  aexdeyplus = Absolute(aexdey);
+  aexceyplus = Absolute(aexcey);
+  cexaeyplus = Absolute(cexaey);
+  bexdeyplus = Absolute(bexdey);
+  dexbeyplus = Absolute(dexbey);
+  permanent =
+      ((cexdeyplus + dexceyplus) * bezplus + (dexbeyplus + bexdeyplus) * cezplus + (bexceyplus + cexbeyplus) * dezplus) * alift +
+      ((dexaeyplus + aexdeyplus) * cezplus + (aexceyplus + cexaeyplus) * dezplus + (cexdeyplus + dexceyplus) * aezplus) * blift +
+      ((aexbeyplus + bexaeyplus) * dezplus + (bexdeyplus + dexbeyplus) * aezplus + (dexaeyplus + aexdeyplus) * bezplus) * clift +
+      ((bexceyplus + cexbeyplus) * aezplus + (cexaeyplus + aexceyplus) * bezplus + (aexbeyplus + bexaeyplus) * cezplus) * dlift;
+  errbound = isperrboundA * permanent;
+  if((det > errbound) || (-det > errbound))
+    {
+      return det;
+    }
+
+  return insphereadapt(pa, pb, pc, pd, pe, permanent);
+}
diff --git a/src/amuse/community/arepo/src/utils/system.c b/src/amuse/community/arepo/src/utils/system.c
new file mode 100644
index 0000000000..027974da55
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/system.c
@@ -0,0 +1,1300 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/system.c
+ * \date        05/2018
+ * \brief       Small functions for interaction with operating system and
+ *              libraries and other auxiliary functions.
+ * \details     contains functions:
+ *                void subdivide_evenly(int N, int pieces, int index, int *first, int *count)
+ *                void permutate_chunks_in_list(int ncount, int *list)
+ *                int get_thread_num(void)
+ *                int system_compare_hostname(const void *a, const void *b)
+ *                int system_compare_first_task(const void *a, const void *b)
+ *                int system_compare_task(const void *a, const void *b)
+ *                void determine_compute_nodes(void)
+ *                void allreduce_sparse_double_sum(double *loc, double *glob, int N)
+ *                void allreduce_sparse_imin(int *loc, int *glob, int N)
+ *                double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *))
+ *                double dabs(double a)
+ *                double dmax(double a, double b)
+ *                size_t smax(size_t a, size_t b)
+ *                double dmin(double a, double b)
+ *                double max_array(double *a, int num_elements)
+ *                int imax(int a, int b)
+ *                int imin(int a, int b)
+ *                int myflush(FILE * fstream)
+ *                int flush_everything(void)
+ *                void enable_core_dumps_and_fpu_exceptions(void)
+ *                void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno)
+ *                double get_random_number(void)
+ *                double get_random_number_aux(void)
+ *                double second(void)
+ *                double measure_time(void)
+ *                double timediff(double t0, double t1)
+ *                void minimum_large_ints(int n, long long *src, long long *res)
+ *                void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm)
+ *                void sumup_large_ints(int n, int *src, long long *res)
+ *                void sumup_longs(int n, long long *src, long long *res)
+ *                size_t sizemax(size_t a, size_t b)
+ *                void report_VmRSS(void)
+ *                long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long
+ * *SwapFree) void check_maxmemsize_setting(void) void mpi_report_committable_memory(void) int my_ffsll(peanokey i) int my_fls(int x)
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 11.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#include <gsl/gsl_rng.h>
+#include <math.h>
+#include <mpi.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../main/allvars.h"
+#include "../main/proto.h"
+
+/*! \brief  Divides N elements evenly on pieces chunks, writes in first and
+ *          count arrays.
+ *
+ * \param[in] N Number of elements.
+ * \param[in] pieces Number of chunks.
+ * \param[in] index Index of piece that is needed as output.
+ * \param[out] first Index of first element of piece number 'index'.
+ * \param[out] count Number of elements of piece number 'index'.
+ *
+ * \return void
+ */
+void subdivide_evenly(int N, int pieces, int index, int *first, int *count)
+{
+  int avg              = (N - 1) / pieces + 1;
+  int exc              = pieces * avg - N;
+  int indexlastsection = pieces - exc;
+
+  if(index < indexlastsection)
+    {
+      *first = index * avg;
+      *count = avg;
+    }
+  else
+    {
+      *first = index * avg - (index - indexlastsection);
+      *count = avg - 1;
+    }
+}
+
+/*! \brief Permutes chunks in a list.
+ *
+ *  \param[in] ncount Number of elements in list.
+ *  \param[in, out] list List to be permuted.
+ *
+ *  \return void
+ */
+void permutate_chunks_in_list(int ncount, int *list)
+{
+#define WALK_N_PIECES 32 /*!< Number of sets, the chunks are divided into */
+#define WALK_N_SIZE 500  /*!< Number of particles per chunk */
+
+  int nchunk;       /*!< Number of chunk sets used */
+  int nchunksize;   /*!< Size of each chunk */
+  int currentchunk; /*!< Chunk set currently processed */
+  int nextparticle;
+
+  if(ncount > WALK_N_PIECES * WALK_N_SIZE)
+    {
+      nchunk     = WALK_N_PIECES;
+      nchunksize = WALK_N_SIZE;
+    }
+  else
+    {
+      nchunk     = 1;
+      nchunksize = ncount;
+    }
+
+  currentchunk = 0;
+
+  int *chunked_TargetList = (int *)mymalloc("chunked_TargetList", ncount * sizeof(int));
+  int n, i;
+  for(n = 0, nextparticle = 0; n < ncount; n++)
+    {
+      i = nextparticle;
+
+      chunked_TargetList[n] = list[i];
+      if(i < ncount)
+        {
+          nextparticle++;
+
+          if((nextparticle % nchunksize) == 0)
+            nextparticle += (nchunk - 1) * nchunksize;
+
+          if(nextparticle >= ncount)
+            {
+              currentchunk++;
+              if(currentchunk < nchunk)
+                nextparticle = currentchunk * nchunksize;
+            }
+        }
+    }
+
+  for(n = 0; n < ncount; n++)
+    list[n] = chunked_TargetList[n];
+
+  myfree(chunked_TargetList);
+}
+
+/*! \brief Returns thread number.
+ *
+ *  Unused.
+ *
+ *  \return 0
+ */
+int get_thread_num(void) { return 0; }
+
+/*! \brief Structure for a data of compute node.
+ */
+static struct node_data
+{
+  int task, this_node, first_task_in_this_node;
+  int first_index, rank_in_node, tasks_in_node;
+  char name[MPI_MAX_PROCESSOR_NAME];
+} loc_node, *list_of_nodes;
+
+/*! \brief Compares first nodename and then task of node_data objects.
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First element to compare.
+ *  \param[in] b Second element to compare.
+ *
+ *  \return (-1,0,1);  -1 if a<b.
+ */
+int system_compare_hostname(const void *a, const void *b)
+{
+  int cmp = strcmp(((struct node_data *)a)->name, ((struct node_data *)b)->name);
+
+  if(cmp == 0)
+    {
+      if(((struct node_data *)a)->task < ((struct node_data *)b)->task)
+        cmp = -1;
+      else
+        cmp = +1;
+    }
+
+  return cmp;
+}
+
+/*! \brief Compares node_data objects; first first_task_this_node and then
+ *         task.
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First element to compare.
+ *  \param[in] b Second element to compare.
+ *
+ *  \return (-1,0,1);  -1 if a<b
+ */
+int system_compare_first_task(const void *a, const void *b)
+{
+  if(((struct node_data *)a)->first_task_in_this_node < ((struct node_data *)b)->first_task_in_this_node)
+    return -1;
+
+  if(((struct node_data *)a)->first_task_in_this_node > ((struct node_data *)b)->first_task_in_this_node)
+    return +1;
+
+  if(((struct node_data *)a)->task < ((struct node_data *)b)->task)
+    return -1;
+
+  if(((struct node_data *)a)->task > ((struct node_data *)b)->task)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Compares task of node_data objects
+ *
+ *  Sorting kernel.
+ *
+ *  \param[in] a First element to compare.
+ *  \param[in] b Second element to compare.
+ *
+ *  \return (-1,0,1);  -1 if a->task < b->task
+ */
+int system_compare_task(const void *a, const void *b)
+{
+  if(((struct node_data *)a)->task < ((struct node_data *)b)->task)
+    return -1;
+
+  if(((struct node_data *)a)->task > ((struct node_data *)b)->task)
+    return +1;
+
+  return 0;
+}
+
+/*! \brief Determines the compute nodes the simulation is running on.
+ *
+ *  Reports this to file uses-machines.txt.
+ *
+ *  \return void
+ */
+void determine_compute_nodes(void)
+{
+  int len, nodes, i, no, rank, first_index;
+
+  MPI_Get_processor_name(loc_node.name, &len);
+  loc_node.task = ThisTask;
+
+  list_of_nodes = malloc(sizeof(struct node_data) *
+                         NTask); /* Note: Internal memory allocation routines are not yet available when this function is called */
+
+  MPI_Allgather(&loc_node, sizeof(struct node_data), MPI_BYTE, list_of_nodes, sizeof(struct node_data), MPI_BYTE, MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      FILE *fd;
+      if(!(fd = fopen("uses-machines.txt", "w")))
+        terminate("can't write file with used machines");
+      for(i = 0; i < NTask; i++)
+        fprintf(fd, "%5d  %s\n", list_of_nodes[i].task, list_of_nodes[i].name);
+      fclose(fd);
+    }
+
+  qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_hostname);
+
+  list_of_nodes[0].first_task_in_this_node = list_of_nodes[0].task;
+
+  for(i = 1, nodes = 1; i < NTask; i++)
+    {
+      if(strcmp(list_of_nodes[i].name, list_of_nodes[i - 1].name) != 0)
+        {
+          list_of_nodes[i].first_task_in_this_node = list_of_nodes[i].task;
+          nodes++;
+        }
+      else
+        list_of_nodes[i].first_task_in_this_node = list_of_nodes[i - 1].first_task_in_this_node;
+    }
+
+  qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_first_task);
+
+  for(i = 0; i < NTask; i++)
+    list_of_nodes[i].tasks_in_node = 0;
+
+  for(i = 0, no = 0, rank = 0, first_index = 0; i < NTask; i++)
+    {
+      if(i ? list_of_nodes[i].first_task_in_this_node != list_of_nodes[i - 1].first_task_in_this_node : 0)
+        {
+          no++;
+          rank        = 0;
+          first_index = i;
+        }
+
+      list_of_nodes[i].first_index  = first_index;
+      list_of_nodes[i].this_node    = no;
+      list_of_nodes[i].rank_in_node = rank++;
+      list_of_nodes[first_index].tasks_in_node++;
+    }
+
+  int max_count = 0;
+  int min_count = (1 << 30);
+
+  for(i = 0; i < NTask; i++)
+    {
+      list_of_nodes[i].tasks_in_node = list_of_nodes[list_of_nodes[i].first_index].tasks_in_node;
+
+      if(list_of_nodes[i].tasks_in_node > max_count)
+        max_count = list_of_nodes[i].tasks_in_node;
+      if(list_of_nodes[i].tasks_in_node < min_count)
+        min_count = list_of_nodes[i].tasks_in_node;
+    }
+
+  qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_task);
+
+  TasksInThisNode = list_of_nodes[ThisTask].tasks_in_node;
+  RankInThisNode  = list_of_nodes[ThisTask].rank_in_node;
+
+  ThisNode = list_of_nodes[ThisTask].this_node;
+
+  NumNodes        = nodes;
+  MinTasksPerNode = min_count;
+  MaxTasksPerNode = max_count;
+
+  free(list_of_nodes);
+}
+
+/*! \brief Home-made Allreduce function for double variables with sum reduction
+ *         operation, optimized for sparse vectors.
+ *
+ *  Tries to avoid communicating and adding up a lot of zeros, which can be
+ *  faster than a brute-force MPI_Allreduce.
+ *
+ *  \param[in] loc Local array.
+ *  \param[out] glob Global (result) array.
+ *  \param[in] N number of elements in array.
+ *
+ *  \return void
+ */
+void allreduce_sparse_double_sum(double *loc, double *glob, int N)
+{
+  int i, j, n, loc_first_n, nimport, nexport, task, ngrp;
+
+  int *send_count  = mymalloc("send_count", sizeof(int) * NTask);
+  int *recv_count  = mymalloc("recv_count", sizeof(int) * NTask);
+  int *send_offset = mymalloc("send_offset", sizeof(int) * NTask);
+  int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask);
+  int *blocksize   = mymalloc("blocksize", sizeof(int) * NTask);
+
+  int blk     = N / NTask;
+  int rmd     = N - blk * NTask; /* remainder */
+  int pivot_n = rmd * (blk + 1);
+
+  for(task = 0, loc_first_n = 0; task < NTask; task++)
+    {
+      if(task < rmd)
+        blocksize[task] = blk + 1;
+      else
+        blocksize[task] = blk;
+
+      if(task < ThisTask)
+        loc_first_n += blocksize[task];
+    }
+
+  double *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(double));
+  memset(loc_data, 0, blocksize[ThisTask] * sizeof(double));
+
+  for(j = 0; j < NTask; j++)
+    send_count[j] = 0;
+
+  /* find for each non-zero element the processor where it should go for being summed */
+  for(n = 0; n < N; n++)
+    {
+      if(loc[n] != 0)
+        {
+          if(n < pivot_n)
+            task = n / (blk + 1);
+          else
+            task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */
+
+          send_count[task]++;
+        }
+    }
+
+  MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += send_count[j];
+      nimport += recv_count[j];
+      if(j > 0)
+        {
+          send_offset[j] = send_offset[j - 1] + send_count[j - 1];
+          recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
+        }
+    }
+
+  struct ind_data
+  {
+    int n;
+    double val;
+  } * export_data, *import_data;
+
+  export_data = mymalloc("export_data", nexport * sizeof(struct ind_data));
+  import_data = mymalloc("import_data", nimport * sizeof(struct ind_data));
+
+  for(j = 0; j < NTask; j++)
+    send_count[j] = 0;
+
+  for(n = 0; n < N; n++)
+    {
+      if(loc[n] != 0)
+        {
+          if(n < pivot_n)
+            task = n / (blk + 1);
+          else
+            task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */
+
+          int index              = send_offset[task] + send_count[task]++;
+          export_data[index].n   = n;
+          export_data[index].val = loc[n];
+        }
+    }
+
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
+          MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask,
+                       TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+
+  for(i = 0; i < nimport; i++)
+    {
+      int j = import_data[i].n - loc_first_n;
+
+      if(j < 0 || j >= blocksize[ThisTask])
+        terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]);
+
+      loc_data[j] += import_data[i].val;
+    }
+
+  myfree(import_data);
+  myfree(export_data);
+
+  /* now share the cost data across all processors */
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(task = 0; task < NTask; task++)
+    bytecounts[task] = blocksize[task] * sizeof(double);
+
+  for(task = 1, byteoffset[0] = 0; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  myfree(byteoffset);
+  myfree(bytecounts);
+
+  myfree(loc_data);
+  myfree(blocksize);
+  myfree(recv_offset);
+  myfree(send_offset);
+  myfree(recv_count);
+  myfree(send_count);
+}
+
+/*! \brief Home-made Allreduce function for int variables with minimum as a
+ *         reduction operation.
+ *
+ *  Tries to avoid communicating and adding up a lot of zeros, which can be
+ *  faster than a brute-force MPI_Allreduce.
+ *
+ *  \param[in] loc Local array.
+ *  \param[out] glob Global (result) array.
+ *  \param[in] N number of elements in array.
+ *
+ *  \return void
+ */
+void allreduce_sparse_imin(int *loc, int *glob, int N)
+{
+  int i, j, n, loc_first_n, nimport, nexport, task, ngrp;
+
+  int *send_count  = mymalloc("send_count", sizeof(int) * NTask);
+  int *recv_count  = mymalloc("recv_count", sizeof(int) * NTask);
+  int *send_offset = mymalloc("send_offset", sizeof(int) * NTask);
+  int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask);
+  int *blocksize   = mymalloc("blocksize", sizeof(int) * NTask);
+
+  int blk     = N / NTask;
+  int rmd     = N - blk * NTask; /* remainder */
+  int pivot_n = rmd * (blk + 1);
+
+  for(task = 0, loc_first_n = 0; task < NTask; task++)
+    {
+      if(task < rmd)
+        blocksize[task] = blk + 1;
+      else
+        blocksize[task] = blk;
+
+      if(task < ThisTask)
+        loc_first_n += blocksize[task];
+    }
+
+  int *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(int));
+  for(i = 0; i < blocksize[ThisTask]; i++)
+    {
+      loc_data[i] = INT_MAX;
+    }
+
+  for(j = 0; j < NTask; j++)
+    send_count[j] = 0;
+
+  /* find for each non-zero element the processor where it should go for being summed */
+  for(n = 0; n < N; n++)
+    {
+      if(loc[n] != 0)
+        {
+          if(n < pivot_n)
+            task = n / (blk + 1);
+          else
+            task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */
+
+          send_count[task]++;
+        }
+    }
+
+  MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD);
+
+  for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++)
+    {
+      nexport += send_count[j];
+      nimport += recv_count[j];
+      if(j > 0)
+        {
+          send_offset[j] = send_offset[j - 1] + send_count[j - 1];
+          recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
+        }
+    }
+
+  struct ind_data
+  {
+    int n;
+    int val;
+  } * export_data, *import_data;
+
+  export_data = mymalloc("export_data", nexport * sizeof(struct ind_data));
+  import_data = mymalloc("import_data", nimport * sizeof(struct ind_data));
+
+  for(j = 0; j < NTask; j++)
+    send_count[j] = 0;
+
+  for(n = 0; n < N; n++)
+    {
+      if(loc[n] != 0)
+        {
+          if(n < pivot_n)
+            task = n / (blk + 1);
+          else
+            task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */
+
+          int index              = send_offset[task] + send_count[task]++;
+          export_data[index].n   = n;
+          export_data[index].val = loc[n];
+        }
+    }
+
+  for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
+    {
+      int recvTask = ThisTask ^ ngrp;
+      if(recvTask < NTask)
+        if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
+          MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask,
+                       TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE,
+                       recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+
+  for(i = 0; i < nimport; i++)
+    {
+      int j = import_data[i].n - loc_first_n;
+
+      if(j < 0 || j >= blocksize[ThisTask])
+        terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]);
+
+      loc_data[j] = imin(loc_data[j], import_data[i].val);
+    }
+
+  myfree(import_data);
+  myfree(export_data);
+
+  /* now share the cost data across all processors */
+  int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask);
+  int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask);
+
+  for(task = 0; task < NTask; task++)
+    bytecounts[task] = blocksize[task] * sizeof(int);
+
+  for(task = 1, byteoffset[0] = 0; task < NTask; task++)
+    byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
+
+  MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
+
+  myfree(byteoffset);
+  myfree(bytecounts);
+
+  myfree(loc_data);
+  myfree(blocksize);
+  myfree(recv_offset);
+  myfree(send_offset);
+  myfree(recv_count);
+  myfree(send_count);
+}
+
+/*! \brief Wrapper function for quicksort.
+ *
+ *  \param[in, out] base Array to be sorted.
+ *  \param[in] nel Number of elements to be sorted.
+ *  \param[in] width Size of each element in array.
+ *  \param [in] compar Compare function (sorting kernel).
+ *
+ *  \return The elapsed CPU time.
+ */
+double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *))
+{
+  double t0, t1;
+
+  t0 = second();
+
+  qsort(base, nel, width, compar);
+
+  t1 = second();
+
+  return timediff(t0, t1);
+}
+
+/*! \brief Absolute value of a double variable.
+ *
+ *  \param[in] a Double variable.
+ *
+ *  \return Absolute value of a.
+ */
+double dabs(double a)
+{
+  if(a < 0)
+    return -a;
+  else
+    return a;
+}
+
+/*! \brief Maximum value of two double variables.
+ *
+ *  \param[in] a First variable.
+ *  \param[in] b Second variable.
+ *
+ *  \return Maximum value of a and b.
+ */
+double dmax(double a, double b)
+{
+  if(a > b)
+    return a;
+  else
+    return b;
+}
+
+/*! \brief Maximum value of two size_t type variables.
+ *
+ *  \param[in] a First variable.
+ *  \param[in] b Second variable.
+ *
+ *  \return Maximum value of the two variables.
+ */
+size_t smax(size_t a, size_t b)
+{
+  if(a > b)
+    return a;
+  else
+    return b;
+}
+
+/*! \brief Minimum value of two double variables.
+ *
+ *  \param[in] a First variable.
+ *  \param[in] b Second variable.
+ *
+ *  \return Minimum value of a and b.
+ */
+double dmin(double a, double b)
+{
+  if(a < b)
+    return a;
+  else
+    return b;
+}
+
+/*! \brief Maximum value in an array of double variables.
+ *
+ *  \param[in] a Array of double variables.
+ *  \param[in] num_elements Number of elements in array.
+ *
+ *  \return Maximum value.
+ */
+double max_array(double *a, int num_elements)
+{
+  int i;
+  double max = -DBL_MAX;
+  for(i = 0; i < num_elements; i++)
+    {
+      if(a[i] > max)
+        {
+          max = a[i];
+        }
+    }
+  return (max);
+}
+
+/*! \brief Maximum value of two integers.
+ *
+ *  \param[in] a First integer variable.
+ *  \param[in] b Second integer variable.
+ *
+ *  \return Maximum of a and b.
+ */
+int imax(int a, int b)
+{
+  if(a > b)
+    return a;
+  else
+    return b;
+}
+
+/*! \brief Minimum value of two integers.
+ *
+ *  \param[in] a First integer variable.
+ *  \param[in] b Second integer variable.
+ *
+ *  \return Minimum of a and b.
+ */
+int imin(int a, int b)
+{
+  if(a < b)
+    return a;
+  else
+    return b;
+}
+
+/*! \brief Flush (i.e. empty buffer) of a file output stream.
+ *
+ *  \brief[in] fstream Pointer to file output.
+ *
+ *   \return Status.
+ */
+int myflush(FILE *fstream)
+{
+#ifdef REDUCE_FLUSH
+  /* do nothing */
+  return 0;
+#else  /* #ifdef REDUCE_FLUSH */
+  return fflush(fstream);
+#endif /* #ifdef REDUCE_FLUSH #else */
+}
+
+/*! \brief Flush for all global log-files.
+ *
+ *  Only flushes in predefined intervals.
+ *
+ *  \return status (0: did nothing, 1 did flush)
+ */
+int flush_everything(void)
+{
+#ifndef REDUCE_FLUSH
+  return 0;
+#else  /* #ifndef REDUCE_FLUSH */
+  if(ThisTask == 0)
+    {
+      if((CPUThisRun - All.FlushLast) < All.FlushCpuTimeDiff)
+        {
+          return 0;
+        }
+      else
+        {
+          All.FlushLast = CPUThisRun;
+        }
+    }
+  else
+    {
+      return 0;
+    }
+#endif /* #ifndef REDUCE_FLUSH #else */
+
+  mpi_printf("Flushing...\n");
+
+  fflush(FdDomain);
+  fflush(FdMemory);
+  fflush(FdTimings);
+  fflush(FdInfo);
+  fflush(FdTimebin);
+  fflush(FdBalance);
+  fflush(FdCPU);
+  fflush(FdEnergy);
+
+#ifdef OUTPUT_CPU_CSV
+  fflush(FdCPUCSV);
+#endif /* #ifdef OUTPUT_CPU_CSV */
+
+#ifdef USE_SFR
+  fflush(FdSfr);
+#endif
+
+  return 1;
+}
+
+#ifdef DEBUG
+#include <fenv.h>
+/*! \brief Allows core dumps that are readable by debugger.
+ *
+ *  \return void
+ */
+void enable_core_dumps_and_fpu_exceptions(void)
+{
+#ifdef DEBUG_ENABLE_FPU_EXCEPTIONS
+  /* enable floating point exceptions */
+
+  extern int feenableexcept(int __excepts);
+  feenableexcept(FE_DIVBYZERO | FE_INVALID);
+
+  /* Note: FPU exceptions appear not to work properly
+   * when the Intel C-Compiler for Linux is used
+   */
+#endif /* #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS */
+
+  /* set core-dump size to infinity */
+  struct rlimit rlim;
+  getrlimit(RLIMIT_CORE, &rlim);
+  rlim.rlim_cur = RLIM_INFINITY;
+  setrlimit(RLIMIT_CORE, &rlim);
+
+  /* MPICH catches the signales SIGSEGV, SIGBUS, and SIGFPE....
+   * The following statements reset things to the default handlers,
+   * which will generate a core file.
+   */
+  signal(SIGSEGV, SIG_DFL);
+  signal(SIGBUS, SIG_DFL);
+  signal(SIGFPE, SIG_DFL);
+  signal(SIGINT, SIG_DFL);
+}
+#endif /* #ifdef DEBUG */
+
+/*! \brief Wrapper for error handling; terminates code.
+ *
+ *  \param[in] reason Error message.
+ *  \param[in] file File in which error occured.
+ *  \param[in] line Line in which error occured.
+ *  \param[in] gsl_errno Error code.
+ *
+ *  \return void
+ */
+void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno)
+{
+  terminate("GSL has reported an error: reason='%s', error handler called from file '%s', line %d, with error code %d", reason, file,
+            line, gsl_errno);
+}
+
+/*! \brief Returns a random number from standard random number generator.
+ *
+ *  \return Random number [0,1).
+ */
+double get_random_number(void) { return gsl_rng_uniform(random_generator); }
+
+/*! \brief Returns a random number from auxiliary random number generator.
+ *
+ *  \return Random number [0,1).
+ */
+double get_random_number_aux(void) { return gsl_rng_uniform(random_generator_aux); }
+
+/*! \brief Wall-clock time in seconds.
+ *
+ *  \return The current value of time as a floating-point value.
+ */
+double second(void) { return MPI_Wtime(); }
+
+/*! \brief Timing routine.
+ *
+ *  Strategy: call this at end of functions to account for time in this
+ *  function, and before another (nontrivial) function is called.
+ *
+ * \return Time passed since last call of this function.
+ */
+double measure_time(void)
+{
+  double t, dt;
+
+  t             = second();
+  dt            = t - WallclockTime;
+  WallclockTime = t;
+
+  return dt;
+}
+
+/*! \brief Time difference.
+ *
+ *  Returns the time difference between two measurements
+ *  obtained with second(). The routine takes care of the
+ *  possible overflow of the tick counter on 32bit systems.
+ *
+ *  \param[in] t0 First time.
+ *  \param[in] t1 Second time.
+ *
+ *  \return Time difference.
+ */
+double timediff(double t0, double t1)
+{
+  double dt;
+
+  dt = t1 - t0;
+
+  if(dt < 0) /* overflow has occured (for systems with 32bit tick counter) */
+    {
+#ifdef WALLCLOCK
+      dt = 0;
+#else  /* #ifdef WALLCLOCK */
+      dt = t1 + pow(2, 32) / CLOCKS_PER_SEC - t0;
+#endif /* #ifdef WALLCLOCK #else */
+    }
+
+  return dt;
+}
+
+/*! \brief Global minimum of long long variables.
+ *
+ *  \param[in] n Length of array.
+ *  \param[in] src Source array.
+ *  \param[in] res Result array.
+ *
+ *  \return void
+ */
+void minimum_large_ints(int n, long long *src, long long *res)
+{
+  if(src == res)
+    {
+      /* we need a buffer */
+      long long buf[n];
+      memcpy(buf, src, n * sizeof(long long));
+      MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD);
+    }
+  else
+    MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD);
+}
+
+/*! \brief Global sum of an array of int variables into a long long.
+ *
+ *  Can be used with arbitrary MPI communicator.
+ *
+ *  \param[in] n Length of array.
+ *  \param[in] src Source array.
+ *  \param[in] res Result array.
+ *  \param[in] comm MPI communicator.
+ *
+ *  \return void
+ */
+void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm)
+{
+  long long lsrc[n];
+
+  for(int i = 0; i < n; i++)
+    lsrc[i] = src[i];
+
+  MPI_Allreduce(lsrc, res, n, MPI_LONG_LONG_INT, MPI_SUM, comm);
+}
+
+/*! \brief Global sum of an array of int variables into a long long.
+ *
+ *  To prevent overflow when summing up; wrapper funciton for
+ *  sumup_large_ints_comm.
+ *
+ *  \param[in] n Length of array.
+ *  \param[in] src Source array.
+ *  \param[in] res Result array.
+ *
+ *  \return void
+ */
+void sumup_large_ints(int n, int *src, long long *res) { sumup_large_ints_comm(n, src, res, MPI_COMM_WORLD); }
+
+/*! \brief Global sum of an array of long long variables.
+ *
+ *  Automatically allocates a buffer, if source and result array are identical.
+ *
+ *  \param[in] n Length of array.
+ *  \param[in] src Source array.
+ *  \param[in] res Result array.
+ *
+ *  \return void
+ */
+void sumup_longs(int n, long long *src, long long *res)
+{
+  if(src == res)
+    {
+      /* we need a buffer */
+      long long buf[n];
+      memcpy(buf, src, n * sizeof(long long));
+      MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
+    }
+  else
+    MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
+}
+
+/*! \brief Compares two elements of type size_t.
+ *
+ *  \param[in] a First element.
+ *  \param[in] b Second element.
+ *
+ *  \return The larger of both elements, first one if equal.
+ */
+size_t sizemax(size_t a, size_t b)
+{
+  if(a < b)
+    return b;
+  else
+    return a;
+}
+
+/*! \brief Reads from process info file of linux system.
+ *
+ *  \return void
+ */
+void report_VmRSS(void)
+{
+  pid_t my_pid;
+  FILE *fd;
+  char buf[1024];
+
+  my_pid = getpid();
+
+  sprintf(buf, "/proc/%d/status", my_pid);
+
+  if((fd = fopen(buf, "r")))
+    {
+      while(1)
+        {
+          if(fgets(buf, 500, fd) != buf)
+            break;
+
+          if(strncmp(buf, "VmRSS", 5) == 0)
+            {
+              printf("ThisTask=%d: %s", ThisTask, buf);
+            }
+          if(strncmp(buf, "VmSize", 6) == 0)
+            {
+              printf("ThisTask=%d: %s", ThisTask, buf);
+            }
+        }
+      fclose(fd);
+    }
+}
+
+/*! \brief Reads from memory info file of Linux system.
+ *
+ * \return Comittable memory.
+ */
+long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree)
+{
+  FILE *fd;
+  char buf[1024];
+
+  if((fd = fopen("/proc/meminfo", "r")))
+    {
+      while(1)
+        {
+          if(fgets(buf, 500, fd) != buf)
+            break;
+
+          if(bcmp(buf, "MemTotal", 8) == 0)
+            {
+              *MemTotal = atoll(buf + 10);
+            }
+          if(strncmp(buf, "Committed_AS", 12) == 0)
+            {
+              *Committed_AS = atoll(buf + 14);
+            }
+          if(strncmp(buf, "SwapTotal", 9) == 0)
+            {
+              *SwapTotal = atoll(buf + 11);
+            }
+          if(strncmp(buf, "SwapFree", 8) == 0)
+            {
+              *SwapFree = atoll(buf + 10);
+            }
+        }
+      fclose(fd);
+    }
+
+  return (*MemTotal - *Committed_AS);
+}
+
+/*! \brief Checks if parameter max memsize is smaller than avialable memory.
+ *
+ *  \return void
+ */
+void check_maxmemsize_setting(void)
+{
+  int errflag = 0, errflag_tot;
+
+  if(All.MaxMemSize > (MemoryOnNode / 1024.0 / TasksInThisNode) && RankInThisNode == 0)
+    {
+      printf("On node '%s', we have %d MPI ranks and at most %g MB available. This is not enough space for MaxMemSize = %g MB\n",
+             loc_node.name, TasksInThisNode, MemoryOnNode / 1024.0, (double)All.MaxMemSize);
+      errflag = 1;
+      fflush(stdout);
+    }
+
+  MPI_Allreduce(&errflag, &errflag_tot, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+#ifndef __OSX__
+  if(errflag_tot)
+    mpi_terminate("Not enough memory error!");
+#endif /* #ifndef __OSX__ */
+}
+
+/*! \brief Gathers memory information from tasks and write them stdout.
+ *
+ *  Part of HOST_MEMORY_REPORTING, printed at startup.
+ *
+ *  \return void
+ */
+void mpi_report_committable_memory(void)
+{
+  long long *sizelist, maxsize[6], minsize[6];
+  double avgsize[6];
+  int i, imem, mintask[6], maxtask[6];
+  long long Mem[6];
+  char label[512];
+
+  Mem[0] = report_comittable_memory(&Mem[1], &Mem[2], &Mem[3], &Mem[4]);
+  Mem[5] = Mem[1] - Mem[0];
+
+  MemoryOnNode = Mem[1];
+
+  for(imem = 0; imem < 6; imem++)
+    {
+      sizelist = (long long *)malloc(NTask * sizeof(long long));
+      MPI_Allgather(&Mem[imem], sizeof(long long), MPI_BYTE, sizelist, sizeof(long long), MPI_BYTE, MPI_COMM_WORLD);
+
+      for(i = 1, mintask[imem] = 0, maxtask[imem] = 0, maxsize[imem] = minsize[imem] = sizelist[0], avgsize[imem] = sizelist[0];
+          i < NTask; i++)
+        {
+          if(sizelist[i] > maxsize[imem])
+            {
+              maxsize[imem] = sizelist[i];
+              maxtask[imem] = i;
+            }
+          if(sizelist[i] < minsize[imem])
+            {
+              minsize[imem] = sizelist[i];
+              mintask[imem] = i;
+            }
+          avgsize[imem] += sizelist[i];
+        }
+
+      free(sizelist);
+    }
+
+  if(ThisTask == 0)
+    {
+      printf(
+          "\n-------------------------------------------------------------------------------------------------------------------------"
+          "\n");
+      for(imem = 0; imem < 6; imem++)
+        {
+          switch(imem)
+            {
+              case 0:
+                sprintf(label, "AvailMem");
+                break;
+              case 1:
+                sprintf(label, "Total Mem");
+                break;
+              case 2:
+                sprintf(label, "Committed_AS");
+                break;
+              case 3:
+                sprintf(label, "SwapTotal");
+                break;
+              case 4:
+                sprintf(label, "SwapFree");
+                break;
+              case 5:
+                sprintf(label, "AllocMem");
+                break;
+            }
+          printf("%s:\t Largest = %10.2f Mb (on task=%4d), Smallest = %10.2f Mb (on task=%4d), Average = %10.2f Mb\n", label,
+                 maxsize[imem] / (1024.0), maxtask[imem], minsize[imem] / (1024.0), mintask[imem], avgsize[imem] / (1024.0 * NTask));
+        }
+      printf(
+          "-------------------------------------------------------------------------------------------------------------------------"
+          "\n");
+    }
+
+  char name[MPI_MAX_PROCESSOR_NAME];
+
+  if(ThisTask == maxtask[2])
+    {
+      int len;
+      MPI_Get_processor_name(name, &len);
+    }
+
+  MPI_Bcast(name, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, maxtask[2], MPI_COMM_WORLD);
+
+  if(ThisTask == 0)
+    {
+      printf("Task=%d has the maximum commited memory and is host: %s\n", maxtask[2], name);
+      printf(
+          "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+          "\n");
+    }
+
+  fflush(stdout);
+}
+
+/*! \brief Find the first bit set in the argument.
+ *
+ *  \param[in] i Peankoey variable.
+ *
+ *  \return First bit set (type int).
+ */
+int my_ffsll(peanokey i)
+{
+  int res = 0;
+
+  while(i > 0xffffffff)
+    {
+      res += 32;
+      i >>= 32;
+    }
+
+  return res + ffs(i);
+}
+
+/*! \brief Finds last bit set in x.
+ *
+ *  The following function appears in the linux kernel.
+ *
+ *  \param[in] x Ineger Input.
+ *
+ *  \return Last bit set in x.
+ */
+int my_fls(int x)
+{
+  int r = 32;
+
+  if(!x)
+    return 0;
+  if(!(x & 0xffff0000u))
+    {
+      x <<= 16;
+      r -= 16;
+    }
+  if(!(x & 0xff000000u))
+    {
+      x <<= 8;
+      r -= 8;
+    }
+  if(!(x & 0xf0000000u))
+    {
+      x <<= 4;
+      r -= 4;
+    }
+  if(!(x & 0xc0000000u))
+    {
+      x <<= 2;
+      r -= 2;
+    }
+  if(!(x & 0x80000000u))
+    {
+      x <<= 1;
+      r -= 1;
+    }
+  return r;
+}
diff --git a/src/amuse/community/arepo/src/utils/tags.h b/src/amuse/community/arepo/src/utils/tags.h
new file mode 100644
index 0000000000..e26bbaa4a5
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/tags.h
@@ -0,0 +1,50 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/tags.h
+ * \date        05/2018
+ * \brief       Tag defines.
+ * \details     Choice of numbers for historic reasons.
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 28.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#define TAG_N 10 /*!< Various tags used for labelling MPI messages */
+#define TAG_HEADER 11
+#define TAG_PDATA 12
+#define TAG_SPHDATA 13
+#define TAG_KEY 14
+#define TAG_GRAV_B 19
+#define TAG_HYDRO_A 22
+#define TAG_HYDRO_B 23
+#define TAG_NFORTHISTASK 24
+#define TAG_NONPERIOD_A 29
+#define TAG_NONPERIOD_B 30
+#define TAG_NONPERIOD_C 31
+#define TAG_DENS_A 35
+#define TAG_DENS_B 36
+#define TAG_LOCALN 37
+#define TAG_FOF_A 45
+#define TAG_PDATA_SPH 70
+#define TAG_KEY_SPH 71
+#define TAG_BARRIER 85
+#define TAG_NODE_DATA 105
diff --git a/src/amuse/community/arepo/src/utils/timer.h b/src/amuse/community/arepo/src/utils/timer.h
new file mode 100644
index 0000000000..a622d1e8e5
--- /dev/null
+++ b/src/amuse/community/arepo/src/utils/timer.h
@@ -0,0 +1,251 @@
+/*!
+ * \copyright   This file is part of the public version of the AREPO code.
+ * \copyright   Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics
+ * \copyright   Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and
+ *              contributing authors.
+ * \copyright   Arepo is free software: you can redistribute it and/or modify
+ *              it under the terms of the GNU General Public License as published by
+ *              the Free Software Foundation, either version 3 of the License, or
+ *              (at your option) any later version.
+ *
+ *              Arepo is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *              GNU General Public License for more details.
+ *
+ *              A copy of the GNU General Public License is available under
+ *              LICENSE as part of this program.  See also
+ *              <https://www.gnu.org/licenses/>.
+ *
+ * \file        src/utils/timer.h
+ * \date        05/2018
+ * \brief       Timer macros for Arepo.
+ * \details
+ *
+ * \par Major modifications and contributions:
+ *
+ * - DD.MM.YYYY Description
+ * - 28.05.2018 Prepared file for public release -- Rainer Weinberger
+ */
+
+#if !defined(TIMER_H) || defined(TIMER_STRUCT)
+#define TIMER_H
+
+#define DETAILED_TIMING_GRAVWALK 0
+#define DETAILED_TIMING_STELLARDENSITY 1
+
+#define TIMER_INSTRUMENT_START(counter)
+#define TIMER_INSTRUMENT_STOP(counter)
+#define TIMER_INSTRUMENT_CREATE(name, descr) ;
+
+#ifdef TIMER_STRUCT
+#undef TIMER_CREATE
+/*! \def TIMER_CREATE(name,desc, par, symba, symbb )
+ *  \brief creates a new CPU timer
+ *
+ *  \param name name used in the code to reference this timer
+ *  \param desc description string used in output files
+ *  \param parent parent of this timer to build a tree-like hierarchy of timers
+ *  \param symba character used for active time in balance.txt
+ *  \param symbb character used for imbalance in balance.txt
+ *
+ */
+#define TIMER_CREATE(name, desc, par, symba, symbb) \
+  Timer_data[name].parent = par;                    \
+  strncpy(Timer_data[name].shortname, #name, 40);   \
+  strncpy(Timer_data[name].longname, (desc), 40);   \
+  Timer_data[name].symb      = (symba);             \
+  Timer_data[name].symbImbal = (symbb);             \
+  TIMER_INSTRUMENT_CREATE(name, desc)
+
+#else /* #ifdef TIMER_STRUCT */
+
+#define TIMER_STACK_DEPTH 30
+#define TIMER_CREATE(name, desc, parent, symba, symbb) name,
+
+/*! \def  TIMER_START(counter)
+ *  \brief Starts the timer counter.
+ *
+ *  Use this macro instead of directly accessing the CPU_Step array,
+ *  so manual  instrumentation APIs can be attached.
+ *
+ *  \param[in] counter Name of the timer to start.
+ */
+#define TIMER_START_INTERNAL(counter)                                             \
+  {                                                                               \
+    TIMER_INSTRUMENT_START(counter);                                              \
+    CPU_Step[TimerStack[TimerStackPos]] += measure_time();                        \
+    int itimer;                                                                   \
+    for(itimer = 0; itimer <= TimerStackPos; itimer++)                            \
+      if(counter == TimerStack[itimer])                                           \
+        {                                                                         \
+          printf("Try to start timer %d, but it is already running.\n", counter); \
+          terminate("fail")                                                       \
+        };                                                                        \
+    if(++TimerStackPos >= TIMER_STACK_DEPTH)                                      \
+      {                                                                           \
+        terminate("Run out of timer stack space, increase TIMER_STACK_DEPTH");    \
+      }                                                                           \
+    else                                                                          \
+      {                                                                           \
+        TimerStack[TimerStackPos] = (counter);                                    \
+      }                                                                           \
+  }
+
+#define TIMER_START(counter) TIMER_START_INTERNAL(counter)
+
+/*! \def TIMER_STOP(counter)
+ *  \brief Stops the timer counter
+ *
+ *  Use this macro instead of directly accessing the CPU_Step array,
+ *  so manual instrumentation APIs can be attached.
+ *
+ *  \param[in] counter Name of the timer to stop.
+ */
+#define TIMER_STOP_INTERNAL(counter)                                                \
+  {                                                                                 \
+    if(TimerStack[TimerStackPos] != (counter))                                      \
+      {                                                                             \
+        terminate("Wrong use of TIMER_STOP, you must stop the timer started last"); \
+      }                                                                             \
+    CPU_Step[TimerStack[TimerStackPos--]] += measure_time();                        \
+    if(TimerStackPos < 0)                                                           \
+      {                                                                             \
+        terminate("Do not stop the out CPU_MISC timer");                            \
+      }                                                                             \
+    TIMER_INSTRUMENT_STOP(counter);                                                 \
+  }
+
+#define TIMER_STOP(counter) TIMER_STOP_INTERNAL(counter)
+
+/*! \def TIMER_STOPSTART(stop, start)
+ *  \brief Stops the timer 'stop' and starts the timer 'start'
+ *
+ *  Use this macro instead of directly accessing the CPU_Step array,
+ *  so manual instrumentation APIs can be attached.
+ *
+ *  \param[in] stop Name of the timer to stop
+ *  \param[in] start Name of the timer to start
+ */
+#define TIMER_STOPSTART(stop, start) \
+  {                                  \
+    TIMER_STOP_INTERNAL(stop);       \
+    TIMER_START_INTERNAL(start);     \
+  }
+
+/*! \def TIMER_ADD(counter, amount)
+ *  \brief Adds amount to the timer counter.
+
+ *  \param[in] counter Name of the timer to add to.
+ *  \param[in] amount Amount to add to timer counter.
+ */
+#define TIMER_ADD(counter, amount) CPU_Step[counter] += (amount);
+
+/*! \def TIMER_DIFF(counter)
+ *  \brief Returns amount elapsed for the timer since last save with
+ *         TIMER_STORE.
+ *
+ *  \param[in] counter Name of the timer to add to.
+ */
+#define TIMER_DIFF(counter) (CPU_Step[counter] - CPU_Step_Stored[counter])
+
+/*! \def TIMER_STORE
+ *  \brief Copies the current value of CPU times to a stored variable, such
+ *         that differences with respect to this reference can be calculated.
+ */
+#define TIMER_STORE memcpy(CPU_Step_Stored, CPU_Step, sizeof(CPU_Step));
+
+enum timers
+{
+  CPU_NONE = -2,                /*!< used for counters without a parent */
+  CPU_ROOT = -1,                /*!< root node of the tree */
+#endif /* #ifdef TIMER_STRUCT #else */
+
+/* possible characters to use for marking the parts:
+ *
+ *   abdefghijklmnopqrstuvABCDEFGHHIJKLMNOPQRSTUV
+ *   0123456789
+ *   -:.*=[]^&;~/_$()?+"<>@#!|\
+ */
+
+/*add your counter here, they must appear in the right order*/
+
+TIMER_CREATE(CPU_ALL, "total", CPU_ROOT, '-', '-') /*!< root timer, everything should be below this timer */
+TIMER_CREATE(CPU_TREE, "treegrav", CPU_ALL, 'a', ')')
+TIMER_CREATE(CPU_TREEBUILD, "treebuild", CPU_TREE, 'b', '(')
+TIMER_CREATE(CPU_TREEBUILD_INSERT, "insert", CPU_TREEBUILD, 'c', '*')
+TIMER_CREATE(CPU_TREEBUILD_BRANCHES, "branches", CPU_TREEBUILD, 'd', '&')
+TIMER_CREATE(CPU_TREEBUILD_TOPLEVEL, "toplevel", CPU_TREEBUILD, 'e', '^')
+TIMER_CREATE(CPU_TREECOSTMEASURE, "treecostm", CPU_TREE, 'f', '%')
+TIMER_CREATE(CPU_TREEWALK, "treewalk", CPU_TREE, 'g', '$')
+TIMER_CREATE(CPU_TREEWALK1, "treewalk1", CPU_TREEWALK, 'h', '#')
+TIMER_CREATE(CPU_TREEWALK2, "treewalk2", CPU_TREEWALK, 'i', '@')
+TIMER_CREATE(CPU_TREEBALSNDRCV, "treebalsndrcv", CPU_TREE, 'j', '!')
+TIMER_CREATE(CPU_TREESENDBACK, "treeback", CPU_TREE, 'm', '7')
+TIMER_CREATE(CPU_TREEDIRECT, "treedirect", CPU_TREE, 'r', '2')
+#ifdef PMGRID
+TIMER_CREATE(CPU_PM_GRAVITY, "pm_grav", CPU_ALL, 's', '1')
+#endif /* #ifdef PMGRID */
+TIMER_CREATE(CPU_NGBTREEBUILD, "ngbtreebuild", CPU_ALL, 't', 'Z')
+TIMER_CREATE(CPU_NGBTREEUPDATEVEL, "ngbtreevelupdate", CPU_ALL, 'u', 'Y')
+TIMER_CREATE(CPU_MESH, "voronoi", CPU_ALL, 'v', 'X')
+TIMER_CREATE(CPU_MESH_INSERT, "insert", CPU_MESH, 'w', 'W')
+TIMER_CREATE(CPU_MESH_FIND_DP, "findpoints", CPU_MESH, 'x', 'V')
+TIMER_CREATE(CPU_MESH_CELLCHECK, "cellcheck", CPU_MESH, 'y', 'U')
+TIMER_CREATE(CPU_MESH_GEOMETRY, "geometry", CPU_MESH, 'z', 'T')
+TIMER_CREATE(CPU_MESH_EXCHANGE, "exchange", CPU_MESH, 'A', 'S')
+TIMER_CREATE(CPU_MESH_DYNAMIC, "dynamic", CPU_MESH, 'B', 'R')
+TIMER_CREATE(CPU_HYDRO, "hydro", CPU_ALL, 'C', 'Q')
+TIMER_CREATE(CPU_GRADIENTS, "gradients", CPU_HYDRO, 'D', 'P')
+TIMER_CREATE(CPU_FLUXES, "fluxes", CPU_HYDRO, 'F', 'N')
+TIMER_CREATE(CPU_FLUXES_COMM, "fluxcomm", CPU_HYDRO, 'H', 'L')
+TIMER_CREATE(CPU_CELL_UPDATES, "updates", CPU_HYDRO, 'J', 'j')
+TIMER_CREATE(CPU_SET_VERTEXVELS, "vertex vel", CPU_HYDRO, 'K', 'I')
+TIMER_CREATE(CPU_MHD, "mhd", CPU_HYDRO, '4', 'p')
+TIMER_CREATE(CPU_DOMAIN, "domain", CPU_ALL, 'U', 'y')
+TIMER_CREATE(CPU_PEANO, "peano", CPU_ALL, 'V', 'x')
+TIMER_CREATE(CPU_DRIFTS, "drift/kicks", CPU_ALL, 'W', 'w')
+TIMER_CREATE(CPU_TIMELINE, "timeline", CPU_ALL, 'X', 'v')
+#ifdef TREE_BASED_TIMESTEPS
+TIMER_CREATE(CPU_TREE_TIMESTEPS, "treetimesteps", CPU_ALL, 'Y', 'u')
+#endif /* #ifdef TREE_BASED_TIMESTEPS */
+TIMER_CREATE(CPU_SNAPSHOT, "i/o", CPU_ALL, 'Z', 't')
+TIMER_CREATE(CPU_LOGS, "logs", CPU_ALL, '1', 's')
+TIMER_CREATE(CPU_COOLINGSFR, "sfrcool", CPU_ALL, '2', 'r')
+#ifdef FOF
+TIMER_CREATE(CPU_FOF, "fof", CPU_ALL, '#', 'h')
+#endif /* #ifdef FOF */
+#ifdef SUBFIND
+TIMER_CREATE(CPU_SUBFIND, "subfind", CPU_ALL, '$', 'g')
+#endif /* #ifdef SUBFIND */
+TIMER_CREATE(CPU_REFINE, "refine", CPU_ALL, '%', 'f')
+TIMER_CREATE(CPU_DEREFINE, "mesh_derefine", CPU_ALL, '^', 'e')
+TIMER_CREATE(CPU_MAKEIMAGES, "images", CPU_ALL, '&', 'd')
+TIMER_CREATE(CPU_INIT, "initializ.", CPU_ALL, '*', 'c')
+TIMER_CREATE(CPU_RESTART, "restart", CPU_ALL, '(', 'b')
+TIMER_CREATE(CPU_MISC, "misc", CPU_ALL, ')', 'a')
+TIMER_CREATE(CPU_LAST, "LAST", CPU_NONE, ' ', ' ') /*!<last item, do not use! */
+#ifndef TIMER_STRUCT
+}
+;
+
+extern enum timers TimerStack[TIMER_STACK_DEPTH];
+extern int TimerStackPos;
+
+/*! \brief struct containing the information of a CPU timer
+ *
+ */
+struct timer_d
+{
+  int parent;         /*!< id of the parent timer */
+  char shortname[40]; /*!< string containing the internal name of the timer */
+  char longname[40];  /*!< name of the timer */
+  char symb;          /*!< symbol used in balance.txt for the active part */
+  char symbImbal;     /*!< symbol used in balance.txt for imbalance */
+  char depth;         /*!< depth in the tree-like structure of this timer */
+};
+extern struct timer_d Timer_data[CPU_LAST + 1];
+#else /* #ifndef TIMER_STRUCT */
+#undef TIMER_STRUCT
+#endif /* #ifndef TIMER_STRUCT #else */
+#endif /* #if !defined(TIMER_H) || defined(TIMER_STRUCT) */

From 5dc196f8bc62248cf370d0bf399eab24b545fcaf Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 18 Mar 2022 17:27:57 +0000
Subject: [PATCH 03/51] remove '#include ./arepoconfig.h' from allvars.h

---
 src/amuse/community/arepo/src/main/allvars.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h
index 2dc46e56b3..409165f7b4 100644
--- a/src/amuse/community/arepo/src/main/allvars.h
+++ b/src/amuse/community/arepo/src/main/allvars.h
@@ -41,8 +41,6 @@
 #include <stddef.h>
 #include <stdio.h>
 
-#include "./arepoconfig.h"
-
 #ifdef IMPOSE_PINNING
 #include <hwloc.h>
 #endif /* #ifdef IMPOSE_PINNING */

From b221aec6cffe53d3cc7c10acf490434774c5e65c Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 18 Mar 2022 17:32:56 +0000
Subject: [PATCH 04/51] update Makefile to include arepo source code

---
 src/amuse/community/arepo/src/Makefile | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 66f18374a1..4b7835bd0a 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,20 +1,27 @@
-CFLAGS   += -Wall -g
+GSL_INCL  = -I/opt/Homebrew/include  # Need to make GSL_INCL generalisable.
+
+CFLAGS   += -Wall -g $(GSL_INCL)
 CXXFLAGS += $(CFLAGS) 
 LDFLAGS  += -lm $(MUSE_LD_FLAGS)
 
 CODELIB = libarepo.a
 
-CODEOBJS = test.o
+SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \
+	hydro init io main mesh mpi_utils ngbtree star_formation subfind \
+	time_integration utils
+SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c))
+
+CODEOBJS = test.o $(SRCS:c=o)
 
 AR = ar ruv
 RANLIB = ranlib
 RM = rm
 
-all: $(CODELIB) 
-
+all: $(CODELIB)
 
 clean:
 	$(RM) -f *.o *.a
+	$(RM) $(SRCS:c=o)
 
 distclean: clean
 

From 3b250368dcdb819f97d7bb81e4330a1b4fc98c09 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 18 Mar 2022 17:41:06 +0000
Subject: [PATCH 05/51] add TODO to Makefile

---
 src/amuse/community/arepo/src/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 4b7835bd0a..67b321843b 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,4 +1,4 @@
-GSL_INCL  = -I/opt/Homebrew/include  # Need to make GSL_INCL generalisable.
+GSL_INCL  = -I/opt/Homebrew/include  # TODO: Need to make GSL_INCL generalisable.
 
 CFLAGS   += -Wall -g $(GSL_INCL)
 CXXFLAGS += $(CFLAGS) 

From d56c2b3d3fd7961f63d307cd010bb2c2076aafe7 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 13:20:31 +0100
Subject: [PATCH 06/51] rename classes

---
 src/amuse/community/arepo/interface.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 64130dc384..7be0a7665f 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -1,6 +1,6 @@
 from amuse.community import *
 
-class arepoInterface(CodeInterface):
+class ArepoInterface(CodeInterface):
     
     include_headers = ['worker_code.h']
     
@@ -17,8 +17,8 @@ def echo_int():
         return function
         
     
-class arepo(InCodeComponentImplementation):
+class Arepo(InCodeComponentImplementation):
 
     def __init__(self, **options):
-        InCodeComponentImplementation.__init__(self,  arepoInterface(**options), **options)
+        InCodeComponentImplementation.__init__(self,  ArepoInterface(**options), **options)
     

From 469b04d20fe1ba2dccf3e6fcaa70bd9db90f3fc9 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 13:29:16 +0100
Subject: [PATCH 07/51] inherit from GravitationalDynamicsInterface

---
 src/amuse/community/arepo/interface.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 7be0a7665f..a52600e5cf 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -1,6 +1,7 @@
 from amuse.community import *
+from amuse.community.interface.gd import GravitationalDynamicsInterface
 
-class ArepoInterface(CodeInterface):
+class ArepoInterface(CodeInterface, GravitationalDynamicsInterface):
     
     include_headers = ['worker_code.h']
     

From a0a927cf913fa0ae534ebd0c3f94ee41337690cd Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 13:51:12 +0100
Subject: [PATCH 08/51] inherit from LiteratureReferencesMixIn

---
 src/amuse/community/arepo/interface.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index a52600e5cf..a9248e400c 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -1,12 +1,16 @@
 from amuse.community import *
 from amuse.community.interface.gd import GravitationalDynamicsInterface
 
-class ArepoInterface(CodeInterface, GravitationalDynamicsInterface):
+class ArepoInterface(
+    CodeInterface,
+    GravitationalDynamicsInterface,
+    LiteratureReferencesMixIn):
     
     include_headers = ['worker_code.h']
     
     def __init__(self, **keyword_arguments):
         CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)
+        LiteratureReferencesMixIn.__init__(self)
     
     @legacy_function
     def echo_int():

From 016c75ec2063a553cd2cc73b8b939f0984eb81ee Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 13:55:52 +0100
Subject: [PATCH 09/51] remove wildcard import

---
 src/amuse/community/arepo/interface.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index a9248e400c..8f11ab923f 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -1,4 +1,8 @@
-from amuse.community import *
+from amuse.community import CodeInterface
+from amuse.community import LegacyFunctionSpecification
+from amuse.community import legacy_function
+from amuse.community import LiteratureReferencesMixIn
+
 from amuse.community.interface.gd import GravitationalDynamicsInterface
 
 class ArepoInterface(

From fae9ec3d41595dc867d330687388190cd647d0ec Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 14:23:25 +0100
Subject: [PATCH 10/51] inherit from GravitationalDynamics

---
 src/amuse/community/arepo/interface.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 8f11ab923f..6601714257 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -4,6 +4,7 @@
 from amuse.community import LiteratureReferencesMixIn
 
 from amuse.community.interface.gd import GravitationalDynamicsInterface
+from amuse.community.interface.gd import GravitationalDynamics
 
 class ArepoInterface(
     CodeInterface,
@@ -26,8 +27,8 @@ def echo_int():
         return function
         
     
-class Arepo(InCodeComponentImplementation):
+class Arepo(GravitationalDynamics):
 
     def __init__(self, **options):
-        InCodeComponentImplementation.__init__(self,  ArepoInterface(**options), **options)
+        GravitationalDynamics.__init__(self,  ArepoInterface(**options), **options)
     

From e8a31134927ecb0225583195694b3655607d073f Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 14:29:11 +0100
Subject: [PATCH 11/51] minor reformatting

---
 src/amuse/community/arepo/interface.py | 29 +++++++++++++-------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 6601714257..89e089fc2a 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -6,29 +6,30 @@
 from amuse.community.interface.gd import GravitationalDynamicsInterface
 from amuse.community.interface.gd import GravitationalDynamics
 
+
 class ArepoInterface(
     CodeInterface,
     GravitationalDynamicsInterface,
-    LiteratureReferencesMixIn):
-    
-    include_headers = ['worker_code.h']
-    
+    LiteratureReferencesMixIn
+):
+
+    include_headers = ["worker_code.h"]
+
     def __init__(self, **keyword_arguments):
         CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)
         LiteratureReferencesMixIn.__init__(self)
-    
+
     @legacy_function
     def echo_int():
-        function = LegacyFunctionSpecification()  
-        function.addParameter('int_in', dtype='int32', direction=function.IN)
-        function.addParameter('int_out', dtype='int32', direction=function.OUT)
-        function.result_type = 'int32'
+        function = LegacyFunctionSpecification()
+        function.addParameter("int_in", dtype="int32", direction=function.IN)
+        function.addParameter("int_out", dtype="int32", direction=function.OUT)
+        function.result_type = "int32"
         function.can_handle_array = True
         return function
-        
-    
-class Arepo(GravitationalDynamics):
 
-    def __init__(self, **options):
-        GravitationalDynamics.__init__(self,  ArepoInterface(**options), **options)
+
+class Arepo(GravitationalDynamics):
     
+    def __init__(self, **options):
+        GravitationalDynamics.__init__(self, ArepoInterface(**options), **options)

From a3a020a27a8d1f4564d54f0ce4cf4b987aaf7b95 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 15:00:09 +0100
Subject: [PATCH 12/51] add ArepoInterface docstring

---
 src/amuse/community/arepo/interface.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 89e089fc2a..34d46a07a9 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -12,6 +12,16 @@ class ArepoInterface(
     GravitationalDynamicsInterface,
     LiteratureReferencesMixIn
 ):
+    """
+    Arepo is a cosmological magnetohydrodynamical moving-mesh simulation code,
+    descended from GADGET.
+
+    References:
+        .. [#] Springel, V., 2010, MNRAS, 401, 791 (Arepo) [2010MNRAS.401..791S]
+        .. [#] Pakmor, R., Bauer, A., Springel, V., 2011, MNRAS, 418, 1392 (Magnetohydrodynamics Module) [2011MNRAS.418.1392P]
+        .. [#] Pakmor, R. et al., 2016, MNRAS, 455, 1134 (Gradient Estimation) [2016MNRAS.455.1134P]
+        .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W]
+    """
 
     include_headers = ["worker_code.h"]
 

From 1d668dd6b713caf0a5bd43c44674d42d785e9863 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 15:08:46 +0100
Subject: [PATCH 13/51] add TODO re CodeWithDataDirectories

---
 src/amuse/community/arepo/interface.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 34d46a07a9..989190ebd8 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -28,6 +28,7 @@ class ArepoInterface(
     def __init__(self, **keyword_arguments):
         CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)
         LiteratureReferencesMixIn.__init__(self)
+        # TODO: Determine whether need to inherit from CodeWithDataDirectories.
 
     @legacy_function
     def echo_int():

From aa1c5982c557cee7cac87e4fa5d0541bb6bc7221 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 16:32:49 +0100
Subject: [PATCH 14/51] add set_parameters()

---
 src/amuse/community/arepo/interface.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 989190ebd8..ac122b9f23 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -31,12 +31,10 @@ def __init__(self, **keyword_arguments):
         # TODO: Determine whether need to inherit from CodeWithDataDirectories.
 
     @legacy_function
-    def echo_int():
+    def set_parameters():
         function = LegacyFunctionSpecification()
-        function.addParameter("int_in", dtype="int32", direction=function.IN)
-        function.addParameter("int_out", dtype="int32", direction=function.OUT)
+        function.addParameter("param_file", dtype="string", direction=function.IN)
         function.result_type = "int32"
-        function.can_handle_array = True
         return function
 
 

From e312e0185138fb4e99230588540c7ad513e37b20 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 17:24:54 +0100
Subject: [PATCH 15/51] change GSL_INCL to GSL_FLAGS

---
 src/amuse/community/arepo/src/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 67b321843b..4f99b34024 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,6 +1,6 @@
-GSL_INCL  = -I/opt/Homebrew/include  # TODO: Need to make GSL_INCL generalisable.
+GSL_FLAGS  = -I/opt/Homebrew/include  # TODO: Need to make GSL_FLAGS generalisable.
 
-CFLAGS   += -Wall -g $(GSL_INCL)
+CFLAGS   += -Wall -g $(GSL_FLAGS)
 CXXFLAGS += $(CFLAGS) 
 LDFLAGS  += -lm $(MUSE_LD_FLAGS)
 

From d1c5ccaeb08ad872d4c39076aa72d4f56babb9df Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Mon, 28 Mar 2022 17:57:32 +0100
Subject: [PATCH 16/51] add AMUSE_DIR conditional and include config.mk

---
 src/amuse/community/arepo/src/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 4f99b34024..8b3de12905 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,3 +1,8 @@
+ifeq ($(origin AMUSE_DIR), undefined)
+	AMUSE_DIR := $(shell amusifier --get-amuse-dir)
+endif
+-include $(AMUSE_DIR)/config.mk
+
 GSL_FLAGS  = -I/opt/Homebrew/include  # TODO: Need to make GSL_FLAGS generalisable.
 
 CFLAGS   += -Wall -g $(GSL_FLAGS)

From c1a672f8d1162cfe12a99814b4a8577dafaf43fd Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Thu, 31 Mar 2022 10:02:33 +0100
Subject: [PATCH 17/51] generate interface.cc

---
 src/amuse/community/arepo/interface.cc | 175 +++++++++++++++++++++++--
 1 file changed, 166 insertions(+), 9 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index a590e82689..160b99ab25 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -1,11 +1,168 @@
-extern int echo(int input);
-
-/*
- * Interface code
- */
- 
-int echo_int(int input, int * output){
-    *output = echo(input);
-    return 0;
+#include "worker_code.h"
+
+int get_mass(int index_of_the_particle, double * mass){
+  return 0;
+}
+
+int commit_particles(){
+  return 0;
+}
+
+int get_time(double * time){
+  return 0;
+}
+
+int set_mass(int index_of_the_particle, double mass){
+  return 0;
+}
+
+int get_index_of_first_particle(int * index_of_the_particle){
+  return 0;
+}
+
+int get_total_radius(double * radius){
+  return 0;
+}
+
+int new_particle(int * index_of_the_particle, double mass, double x, 
+  double y, double z, double vx, double vy, double vz, double radius){
+  return 0;
+}
+
+int get_total_mass(double * mass){
+  return 0;
+}
+
+int evolve_model(double time){
+  return 0;
+}
+
+int set_eps2(double epsilon_squared){
+  return 0;
+}
+
+int get_begin_time(double * time){
+  return 0;
+}
+
+int get_eps2(double * epsilon_squared){
+  return 0;
+}
+
+int get_index_of_next_particle(int index_of_the_particle, 
+  int * index_of_the_next_particle){
+  return 0;
+}
+
+int delete_particle(int index_of_the_particle){
+  return 0;
+}
+
+int get_potential(int index_of_the_particle, double * potential){
+  return 0;
+}
+
+int synchronize_model(){
+  return 0;
+}
+
+int set_state(int index_of_the_particle, double mass, double x, double y, 
+  double z, double vx, double vy, double vz, double radius){
+  return 0;
+}
+
+int get_state(int index_of_the_particle, double * mass, double * x, 
+  double * y, double * z, double * vx, double * vy, double * vz, 
+  double * radius){
+  return 0;
+}
+
+int get_time_step(double * time_step){
+  return 0;
+}
+
+int recommit_particles(){
+  return 0;
+}
+
+int get_kinetic_energy(double * kinetic_energy){
+  return 0;
+}
+
+int get_number_of_particles(int * number_of_particles){
+  return 0;
+}
+
+int set_acceleration(int index_of_the_particle, double ax, double ay, 
+  double az){
+  return 0;
+}
+
+int get_center_of_mass_position(double * x, double * y, double * z){
+  return 0;
+}
+
+int get_center_of_mass_velocity(double * vx, double * vy, double * vz){
+  return 0;
+}
+
+int get_radius(int index_of_the_particle, double * radius){
+  return 0;
+}
+
+int set_begin_time(double time){
+  return 0;
+}
+
+int set_radius(int index_of_the_particle, double radius){
+  return 0;
+}
+
+int cleanup_code(){
+  return 0;
+}
+
+int recommit_parameters(){
+  return 0;
+}
+
+int initialize_code(){
+  return 0;
+}
+
+int get_potential_energy(double * potential_energy){
+  return 0;
+}
+
+int get_velocity(int index_of_the_particle, double * vx, double * vy, 
+  double * vz){
+  return 0;
+}
+
+int get_position(int index_of_the_particle, double * x, double * y, 
+  double * z){
+  return 0;
+}
+
+int set_position(int index_of_the_particle, double x, double y, double z){
+  return 0;
+}
+
+int get_acceleration(int index_of_the_particle, double * ax, double * ay, 
+  double * az){
+  return 0;
+}
+
+int commit_parameters(){
+  return 0;
+}
+
+int set_parameters(char * param_file){
+  return 0;
+}
+
+int set_velocity(int index_of_the_particle, double vx, double vy, 
+  double vz){
+  return 0;
 }
 

From 12a022d97abb63a30468db039011c3b53843e47f Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Thu, 31 Mar 2022 10:08:44 +0100
Subject: [PATCH 18/51] update interface name in Makefile

---
 src/amuse/community/arepo/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 7c392db261..567a479296 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -30,10 +30,10 @@ $(CODELIB):
 	make -C src all
 
 worker_code.cc: interface.py
-	$(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@
+	$(CODE_GENERATOR) --type=c interface.py ArepoInterface -o $@
 
 worker_code.h: interface.py
-	$(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@
+	$(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@
 
 arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
 	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@

From fb1f673a05290800331631c390c5473eb2b99ab9 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Thu, 31 Mar 2022 10:20:13 +0100
Subject: [PATCH 19/51] update interface name in test_arepo.py

---
 src/amuse/community/arepo/test_arepo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py
index 8cdeabb474..d4a7b22b38 100644
--- a/src/amuse/community/arepo/test_arepo.py
+++ b/src/amuse/community/arepo/test_arepo.py
@@ -1,12 +1,12 @@
 from amuse.test.amusetest import TestWithMPI
 
-from .interface import arepoInterface
+from .interface import ArepoInterface
 from .interface import arepo
 
-class arepoInterfaceTests(TestWithMPI):
+class ArepoInterfaceTests(TestWithMPI):
     
     def test1(self):
-        instance = arepoInterface()
+        instance = ArepoInterface()
         result,error = instance.echo_int(12)
         self.assertEquals(error, 0)
         self.assertEquals(result, 12)

From 4d050da72c6cd0310ed4293f972cb2d030e183c6 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Thu, 31 Mar 2022 10:29:58 +0100
Subject: [PATCH 20/51] update arepo to Arepo

---
 src/amuse/community/arepo/test_arepo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py
index d4a7b22b38..a54108fd43 100644
--- a/src/amuse/community/arepo/test_arepo.py
+++ b/src/amuse/community/arepo/test_arepo.py
@@ -1,7 +1,7 @@
 from amuse.test.amusetest import TestWithMPI
 
 from .interface import ArepoInterface
-from .interface import arepo
+from .interface import Arepo
 
 class ArepoInterfaceTests(TestWithMPI):
     

From 4ff74a9cce4c78b08d5669c2562e8c011e182edf Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 15:33:26 +0100
Subject: [PATCH 21/51] add code from arepo main.c to initialize_code() and
 cleanup_code()

---
 src/amuse/community/arepo/interface.cc | 79 +++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 8 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 160b99ab25..b6656bc04a 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -1,5 +1,76 @@
 #include "worker_code.h"
 
+#include "src/main/allvars.h"
+#include "src/main/proto.h"
+
+int initialize_code(){
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
+  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
+
+  /* output a welcome message */
+  hello();
+
+  /* initialize CPU-time/Wallclock-time measurement */
+  init_cpu_log();
+
+  determine_compute_nodes();
+
+  for(PTask = 0; NTask > (1 << PTask); PTask++)
+    ;
+
+  begrun0();
+
+  strcpy(ParameterFile, "param.txt");  /* Removing command line parsing. argv[1] replaced with "param.txt". */
+  RestartFlag = 0;
+
+  begrun1(); /* set-up run  */
+
+  char fname[MAXLEN_PATH];
+  strcpy(fname, All.InitCondFile);
+
+  /* now we can load the file */
+
+#ifdef READ_DM_AS_GAS
+      read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES);
+#else  /* #ifdef READ_DM_AS_GAS */
+      read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES);
+#endif /* #ifdef READ_DM_AS_GAS #else */
+
+  /* init returns a status code, where a value of >=0 means that endrun() should be called. */
+  int status = init();
+
+  if(status >= 0)
+    {
+      if(status > 0)
+        mpi_printf("init() returned with %d\n", status);
+
+      cleanup_code();
+    }
+
+  begrun2();
+  return 0;
+}
+
+int cleanup_code(){
+  mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
+  mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
+  fflush(stdout);
+
+#ifdef HAVE_HDF5
+  /*The hdf5 library will sometimes register an atexit() handler that calls its
+   * error handler. In AREPO this is set to my_hdf_error_handler, which calls
+   * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed.
+   * Hence unset the HDF error handler here
+   */
+  H5Eset_auto(NULL, NULL);
+#endif /* #ifdef HAVE_HDF5 */
+
+  MPI_Finalize();
+  exit(0);
+  return 0;
+}
+
 int get_mass(int index_of_the_particle, double * mass){
   return 0;
 }
@@ -118,18 +189,10 @@ int set_radius(int index_of_the_particle, double radius){
   return 0;
 }
 
-int cleanup_code(){
-  return 0;
-}
-
 int recommit_parameters(){
   return 0;
 }
 
-int initialize_code(){
-  return 0;
-}
-
 int get_potential_energy(double * potential_energy){
   return 0;
 }

From 7efa65153bf353a56a0601d1decca50a881adb6f Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 15:44:25 +0100
Subject: [PATCH 22/51] comment out set_parameters() and add TODO

---
 src/amuse/community/arepo/interface.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index ac122b9f23..fef740d365 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -30,12 +30,15 @@ def __init__(self, **keyword_arguments):
         LiteratureReferencesMixIn.__init__(self)
         # TODO: Determine whether need to inherit from CodeWithDataDirectories.
 
-    @legacy_function
-    def set_parameters():
-        function = LegacyFunctionSpecification()
-        function.addParameter("param_file", dtype="string", direction=function.IN)
-        function.result_type = "int32"
-        return function
+    # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function.
+    # This function has been kept as a template for future functions.
+    
+    # @legacy_function
+    # def set_parameters():
+    #     function = LegacyFunctionSpecification()
+    #     function.addParameter("param_file", dtype="string", direction=function.IN)
+    #     function.result_type = "int32"
+    #     return function
 
 
 class Arepo(GravitationalDynamics):

From 2c1f16add06379e9d8d62c67f9026eed1678f775 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 15:50:26 +0100
Subject: [PATCH 23/51] add TODO to test_arepo.py

---
 src/amuse/community/arepo/test_arepo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py
index a54108fd43..6aea82105f 100644
--- a/src/amuse/community/arepo/test_arepo.py
+++ b/src/amuse/community/arepo/test_arepo.py
@@ -7,7 +7,7 @@ class ArepoInterfaceTests(TestWithMPI):
     
     def test1(self):
         instance = ArepoInterface()
-        result,error = instance.echo_int(12)
+        result,error = instance.echo_int(12)  # TODO: Update test and add more...
         self.assertEquals(error, 0)
         self.assertEquals(result, 12)
         instance.stop()

From 5daa7e494ca52a8039fbf98dd11edc4594f8eec6 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 16:04:20 +0100
Subject: [PATCH 24/51] add TODO to Makefile

---
 src/amuse/community/arepo/src/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 8b3de12905..27c1b505b9 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,3 +1,4 @@
+# TODO: Determine whether this is needed as included in arepo/Makefile.
 ifeq ($(origin AMUSE_DIR), undefined)
 	AMUSE_DIR := $(shell amusifier --get-amuse-dir)
 endif

From ed27e203529c185d23c275b8afb141f1a8f14f25 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 17:02:21 +0100
Subject: [PATCH 25/51] remove test.cc as not needed

---
 src/amuse/community/arepo/src/test.cc | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 src/amuse/community/arepo/src/test.cc

diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc
deleted file mode 100644
index c30eeef8cb..0000000000
--- a/src/amuse/community/arepo/src/test.cc
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Example function for a code
- */
-int echo(int input){
-    return input;
-}

From 5b4065610e3b21d8f0b06e3dfccf533c9b947867 Mon Sep 17 00:00:00 2001
From: Felicity Guest <F.L.Guest@exeter.ac.uk>
Date: Fri, 1 Apr 2022 17:27:11 +0100
Subject: [PATCH 26/51] add initialize_code() and define_methods() to Arepo

---
 src/amuse/community/arepo/interface.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index fef740d365..0e1ae7e60b 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -30,9 +30,7 @@ def __init__(self, **keyword_arguments):
         LiteratureReferencesMixIn.__init__(self)
         # TODO: Determine whether need to inherit from CodeWithDataDirectories.
 
-    # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function.
-    # This function has been kept as a template for future functions.
-    
+    # This function has been kept as a basic template for future functions.
     # @legacy_function
     # def set_parameters():
     #     function = LegacyFunctionSpecification()
@@ -45,3 +43,20 @@ class Arepo(GravitationalDynamics):
     
     def __init__(self, **options):
         GravitationalDynamics.__init__(self, ArepoInterface(**options), **options)
+
+    def initialize_code(self):
+        result = self.overridden().initialize_code()
+
+        # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function.
+        # Could be done in the way in which Gadget2 sets the gadget_output_directory.
+        #self.parameters.gadget_output_directory = self.get_output_directory()
+        
+        return result
+
+    def define_methods(self, builder):
+        # TODO: Determine how to link this to Arepo's run() - the main simulation loop.
+        builder.add_method(
+            "run",
+            (),
+            (builder.ERROR_CODE)
+        )

From deb6a8b49ecafd0435f3982f240cdfaf9a11dfff Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Thu, 12 May 2022 13:46:02 +0100
Subject: [PATCH 27/51] add default parameters to arepo's interface.cc

---
 src/amuse/community/arepo/interface.cc | 133 ++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 12 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index b6656bc04a..10fa51a8af 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -3,6 +3,117 @@
 #include "src/main/allvars.h"
 #include "src/main/proto.h"
 
+// general interface functions:
+
+void set_default_parameters(){
+  // Relevant files
+  strcpy(All.InitCondFile, "./snap_010");
+  strcpy(All.OutputDir,   "./output");
+  strcpy(All.SnapshotFileBase, "snap");
+  strcpy(All.OutputListFilename, "./output_list.txt");
+
+  // File formats
+  All.ICFormat = 1;
+  All.SnapFormat = 3;
+
+  // CPU-time LimitUBelowThisDensity
+  All.TimeLimitCPU = 93000;
+  All.CpuTimeBetRestartFile = 12000;
+  All.ResubmitOn = 0;
+  strcpy(All.ResubmitCommand, "my-scriptfile");
+
+  // Memory allocation
+  All.MaxMemSize = 2500;
+
+  // Characteristics of run
+  All.TimeBegin = 0.0;
+  All.TimeMax = 1.0;
+
+  // Basic code options that set simulation type
+  All.ComovingIntegrationOn = 0;
+  All.PeriodicBoundariesOn = 0;
+  All.CoolingOn = 0;
+  All.StarformationOn = 0;
+
+  // Cosmological parameters
+  All.Omega0 = 0.0;
+  All.OmegaLambda = 0.0;
+  All.OmegaBaryon = 0.0;
+  All.HubbleParam = 1.0;
+  All.BoxSize = 100000.0;
+
+  // Output frequency and output parameters
+  All.OutputListOn = 1;
+  All.TimeBetSnapshot = 0.0;
+  All.TimeOfFirstSnapshot = 0.0;
+  All.TimeBetStatistics = 0.01;
+  All.NumFilesPerSnapshot = 1;
+  All.NumFilesWrittenInParallel = 1;
+
+  // Integration timing accuracy
+  All.TypeOfTimestepCriterion = 0;
+  All.ErrTolIntAccuracy = 0.012;
+  All.CourantFac = 0.3;
+  All.MaxSizeTimestep = 0.05;
+  All.MinSizeTimestep = 2.0e-9;
+
+  // Treatment of empty space and temp limits
+  All.InitGasTemp = 244.8095;
+  All.MinGasTemp = 5.0;
+  All.MinimumDensityOnStartUp = 1.0e-20;
+  All.LimitUBelowThisDensity = 0.0;
+  All.LimitUBelowCertainDensityToThisValue = 0.0;
+  All.MinEgySpec = 0.0;
+
+  // Tree algorithm, force accuracy, domain update frequency
+  All.TypeOfOpeningCriterion = 1;
+  All.ErrTolTheta = 0.7;
+  All.ErrTolForceAcc = 0.0025;
+  All.MultipleDomains = 8;
+  All.TopNodeFactor = 2.5;
+  All.ActivePartFracForNewDomainDecomp = 0.01;
+
+  // Initial density estimates
+  All.DesNumNgb = 64;
+  All.MaxNumNgbDeviation = 4;
+
+  // System of Units
+  All.UnitLength_in_cm = 3.085678e21;
+  All.UnitMass_in_g = 1.989e43;
+  All.UnitVelocity_in_cm_per_s = 1e5;
+
+  // Gravitational softening lengths
+  All.SofteningComovingType0 = 1.0;
+  All.SofteningComovingType1 = 1.0;
+
+  All.SofteningMaxPhysType0 = 1.0;
+  All.SofteningMaxPhysType1 = 1.0;
+
+  All.GasSoftFactor = 2.5;
+
+  All.SofteningTypeOfPartType0 = 0;
+  All.SofteningTypeOfPartType1 = 1;
+  All.SofteningTypeOfPartType2 = 1;
+  All.SofteningTypeOfPartType3 = 1;
+  All.SofteningTypeOfPartType4 = 1;
+  All.SofteningTypeOfPartType5 = 1;
+
+  All.MinimumComovingHydroSoftening = 1.0;
+  All.AdaptiveHydroSofteningSpacing = 1.2;
+
+  // Mesh regularization options
+  All.CellShapingSpeed = 0.5;
+  All.CellShapingFactor = 1.0;
+
+  // parameters that are fixed for AMUSE:
+  All.PartAllocFactor = 1.5; // Memory allocation parameter
+  All.TreeAllocFactor = 0.8; // Memory allocation parameter
+  All.BufferSize = 25;       // Memory allocation parameter
+  All.ResubmitOn = 0;              // Keep this turned off!
+  All.OutputListOn = 0;            // Keep this turned off
+  All.GravityConstantInternal = 0; // Keep this turned off
+}
+
 int initialize_code(){
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
@@ -21,7 +132,6 @@ int initialize_code(){
 
   begrun0();
 
-  strcpy(ParameterFile, "param.txt");  /* Removing command line parsing. argv[1] replaced with "param.txt". */
   RestartFlag = 0;
 
   begrun1(); /* set-up run  */
@@ -95,7 +205,7 @@ int get_total_radius(double * radius){
   return 0;
 }
 
-int new_particle(int * index_of_the_particle, double mass, double x, 
+int new_particle(int * index_of_the_particle, double mass, double x,
   double y, double z, double vx, double vy, double vz, double radius){
   return 0;
 }
@@ -120,7 +230,7 @@ int get_eps2(double * epsilon_squared){
   return 0;
 }
 
-int get_index_of_next_particle(int index_of_the_particle, 
+int get_index_of_next_particle(int index_of_the_particle,
   int * index_of_the_next_particle){
   return 0;
 }
@@ -137,13 +247,13 @@ int synchronize_model(){
   return 0;
 }
 
-int set_state(int index_of_the_particle, double mass, double x, double y, 
+int set_state(int index_of_the_particle, double mass, double x, double y,
   double z, double vx, double vy, double vz, double radius){
   return 0;
 }
 
-int get_state(int index_of_the_particle, double * mass, double * x, 
-  double * y, double * z, double * vx, double * vy, double * vz, 
+int get_state(int index_of_the_particle, double * mass, double * x,
+  double * y, double * z, double * vx, double * vy, double * vz,
   double * radius){
   return 0;
 }
@@ -164,7 +274,7 @@ int get_number_of_particles(int * number_of_particles){
   return 0;
 }
 
-int set_acceleration(int index_of_the_particle, double ax, double ay, 
+int set_acceleration(int index_of_the_particle, double ax, double ay,
   double az){
   return 0;
 }
@@ -197,12 +307,12 @@ int get_potential_energy(double * potential_energy){
   return 0;
 }
 
-int get_velocity(int index_of_the_particle, double * vx, double * vy, 
+int get_velocity(int index_of_the_particle, double * vx, double * vy,
   double * vz){
   return 0;
 }
 
-int get_position(int index_of_the_particle, double * x, double * y, 
+int get_position(int index_of_the_particle, double * x, double * y,
   double * z){
   return 0;
 }
@@ -211,7 +321,7 @@ int set_position(int index_of_the_particle, double x, double y, double z){
   return 0;
 }
 
-int get_acceleration(int index_of_the_particle, double * ax, double * ay, 
+int get_acceleration(int index_of_the_particle, double * ax, double * ay,
   double * az){
   return 0;
 }
@@ -224,8 +334,7 @@ int set_parameters(char * param_file){
   return 0;
 }
 
-int set_velocity(int index_of_the_particle, double vx, double vy, 
+int set_velocity(int index_of_the_particle, double vx, double vy,
   double vz){
   return 0;
 }
-

From fc35f9d8bce838975587490108467f4cfce7ff1f Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Fri, 13 May 2022 10:57:33 +0100
Subject: [PATCH 28/51] Comment out read_parameter_file in begrun, add run_sim
 in interface.cc, add cleanup_code to interface.py, add set_default_parameters
 to initialize_code

---
 src/amuse/community/arepo/interface.cc      |  7 +++++++
 src/amuse/community/arepo/interface.py      | 14 ++++++++------
 src/amuse/community/arepo/src/init/begrun.c |  2 +-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 10fa51a8af..6a1b9641b1 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -134,6 +134,7 @@ int initialize_code(){
 
   RestartFlag = 0;
 
+  set_default_parameters();
   begrun1(); /* set-up run  */
 
   char fname[MAXLEN_PATH];
@@ -162,6 +163,12 @@ int initialize_code(){
   return 0;
 }
 
+int run_sim() {
+  /* This run command is for the Arepo simulation */
+  run();
+  return 0;
+}
+
 int cleanup_code(){
   mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
   mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 0e1ae7e60b..1f228321b9 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -40,23 +40,25 @@ def __init__(self, **keyword_arguments):
 
 
 class Arepo(GravitationalDynamics):
-    
+
     def __init__(self, **options):
         GravitationalDynamics.__init__(self, ArepoInterface(**options), **options)
 
     def initialize_code(self):
         result = self.overridden().initialize_code()
 
-        # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function.
-        # Could be done in the way in which Gadget2 sets the gadget_output_directory.
-        #self.parameters.gadget_output_directory = self.get_output_directory()
-        
         return result
 
     def define_methods(self, builder):
         # TODO: Determine how to link this to Arepo's run() - the main simulation loop.
         builder.add_method(
-            "run",
+            "run_sim",
+            (),
+            (builder.ERROR_CODE)
+        )
+        # When simulation is finished, shutdown HDF5 & MPI, and exit(0)
+        builder.add_method(
+            "cleanup_code",
             (),
             (builder.ERROR_CODE)
         )
diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c
index ad8a5222ca..5db8ba6351 100644
--- a/src/amuse/community/arepo/src/init/begrun.c
+++ b/src/amuse/community/arepo/src/init/begrun.c
@@ -101,7 +101,7 @@ void begrun0(void)
  */
 void begrun1(void)
 {
-  read_parameter_file(ParameterFile); /* ... read in parameters for this run */
+  /* read_parameter_file(ParameterFile);  ... read in parameters for this run */
 
   check_parameters(); /* consistency check of parameters */
 

From 201535d042be63baf5a2bcdb01795d9d13d40424 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Wed, 18 May 2022 12:59:52 +0100
Subject: [PATCH 29/51] fixes

---
 src/amuse/community/arepo/src/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 27c1b505b9..197fa00cef 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -1,11 +1,14 @@
+MPICXX ?= mpicxx
+MPICC ?= mpicc
 # TODO: Determine whether this is needed as included in arepo/Makefile.
 ifeq ($(origin AMUSE_DIR), undefined)
 	AMUSE_DIR := $(shell amusifier --get-amuse-dir)
 endif
 -include $(AMUSE_DIR)/config.mk
 
-GSL_FLAGS  = -I/opt/Homebrew/include  # TODO: Need to make GSL_FLAGS generalisable.
+CC       =  $(MPICC)  # sets the C-compiler
 
+# GSL_FLAGS  = -I/opt/Homebrew/include  # TODO: Need to make GSL_FLAGS generalisable.
 CFLAGS   += -Wall -g $(GSL_FLAGS)
 CXXFLAGS += $(CFLAGS) 
 LDFLAGS  += -lm $(MUSE_LD_FLAGS)
@@ -17,7 +20,7 @@ SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \
 	time_integration utils
 SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c))
 
-CODEOBJS = test.o $(SRCS:c=o)
+CODEOBJS = $(SRCS:c=o)
 
 AR = ar ruv
 RANLIB = ranlib

From fbcce03f21c342f67be4c9f8da5c855fd16aa549 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Wed, 18 May 2022 13:13:22 +0100
Subject: [PATCH 30/51] define CXX flag in Arepo makefile

---
 src/amuse/community/arepo/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 567a479296..46f0668422 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -6,7 +6,7 @@ endif
 -include $(AMUSE_DIR)/config.mk
 
 MPICXX   ?= mpicxx
-
+CXX = $(MPICXX)
 CFLAGS   += -Wall -g
 CXXFLAGS += $(CFLAGS) 
 LDFLAGS  += -lm $(MUSE_LD_FLAGS)

From 5cf22a118f03c6b8df78c3b454d445edab234f1d Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Wed, 18 May 2022 13:50:04 +0100
Subject: [PATCH 31/51] set_default values to match current arepo config data
 structures

---
 src/amuse/community/arepo/interface.cc | 33 +++++++++++++-------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 6a1b9641b1..44fe3205d7 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -83,38 +83,39 @@ void set_default_parameters(){
   All.UnitVelocity_in_cm_per_s = 1e5;
 
   // Gravitational softening lengths
-  All.SofteningComovingType0 = 1.0;
-  All.SofteningComovingType1 = 1.0;
-
-  All.SofteningMaxPhysType0 = 1.0;
-  All.SofteningMaxPhysType1 = 1.0;
-
+  All.SofteningComoving[0] = 1.0;
+  All.SofteningComoving[1] = 1.0;
+  All.SofteningMaxPhys[0] = 1.0;
+  All.SofteningMaxPhys[1] = 1.0;
   All.GasSoftFactor = 2.5;
 
-  All.SofteningTypeOfPartType0 = 0;
-  All.SofteningTypeOfPartType1 = 1;
-  All.SofteningTypeOfPartType2 = 1;
-  All.SofteningTypeOfPartType3 = 1;
-  All.SofteningTypeOfPartType4 = 1;
-  All.SofteningTypeOfPartType5 = 1;
 
-  All.MinimumComovingHydroSoftening = 1.0;
-  All.AdaptiveHydroSofteningSpacing = 1.2;
+  All.SofteningTypeOfPartType[0] = 0;
+  All.SofteningTypeOfPartType[1] = 1;
+  All.SofteningTypeOfPartType[2] = 1;
+  All.SofteningTypeOfPartType[3] = 1;
+  All.SofteningTypeOfPartType[4] = 1;
+  All.SofteningTypeOfPartType[5] = 1;
+  #ifdef ADAPTIVE_HYDRO_SOFTENING
+    All.MinimumComovingHydroSoftening = 1.0;
+    All.AdaptiveHydroSofteningSpacing = 1.2;
+  #endif
 
   // Mesh regularization options
   All.CellShapingSpeed = 0.5;
   All.CellShapingFactor = 1.0;
 
   // parameters that are fixed for AMUSE:
-  All.PartAllocFactor = 1.5; // Memory allocation parameter
   All.TreeAllocFactor = 0.8; // Memory allocation parameter
-  All.BufferSize = 25;       // Memory allocation parameter
   All.ResubmitOn = 0;              // Keep this turned off!
   All.OutputListOn = 0;            // Keep this turned off
   All.GravityConstantInternal = 0; // Keep this turned off
 }
 
 int initialize_code(){
+  int argc = 0;
+  char **argv=NULL;
+
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
   MPI_Comm_size(MPI_COMM_WORLD, &NTask);

From af0f37408b87f53079b6e268a72ac54eb28c6ad3 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Thu, 19 May 2022 12:14:44 +0100
Subject: [PATCH 32/51] add #include mpi.h & run.c change mpi_print to just
 print

---
 src/amuse/community/arepo/interface.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 44fe3205d7..a212d90b24 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -1,7 +1,12 @@
+#ifndef NOMPI
+#include <mpi.h>
+#endif
+
 #include "worker_code.h"
 
 #include "src/main/allvars.h"
 #include "src/main/proto.h"
+#include "src/main/run.c"
 
 // general interface functions:
 
@@ -155,7 +160,7 @@ int initialize_code(){
   if(status >= 0)
     {
       if(status > 0)
-        mpi_printf("init() returned with %d\n", status);
+        printf("init() returned with %d\n", status);
 
       cleanup_code();
     }
@@ -171,8 +176,8 @@ int run_sim() {
 }
 
 int cleanup_code(){
-  mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
-  mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
+  printf("Code run for %f seconds!\n", timediff(StartOfRun, second()));
+  printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
   fflush(stdout);
 
 #ifdef HAVE_HDF5

From 3ecbaa3ae7a7db1b990cd479a4d158c037210f51 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 13:17:54 +0100
Subject: [PATCH 33/51] include GSL headers

---
 src/amuse/community/arepo/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 46f0668422..3e67a00625 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -39,4 +39,4 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
 	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@
 
 .cc.o: $<
-	$(CXX) $(CXXFLAGS) -c -o $@ $< 
+	$(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< 

From 5ab0a4b3d6845b83631547015f8e0c26defdfb44 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 13:39:28 +0100
Subject: [PATCH 34/51] add GSL_FLAGS to CXXFLAGS

---
 src/amuse/community/arepo/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 3e67a00625..b055eb3c18 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -8,7 +8,7 @@ endif
 MPICXX   ?= mpicxx
 CXX = $(MPICXX)
 CFLAGS   += -Wall -g
-CXXFLAGS += $(CFLAGS) 
+CXXFLAGS += $(CFLAGS) $(GSL_FLAGS)
 LDFLAGS  += -lm $(MUSE_LD_FLAGS)
 
 OBJS = interface.o
@@ -38,5 +38,5 @@ worker_code.h: interface.py
 arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
 	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@
 
-.cc.o: $<
-	$(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< 
+# .cc.o: $<
+# 	$(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< 

From dfac304c57102167c571f4f9cb4289b95e713b09 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 13:39:50 +0100
Subject: [PATCH 35/51] remove include

---
 src/amuse/community/arepo/interface.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index a212d90b24..9815df3021 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -6,7 +6,6 @@
 
 #include "src/main/allvars.h"
 #include "src/main/proto.h"
-#include "src/main/run.c"
 
 // general interface functions:
 

From d1da5b8a5ff4fa9339b93516ff125a1139aa8237 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 16:03:16 +0100
Subject: [PATCH 36/51] arepo is C not C++

---
 src/amuse/community/arepo/interface.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 9815df3021..7374928af6 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -7,6 +7,10 @@
 #include "src/main/allvars.h"
 #include "src/main/proto.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // general interface functions:
 
 void set_default_parameters(){
@@ -350,3 +354,6 @@ int set_velocity(int index_of_the_particle, double vx, double vy,
   double vz){
   return 0;
 }
+#ifdef __cplusplus
+}
+#endif

From 2f9296793592efb38bc7f1f810c338f48cc3e518 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 16:09:34 +0100
Subject: [PATCH 37/51] re-add lines

---
 src/amuse/community/arepo/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index b055eb3c18..934b56a210 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -38,5 +38,5 @@ worker_code.h: interface.py
 arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
 	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@
 
-# .cc.o: $<
-# 	$(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< 
+.cc.o: $<
+	$(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< 

From 980aae5ca43042e6d9f00167a33e2965f25311d8 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Thu, 19 May 2022 18:15:59 +0100
Subject: [PATCH 38/51] create arepo interface.h and move #include allvars.h &
 proto.h along with ifdef Cpp choice there

---
 src/amuse/community/arepo/interface.cc | 17 ++---------------
 src/amuse/community/arepo/interface.h  | 10 ++++++++++
 src/amuse/community/arepo/interface.py |  2 +-
 3 files changed, 13 insertions(+), 16 deletions(-)
 create mode 100644 src/amuse/community/arepo/interface.h

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 7374928af6..525745f074 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -4,12 +4,6 @@
 
 #include "worker_code.h"
 
-#include "src/main/allvars.h"
-#include "src/main/proto.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 // general interface functions:
 
@@ -136,12 +130,6 @@ int initialize_code(){
 
   determine_compute_nodes();
 
-  for(PTask = 0; NTask > (1 << PTask); PTask++)
-    ;
-
-  begrun0();
-
-  RestartFlag = 0;
 
   set_default_parameters();
   begrun1(); /* set-up run  */
@@ -354,6 +342,5 @@ int set_velocity(int index_of_the_particle, double vx, double vy,
   double vz){
   return 0;
 }
-#ifdef __cplusplus
-}
-#endif
+
+
diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h
new file mode 100644
index 0000000000..2e1d880db7
--- /dev/null
+++ b/src/amuse/community/arepo/interface.h
@@ -0,0 +1,10 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "src/allvars.h"
+#include "src/proto.h"
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index 1f228321b9..c1da97b511 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -23,7 +23,7 @@ class ArepoInterface(
         .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W]
     """
 
-    include_headers = ["worker_code.h"]
+    include_headers = ["worker_code.h", "inteface.h"]
 
     def __init__(self, **keyword_arguments):
         CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)

From b683887a8d3f0fc8514359762e56431aa90a863f Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 21:00:57 +0100
Subject: [PATCH 39/51] fix typo

---
 src/amuse/community/arepo/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py
index c1da97b511..a8e770a79d 100644
--- a/src/amuse/community/arepo/interface.py
+++ b/src/amuse/community/arepo/interface.py
@@ -23,7 +23,7 @@ class ArepoInterface(
         .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W]
     """
 
-    include_headers = ["worker_code.h", "inteface.h"]
+    include_headers = ["worker_code.h", "interface.h"]
 
     def __init__(self, **keyword_arguments):
         CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments)

From 3e84f77e24339f50883f4c52bd7d4fa8510377ec Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Thu, 19 May 2022 21:01:47 +0100
Subject: [PATCH 40/51] fix paths

---
 src/amuse/community/arepo/interface.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h
index 2e1d880db7..890b78b765 100644
--- a/src/amuse/community/arepo/interface.h
+++ b/src/amuse/community/arepo/interface.h
@@ -2,8 +2,8 @@
 extern "C" {
 #endif
 
-#include "src/allvars.h"
-#include "src/proto.h"
+#include "src/main/allvars.h"
+#include "src/main/proto.h"
 
 #ifdef __cplusplus
 }

From 1cee0565eed561b659bee84980248b91751358d4 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Fri, 20 May 2022 09:57:07 +0100
Subject: [PATCH 41/51] add #include statement for new interface.h header

---
 src/amuse/community/arepo/interface.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 525745f074..ad3cfba8c9 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -3,7 +3,7 @@
 #endif
 
 #include "worker_code.h"
-
+#include "interface.h"
 
 // general interface functions:
 

From 719aaa3e6ee211460959a806fdbcc54cca54d2cc Mon Sep 17 00:00:00 2001
From: ipelupessy <i.pelupessy@esciencecenter.nl>
Date: Fri, 20 May 2022 11:05:14 +0200
Subject: [PATCH 42/51] some fixes for c<->cpp

---
 src/amuse/community/arepo/interface.cc | 7 ++++++-
 src/amuse/community/arepo/interface.h  | 5 ++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index 525745f074..ebaed03a03 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -1,12 +1,17 @@
+#include <cstdio>
+#include <cstring>
+
 #ifndef NOMPI
 #include <mpi.h>
 #endif
 
+#include "interface.h"
 #include "worker_code.h"
 
-
 // general interface functions:
 
+using namespace std;
+
 void set_default_parameters(){
   // Relevant files
   strcpy(All.InitCondFile, "./snap_010");
diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h
index 890b78b765..7bcd4497c6 100644
--- a/src/amuse/community/arepo/interface.h
+++ b/src/amuse/community/arepo/interface.h
@@ -1,10 +1,13 @@
 #ifdef __cplusplus
 extern "C" {
+#define ___cplusplus
+#undef __cplusplus
 #endif
 
 #include "src/main/allvars.h"
 #include "src/main/proto.h"
 
-#ifdef __cplusplus
+#ifdef ___cplusplus
 }
+#define __cplusplus
 #endif

From 81223f643360fa8850efab638ab13fbebbbe7647 Mon Sep 17 00:00:00 2001
From: ipelupessy <i.pelupessy@esciencecenter.nl>
Date: Fri, 20 May 2022 12:59:47 +0200
Subject: [PATCH 43/51] fixes to build, note the source changes

---
 src/amuse/community/arepo/Makefile          | 2 +-
 src/amuse/community/arepo/src/Makefile      | 3 ++-
 src/amuse/community/arepo/src/init/begrun.c | 2 +-
 src/amuse/community/arepo/src/main/main.c   | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 934b56a210..271a4671ee 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -36,7 +36,7 @@ worker_code.h: interface.py
 	$(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@
 
 arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
-	$(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@
+	$(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@
 
 .cc.o: $<
 	$(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< 
diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 197fa00cef..e852b39c53 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -16,11 +16,12 @@ LDFLAGS  += -lm $(MUSE_LD_FLAGS)
 CODELIB = libarepo.a
 
 SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \
-	hydro init io main mesh mpi_utils ngbtree star_formation subfind \
+	hydro init io mesh mesh/voronoi mpi_utils ngbtree star_formation subfind \
 	time_integration utils
 SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c))
 
 CODEOBJS = $(SRCS:c=o)
+CODEOBJS += main/allvars.o main/run.o main/main.o
 
 AR = ar ruv
 RANLIB = ranlib
diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c
index 5db8ba6351..a70748cfd3 100644
--- a/src/amuse/community/arepo/src/init/begrun.c
+++ b/src/amuse/community/arepo/src/init/begrun.c
@@ -84,7 +84,7 @@ void begrun0(void)
 
   if(ThisTask == 0)
     {
-      output_compile_time_options();
+//      output_compile_time_options();
     }
 }
 
diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c
index f1ae80be6a..0b0824b5b1 100644
--- a/src/amuse/community/arepo/src/main/main.c
+++ b/src/amuse/community/arepo/src/main/main.c
@@ -58,7 +58,7 @@
  *
  *  \return status of exit; 0 for normal exit.
  */
-int main(int argc, char **argv)
+int no_main(int argc, char **argv)
 {
 // #ifdef IMPOSE_PINNING
 //   detect_topology();

From 1e0c6b97ab41d2894819a35ff618405553f3bdda Mon Sep 17 00:00:00 2001
From: ipelupessy <i.pelupessy@esciencecenter.nl>
Date: Fri, 20 May 2022 13:06:37 +0200
Subject: [PATCH 44/51] fix build to detect source changes

---
 src/amuse/community/arepo/Makefile     | 4 +++-
 src/amuse/community/arepo/src/Makefile | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 271a4671ee..14eef8f6a0 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -26,7 +26,7 @@ clean:
 distclean: clean
 	make -C src distclean
 
-$(CODELIB):
+$(CODELIB): .FORCE
 	make -C src all
 
 worker_code.cc: interface.py
@@ -40,3 +40,5 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
 
 .cc.o: $<
 	$(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< 
+
+.FORCE:
diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index e852b39c53..23d0d99c52 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -41,4 +41,7 @@ $(CODELIB): $(CODEOBJS)
 	$(RANLIB) $@
 
 .cc.o: $<
-	$(CXX) $(CXXFLAGS) -c -o $@ $< 
+	$(MPICXX) $(CXXFLAGS) -c -o $@ $< 
+
+.c.o: $<
+	$(MPICC) $(CXXFLAGS) -c -o $@ $< 

From edc21b73b48e8bdb7a0a0fa93327c2a2495bd600 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Fri, 20 May 2022 13:51:22 +0100
Subject: [PATCH 45/51] move GSL_LIBS and GMP_LIBS to the end of the flags for
 Arepo make file

---
 src/amuse/community/arepo/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile
index 14eef8f6a0..4fa877bb9a 100644
--- a/src/amuse/community/arepo/Makefile
+++ b/src/amuse/community/arepo/Makefile
@@ -36,7 +36,7 @@ worker_code.h: interface.py
 	$(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@
 
 arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS)
-	$(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@
+	$(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS)
 
 .cc.o: $<
 	$(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< 

From 73d53b70e02442a5a01e7e86cdc16075753c6d58 Mon Sep 17 00:00:00 2001
From: Matthew West <m.t.west@exeter.ac.uk>
Date: Mon, 23 May 2022 08:56:56 +0100
Subject: [PATCH 46/51] remove extra mpi_init from initialize_code

---
 src/amuse/community/arepo/interface.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index a8e949cb4c..db8497b93a 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -121,10 +121,7 @@ void set_default_parameters(){
 }
 
 int initialize_code(){
-  int argc = 0;
-  char **argv=NULL;
 
-  MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
   MPI_Comm_size(MPI_COMM_WORLD, &NTask);
 

From ff2ea53cdbf05e7cce479363ff746bedf57e89b0 Mon Sep 17 00:00:00 2001
From: Steven Rieder <steven@rieder.nl>
Date: Wed, 25 May 2022 11:04:23 +0200
Subject: [PATCH 47/51] Default to non-periodic gravity

---
 src/amuse/community/arepo/src/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile
index 23d0d99c52..c0ec73a619 100644
--- a/src/amuse/community/arepo/src/Makefile
+++ b/src/amuse/community/arepo/src/Makefile
@@ -23,6 +23,10 @@ SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c))
 CODEOBJS = $(SRCS:c=o)
 CODEOBJS += main/allvars.o main/run.o main/main.o
 
+AREPOFLAGS += -DGRAVITY_NOT_PERIODIC  # no periodic boundaries by default
+
+CXXFLAGS += $(AREPOFLAGS)
+
 AR = ar ruv
 RANLIB = ranlib
 RM = rm

From 285b495bd60348d5c4a98cc1230433022e767e08 Mon Sep 17 00:00:00 2001
From: "Stephen P. Cook" <s.cook4@exeter.ac.uk>
Date: Wed, 28 Sep 2022 11:25:39 +0000
Subject: [PATCH 48/51] Add missing calls to memory management helpers

Add basic arepo test.

Co-authored-by: Steven Rieder <rieder@users.noreply.github.com>
---
 src/amuse/community/arepo/__init__.py       | 3 ++-
 src/amuse/community/arepo/interface.cc      | 3 ++-
 src/amuse/community/arepo/src/init/begrun.c | 6 ++++++
 src/amuse/community/arepo/test_simple.py    | 5 +++++
 4 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 src/amuse/community/arepo/test_simple.py

diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py
index abe3ba85b6..b08b6187b7 100644
--- a/src/amuse/community/arepo/__init__.py
+++ b/src/amuse/community/arepo/__init__.py
@@ -1 +1,2 @@
-# generated file
\ No newline at end of file
+# generated file
+from .interface import Arepo
diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc
index db8497b93a..e05dd6a0dc 100644
--- a/src/amuse/community/arepo/interface.cc
+++ b/src/amuse/community/arepo/interface.cc
@@ -132,7 +132,8 @@ int initialize_code(){
   init_cpu_log();
 
   determine_compute_nodes();
-
+  // Needed to check available memory
+  mpi_report_committable_memory();
 
   set_default_parameters();
   begrun1(); /* set-up run  */
diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c
index a70748cfd3..f6944d7306 100644
--- a/src/amuse/community/arepo/src/init/begrun.c
+++ b/src/amuse/community/arepo/src/init/begrun.c
@@ -103,6 +103,12 @@ void begrun1(void)
 {
   /* read_parameter_file(ParameterFile);  ... read in parameters for this run */
 
+#ifdef HOST_MEMORY_REPORTING
+  check_maxmemsize_setting();
+#endif /* #ifdef HOST_MEMORY_REPORTING */
+
+  mymalloc_init(); /* Added from read_parameter_file */
+
   check_parameters(); /* consistency check of parameters */
 
 #ifdef HAVE_HDF5
diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py
new file mode 100644
index 0000000000..1951ac6edd
--- /dev/null
+++ b/src/amuse/community/arepo/test_simple.py
@@ -0,0 +1,5 @@
+from amuse.community.arepo import Arepo
+
+# Check code runs without errors
+x = Arepo(redirection="none")
+x.initialize_code()

From 62cd3459792301bf40106312c6934fbb4938b41b Mon Sep 17 00:00:00 2001
From: Volker Springel <vspringel@mpa-garching.mpg.de>
Date: Mon, 10 Jan 2022 21:21:32 +0100
Subject: [PATCH 49/51] small bug fix: in case HIERARCHICAL_GRAVITY is not
 used, and the maximum used timestep sizes increases during a step, it could
 happen that for particles on the maximum timestep one gravity half-step is
 not applied (because HighestActiveTimeBin increases)

---
 .../community/arepo/src/time_integration/do_gravity_hydro.c   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
index 88b7f89a34..40a06ac282 100644
--- a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
+++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c
@@ -265,7 +265,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void)
     }
 
   /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */
-  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+  timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin);
   sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
 #else /* #ifdef HIERARCHICAL_GRAVITY */
 
@@ -276,7 +276,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void)
     timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS);
   else
 #endif /* #ifdef FORCE_EQUAL_TIMESTEPS */
-    timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin);
+    timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin);
   sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles);
 
   mpi_printf("KICKS: 1st gravity for highest active timebin=%d:  particles %lld\n", All.HighestActiveTimeBin,

From 33d3f94feb37d69d4115bf09d8b663dd3abd4708 Mon Sep 17 00:00:00 2001
From: Volker Springel <vspringel@mpa-garching.mpg.de>
Date: Thu, 2 Jun 2022 13:26:13 +0200
Subject: [PATCH 50/51] removed non-standard uint in favor of 'unsigned int'

---
 src/amuse/community/arepo/src/io/hdf5_util.c | 2 +-
 src/amuse/community/arepo/src/main/proto.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c
index a613a36bdc..a690bd71f3 100644
--- a/src/amuse/community/arepo/src/io/hdf5_util.c
+++ b/src/amuse/community/arepo/src/io/hdf5_util.c
@@ -847,7 +847,7 @@ herr_t my_H5Pset_shuffle(hid_t plist_id)
  *
  *  \return Non-negative value if successful.
  */
-herr_t my_H5Pset_deflate(hid_t plist_id, uint level)
+herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level)
 {
   herr_t status = H5Pset_deflate(plist_id, level);
   if(status < 0)
diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h
index 15a346f1bc..61bdaad467 100644
--- a/src/amuse/community/arepo/src/main/proto.h
+++ b/src/amuse/community/arepo/src/main/proto.h
@@ -598,7 +598,7 @@ hid_t my_H5Pcreate(hid_t class_id);
 herr_t my_H5Pclose(hid_t plist);
 herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim);
 herr_t my_H5Pset_shuffle(hid_t plist_id);
-herr_t my_H5Pset_deflate(hid_t plist_id, uint level);
+herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level);
 herr_t my_H5Pset_fletcher32(hid_t plist_id);
 #endif /* #ifdef HDF5_FILTERS */
 

From 0193040db0bc011ee7552f088720ef19c3818081 Mon Sep 17 00:00:00 2001
From: Volker Springel <vspringel@mpa-garching.mpg.de>
Date: Thu, 2 Jun 2022 14:03:04 +0200
Subject: [PATCH 51/51] disabled a superfluous call of get_starformtion_rate()

---
 src/amuse/community/arepo/src/cooling/cooling.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c
index 7e7cebbc98..3baf82d3a3 100644
--- a/src/amuse/community/arepo/src/cooling/cooling.c
+++ b/src/amuse/community/arepo/src/cooling/cooling.c
@@ -477,9 +477,9 @@ void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate)
   double u   = dmax(All.MinEgySpec, SphP[i].Utherm);
 
   /* update GasState as appropriate given compile-time options and cell properties */
-#if defined(USE_SFR)
-  sfr = get_starformation_rate(i);
-#endif /* #if defined(USE_SFR) */
+  //  #if defined(USE_SFR)
+  //  sfr = get_starformation_rate(i);  // call is superfluous at this place
+  // #endif
 
   /* update DoCool */
   DoCool.u_old_input    = u;