From 4ed2054c4ca94086585fe32ea48b8885b06a453f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 14:49:07 +0000 Subject: [PATCH 01/51] add arepo directory --- src/amuse/community/arepo/Makefile | 42 +++++++++++++++++++++++++ src/amuse/community/arepo/__init__.py | 1 + src/amuse/community/arepo/interface.cc | 11 +++++++ src/amuse/community/arepo/interface.py | 24 ++++++++++++++ src/amuse/community/arepo/src/Makefile | 27 ++++++++++++++++ src/amuse/community/arepo/src/test.cc | 6 ++++ src/amuse/community/arepo/test_arepo.py | 14 +++++++++ 7 files changed, 125 insertions(+) create mode 100644 src/amuse/community/arepo/Makefile create mode 100644 src/amuse/community/arepo/__init__.py create mode 100644 src/amuse/community/arepo/interface.cc create mode 100644 src/amuse/community/arepo/interface.py create mode 100644 src/amuse/community/arepo/src/Makefile create mode 100644 src/amuse/community/arepo/src/test.cc create mode 100644 src/amuse/community/arepo/test_arepo.py diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile new file mode 100644 index 0000000000..7c392db261 --- /dev/null +++ b/src/amuse/community/arepo/Makefile @@ -0,0 +1,42 @@ +# standard amuse configuration include +# config.mk will be made after ./configure has run +ifeq ($(origin AMUSE_DIR), undefined) + AMUSE_DIR := $(shell amusifier --get-amuse-dir) +endif +-include $(AMUSE_DIR)/config.mk + +MPICXX ?= mpicxx + +CFLAGS += -Wall -g +CXXFLAGS += $(CFLAGS) +LDFLAGS += -lm $(MUSE_LD_FLAGS) + +OBJS = interface.o + +CODELIB = src/libarepo.a + +all: arepo_worker + +clean: + $(RM) -rf __pycache__ + $(RM) -f *.so *.o *.pyc worker_code.cc worker_code.h + $(RM) *~ arepo_worker worker_code.cc + make -C src clean + +distclean: clean + make -C src distclean + +$(CODELIB): + make -C src all + +worker_code.cc: interface.py + $(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@ + +worker_code.h: interface.py + $(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@ + +arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) + $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ + +.cc.o: $< + $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py new file mode 100644 index 0000000000..abe3ba85b6 --- /dev/null +++ b/src/amuse/community/arepo/__init__.py @@ -0,0 +1 @@ +# generated file \ No newline at end of file diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc new file mode 100644 index 0000000000..a590e82689 --- /dev/null +++ b/src/amuse/community/arepo/interface.cc @@ -0,0 +1,11 @@ +extern int echo(int input); + +/* + * Interface code + */ + +int echo_int(int input, int * output){ + *output = echo(input); + return 0; +} + diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py new file mode 100644 index 0000000000..64130dc384 --- /dev/null +++ b/src/amuse/community/arepo/interface.py @@ -0,0 +1,24 @@ +from amuse.community import * + +class arepoInterface(CodeInterface): + + include_headers = ['worker_code.h'] + + def __init__(self, **keyword_arguments): + CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) + + @legacy_function + def echo_int(): + function = LegacyFunctionSpecification() + function.addParameter('int_in', dtype='int32', direction=function.IN) + function.addParameter('int_out', dtype='int32', direction=function.OUT) + function.result_type = 'int32' + function.can_handle_array = True + return function + + +class arepo(InCodeComponentImplementation): + + def __init__(self, **options): + InCodeComponentImplementation.__init__(self, arepoInterface(**options), **options) + diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile new file mode 100644 index 0000000000..66f18374a1 --- /dev/null +++ b/src/amuse/community/arepo/src/Makefile @@ -0,0 +1,27 @@ +CFLAGS += -Wall -g +CXXFLAGS += $(CFLAGS) +LDFLAGS += -lm $(MUSE_LD_FLAGS) + +CODELIB = libarepo.a + +CODEOBJS = test.o + +AR = ar ruv +RANLIB = ranlib +RM = rm + +all: $(CODELIB) + + +clean: + $(RM) -f *.o *.a + +distclean: clean + +$(CODELIB): $(CODEOBJS) + $(RM) -f $@ + $(AR) $@ $(CODEOBJS) + $(RANLIB) $@ + +.cc.o: $< + $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc new file mode 100644 index 0000000000..c30eeef8cb --- /dev/null +++ b/src/amuse/community/arepo/src/test.cc @@ -0,0 +1,6 @@ +/* + * Example function for a code + */ +int echo(int input){ + return input; +} diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py new file mode 100644 index 0000000000..8cdeabb474 --- /dev/null +++ b/src/amuse/community/arepo/test_arepo.py @@ -0,0 +1,14 @@ +from amuse.test.amusetest import TestWithMPI + +from .interface import arepoInterface +from .interface import arepo + +class arepoInterfaceTests(TestWithMPI): + + def test1(self): + instance = arepoInterface() + result,error = instance.echo_int(12) + self.assertEquals(error, 0) + self.assertEquals(result, 12) + instance.stop() + From 5507dc490d9f8760e9edb0a9dae7b4141881dd86 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 14:57:05 +0000 Subject: [PATCH 02/51] add arepo source code --- .../arepo/src/add_backgroundgrid/add_bggrid.c | 492 ++ .../arepo/src/add_backgroundgrid/add_bggrid.h | 58 + .../src/add_backgroundgrid/calc_weights.c | 301 + .../arepo/src/add_backgroundgrid/distribute.c | 328 ++ .../community/arepo/src/cooling/cooling.c | 870 +++ .../arepo/src/cooling/cooling_proto.h | 49 + .../arepo/src/cooling/cooling_vars.h | 80 + src/amuse/community/arepo/src/debug_md5/Md5.c | 472 ++ src/amuse/community/arepo/src/debug_md5/Md5.h | 92 + .../arepo/src/debug_md5/calc_checksum.c | 121 + .../community/arepo/src/domain/bsd_tree.h | 865 +++ src/amuse/community/arepo/src/domain/domain.c | 633 ++ src/amuse/community/arepo/src/domain/domain.h | 156 + .../arepo/src/domain/domain_DC_update.c | 699 +++ .../arepo/src/domain/domain_balance.c | 1154 ++++ .../community/arepo/src/domain/domain_box.c | 336 ++ .../arepo/src/domain/domain_counttogo.c | 84 + .../arepo/src/domain/domain_exchange.c | 399 ++ .../arepo/src/domain/domain_rearrange.c | 129 + .../arepo/src/domain/domain_sort_kernels.c | 158 + .../arepo/src/domain/domain_toplevel.c | 393 ++ .../community/arepo/src/domain/domain_vars.c | 117 + src/amuse/community/arepo/src/domain/peano.c | 569 ++ src/amuse/community/arepo/src/fof/fof.c | 967 ++++ src/amuse/community/arepo/src/fof/fof.h | 319 + .../community/arepo/src/fof/fof_distribute.c | 420 ++ .../community/arepo/src/fof/fof_findgroups.c | 720 +++ src/amuse/community/arepo/src/fof/fof_io.c | 3151 ++++++++++ .../community/arepo/src/fof/fof_nearest.c | 473 ++ .../arepo/src/fof/fof_sort_kernels.c | 495 ++ src/amuse/community/arepo/src/fof/fof_vars.c | 79 + .../community/arepo/src/gitversion/version | 7 + .../community/arepo/src/gitversion/version.h | 38 + src/amuse/community/arepo/src/gravity/accel.c | 347 ++ .../community/arepo/src/gravity/forcetree.c | 1827 ++++++ .../community/arepo/src/gravity/forcetree.h | 168 + .../arepo/src/gravity/forcetree_ewald.c | 529 ++ .../src/gravity/forcetree_optimizebalance.c | 486 ++ .../arepo/src/gravity/forcetree_walk.c | 709 +++ .../arepo/src/gravity/grav_external.c | 579 ++ .../arepo/src/gravity/grav_softening.c | 215 + .../community/arepo/src/gravity/gravdirect.c | 259 + .../community/arepo/src/gravity/gravtree.c | 749 +++ .../arepo/src/gravity/gravtree_forcetest.c | 1089 ++++ .../community/arepo/src/gravity/longrange.c | 199 + .../arepo/src/gravity/pm/pm_mpi_fft.c | 1771 ++++++ .../arepo/src/gravity/pm/pm_nonperiodic.c | 2087 +++++++ .../arepo/src/gravity/pm/pm_periodic.c | 2034 +++++++ .../arepo/src/gravity/pm/pm_periodic2d.c | 905 +++ .../arepo/src/hydro/finite_volume_solver.c | 1895 ++++++ .../community/arepo/src/hydro/gradients.c | 149 + src/amuse/community/arepo/src/hydro/mhd.c | 99 + src/amuse/community/arepo/src/hydro/riemann.c | 955 +++ .../community/arepo/src/hydro/riemann_hllc.c | 213 + .../community/arepo/src/hydro/riemann_hlld.c | 567 ++ src/amuse/community/arepo/src/hydro/scalars.c | 107 + .../src/hydro/update_primitive_variables.c | 343 ++ src/amuse/community/arepo/src/init/begrun.c | 344 ++ src/amuse/community/arepo/src/init/density.c | 635 ++ src/amuse/community/arepo/src/init/init.c | 835 +++ src/amuse/community/arepo/src/io/global.c | 257 + src/amuse/community/arepo/src/io/hdf5_util.c | 881 +++ src/amuse/community/arepo/src/io/io.c | 2226 +++++++ src/amuse/community/arepo/src/io/io_fields.c | 765 +++ src/amuse/community/arepo/src/io/logs.c | 623 ++ src/amuse/community/arepo/src/io/parameters.c | 861 +++ src/amuse/community/arepo/src/io/read_ic.c | 1900 ++++++ src/amuse/community/arepo/src/io/restart.c | 1549 +++++ src/amuse/community/arepo/src/main/allvars.c | 331 ++ src/amuse/community/arepo/src/main/allvars.h | 1924 +++++++ src/amuse/community/arepo/src/main/main.c | 296 + .../community/arepo/src/main/main_original.c | 299 + .../community/arepo/src/main/main_reduced.c | 135 + src/amuse/community/arepo/src/main/proto.h | 665 +++ src/amuse/community/arepo/src/main/run.c | 660 +++ .../arepo/src/mesh/criterion_derefinement.c | 181 + .../arepo/src/mesh/criterion_refinement.c | 267 + src/amuse/community/arepo/src/mesh/mesh.h | 268 + .../community/arepo/src/mesh/refinement.c | 217 + .../arepo/src/mesh/set_vertex_velocities.c | 321 ++ .../arepo/src/mesh/voronoi/voronoi.c | 1163 ++++ .../arepo/src/mesh/voronoi/voronoi.h | 379 ++ .../arepo/src/mesh/voronoi/voronoi_1d.c | 363 ++ .../src/mesh/voronoi/voronoi_1d_spherical.c | 339 ++ .../arepo/src/mesh/voronoi/voronoi_2d.c | 2110 +++++++ .../arepo/src/mesh/voronoi/voronoi_3d.c | 5111 +++++++++++++++++ .../arepo/src/mesh/voronoi/voronoi_check.c | 407 ++ .../src/mesh/voronoi/voronoi_derefinement.c | 1088 ++++ .../src/mesh/voronoi/voronoi_dynamic_update.c | 1037 ++++ .../arepo/src/mesh/voronoi/voronoi_exchange.c | 531 ++ .../src/mesh/voronoi/voronoi_ghost_search.c | 1773 ++++++ .../src/mesh/voronoi/voronoi_gradients_lsf.c | 944 +++ .../mesh/voronoi/voronoi_gradients_onedims.c | 204 + .../src/mesh/voronoi/voronoi_refinement.c | 425 ++ .../arepo/src/mesh/voronoi/voronoi_utils.c | 501 ++ .../src/mpi_utils/checksummed_sendrecv.c | 321 ++ .../src/mpi_utils/hypercube_allgatherv.c | 94 + .../community/arepo/src/mpi_utils/mpi_util.c | 375 ++ .../arepo/src/mpi_utils/myIBarrier.c | 175 + .../arepo/src/mpi_utils/myIBarrier.h | 51 + .../arepo/src/mpi_utils/myalltoall.c | 122 + .../community/arepo/src/mpi_utils/pinning.c | 292 + .../src/mpi_utils/sizelimited_sendrecv.c | 116 + .../community/arepo/src/ngbtree/ngbtree.c | 1394 +++++ .../arepo/src/ngbtree/ngbtree_search.c | 376 ++ .../arepo/src/ngbtree/ngbtree_walk.c | 225 + .../arepo/src/star_formation/sfr_eEOS.c | 539 ++ .../arepo/src/star_formation/starformation.c | 437 ++ .../community/arepo/src/subfind/subfind.c | 577 ++ .../community/arepo/src/subfind/subfind.h | 213 + .../arepo/src/subfind/subfind_coll_domain.c | 620 ++ .../arepo/src/subfind/subfind_coll_tree.c | 992 ++++ .../arepo/src/subfind/subfind_coll_treewalk.c | 460 ++ .../arepo/src/subfind/subfind_collective.c | 2417 ++++++++ .../arepo/src/subfind/subfind_density.c | 662 +++ .../arepo/src/subfind/subfind_distribute.c | 421 ++ .../arepo/src/subfind/subfind_findlinkngb.c | 539 ++ .../community/arepo/src/subfind/subfind_io.c | 156 + .../arepo/src/subfind/subfind_loctree.c | 930 +++ .../arepo/src/subfind/subfind_nearesttwo.c | 475 ++ .../arepo/src/subfind/subfind_properties.c | 1195 ++++ .../arepo/src/subfind/subfind_reprocess.c | 240 + .../arepo/src/subfind/subfind_serial.c | 807 +++ .../community/arepo/src/subfind/subfind_so.c | 964 ++++ .../arepo/src/subfind/subfind_so_potegy.c | 853 +++ .../arepo/src/subfind/subfind_sort_kernels.c | 442 ++ .../arepo/src/subfind/subfind_vars.c | 102 + .../arepo/src/time_integration/darkenergy.c | 74 + .../src/time_integration/do_gravity_hydro.c | 484 ++ .../arepo/src/time_integration/driftfac.c | 307 + .../arepo/src/time_integration/predict.c | 506 ++ .../arepo/src/time_integration/timestep.c | 980 ++++ .../arepo/src/time_integration/timestep.h | 88 + .../src/time_integration/timestep_treebased.c | 494 ++ .../community/arepo/src/utils/allocate.c | 133 + src/amuse/community/arepo/src/utils/debug.c | 148 + src/amuse/community/arepo/src/utils/dtypes.h | 195 + .../arepo/src/utils/generic_comm_helpers2.h | 724 +++ .../community/arepo/src/utils/mpz_extension.c | 119 + .../community/arepo/src/utils/mymalloc.c | 792 +++ .../community/arepo/src/utils/parallel_sort.c | 743 +++ .../community/arepo/src/utils/predicates.c | 4292 ++++++++++++++ src/amuse/community/arepo/src/utils/system.c | 1300 +++++ src/amuse/community/arepo/src/utils/tags.h | 50 + src/amuse/community/arepo/src/utils/timer.h | 251 + 145 files changed, 95582 insertions(+) create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c create mode 100644 src/amuse/community/arepo/src/add_backgroundgrid/distribute.c create mode 100644 src/amuse/community/arepo/src/cooling/cooling.c create mode 100644 src/amuse/community/arepo/src/cooling/cooling_proto.h create mode 100644 src/amuse/community/arepo/src/cooling/cooling_vars.h create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.c create mode 100644 src/amuse/community/arepo/src/debug_md5/Md5.h create mode 100644 src/amuse/community/arepo/src/debug_md5/calc_checksum.c create mode 100644 src/amuse/community/arepo/src/domain/bsd_tree.h create mode 100644 src/amuse/community/arepo/src/domain/domain.c create mode 100644 src/amuse/community/arepo/src/domain/domain.h create mode 100644 src/amuse/community/arepo/src/domain/domain_DC_update.c create mode 100644 src/amuse/community/arepo/src/domain/domain_balance.c create mode 100644 src/amuse/community/arepo/src/domain/domain_box.c create mode 100644 src/amuse/community/arepo/src/domain/domain_counttogo.c create mode 100644 src/amuse/community/arepo/src/domain/domain_exchange.c create mode 100644 src/amuse/community/arepo/src/domain/domain_rearrange.c create mode 100644 src/amuse/community/arepo/src/domain/domain_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/domain/domain_toplevel.c create mode 100644 src/amuse/community/arepo/src/domain/domain_vars.c create mode 100644 src/amuse/community/arepo/src/domain/peano.c create mode 100644 src/amuse/community/arepo/src/fof/fof.c create mode 100644 src/amuse/community/arepo/src/fof/fof.h create mode 100644 src/amuse/community/arepo/src/fof/fof_distribute.c create mode 100644 src/amuse/community/arepo/src/fof/fof_findgroups.c create mode 100644 src/amuse/community/arepo/src/fof/fof_io.c create mode 100644 src/amuse/community/arepo/src/fof/fof_nearest.c create mode 100644 src/amuse/community/arepo/src/fof/fof_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/fof/fof_vars.c create mode 100644 src/amuse/community/arepo/src/gitversion/version create mode 100644 src/amuse/community/arepo/src/gitversion/version.h create mode 100644 src/amuse/community/arepo/src/gravity/accel.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree.h create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_ewald.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c create mode 100644 src/amuse/community/arepo/src/gravity/forcetree_walk.c create mode 100644 src/amuse/community/arepo/src/gravity/grav_external.c create mode 100644 src/amuse/community/arepo/src/gravity/grav_softening.c create mode 100644 src/amuse/community/arepo/src/gravity/gravdirect.c create mode 100644 src/amuse/community/arepo/src/gravity/gravtree.c create mode 100644 src/amuse/community/arepo/src/gravity/gravtree_forcetest.c create mode 100644 src/amuse/community/arepo/src/gravity/longrange.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic.c create mode 100644 src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c create mode 100644 src/amuse/community/arepo/src/hydro/finite_volume_solver.c create mode 100644 src/amuse/community/arepo/src/hydro/gradients.c create mode 100644 src/amuse/community/arepo/src/hydro/mhd.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hllc.c create mode 100644 src/amuse/community/arepo/src/hydro/riemann_hlld.c create mode 100644 src/amuse/community/arepo/src/hydro/scalars.c create mode 100644 src/amuse/community/arepo/src/hydro/update_primitive_variables.c create mode 100644 src/amuse/community/arepo/src/init/begrun.c create mode 100644 src/amuse/community/arepo/src/init/density.c create mode 100644 src/amuse/community/arepo/src/init/init.c create mode 100644 src/amuse/community/arepo/src/io/global.c create mode 100644 src/amuse/community/arepo/src/io/hdf5_util.c create mode 100644 src/amuse/community/arepo/src/io/io.c create mode 100644 src/amuse/community/arepo/src/io/io_fields.c create mode 100644 src/amuse/community/arepo/src/io/logs.c create mode 100644 src/amuse/community/arepo/src/io/parameters.c create mode 100644 src/amuse/community/arepo/src/io/read_ic.c create mode 100644 src/amuse/community/arepo/src/io/restart.c create mode 100644 src/amuse/community/arepo/src/main/allvars.c create mode 100644 src/amuse/community/arepo/src/main/allvars.h create mode 100644 src/amuse/community/arepo/src/main/main.c create mode 100644 src/amuse/community/arepo/src/main/main_original.c create mode 100644 src/amuse/community/arepo/src/main/main_reduced.c create mode 100644 src/amuse/community/arepo/src/main/proto.h create mode 100644 src/amuse/community/arepo/src/main/run.c create mode 100644 src/amuse/community/arepo/src/mesh/criterion_derefinement.c create mode 100644 src/amuse/community/arepo/src/mesh/criterion_refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/mesh.h create mode 100644 src/amuse/community/arepo/src/mesh/refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/set_vertex_velocities.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi.h create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c create mode 100644 src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/mpi_util.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/myIBarrier.h create mode 100644 src/amuse/community/arepo/src/mpi_utils/myalltoall.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/pinning.c create mode 100644 src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_search.c create mode 100644 src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c create mode 100644 src/amuse/community/arepo/src/star_formation/sfr_eEOS.c create mode 100644 src/amuse/community/arepo/src/star_formation/starformation.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind.h create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_domain.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_tree.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_collective.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_density.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_distribute.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_io.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_loctree.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_properties.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_reprocess.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_serial.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_so_potegy.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c create mode 100644 src/amuse/community/arepo/src/subfind/subfind_vars.c create mode 100644 src/amuse/community/arepo/src/time_integration/darkenergy.c create mode 100644 src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c create mode 100644 src/amuse/community/arepo/src/time_integration/driftfac.c create mode 100644 src/amuse/community/arepo/src/time_integration/predict.c create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.c create mode 100644 src/amuse/community/arepo/src/time_integration/timestep.h create mode 100644 src/amuse/community/arepo/src/time_integration/timestep_treebased.c create mode 100644 src/amuse/community/arepo/src/utils/allocate.c create mode 100644 src/amuse/community/arepo/src/utils/debug.c create mode 100644 src/amuse/community/arepo/src/utils/dtypes.h create mode 100644 src/amuse/community/arepo/src/utils/generic_comm_helpers2.h create mode 100644 src/amuse/community/arepo/src/utils/mpz_extension.c create mode 100644 src/amuse/community/arepo/src/utils/mymalloc.c create mode 100644 src/amuse/community/arepo/src/utils/parallel_sort.c create mode 100644 src/amuse/community/arepo/src/utils/predicates.c create mode 100644 src/amuse/community/arepo/src/utils/system.c create mode 100644 src/amuse/community/arepo/src/utils/tags.h create mode 100644 src/amuse/community/arepo/src/utils/timer.h diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c new file mode 100644 index 0000000000..ea94880120 --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.c @@ -0,0 +1,492 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/add_bggrid.c + * \date 05/2018 + * \brief Re-gridding of ICs to ensure that the entire computational + * domain contains gas cells. + * \details Can be used to convert SPH ICs to Arepo ICs. + * contains functions: + * int add_backgroundgrid(void) + * void modify_boxsize(double new_val) + * void prepare_domain_backgroundgrid(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static void modify_boxsize(double new_val); + +MyIDType IDNew; + +/*! \brief Re-gridding of ICs onto oct-tree nodes. + * + * If this is active, no simulation is performed. + * + * \return void + */ +int add_backgroundgrid(void) +{ + int i, no, numnodes; + long long ngas_count_all_old; + double vol, voltot, mgas, mtot; + int flag_all, flag = 0; + + mpi_printf("\n\nADD BACKGROUND GRID: Adding background grid to IC file\n\n"); + + for(i = 0, mgas = 0; i < NumGas; i++) + if(P[i].Type == 0) + mgas += P[i].Mass; + + MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Total gas mass before remap=%g\n", mtot); + + ngas_count_all_old = All.TotNumGas; + + ngb_treefree(); + + domain_free(); + + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + numnodes = construct_forcetree(1, 1, 0, 0); /* build tree only with gas cells */ + + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + vol += Nodes[i].len * Nodes[i].len * Nodes[i].len; + } + } + + for(i = 0; i < NumGas; i++) + { + no = Father[i]; + vol += Nodes[no].len * Nodes[no].len * Nodes[no].len / 8; + } + + MPI_Allreduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("\nADD BACKGROUND GRID: voltot=%g %g\n", voltot, pow(DomainLen, 3)); + + int count_leaves = 0, count_leaves_all; + + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize) + if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize) + if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize) + count_leaves++; + } + } + + MPI_Allreduce(&count_leaves, &count_leaves_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: count_leaves_all=%d\n\n", count_leaves_all); + + if((NumGas + count_leaves >= All.MaxPartSph) || (NumPart + count_leaves >= All.MaxPart)) + flag = 1; + + MPI_Allreduce(&flag, &flag_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /*Increase storage for newly added gas particles */ + if(flag_all) + domain_resize_storage(count_leaves, count_leaves, 0); + + /* determine maximum ID */ + MyIDType maxid, newid, *tmp; + int *list; + + for(i = 0, maxid = 0; i < NumPart; i++) + if(P[i].ID > maxid) + maxid = P[i].ID; + + tmp = mymalloc("tmp", NTask * sizeof(MyIDType)); + + MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0; i < NTask; i++) + if(tmp[i] > maxid) + maxid = tmp[i]; + + myfree(tmp); + // maxid is now the total maximum ID number of all particles + + list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&count_leaves, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + newid = maxid + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + myfree(list); + + // newid is now the maxid+total of count_leaves over all previous tasks + + IDNew = maxid + 1; /* old gas particles will have IDs below this */ + + // move all particle and sph particle data down the arrays by + // count_leaves. + + memmove(P + count_leaves, P, sizeof(struct particle_data) * NumPart); + memmove(SphP + count_leaves, SphP, sizeof(struct sph_particle_data) * NumGas); + + NumPart += count_leaves; + NumGas += count_leaves; + + // this is the same loop as determined count_leaves above, so + // it will be applied count_leaves times again. + count_leaves = 0; + for(i = Tree_MaxPart, vol = 0; i < numnodes + Tree_MaxPart; i++) + { + if(Nodes[i].u.d.sibling == Nodes[i].u.d.nextnode) /* node is a leave */ + { + if(Nodes[i].center[0] > 0 && Nodes[i].center[0] < All.BoxSize) + if(Nodes[i].center[1] > 0 && Nodes[i].center[1] < All.BoxSize) + if(Nodes[i].center[2] > 0 && Nodes[i].center[2] < All.BoxSize) + { + P[count_leaves].Pos[0] = Nodes[i].center[0]; + P[count_leaves].Pos[1] = Nodes[i].center[1]; + P[count_leaves].Pos[2] = Nodes[i].center[2]; + P[count_leaves].Vel[0] = 0; + P[count_leaves].Vel[1] = 0; + P[count_leaves].Vel[2] = 0; + + P[count_leaves].Mass = 0; + P[count_leaves].TimeBinHydro = 0; + P[count_leaves].TimeBinGrav = 0; + + P[count_leaves].Ti_Current = All.Ti_Current; + +#ifdef MHD + SphP[count_leaves].B[0] = 0; + SphP[count_leaves].B[1] = 0; + SphP[count_leaves].B[2] = 0; + SphP[count_leaves].DivB = 0; +#endif /* #ifdef MHD */ + + P[count_leaves].Type = 0; + P[count_leaves].SofteningType = All.SofteningTypeOfPartType[0]; + + // this puts the new ID at the right spot + P[count_leaves].ID = newid++; + + SphP[count_leaves].Volume = Nodes[i].len * Nodes[i].len * Nodes[i].len; + SphP[count_leaves].Utherm = 0; + SphP[count_leaves].Energy = 0; + SphP[count_leaves].Momentum[0] = 0; + SphP[count_leaves].Momentum[1] = 0; + SphP[count_leaves].Momentum[2] = 0; + + count_leaves++; + } + } + } + + /* Delete the force tree */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + calculate_weights(); + distribute_particles(); + + int count_elim = 0, count_elim_all; + + for(i = 0; i < NumGas; i++) + if(P[i].Type == 0) + { + if(P[i].ID <= maxid) + { + // remove particle i by swapping in the last sph particle + // and then swap the last particle to that spot + P[i] = P[NumGas - 1]; + P[NumGas - 1] = P[NumPart - 1]; + + SphP[i] = SphP[NumGas - 1]; + + NumPart--; + NumGas--; + i--; + + count_elim++; + } + else + { + if(P[i].Mass > 0) + { + SphP[i].Utherm = SphP[i].Energy / P[i].Mass; + P[i].Vel[0] = SphP[i].Momentum[0] / P[i].Mass; + P[i].Vel[1] = SphP[i].Momentum[1] / P[i].Mass; + P[i].Vel[2] = SphP[i].Momentum[2] / P[i].Mass; + } + } + } + + MPI_Allreduce(&count_elim, &count_elim_all, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + sumup_large_ints(1, &NumPart, &All.TotNumPart); + sumup_large_ints(1, &NumGas, &All.TotNumGas); + + mpi_printf("\nADD BACKGROUND GRID: count_elim_all=%d IDNew=%d\n", count_elim_all, IDNew); + mpi_printf("ADD BACKGROUND GRID: added particles=%d (task 0: NumGas=%d)\n", count_leaves_all - count_elim_all, NumGas); + mpi_printf("ADD BACKGROUND GRID: new particle number=%d\n", All.TotNumPart); + mpi_printf("ADD BACKGROUND GRID: new gas particle number=%d\n\n", All.TotNumGas); + + for(i = 0, mgas = 0; i < NumGas; i++) + if(P[i].Type == 0) + mgas += P[i].Mass; + + MPI_Allreduce(&mgas, &mtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Total gas mass after remap=%g\n", mtot); + + savepositions(0, 0); + + mpi_printf("\nADD BACKGROUND GRID: GridSize = %d\n", All.GridSize); + mpi_printf( + "ADD BACKGROUND GRID: Suggested value for MeanVolume = %g\nADD BACKGROUND GRID: Suggested value for ReferenceGasPartMass = %g\n", + pow(All.BoxSize / All.GridSize, 3), mtot / ngas_count_all_old); + mpi_printf("ADD BACKGROUND GRID: Suggested value for BoxSize = %g\n", All.BoxSize); + mpi_printf("ADD BACKGROUND GRID: Done!\n\n"); + + return 0; +} + +/*! \brief Changes the box size to a new value. + * + * LONG_X, LONG_Y and LONG_Z are still active as specified in Config file. + * + * \param[in] new_val New box size. + * + * \return void + */ +void modify_boxsize(double new_val) +{ + All.BoxSize = new_val; + + boxSize = All.BoxSize; + boxHalf = 0.5 * All.BoxSize; +#ifdef LONG_X + boxHalf_X = boxHalf * LONG_X; + boxSize_X = boxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxHalf_Y = boxHalf * LONG_Y; + boxSize_Y = boxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxHalf_Z = boxHalf * LONG_Z; + boxSize_Z = boxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ +} + +/*! \brief Prepares computational box; makes sure simulation volume is large + * enough. + * + * \return void + */ +void prepare_domain_backgroundgrid(void) +{ + int i, j, shift_half_box = 0, min_topleave_num = 0, set_grid_size_flag = 0; + unsigned int size, bit_num; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + double len_gas, xmin_gas[3], xmax_gas[3], xmin_gas_glob[3], xmax_gas_glob[3]; + double min_box_size, max_box_size; + + mpi_printf("\n\nADD BACKGROUND GRID: preparing domain for first domain decomposition\n"); + + /* Checking GridSize limits */ + if(All.GridSize < 0) + terminate("GridSize = %d is less than zero. This is not allowed.", All.GridSize); + + if(All.GridSize > ADDBACKGROUNDGRIDMAX) + terminate("GridSize = %d is exceeding the max grid size = %d", All.GridSize, ADDBACKGROUNDGRIDMAX); + + if(All.GridSize > 0) + set_grid_size_flag = 1; + + /* Now checking it is a power of two. If not assign the closest value (is this required?) */ + bit_num = 0; + size = ADDBACKGROUNDGRIDMAX; + while(((size & 1) == 0) && size > 1) + { + size >>= 1; + bit_num++; + } + + for(j = 1; j < bit_num; j++) + { + size = All.GridSize; + size >>= (bit_num - j); + if((size & 1) == 1) + break; + } + + mpi_printf("ADD BACKGROUND GRID: original value of GridSize = %d\n", All.GridSize); + + All.GridSize = (size << (bit_num - j - 1)); + + if(All.GridSize < 1) + All.GridSize = 1; + + mpi_printf("ADD BACKGROUND GRID: closest power of two corresponding to GridSize = %d is taken as initial guess\n", 2 * All.GridSize); + + /* determine local extension */ + for(j = 0; j < 3; j++) + { + xmin[j] = MAX_REAL_NUMBER; + xmax[j] = -MAX_REAL_NUMBER; + xmin_gas[j] = MAX_REAL_NUMBER; + xmax_gas[j] = -MAX_REAL_NUMBER; + } + + for(i = 0; i < NumPart; i++) + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + + for(i = 0; i < NumGas; i++) + { + for(j = 0; j < 3; j++) + { + if(xmin_gas[j] > P[i].Pos[j]) + xmin_gas[j] = P[i].Pos[j]; + + if(xmax_gas[j] < P[i].Pos[j]) + xmax_gas[j] = P[i].Pos[j]; + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(xmin_gas, xmin_gas_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax_gas, xmax_gas_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + mpi_printf("ADD BACKGROUND GRID: Min and max coordinates.\n"); + mpi_printf("ADD BACKGROUND GRID: xmin|ymin|zmin=% g|% g|% g.\n", xmin_glob[0], xmin_glob[1], xmin_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmax|ymax|zmax=% g|% g|% g.\n", xmax_glob[0], xmax_glob[1], xmax_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmin_gas|ymin_gas|zmin_gas=% g|% g|% g.\n", xmin_gas_glob[0], xmin_gas_glob[1], xmin_gas_glob[2]); + mpi_printf("ADD BACKGROUND GRID: xmax_gas|ymax_gas|zmax_gas=% g|% g|% g.\n", xmax_gas_glob[0], xmax_gas_glob[1], xmax_gas_glob[2]); + + len = 0; + len_gas = 0; + for(j = 0; j < 3; j++) + { + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + + if(xmax_gas_glob[j] - xmin_gas_glob[j] > len_gas) + len_gas = xmax_gas_glob[j] - xmin_gas_glob[j]; + + if(xmin_glob[j] < 0) + shift_half_box = 1; + } + + max_box_size = FACTOR_MAX_BOX_SIZE * len_gas; + min_box_size = FACTOR_MIN_BOX_SIZE * len_gas; + + if(All.BoxSize < min_box_size) + { + mpi_printf("ADD BACKGROUND GRID: Need to increase the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, min_box_size); + modify_boxsize(min_box_size); + } + if(All.BoxSize > max_box_size) + { + mpi_printf("ADD BACKGROUND GRID: Need to decrease the BoxSize. Old value = %g, new value = %g\n", All.BoxSize, max_box_size); + modify_boxsize(max_box_size); + } + + mpi_printf("ADD BACKGROUND GRID: Domain extent %g, BoxSize = %g, ratio = %g\n", len, All.BoxSize, len / All.BoxSize); + mpi_printf("ADD BACKGROUND GRID: Gas extent %g, BoxSize = %g, ratio = %g\n", len_gas, All.BoxSize, len_gas / All.BoxSize); + + /* the terminate condition must be checked properly */ + if(!set_grid_size_flag) + { + while(min_topleave_num < NTask && (All.BoxSize / len_gas) > All.GridSize && All.GridSize < ADDBACKGROUNDGRIDMAX) + { + All.GridSize <<= 1; + min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0); + mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n", + All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize); + } + } + else + { + All.GridSize <<= 1; + min_topleave_num = (int)pow(len_gas * All.GridSize / All.BoxSize, 3.0); + mpi_printf("ADD BACKGROUND GRID: GridSize=%3d, min_topleave_num=%6d, NTask=%6d, BoxSize/GridSize=%g, len_gas/GridSize=%g\n", + All.GridSize, min_topleave_num, NTask, All.BoxSize / All.GridSize, len_gas / All.BoxSize); + } + + if(min_topleave_num < NTask) + { + char buf[500]; + sprintf(buf, + "min_topleave_num=%d < NTask=%d, MaxGridSize=%d. Try either to run with less task or to set the BoxSize to a smaller " + "value\n", + min_topleave_num, NTask, ADDBACKGROUNDGRIDMAX); + terminate(buf); + } + + if(len_gas / All.BoxSize > All.GridSize) + { + char buf[500]; + sprintf(buf, "len_gas/BoxSize=%g > GridSize=%d, MaxGridSize=%d. GridSize should be increased if possible\n", + len_gas / All.BoxSize, All.GridSize, ADDBACKGROUNDGRIDMAX); + terminate(buf); + } + + if(shift_half_box) + { + mpi_printf("ADD BACKGROUND GRID: Need to shift particles by half box size\n\n"); + for(i = 0; i < NumPart; i++) + { + P[i].Pos[0] += 0.5 * All.BoxSize; + P[i].Pos[1] += 0.5 * All.BoxSize; + P[i].Pos[2] += 0.5 * All.BoxSize; + } + } +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h new file mode 100644 index 0000000000..47c81c199b --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/add_bggrid.h @@ -0,0 +1,58 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/add_bggrid.h + * \date 05/2018 + * \brief Re-gridding of ICs to ensure that the entire computational + * domain contains gas cells. + * \details Can be used to convert SPH ICs to Arepo ICs. + * Interface functions: + * int add_backgroundgrid(void); + * void prepare_domain_backgroundgrid(void); + * Functions of this module called in: + * init() (init.c) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ADD_BGGRID_H +#define ADD_BGGRID_H + +#include "../main/allvars.h" + +#ifdef ADDBACKGROUNDGRID + +#define ADDBACKGROUNDGRIDMAX 256 +#define FACTOR_MAX_BOX_SIZE 15.0 +#define FACTOR_MIN_BOX_SIZE 2.0 + +extern MyIDType IDNew; + +int add_backgroundgrid(void); +void prepare_domain_backgroundgrid(void); +void calculate_weights(); +void distribute_particles(); + +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#endif /* ADD_BGGRID_H */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c new file mode 100644 index 0000000000..8e0f2ea04e --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/calc_weights.c @@ -0,0 +1,301 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/calc_weights.c + * \date 05/2018 + * \brief Routine that calculates the cumulative weights of neighboring + * cells. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void calculate_weights() + * int find_cells_evaluate(int target, int mode, int thread_id) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static int find_cells_evaluate(int target, int mode, int thread_id); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->Hsml = SphP[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Weight; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + SphP[i].Weight = out->Weight; + } + else /* combine */ + { + SphP[i].Weight += out->Weight; + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsGravity.NActiveParticles) + break; + + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Calculates SPH weights of each cell. + * + * \return void + */ +void calculate_weights() +{ + domain_free(); + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + mpi_printf("ADD BACKGROUND GRID: distribution of fluid quantities in a SPH-like fashion\n"); + mpi_printf("ADD BACKGROUND GRID: finding the normalization factors\n"); + + TimeBinsGravity.NActiveParticles = 0; + + int i; + for(i = 0; i < NumGas; i++) + { + if(P[i].Mass > 0) + { + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } + } + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + + mpi_printf("ADD BACKGROUND GRID: done\n"); +} + +/*! \brief finds cells and adds up weights in an SPH fashion + * + * \param[in] target Index of particle/cell + * \param[in] mode Flag if it operates on local or imported data + * \param[in] threadid ID of thread + * + * \return 0 + */ +int find_cells_evaluate(int target, int mode, int thread_id) +{ + int j, n, numnodes, *firstnode; + double h, h2, hinv, hinv3; + MyDouble dx, dy, dz, r; + MyDouble *pos; + double xtmp, ytmp, ztmp; + + double weight = 0; + + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode); + + for(n = 0; n < nfound; n++) + { + j = Thread[thread_id].Ngblist[n]; + + if(P[j].ID >= IDNew) + { + dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]); + dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]); + dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]); + + double r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + r = sqrt(r2); + + double u = r * hinv; + double wk; + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + weight += wk * SphP[j].Volume; + } + } + } + + out.Weight = weight; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c new file mode 100644 index 0000000000..aad7d150c5 --- /dev/null +++ b/src/amuse/community/arepo/src/add_backgroundgrid/distribute.c @@ -0,0 +1,328 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/add_backgroundgrid/distribute.c + * \date 05/2018 + * \brief Distributes the cell properties in an SPH kernel weighted + * fashion to neighboring cells. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void distribute_particles(void) + * int find_cells_evaluate(int target, int mode, int thread_id) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "add_bggrid.h" + +#ifdef ADDBACKGROUNDGRID + +static int find_cells_evaluate(int target, int mode, int thread_id); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + MyFloat Weight; + MyFloat Mass; + MyFloat InternalEnergy; + MyFloat Momentum[3]; +#ifdef MHD + MyFloat B[3]; +#endif /* #ifdef MHD */ + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->Hsml = SphP[i].Hsml; + + in->Weight = SphP[i].Weight; + in->Mass = P[i].Mass; + in->InternalEnergy = SphP[i].Utherm * P[i].Mass; + + int k; + for(k = 0; k < 3; k++) + in->Momentum[k] = P[i].Vel[k] * P[i].Mass; + +#ifdef MHD + for(k = 0; k < 3; k++) + in->B[k] = SphP[i].B[k]; +#endif /* #ifdef MHD */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + char nothing; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) { return; } + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + { + int j, threadid = get_thread_num(); + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsGravity.NActiveParticles) + break; + + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + find_cells_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + find_cells_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main function to distribute hydro quantities over a kernel average. + * + * \return void + */ +void distribute_particles(void) +{ + mpi_printf("ADD BACKGROUND GRID: distributing the fluid quantities\n"); + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + +#ifdef MHD + /* now divide the B field in each cell by the weight (sum of the wk's, + which we stored in SphP.divB */ + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].ID >= IDNew) + { + int j; + if(SphP[i].DivB > 0) + for(j = 0; j < 3; j++) + SphP[i].B[j] /= SphP[i].DivB; + } + } +#endif /* #ifdef MHD */ + + mpi_printf("ADD BACKGROUND GRID: done\n"); +} + +/*! \brief Distributes imported properties on neighbouring cells. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +int find_cells_evaluate(int target, int mode, int thread_id) +{ + int j, n, numnodes, *firstnode; + double h, h2, hinv, hinv3; + MyDouble dx, dy, dz, r; + MyDouble *pos; + double xtmp, ytmp, ztmp; + + data_in local, *target_data; + data_out out; + out.nothing = 0; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, thread_id, numnodes, firstnode); + + double wsum = 0; + + for(n = 0; n < nfound; n++) + { + j = Thread[thread_id].Ngblist[n]; + + if(P[j].ID < IDNew) + continue; + + dx = NGB_PERIODIC_LONG_X(pos[0] - P[j].Pos[0]); + dy = NGB_PERIODIC_LONG_Y(pos[1] - P[j].Pos[1]); + dz = NGB_PERIODIC_LONG_Z(pos[2] - P[j].Pos[2]); + + double r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + r = sqrt(r2); + + double u = r * hinv; + double wk; + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + double weight = SphP[j].Volume * wk / target_data->Weight; + + wsum += weight; + + P[j].Mass += target_data->Mass * weight; + SphP[j].Energy += target_data->InternalEnergy * weight; + + int k; + for(k = 0; k < 3; k++) + SphP[j].Momentum[k] += target_data->Momentum[k] * weight; + +#ifdef MHD + for(k = 0; k < 3; k++) + SphP[j].B[k] += target_data->B[k] * weight; + SphP[j].DivB += wk; +#endif /* #ifdef MHD */ + } + } + + if(wsum > 1.01) + { + printf("wsum=%g, Weight=%g, target=%d\n", wsum, target_data->Weight, target); + terminate("bla"); + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c new file mode 100644 index 0000000000..7e7cebbc98 --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling.c @@ -0,0 +1,870 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling.c + * \date 05/2018 + * \brief Module for gas radiative cooling + * \details contains functions: + * double DoCooling(double u_old, double rho, double dt, double + * *ne_guess) + * double GetCoolingTime(double u_old, double rho, double + * *ne_guess) + * double convert_u_to_temp(double u, double rho, double + * *ne_guess) + * void find_abundances_and_rates(double logT, double rho, + * double *ne_guess) + * double CoolingRateFromU(double u, double rho, double + * *ne_guess) + * void SetOutputGasState(int i, double *ne_guess, double *nH0, + * double *coolrate) + * double CoolingRate(double logT, double rho, double *nelec) + * void MakeRateTable(void) + * void ReadIonizeParams(char *fname, int which) + * void IonizeParamsUVB(void) + * void SetZeroIonization(void) + * void IonizeParams(void) + * void InitCool(void) + * void cooling_only(void) + * void cool_cell(int i) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef COOLING + +static double Tmin = 0.0; /*!< min temperature in log10 */ +static double Tmax = 9.0; /*!< max temperature in log10 */ +static double deltaT; /*!< log10 of temperature spacing in the interpolation tables */ +static GasState gs; /*!< gas state */ +static RateTable *RateT; /*!< tabulated rates */ +static PhotoTable *PhotoTUVB; /*!< photo-ionization/heating rate table for UV background */ +static PhotoCurrent pc; /*!< current interpolated photo rates */ +static int NheattabUVB; /*!< length of UVB photo table */ +static DoCoolData DoCool; /*!< cooling data */ + +/*! \brief Computes the new internal energy per unit mass. + * + * The function solves for the new internal energy per unit mass of the gas + * by integrating the equation for the internal energy with an implicit + * Euler scheme. The root of resulting non linear equation, + * which gives tnew internal energy, is found with the bisection method. + * Arguments are passed in code units. + * + * \param[in] u_old the initial (before cooling is applied) internal energy + * per unit mass of the gas cell. + * \param[in] rho the proper density of the gas cell. + * \param[in] dt the duration of the time step. + * \param[in] ne_guess electron number density relative to hydrogen number + * density (for molecular weight computation). + * + * \return The new internal energy per unit mass of the gas cell. + */ +double DoCooling(double u_old, double rho, double dt, double *ne_guess) +{ + double u, du; + double u_lower, u_upper; + double ratefact; + double LambdaNet; + + int iter = 0; + + DoCool.u_old_input = u_old; + DoCool.rho_input = rho; + DoCool.dt_input = dt; + DoCool.ne_guess_input = *ne_guess; + + if(!gsl_finite(u_old)) + terminate("invalid input: u_old=%g\n", u_old); + + if(u_old < 0 || rho < 0) + terminate("invalid input: task=%d u_old=%g rho=%g dt=%g All.MinEgySpec=%g\n", ThisTask, u_old, rho, dt, All.MinEgySpec); + + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */ + u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + dt *= All.UnitTime_in_s / All.HubbleParam; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + ratefact = gs.nHcgs * gs.nHcgs / rho; + + u = u_old; + u_lower = u; + u_upper = u; + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + /* bracketing */ + if(u - u_old - ratefact * LambdaNet * dt < 0) /* heating */ + { + u_upper *= sqrt(1.1); + u_lower /= sqrt(1.1); + while(u_upper - u_old - ratefact * CoolingRateFromU(u_upper, rho, ne_guess) * dt < 0) + { + u_upper *= 1.1; + u_lower *= 1.1; + } + } + + if(u - u_old - ratefact * LambdaNet * dt > 0) + { + u_lower /= sqrt(1.1); + u_upper *= sqrt(1.1); + while(u_lower - u_old - ratefact * CoolingRateFromU(u_lower, rho, ne_guess) * dt > 0) + { + u_upper /= 1.1; + u_lower /= 1.1; + } + } + + do + { + u = 0.5 * (u_lower + u_upper); + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + if(u - u_old - ratefact * LambdaNet * dt > 0) + { + u_upper = u; + } + else + { + u_lower = u; + } + + du = u_upper - u_lower; + + iter++; + + if(iter >= (MAXITER - 10)) + printf("u= %g\n", u); + } + while(fabs(du / u) > 1.0e-6 && iter < MAXITER); + + if(iter >= MAXITER) + terminate( + "failed to converge in DoCooling(): DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= " + "%g\n", + DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + + u *= All.UnitDensity_in_cgs / All.UnitPressure_in_cgs; /* to internal units */ + + return u; +} + +/*! \brief Returns the cooling time. + * + * If we actually have heating, a cooling time of 0 is returned. + * + * \param[in] u_old The initial (before cooling is applied) internal energy + * per unit mass of the gas cell. + * \param[in] rho The proper density of the gas cell. + * \param[in] ne_guess Electron number density relative to hydrogen number + * density (for molecular weight computation). + * + * \return Cooling time; 0 if heating. + */ +double GetCoolingTime(double u_old, double rho, double *ne_guess) +{ + double u; + double ratefact; + double LambdaNet, coolingtime; + + DoCool.u_old_input = u_old; + DoCool.rho_input = rho; + DoCool.ne_guess_input = *ne_guess; + + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; /* convert to physical cgs units */ + u_old *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + ratefact = gs.nHcgs * gs.nHcgs / rho; + + u = u_old; + + LambdaNet = CoolingRateFromU(u, rho, ne_guess); + + if(LambdaNet >= 0) /* ups, we have actually heating due to UV background */ + return 0; + + coolingtime = u_old / (-ratefact * LambdaNet); + + coolingtime *= All.HubbleParam / All.UnitTime_in_s; + + return coolingtime; +} + +/*! \brief Compute gas temperature from internal energy per unit mass. + * + * This function determines the electron fraction, and hence the mean + * molecular weight. With it arrives at a self-consistent temperature. + * Element abundances and the rates for the emission are also computed. + * + * \param[in] u internal energy per unit mass. + * \param[in] rho gas density. + * \param[in, out] ne_guess electron number density relative to hydrogen + * number density + * + * \return The gas temperature. + */ +double convert_u_to_temp(double u, double rho, double *ne_guess) +{ + double temp, temp_old, temp_new, max = 0, ne_old; + double mu; + int iter = 0; + + double u_input, rho_input, ne_input; + + u_input = u; + rho_input = rho; + ne_input = *ne_guess; + + mu = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess); + temp = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu; + + do + { + ne_old = *ne_guess; + + find_abundances_and_rates(log10(temp), rho, ne_guess); + temp_old = temp; + + mu = (1 + 4 * gs.yhelium) / (1 + gs.yhelium + *ne_guess); + + temp_new = GAMMA_MINUS1 / BOLTZMANN * u * PROTONMASS * mu; + + max = dmax(max, temp_new / (1 + gs.yhelium + *ne_guess) * fabs((*ne_guess - ne_old) / (temp_new - temp_old + 1.0))); + + temp = temp_old + (temp_new - temp_old) / (1 + max); + iter++; + + if(iter > (MAXITER - 10)) + printf("-> temp= %g ne=%g\n", temp, *ne_guess); + } + while(fabs(temp - temp_old) > 1.0e-3 * temp && iter < MAXITER); + + if(iter >= MAXITER) + { + printf("failed to converge in convert_u_to_temp()\n"); + printf("u_input= %g\nrho_input=%g\n ne_input=%g\n", u_input, rho_input, ne_input); + printf("DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n", DoCool.u_old_input, + DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + terminate("convergence failure"); + } + + gs.mu = mu; + + return temp; +} + +/*! \brief Computes the actual abundance ratios. + * + * The chemical composition of the gas is primordial (no metals are present). + * + * \param[in] logT log10 of gas temperature. + * \param[in] rho Gas density. + * \param[in, out] ne_guess Electron number density relative to hydrogen + * number density. + * + * \return void + */ +void find_abundances_and_rates(double logT, double rho, double *ne_guess) +{ + double neold, nenew; + int j, niter; + double flow, fhi, t; + + double logT_input, rho_input, ne_input; + + logT_input = logT; + rho_input = rho; + ne_input = *ne_guess; + + if(!gsl_finite(logT)) + terminate("logT=%g\n", logT); + + if(logT <= Tmin) /* everything neutral */ + { + gs.nH0 = 1.0; + gs.nHe0 = gs.yhelium; + gs.nHp = 0; + gs.nHep = 0; + gs.nHepp = 0; + gs.ne = 0; + *ne_guess = 0; + return; + } + + if(logT >= Tmax) /* everything is ionized */ + { + gs.nH0 = 0; + gs.nHe0 = 0; + gs.nHp = 1.0; + gs.nHep = 0; + gs.nHepp = gs.yhelium; + gs.ne = gs.nHp + 2.0 * gs.nHepp; + *ne_guess = gs.ne; /* note: in units of the hydrogen number density */ + return; + } + + t = (logT - Tmin) / deltaT; + j = (int)t; + fhi = t - j; + flow = 1 - fhi; + + if(*ne_guess == 0) + *ne_guess = 1.0; + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + + gs.ne = *ne_guess; + neold = gs.ne; + niter = 0; + gs.necgs = gs.ne * gs.nHcgs; + + /* evaluate number densities iteratively (cf KWH eqns 33-38) in units of nH */ + do + { + niter++; + + gs.aHp = flow * RateT[j].AlphaHp + fhi * RateT[j + 1].AlphaHp; + gs.aHep = flow * RateT[j].AlphaHep + fhi * RateT[j + 1].AlphaHep; + gs.aHepp = flow * RateT[j].AlphaHepp + fhi * RateT[j + 1].AlphaHepp; + gs.ad = flow * RateT[j].Alphad + fhi * RateT[j + 1].Alphad; + gs.geH0 = flow * RateT[j].GammaeH0 + fhi * RateT[j + 1].GammaeH0; + gs.geHe0 = flow * RateT[j].GammaeHe0 + fhi * RateT[j + 1].GammaeHe0; + gs.geHep = flow * RateT[j].GammaeHep + fhi * RateT[j + 1].GammaeHep; + + if(gs.necgs <= 1.e-25 || pc.J_UV == 0) + { + gs.gJH0ne = gs.gJHe0ne = gs.gJHepne = 0; + } + else + { + gs.gJH0ne = pc.gJH0 / gs.necgs; + gs.gJHe0ne = pc.gJHe0 / gs.necgs; + gs.gJHepne = pc.gJHep / gs.necgs; + } + + gs.nH0 = gs.aHp / (gs.aHp + gs.geH0 + gs.gJH0ne); /* eqn (33) */ + gs.nHp = 1.0 - gs.nH0; /* eqn (34) */ + + if((gs.gJHe0ne + gs.geHe0) <= SMALLNUM) /* no ionization at all */ + { + gs.nHep = 0.0; + gs.nHepp = 0.0; + gs.nHe0 = gs.yhelium; + } + else + { + gs.nHep = + gs.yhelium / (1.0 + (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne) + (gs.geHep + gs.gJHepne) / gs.aHepp); /* eqn (35) */ + gs.nHe0 = gs.nHep * (gs.aHep + gs.ad) / (gs.geHe0 + gs.gJHe0ne); /* eqn (36) */ + gs.nHepp = gs.nHep * (gs.geHep + gs.gJHepne) / gs.aHepp; /* eqn (37) */ + } + + neold = gs.ne; + + gs.ne = gs.nHp + gs.nHep + 2 * gs.nHepp; /* eqn (38) */ + gs.necgs = gs.ne * gs.nHcgs; + + if(pc.J_UV == 0) + break; + + nenew = 0.5 * (gs.ne + neold); + gs.ne = nenew; + gs.necgs = gs.ne * gs.nHcgs; + + if(fabs(gs.ne - neold) < 1.0e-4) + break; + + if(niter > (MAXITER - 10)) + printf("ne= %g niter=%d\n", gs.ne, niter); + } + while(niter < MAXITER); + + if(niter >= MAXITER) + { + printf("gs.aHp = %le\n", gs.aHp); + char buff[1000]; + sprintf(buff, "%s/cooling_task%d.dat", All.OutputDir, ThisTask); + FILE *fp = fopen(buff, "w"); + fwrite(&All.Time, sizeof(double), 1, fp); + fwrite(&logT_input, sizeof(double), 1, fp); + fwrite(&rho_input, sizeof(double), 1, fp); + fwrite(&ne_input, sizeof(double), 1, fp); + fclose(fp); + terminate( + "no convergence reached in find_abundances_and_rates(): logT_input= %g rho_input= %g ne_input= %g " + "DoCool.u_old_input=%g\nDoCool.rho_input= %g\nDoCool.dt_input= %g\nDoCool.ne_guess_input= %g\n", + logT_input, rho_input, ne_input, DoCool.u_old_input, DoCool.rho_input, DoCool.dt_input, DoCool.ne_guess_input); + } + gs.bH0 = flow * RateT[j].BetaH0 + fhi * RateT[j + 1].BetaH0; + gs.bHep = flow * RateT[j].BetaHep + fhi * RateT[j + 1].BetaHep; + gs.bff = flow * RateT[j].Betaff + fhi * RateT[j + 1].Betaff; + + *ne_guess = gs.ne; +} + +/*! \brief Get cooling rate from gas internal energy. + * + * This function first computes the self-consistent temperature + * and abundance ratios, and then it calculates + * (heating rate-cooling rate)/n_h^2 in cgs units. + * + * \param[in] u Gas internal energy per unit mass. + * \param[in] rho Gas density. + * \param[in, out] ne_guess Electron number density relative to hydrogen + * number density. + * + * \return Cooling rate. + */ +double CoolingRateFromU(double u, double rho, double *ne_guess) +{ + double temp; + + temp = convert_u_to_temp(u, rho, ne_guess); + + return CoolingRate(log10(temp), rho, ne_guess); +} + +/*! \brief This function computes the self-consistent temperature and + * abundance ratios. + * + * Used only in io_fields.c for calculating output fields. + * + * \param[in] i index into SphP for gas cell to consider. + * \param[in, out] ne_guess pointer to electron number density relative to + * hydrogen number density (modified). + * \param[out] nH0 Pointer to the neutral hydrogen fraction (set to value in + * the GasState struct). + * \param[out] coolrate Pointer to cooling rate (set to value from + * CoolingRateFromU). + * + * \return void + */ +void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate) +{ + double sfr = 0; + double rho = SphP[i].Density * All.cf_a3inv; + double u = dmax(All.MinEgySpec, SphP[i].Utherm); + + /* update GasState as appropriate given compile-time options and cell properties */ +#if defined(USE_SFR) + sfr = get_starformation_rate(i); +#endif /* #if defined(USE_SFR) */ + + /* update DoCool */ + DoCool.u_old_input = u; + DoCool.rho_input = rho; + DoCool.ne_guess_input = *ne_guess; + + /* convert to physical cgs units */ + rho *= All.UnitDensity_in_cgs * All.HubbleParam * All.HubbleParam; + u *= All.UnitPressure_in_cgs / All.UnitDensity_in_cgs; + + /* calculate cooling rate (and so ne_guess and all of gs including nH0, nHeII) */ + *coolrate = CoolingRateFromU(u, rho, ne_guess); + + *nH0 = gs.nH0; +} + +/*! \brief Calculate (heating rate-cooling rate)/n_h^2 in cgs units. + * + * \param[in] logT log10 of gas temperature. + * \param[in] rho Gas density. + * \param[in, out] nelec Electron number density relative to hydrogen number + * density. + * + * \return (heating rate-cooling rate)/n_h^2. + */ +double CoolingRate(double logT, double rho, double *nelec) +{ + double Lambda, Heat; + double LambdaExc, LambdaIon, LambdaRec, LambdaFF, LambdaCmptn = 0.0; + double LambdaExcH0, LambdaExcHep, LambdaIonH0, LambdaIonHe0, LambdaIonHep; + double LambdaRecHp, LambdaRecHep, LambdaRecHepp, LambdaRecHepd; + double redshift; + double T; + double LambdaPrim = 0.0, LambdaMet = 0.0, LambdaDust = 0.0, LambdaMol = 0.0; + + if(logT <= Tmin) + logT = Tmin + 0.5 * deltaT; /* floor at Tmin */ + + gs.nHcgs = gs.XH * rho / PROTONMASS; /* hydrogen number dens in cgs units */ + + if(logT < Tmax) + { + find_abundances_and_rates(logT, rho, nelec); + + /* Compute cooling and heating rate (cf KWH Table 1) in units of nH**2 */ + T = pow(10.0, logT); + + LambdaExcH0 = gs.bH0 * gs.ne * gs.nH0; + LambdaExcHep = gs.bHep * gs.ne * gs.nHep; + LambdaExc = LambdaExcH0 + LambdaExcHep; /* excitation */ + LambdaIonH0 = 2.18e-11 * gs.geH0 * gs.ne * gs.nH0; + LambdaIonHe0 = 3.94e-11 * gs.geHe0 * gs.ne * gs.nHe0; + LambdaIonHep = 8.72e-11 * gs.geHep * gs.ne * gs.nHep; + LambdaIon = LambdaIonH0 + LambdaIonHe0 + LambdaIonHep; /* ionization */ + LambdaRecHp = 1.036e-16 * T * gs.ne * (gs.aHp * gs.nHp); + LambdaRecHep = 1.036e-16 * T * gs.ne * (gs.aHep * gs.nHep); + LambdaRecHepp = 1.036e-16 * T * gs.ne * (gs.aHepp * gs.nHepp); + LambdaRecHepd = 6.526e-11 * gs.ad * gs.ne * gs.nHep; + LambdaRec = LambdaRecHp + LambdaRecHep + LambdaRecHepp + LambdaRecHepd; + LambdaFF = gs.bff * (gs.nHp + gs.nHep + 4 * gs.nHepp) * gs.ne; + LambdaPrim = LambdaExc + LambdaIon + LambdaRec + LambdaFF; + + if(All.ComovingIntegrationOn) + { + redshift = 1 / All.Time - 1; + LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs; + } + else + LambdaCmptn = 0; + + Lambda = LambdaPrim + LambdaMet + LambdaDust + LambdaCmptn + LambdaMol; + + Heat = 0; + if(pc.J_UV != 0) + Heat += (gs.nH0 * pc.epsH0 + gs.nHe0 * pc.epsHe0 + gs.nHep * pc.epsHep) / gs.nHcgs; + } + else /* here we're outside of tabulated rates, T>Tmax K */ + { + /* at high T (fully ionized); only free-free and Compton cooling are present. Assumes no heating. */ + Heat = 0; + + LambdaExcH0 = LambdaExcHep = LambdaIonH0 = LambdaIonHe0 = LambdaIonHep = LambdaRecHp = LambdaRecHep = LambdaRecHepp = + LambdaRecHepd = 0; + + /* very hot: H and He both fully ionized */ + gs.nHp = 1.0; + gs.nHep = 0; + gs.nHepp = gs.yhelium; + gs.ne = gs.nHp + 2.0 * gs.nHepp; + *nelec = gs.ne; /* note: in units of the hydrogen number density */ + + T = pow(10.0, logT); + LambdaFF = 1.42e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - logT) * (5.5 - logT) / 3)) * (gs.nHp + 4 * gs.nHepp) * gs.ne; + + if(All.ComovingIntegrationOn) + { + redshift = 1 / All.Time - 1; + /* add inverse Compton cooling off the microwave background */ + LambdaCmptn = 5.65e-36 * gs.ne * (T - 2.73 * (1. + redshift)) * pow(1. + redshift, 4.) / gs.nHcgs; + } + else + LambdaCmptn = 0; + + Lambda = LambdaFF + LambdaCmptn; + } + + return (Heat - Lambda); +} + +/*! \brief Make cooling rates interpolation table. + * + * Set up interpolation tables in T for cooling rates given in + * KWH, ApJS, 105, 19. + * + * \return void + */ +void MakeRateTable(void) +{ + int i; + double T; + double Tfact; + + gs.yhelium = (1 - gs.XH) / (4 * gs.XH); + gs.mhboltz = PROTONMASS / BOLTZMANN; + if(All.MinGasTemp > 0.0) + Tmin = log10(0.1 * All.MinGasTemp); + else + Tmin = 1.0; + deltaT = (Tmax - Tmin) / NCOOLTAB; + gs.ethmin = pow(10.0, Tmin) * (1. + gs.yhelium) / ((1. + 4. * gs.yhelium) * gs.mhboltz * GAMMA_MINUS1); + /* minimum internal energy for neutral gas */ + + for(i = 0; i <= NCOOLTAB; i++) + { + RateT[i].BetaH0 = RateT[i].BetaHep = RateT[i].Betaff = RateT[i].AlphaHp = RateT[i].AlphaHep = RateT[i].AlphaHepp = + RateT[i].Alphad = RateT[i].GammaeH0 = RateT[i].GammaeHe0 = RateT[i].GammaeHep = 0; + + T = pow(10.0, Tmin + deltaT * i); + Tfact = 1.0 / (1 + sqrt(T / 1.0e5)); + + /* collisional excitation */ + /* Cen 1992 */ + if(118348 / T < 70) + RateT[i].BetaH0 = 7.5e-19 * exp(-118348 / T) * Tfact; + if(473638 / T < 70) + RateT[i].BetaHep = 5.54e-17 * pow(T, -0.397) * exp(-473638 / T) * Tfact; + + /* free-free */ + RateT[i].Betaff = 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-(5.5 - log10(T)) * (5.5 - log10(T)) / 3)); + + /* recombination */ + /* Cen 1992 */ + /* Hydrogen II */ + RateT[i].AlphaHp = 8.4e-11 * pow(T / 1000, -0.2) / (1. + pow(T / 1.0e6, 0.7)) / sqrt(T); + /* Helium II */ + RateT[i].AlphaHep = 1.5e-10 * pow(T, -0.6353); + /* Helium III */ + RateT[i].AlphaHepp = 4. * RateT[i].AlphaHp; + + /* Cen 1992 */ + /* dielectric recombination */ + if(470000 / T < 70) + RateT[i].Alphad = 1.9e-3 * pow(T, -1.5) * exp(-470000 / T) * (1. + 0.3 * exp(-94000 / T)); + + /* collisional ionization */ + /* Cen 1992 */ + /* Hydrogen */ + if(157809.1 / T < 70) + RateT[i].GammaeH0 = 5.85e-11 * sqrt(T) * exp(-157809.1 / T) * Tfact; + /* Helium */ + if(285335.4 / T < 70) + RateT[i].GammaeHe0 = 2.38e-11 * sqrt(T) * exp(-285335.4 / T) * Tfact; + /* Hellium II */ + if(631515.0 / T < 70) + RateT[i].GammaeHep = 5.68e-12 * sqrt(T) * exp(-631515.0 / T) * Tfact; + } +} + +/*! \brief Read table input for ionizing parameters. + * + * \param[in] fname Name of file that contains the tabulated parameters. + * \param[in] which Flag used to identify the type of the ionizing background + * (0 = UV background, 1 = AGN background, 2=RADCOOL). + * + * \return void + */ +void ReadIonizeParams(char *fname, int which) +{ + int iter, i; + FILE *fdcool; + float dummy; + + if(which == 0) + { + NheattabUVB = 0; + + for(iter = 0, i = 0; iter < 2; iter++) + { + if(!(fdcool = fopen(fname, "r"))) + terminate("COOLING: cannot read ionization table in file `%s'\n", fname); + if(iter == 0) + while(fscanf(fdcool, "%g %g %g %g %g %g %g", &dummy, &dummy, &dummy, &dummy, &dummy, &dummy, &dummy) != EOF) + NheattabUVB++; + if(iter == 1) + while(fscanf(fdcool, "%g %g %g %g %g %g %g", &PhotoTUVB[i].variable, &PhotoTUVB[i].gH0, &PhotoTUVB[i].gHe, + &PhotoTUVB[i].gHep, &PhotoTUVB[i].eH0, &PhotoTUVB[i].eHe, &PhotoTUVB[i].eHep) != EOF) + i++; + fclose(fdcool); + + if(iter == 0) + { + PhotoTUVB = (PhotoTable *)mymalloc("PhotoT", NheattabUVB * sizeof(PhotoTable)); + mpi_printf("COOLING: read ionization table with %d entries in file `%s'.\n", NheattabUVB, fname); + } + } + /* ignore zeros at end of treecool file */ + for(i = 0; i < NheattabUVB; ++i) + if(PhotoTUVB[i].gH0 == 0.0) + break; + + NheattabUVB = i; + mpi_printf("COOLING: using %d ionization table entries from file `%s'.\n", NheattabUVB, fname); + } +} + +/*! \brief Set the ionization parameters for the UV background. + * + * \return void + */ +void IonizeParamsUVB(void) +{ + int i, ilow; + double logz, dzlow, dzhi; + double redshift; + + if(All.ComovingIntegrationOn) + redshift = 1 / All.Time - 1; + else + { + redshift = 0.0; + } + + logz = log10(redshift + 1.0); + ilow = 0; + for(i = 0; i < NheattabUVB; i++) + { + if(PhotoTUVB[i].variable < logz) + ilow = i; + else + break; + } + + dzlow = logz - PhotoTUVB[ilow].variable; + dzhi = PhotoTUVB[ilow + 1].variable - logz; + + if(NheattabUVB == 0 || logz > PhotoTUVB[NheattabUVB - 1].variable || PhotoTUVB[ilow].gH0 == 0 || PhotoTUVB[ilow + 1].gH0 == 0) + { + SetZeroIonization(); + return; + } + else + pc.J_UV = 1; + + pc.gJH0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].gH0) + dzlow * log10(PhotoTUVB[ilow + 1].gH0)) / (dzlow + dzhi)); + pc.gJHe0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHe) + dzlow * log10(PhotoTUVB[ilow + 1].gHe)) / (dzlow + dzhi)); + pc.gJHep = pow(10., (dzhi * log10(PhotoTUVB[ilow].gHep) + dzlow * log10(PhotoTUVB[ilow + 1].gHep)) / (dzlow + dzhi)); + pc.epsH0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].eH0) + dzlow * log10(PhotoTUVB[ilow + 1].eH0)) / (dzlow + dzhi)); + pc.epsHe0 = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHe) + dzlow * log10(PhotoTUVB[ilow + 1].eHe)) / (dzlow + dzhi)); + pc.epsHep = pow(10., (dzhi * log10(PhotoTUVB[ilow].eHep) + dzlow * log10(PhotoTUVB[ilow + 1].eHep)) / (dzlow + dzhi)); + + return; +} + +/*! \brief Reset the ionization parameters. + * + * \return void + */ +void SetZeroIonization(void) { memset(&pc, 0, sizeof(PhotoCurrent)); } + +/*! \brief Wrapper function to set the ionizing background. + * + * \return void + */ +void IonizeParams(void) { IonizeParamsUVB(); } + +/*! \brief Initialize the cooling module. + * + * This function initializes the cooling module. In particular, + * it allocates the memory for the cooling rate and ionization tables + * and initializes them. + * + * \return void + */ +void InitCool(void) +{ + /* set default hydrogen mass fraction */ + gs.XH = HYDROGEN_MASSFRAC; + + /* zero photo-ionization/heating rates */ + SetZeroIonization(); + + /* allocate and construct rate table */ + RateT = (RateTable *)mymalloc("RateT", (NCOOLTAB + 1) * sizeof(RateTable)); + ; + MakeRateTable(); + + /* read photo tables */ + ReadIonizeParams(All.TreecoolFile, 0); + + mpi_printf("GFM_COOLING: time, time begin = %le\t%le\n", All.Time, All.TimeBegin); + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + + IonizeParams(); +} + +/*! \brief Apply the isochoric cooling to all the active gas cells. + * + * \return void + */ +void cooling_only(void) /* normal cooling routine when star formation is disabled */ +{ + int idx, i; + + CPU_Step[CPU_MISC] += measure_time(); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i >= 0) + { + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + cool_cell(i); + } + } + CPU_Step[CPU_COOLINGSFR] += measure_time(); +} + +/*! \brief Apply the isochoric cooling to a given gas cell. + * + * This function applies the normal isochoric cooling to a single gas cell. + * Once the cooling has been applied according to one of the cooling models + * implemented, the internal energy per unit mass, the total energy and the + * pressure of the cell are updated. + * + * \param[in] i Index of the gas cell to which cooling is applied. + * + * \return void + */ +void cool_cell(int i) +{ + double dt, dtime, ne = 1; + double unew, dens, dtcool; + + dens = SphP[i].Density; + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + dtcool = dtime; + + ne = SphP[i].Ne; /* electron abundance (gives ionization state and mean molecular weight) */ + unew = DoCooling(dmax(All.MinEgySpec, SphP[i].Utherm), dens * All.cf_a3inv, dtcool, &ne); + SphP[i].Ne = ne; + + if(unew < 0) + terminate("invalid temperature: Thistask=%d i=%d unew=%g\n", ThisTask, i, unew); + + double du = unew - SphP[i].Utherm; + + if(unew < All.MinEgySpec) + du = All.MinEgySpec - SphP[i].Utherm; + + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + +#ifdef OUTPUT_COOLHEAT + if(dtime > 0) + SphP[i].CoolHeat = du * P[i].Mass / dtime; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + set_pressure_of_cell(i); +} + +#endif /* #ifdef COOLING */ diff --git a/src/amuse/community/arepo/src/cooling/cooling_proto.h b/src/amuse/community/arepo/src/cooling/cooling_proto.h new file mode 100644 index 0000000000..cbd304a838 --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling_proto.h @@ -0,0 +1,49 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling_proto.h + * \date 05/2018 + * \brief Header for cooling functions. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef INLINE_FUNC +#define INLINE_FUNC +#endif /* #ifndef INLINE_FUNC */ + +void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate); + +double convert_u_to_temp(double u, double rho, double *ne_guess); +double CoolingRate(double logT, double rho, double *nelec); +double CoolingRateFromU(double u, double rho, double *ne_guess); +double DoCooling(double u_old, double rho, double dt, double *ne_guess); +double GetCoolingTime(double u_old, double rho, double *ne_guess); + +void find_abundances_and_rates(double logT, double rho, double *ne_guess); +void InitCool(void); +void IonizeParamsUVB(void); +void IonizeParams(void); +void ReadIonizeParams(char *fname, int which); +void SetZeroIonization(void); diff --git a/src/amuse/community/arepo/src/cooling/cooling_vars.h b/src/amuse/community/arepo/src/cooling/cooling_vars.h new file mode 100644 index 0000000000..22a737522d --- /dev/null +++ b/src/amuse/community/arepo/src/cooling/cooling_vars.h @@ -0,0 +1,80 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/cooling/cooling_vars.h + * \date 05/2018 + * \brief Variables for cooling. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#define NCOOLTAB 2000 +#define SMALLNUM 1.0e-60 +#define COOLLIM 0.1 +#define HEATLIM 20.0 +#define eV_to_K 11606.0 +#define eV_to_erg 1.60184e-12 +#define MAX_TABLESIZE 250 /* Max # of lines in TREECOOL */ + +/* data for gas state */ +typedef struct +{ + double ne, necgs, nHcgs; + double bH0, bHep, bff, aHp, aHep, aHepp, ad, geH0, geHe0, geHep; + double gJH0ne, gJHe0ne, gJHepne; + double nH0, nHp, nHep, nHe0, nHepp; + double XH, yhelium; + double mhboltz; + double ethmin; /* minimum internal energy for neutral gas */ + double mu; +} GasState; + +/* tabulated rates */ +typedef struct +{ + double BetaH0, BetaHep, Betaff; + double AlphaHp, AlphaHep, Alphad, AlphaHepp; + double GammaeH0, GammaeHe0, GammaeHep; +} RateTable; + +/* photo-ionization/heating rate table */ +typedef struct +{ + float variable; /* logz for UVB */ + float gH0, gHe, gHep; /* photo-ionization rates */ + float eH0, eHe, eHep; /* photo-heating rates */ +} PhotoTable; + +/* current interpolated photo-ionization/heating rates */ +typedef struct +{ + char J_UV; + double gJH0, gJHep, gJHe0, epsH0, epsHep, epsHe0; +} PhotoCurrent; + +/* cooling data */ +typedef struct +{ + double u_old_input, rho_input, dt_input, ne_guess_input; +} DoCoolData; diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.c b/src/amuse/community/arepo/src/debug_md5/Md5.c new file mode 100644 index 0000000000..5ac2d223fe --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/Md5.c @@ -0,0 +1,472 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/Md5.c + * \date MM/YYYY + * \brief + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* + ********************************************************************** + ** md5.c ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +/* -- include the following line if the md5.h header file is separate -- */ +#include "Md5.h" + +/* forward declaration */ +static void Transform(); +static void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen); + +static unsigned char PADDING[64] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* F, G and H are basic MD5 functions: selection, majority, parity */ +#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits */ +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ +/* Rotation is separate from addition to prevent recomputation */ +#define FF(a, b, c, d, x, s, ac) \ + { \ + (a) += F((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define GG(a, b, c, d, x, s, ac) \ + { \ + (a) += G((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define HH(a, b, c, d, x, s, ac) \ + { \ + (a) += H((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } +#define II(a, b, c, d, x, s, ac) \ + { \ + (a) += I((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT((a), (s)); \ + (a) += (b); \ + } + +void MD5Init(MD5_CTX *mdContext) +{ + mdContext->i[0] = mdContext->i[1] = (UINT4)0; + + /* Load magic initialization constants. + */ + mdContext->buf[0] = (UINT4)0x67452301; + mdContext->buf[1] = (UINT4)0xefcdab89; + mdContext->buf[2] = (UINT4)0x98badcfe; + mdContext->buf[3] = (UINT4)0x10325476; +} + +void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong) +{ + while(inLenLong > 0) + { + unsigned int inLen = 0x10000000; + if(inLen > inLenLong) + inLen = inLenLong; + MD5Update(mdContext, inBuf, inLen); + inBuf += inLen; + inLenLong -= inLen; + } +} + +void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned int inLen) +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* update number of bits */ + if((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) + mdContext->i[1]++; + mdContext->i[0] += ((UINT4)inLen << 3); + mdContext->i[1] += ((UINT4)inLen >> 29); + + while(inLen--) + { + /* add new character to buffer, increment mdi */ + mdContext->in[mdi++] = *inBuf++; + + /* transform if necessary */ + if(mdi == 0x40) + { + for(i = 0, ii = 0; i < 16; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) | + (((UINT4)mdContext->in[ii + 1]) << 8) | ((UINT4)mdContext->in[ii]); + Transform(mdContext->buf, in); + mdi = 0; + } + } +} + +void MD5Final(MD5_CTX *mdContext) +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + unsigned int padLen; + + /* save number of bits */ + in[14] = mdContext->i[0]; + in[15] = mdContext->i[1]; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + MD5Update(mdContext, PADDING, padLen); + + /* append length in bits and transform */ + for(i = 0, ii = 0; i < 14; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | (((UINT4)mdContext->in[ii + 2]) << 16) | (((UINT4)mdContext->in[ii + 1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform(mdContext->buf, in); + + /* store buffer in digest */ + for(i = 0, ii = 0; i < 4; i++, ii += 4) + { + mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); + mdContext->digest[ii + 1] = (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); + mdContext->digest[ii + 2] = (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); + mdContext->digest[ii + 3] = (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); + } +} + +/* Basic MD5 step. Transform buf based on in. + */ +static void Transform(buf, in) UINT4 *buf; +UINT4 *in; +{ + UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + FF(a, b, c, d, in[0], S11, 3614090360); /* 1 */ + FF(d, a, b, c, in[1], S12, 3905402710); /* 2 */ + FF(c, d, a, b, in[2], S13, 606105819); /* 3 */ + FF(b, c, d, a, in[3], S14, 3250441966); /* 4 */ + FF(a, b, c, d, in[4], S11, 4118548399); /* 5 */ + FF(d, a, b, c, in[5], S12, 1200080426); /* 6 */ + FF(c, d, a, b, in[6], S13, 2821735955); /* 7 */ + FF(b, c, d, a, in[7], S14, 4249261313); /* 8 */ + FF(a, b, c, d, in[8], S11, 1770035416); /* 9 */ + FF(d, a, b, c, in[9], S12, 2336552879); /* 10 */ + FF(c, d, a, b, in[10], S13, 4294925233); /* 11 */ + FF(b, c, d, a, in[11], S14, 2304563134); /* 12 */ + FF(a, b, c, d, in[12], S11, 1804603682); /* 13 */ + FF(d, a, b, c, in[13], S12, 4254626195); /* 14 */ + FF(c, d, a, b, in[14], S13, 2792965006); /* 15 */ + FF(b, c, d, a, in[15], S14, 1236535329); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG(a, b, c, d, in[1], S21, 4129170786); /* 17 */ + GG(d, a, b, c, in[6], S22, 3225465664); /* 18 */ + GG(c, d, a, b, in[11], S23, 643717713); /* 19 */ + GG(b, c, d, a, in[0], S24, 3921069994); /* 20 */ + GG(a, b, c, d, in[5], S21, 3593408605); /* 21 */ + GG(d, a, b, c, in[10], S22, 38016083); /* 22 */ + GG(c, d, a, b, in[15], S23, 3634488961); /* 23 */ + GG(b, c, d, a, in[4], S24, 3889429448); /* 24 */ + GG(a, b, c, d, in[9], S21, 568446438); /* 25 */ + GG(d, a, b, c, in[14], S22, 3275163606); /* 26 */ + GG(c, d, a, b, in[3], S23, 4107603335); /* 27 */ + GG(b, c, d, a, in[8], S24, 1163531501); /* 28 */ + GG(a, b, c, d, in[13], S21, 2850285829); /* 29 */ + GG(d, a, b, c, in[2], S22, 4243563512); /* 30 */ + GG(c, d, a, b, in[7], S23, 1735328473); /* 31 */ + GG(b, c, d, a, in[12], S24, 2368359562); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH(a, b, c, d, in[5], S31, 4294588738); /* 33 */ + HH(d, a, b, c, in[8], S32, 2272392833); /* 34 */ + HH(c, d, a, b, in[11], S33, 1839030562); /* 35 */ + HH(b, c, d, a, in[14], S34, 4259657740); /* 36 */ + HH(a, b, c, d, in[1], S31, 2763975236); /* 37 */ + HH(d, a, b, c, in[4], S32, 1272893353); /* 38 */ + HH(c, d, a, b, in[7], S33, 4139469664); /* 39 */ + HH(b, c, d, a, in[10], S34, 3200236656); /* 40 */ + HH(a, b, c, d, in[13], S31, 681279174); /* 41 */ + HH(d, a, b, c, in[0], S32, 3936430074); /* 42 */ + HH(c, d, a, b, in[3], S33, 3572445317); /* 43 */ + HH(b, c, d, a, in[6], S34, 76029189); /* 44 */ + HH(a, b, c, d, in[9], S31, 3654602809); /* 45 */ + HH(d, a, b, c, in[12], S32, 3873151461); /* 46 */ + HH(c, d, a, b, in[15], S33, 530742520); /* 47 */ + HH(b, c, d, a, in[2], S34, 3299628645); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II(a, b, c, d, in[0], S41, 4096336452); /* 49 */ + II(d, a, b, c, in[7], S42, 1126891415); /* 50 */ + II(c, d, a, b, in[14], S43, 2878612391); /* 51 */ + II(b, c, d, a, in[5], S44, 4237533241); /* 52 */ + II(a, b, c, d, in[12], S41, 1700485571); /* 53 */ + II(d, a, b, c, in[3], S42, 2399980690); /* 54 */ + II(c, d, a, b, in[10], S43, 4293915773); /* 55 */ + II(b, c, d, a, in[1], S44, 2240044497); /* 56 */ + II(a, b, c, d, in[8], S41, 1873313359); /* 57 */ + II(d, a, b, c, in[15], S42, 4264355552); /* 58 */ + II(c, d, a, b, in[6], S43, 2734768916); /* 59 */ + II(b, c, d, a, in[13], S44, 1309151649); /* 60 */ + II(a, b, c, d, in[4], S41, 4149444226); /* 61 */ + II(d, a, b, c, in[11], S42, 3174756917); /* 62 */ + II(c, d, a, b, in[2], S43, 718787259); /* 63 */ + II(b, c, d, a, in[9], S44, 3951481745); /* 64 */ + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + ********************************************************************** + ** End of md5.c ** + ******************************* (cut) ******************************** + */ + +/* + ********************************************************************** + ** md5driver.c -- sample routines to test ** + ** RSA Data Security, Inc. MD5 message digest algorithm. ** + ** Created: 2/16/90 RLR ** + ** Updated: 1/91 SRD ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +#include +#include +#include +#include +/* -- include the following file if the file md5.h is separate -- */ +/* #include "md5.h" */ + +/* Prints message digest buffer in mdContext as 32 hexadecimal digits. + Order is from low-order byte to high-order byte of digest. + Each byte is printed with high-order hexadecimal digit first. + */ +static void MDPrint(mdContext) MD5_CTX *mdContext; +{ + int i; + + for(i = 0; i < 16; i++) + printf("%02x", mdContext->digest[i]); +} + +/* size of test block */ +#define TEST_BLOCK_SIZE 1000 + +/* number of blocks to process */ +#define TEST_BLOCKS 10000 + +/* number of test bytes = TEST_BLOCK_SIZE * TEST_BLOCKS */ +static long TEST_BYTES = (long)TEST_BLOCK_SIZE * (long)TEST_BLOCKS; + +/* A time trial routine, to measure the speed of MD5. + Measures wall time required to digest TEST_BLOCKS * TEST_BLOCK_SIZE + characters. + */ +static void MDTimeTrial() +{ + MD5_CTX mdContext; + time_t endTime, startTime; + unsigned char data[TEST_BLOCK_SIZE]; + unsigned int i; + + /* initialize test data */ + for(i = 0; i < TEST_BLOCK_SIZE; i++) + data[i] = (unsigned char)(i & 0xFF); + + /* start timer */ + printf("MD5 time trial. Processing %ld characters...\n", TEST_BYTES); + time(&startTime); + + /* digest data in TEST_BLOCK_SIZE byte blocks */ + MD5Init(&mdContext); + for(i = TEST_BLOCKS; i > 0; i--) + MD5Update(&mdContext, data, TEST_BLOCK_SIZE); + MD5Final(&mdContext); + + /* stop timer, get time difference */ + time(&endTime); + MDPrint(&mdContext); + printf(" is digest of test input.\n"); + printf("Seconds to process test input: %ld\n", (long)(endTime - startTime)); + printf("Characters processed per second: %ld\n", TEST_BYTES / (endTime - startTime)); +} + +/* Computes the message digest for string inString. + Prints out message digest, a space, the string (in quotes) and a + carriage return. + */ +static void MDString(inString) char *inString; +{ + MD5_CTX mdContext; + unsigned int len = strlen(inString); + + MD5Init(&mdContext); + MD5Update(&mdContext, (unsigned char *)inString, len); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf(" \"%s\"\n\n", inString); +} + +/* Computes the message digest for a specified file. + Prints out message digest, a space, the file name, and a carriage + return. + */ +static void MDFile(filename) char *filename; +{ + FILE *inFile = fopen(filename, "rb"); + MD5_CTX mdContext; + int bytes; + unsigned char data[1024]; + + if(inFile == NULL) + { + printf("%s can't be opened.\n", filename); + return; + } + + MD5Init(&mdContext); + while((bytes = fread(data, 1, 1024, inFile)) != 0) + MD5Update(&mdContext, data, bytes); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf(" %s\n", filename); + fclose(inFile); +} + +/* Writes the message digest of the data from stdin onto stdout, + followed by a carriage return. + */ +static void MDFilter() +{ + MD5_CTX mdContext; + int bytes; + unsigned char data[16]; + + MD5Init(&mdContext); + while((bytes = fread(data, 1, 16, stdin)) != 0) + MD5Update(&mdContext, data, bytes); + MD5Final(&mdContext); + MDPrint(&mdContext); + printf("\n"); +} + +/* Runs a standard suite of test data. + */ +static void MDTestSuite() +{ + printf("MD5 test suite results:\n\n"); + MDString(""); + MDString("a"); + MDString("abc"); + MDString("message digest"); + MDString("abcdefghijklmnopqrstuvwxyz"); + MDString("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"); + MDString( + "1234567890123456789012345678901234567890\ +1234567890123456789012345678901234567890"); + /* Contents of file foo are "abc" */ + MDFile("foo"); +} diff --git a/src/amuse/community/arepo/src/debug_md5/Md5.h b/src/amuse/community/arepo/src/debug_md5/Md5.h new file mode 100644 index 0000000000..df809ed5d2 --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/Md5.h @@ -0,0 +1,92 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/Md5.h + * \date 05/2018 + * \brief Header for implementation of MD5 checksums. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +/* + ********************************************************************** + ** md5.h -- Header file for implementation of MD5 ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** + ** Revised (for MD5): RLR 4/27/91 ** + ** -- G modified to have y&~z instead of y&z ** + ** -- FF, GG, HH modified to add in last register done ** + ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** + ** -- distinct additive constant for each step ** + ** -- round 4 added, working mod 7 ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +/* typedef a 32 bit type */ +typedef unsigned long int UINT4; + +/* Data structure for MD5 (Message Digest) computation */ +typedef struct +{ + UINT4 i[2]; /* number of _bits_ handled mod 2^64 */ + UINT4 buf[4]; /* scratch buffer */ + unsigned char in[64]; /* input buffer */ + unsigned char digest[16]; /* actual digest after MD5Final call */ +} MD5_CTX; + +void MD5Final(MD5_CTX *mdContext); +// void MD5Update(MD5_CTX * mdContext, unsigned char *inBuf, unsigned int inLen); +void MD5UpdateLong(MD5_CTX *mdContext, unsigned char *inBuf, unsigned long long inLenLong); +void MD5Init(MD5_CTX *mdContext); + +/* + ********************************************************************** + ** End of md5.h ** + ******************************* (cut) ******************************** + */ diff --git a/src/amuse/community/arepo/src/debug_md5/calc_checksum.c b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c new file mode 100644 index 0000000000..3f710ef036 --- /dev/null +++ b/src/amuse/community/arepo/src/debug_md5/calc_checksum.c @@ -0,0 +1,121 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/debug_md5/calc_checksum.c + * \date 05/2018 + * \brief Functions to calculate an MD5 checksum from a dataset. + * \details contains functions: + * void calc_memory_checksum(void *base, size_t bytes) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "Md5.h" + +/*! \brief Calculates a md5 checksum (on all MPI tasks) and prints it. + * + * \param[in] base Pointer to start of data. + * \param[in] bytes Number of bytes to be checked. + * + * \return void + */ +void calc_memory_checksum(void *base, size_t bytes) +{ + MD5_CTX sum; + union + { + unsigned char digest[16]; + int val[4]; + } u, uglob; + + MD5Init(&sum); + MD5UpdateLong(&sum, base, bytes); + MD5Final(&sum); + + int i; + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("Step=%d MD5=", All.NumCurrentTiStep); + for(i = 0; i < 16; i++) + printf("%02x", uglob.digest[i]); + printf("\n"); + } +} + +#ifdef RESTART_DEBUG +/*! \brief Calculates md5 checksums of main data structures of a restart file. + * + * \return void + */ +void log_restart_debug(void) +{ + MD5_CTX sum; + union + { + unsigned char digest[16]; + int val[4]; + } u, uglob_P, uglob_SphP; + int i; + + MD5Init(&sum); + MD5UpdateLong(&sum, (void *)P, NumPart * sizeof(struct particle_data)); + MD5Final(&sum); + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob_P.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MD5Init(&sum); + MD5UpdateLong(&sum, (void *)SphP, NumGas * sizeof(struct sph_particle_data)); + MD5Final(&sum); + + for(i = 0; i < 16; i++) + u.digest[i] = sum.digest[i]; + + MPI_Allreduce(u.val, uglob_SphP.val, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdRestartTest, "\n"); + fprintf(FdRestartTest, "Step=%8d P[] ", All.NumCurrentTiStep); + for(i = 0; i < 16; i++) + fprintf(FdRestartTest, "%02x", uglob_P.digest[i]); + fprintf(FdRestartTest, "\n"); + fprintf(FdRestartTest, " SphP[] "); + for(i = 0; i < 16; i++) + fprintf(FdRestartTest, "%02x", uglob_SphP.digest[i]); + fprintf(FdRestartTest, "\n"); + fflush(FdRestartTest); + } +} +#endif diff --git a/src/amuse/community/arepo/src/domain/bsd_tree.h b/src/amuse/community/arepo/src/domain/bsd_tree.h new file mode 100644 index 0000000000..c8f763abdf --- /dev/null +++ b/src/amuse/community/arepo/src/domain/bsd_tree.h @@ -0,0 +1,865 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/bsd_tree.h + * \date 05/2018 + * \brief BSD tree. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* $FreeBSD: src/sys/sys/tree.h,v 1.9.4.2 2012/11/17 11:37:26 svnexp Exp $ */ + +/*- + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +#include + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ + struct name \ + { \ + struct type *sph_root; /* root of the tree */ \ + } + +#define SPLAY_INITIALIZER(root) \ + { \ + NULL \ + } + +#define SPLAY_INIT(root) \ + do \ + { \ + (root)->sph_root = NULL; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ + struct \ + { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ + } + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) \ + do \ + { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) \ + do \ + { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) \ + do \ + { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) \ + do \ + { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } \ + while(/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) \ + do \ + { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ + } \ + while(/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ + void name##_SPLAY(struct name *, struct type *); \ + void name##_SPLAY_MINMAX(struct name *, int); \ + struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ + struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ + /* Finds the node with the same key as elm */ \ + static __inline struct type *name##_SPLAY_FIND(struct name *head, struct type *elm) \ + { \ + if(SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ + } \ + \ + static __inline struct type *name##_SPLAY_NEXT(struct name *head, struct type *elm) \ + { \ + name##_SPLAY(head, elm); \ + if(SPLAY_RIGHT(elm, field) != NULL) \ + { \ + elm = SPLAY_RIGHT(elm, field); \ + while(SPLAY_LEFT(elm, field) != NULL) \ + { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } \ + else \ + elm = NULL; \ + return (elm); \ + } \ + \ + static __inline struct type *name##_SPLAY_MIN_MAX(struct name *head, int val) \ + { \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ + } + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ + struct type *name##_SPLAY_INSERT(struct name *head, struct type *elm) \ + { \ + if(SPLAY_EMPTY(head)) \ + { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } \ + else \ + { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) \ + { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } \ + else if(__comp > 0) \ + { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field); \ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } \ + else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ + } \ + \ + struct type *name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ + { \ + struct type *__tmp; \ + if(SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if((cmp)(elm, (head)->sph_root) == 0) \ + { \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ + } \ + else \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ + } \ + \ + void name##_SPLAY(struct name *head, struct type *elm) \ + { \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while((__comp = (cmp)(elm, (head)->sph_root)) != 0) \ + { \ + if(__comp < 0) \ + { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if((cmp)(elm, __tmp) < 0) \ + { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } \ + else if(__comp > 0) \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if((cmp)(elm, __tmp) > 0) \ + { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if(SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } \ + \ + /* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ + void name##_SPLAY_MINMAX(struct name *head, int __comp) \ + { \ + struct type __node, *__left, *__right, *__tmp; \ + \ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL; \ + __left = __right = &__node; \ + \ + while(1) \ + { \ + if(__comp < 0) \ + { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if(__comp < 0) \ + { \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if(SPLAY_LEFT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } \ + else if(__comp > 0) \ + { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if(__tmp == NULL) \ + break; \ + if(__comp > 0) \ + { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if(SPLAY_RIGHT((head)->sph_root, field) == NULL) \ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ + } + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) for((x) = SPLAY_MIN(name, head); (x) != NULL; (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \ + struct name \ + { \ + struct type *rbh_root; /* root of the tree */ \ + } + +#define RB_INITIALIZER(root) \ + { \ + NULL \ + } + +#define RB_INIT(root) \ + do \ + { \ + (root)->rbh_root = NULL; \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ + struct \ + { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ + } + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) \ + do \ + { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_SET_BLACKRED(black, red, field) \ + do \ + { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ + } \ + while(/*CONSTCOND*/ 0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) \ + do \ + { \ + } \ + while(0) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) \ + do \ + { \ + (tmp) = RB_RIGHT(elm, field); \ + if((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) \ + { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \ + { \ + if((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } \ + else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } \ + while(/*CONSTCOND*/ 0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) \ + do \ + { \ + (tmp) = RB_LEFT(elm, field); \ + if((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) \ + { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) \ + { \ + if((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } \ + else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ + } \ + while(/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, ) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ + attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \ + attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \ + attr struct type *name##_RB_REMOVE(struct name *, struct type *); \ + attr struct type *name##_RB_INSERT(struct name *, struct type *); \ + attr struct type *name##_RB_FIND(struct name *, struct type *); \ + attr struct type *name##_RB_NFIND(struct name *, struct type *); \ + attr struct type *name##_RB_NEXT(struct type *); \ + attr struct type *name##_RB_PREV(struct type *); \ + attr struct type *name##_RB_MINMAX(struct name *, int); + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, ) +#define RB_GENERATE_STATIC(name, type, field, cmp) RB_GENERATE_INTERNAL(name, type, field, cmp, static) +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ + attr void name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ + { \ + struct type *parent, *gparent, *tmp; \ + while((parent = RB_PARENT(elm, field)) != NULL && RB_COLOR(parent, field) == RB_RED) \ + { \ + gparent = RB_PARENT(parent, field); \ + if(parent == RB_LEFT(gparent, field)) \ + { \ + tmp = RB_RIGHT(gparent, field); \ + if(tmp && RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if(RB_RIGHT(parent, field) == elm) \ + { \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } \ + else \ + { \ + tmp = RB_LEFT(gparent, field); \ + if(tmp && RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field); \ + elm = gparent; \ + continue; \ + } \ + if(RB_LEFT(parent, field) == elm) \ + { \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ + } \ + \ + attr void name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ + { \ + struct type *tmp; \ + while((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && elm != RB_ROOT(head)) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + { \ + tmp = RB_RIGHT(parent, field); \ + if(RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) \ + { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } \ + else \ + { \ + if(RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) \ + { \ + struct type *oleft; \ + if((oleft = RB_LEFT(tmp, field)) != NULL) \ + RB_COLOR(oleft, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field); \ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if(RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_LEFT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + else \ + { \ + tmp = RB_LEFT(parent, field); \ + if(RB_COLOR(tmp, field) == RB_RED) \ + { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + if((RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) && \ + (RB_RIGHT(tmp, field) == NULL || RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) \ + { \ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } \ + else \ + { \ + if(RB_LEFT(tmp, field) == NULL || RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) \ + { \ + struct type *oright; \ + if((oright = RB_RIGHT(tmp, field)) != NULL) \ + RB_COLOR(oright, field) = RB_BLACK; \ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field); \ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field); \ + RB_COLOR(parent, field) = RB_BLACK; \ + if(RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK; \ + RB_ROTATE_RIGHT(head, parent, tmp, field); \ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if(elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ + } \ + \ + attr struct type *name##_RB_REMOVE(struct name *head, struct type *elm) \ + { \ + struct type *child, *parent, *old = elm; \ + int color; \ + if(RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if(RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else \ + { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while((left = RB_LEFT(elm, field)) != NULL) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if(child) \ + RB_PARENT(child, field) = parent; \ + if(parent) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = child; \ + if(RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if(RB_PARENT(old, field)) \ + { \ + if(RB_LEFT(RB_PARENT(old, field), field) == old) \ + RB_LEFT(RB_PARENT(old, field), field) = elm; \ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm; \ + RB_AUGMENT(RB_PARENT(old, field)); \ + } \ + else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if(RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if(parent) \ + { \ + left = parent; \ + do \ + { \ + RB_AUGMENT(left); \ + } \ + while((left = RB_PARENT(left, field)) != NULL); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if(child) \ + RB_PARENT(child, field) = parent; \ + if(parent) \ + { \ + if(RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = child; \ + color: \ + if(color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ + } \ + \ + /* Inserts a node into the RB tree */ \ + attr struct type *name##_RB_INSERT(struct name *head, struct type *elm) \ + { \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while(tmp) \ + { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if(comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if(parent != NULL) \ + { \ + if(comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } \ + else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ + } \ + \ + /* Finds the node with the same key as elm */ \ + attr struct type *name##_RB_FIND(struct name *head, struct type *elm) \ + { \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while(tmp) \ + { \ + comp = cmp(elm, tmp); \ + if(comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ + } \ + \ + /* Finds the first node greater than or equal to the search key */ \ + attr struct type *name##_RB_NFIND(struct name *head, struct type *elm) \ + { \ + struct type *tmp = RB_ROOT(head); \ + struct type *res = NULL; \ + int comp; \ + while(tmp) \ + { \ + comp = cmp(elm, tmp); \ + if(comp < 0) \ + { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } \ + else if(comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ + } \ + \ + /* ARGSUSED */ \ + attr struct type *name##_RB_NEXT(struct type *elm) \ + { \ + if(RB_RIGHT(elm, field)) \ + { \ + elm = RB_RIGHT(elm, field); \ + while(RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } \ + else \ + { \ + if(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else \ + { \ + while(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } \ + \ + /* ARGSUSED */ \ + attr struct type *name##_RB_PREV(struct type *elm) \ + { \ + if(RB_LEFT(elm, field)) \ + { \ + elm = RB_LEFT(elm, field); \ + while(RB_RIGHT(elm, field)) \ + elm = RB_RIGHT(elm, field); \ + } \ + else \ + { \ + if(RB_PARENT(elm, field) && (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else \ + { \ + while(RB_PARENT(elm, field) && (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ + } \ + \ + attr struct type *name##_RB_MINMAX(struct name *head, int val) \ + { \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while(tmp) \ + { \ + parent = tmp; \ + if(val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ + } + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) for((x) = RB_MIN(name, head); (x) != NULL; (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_SAFE(x, name, head, y) \ + for((x) = RB_MIN(name, head); ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) for((x) = RB_MAX(name, head); (x) != NULL; (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_FROM(x, name, y) for((x) = (y); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ + for((x) = RB_MAX(name, head); ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); (x) = (y)) + +#endif /* _SYS_TREE_H_ */ diff --git a/src/amuse/community/arepo/src/domain/domain.c b/src/amuse/community/arepo/src/domain/domain.c new file mode 100644 index 0000000000..4557c25ff5 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain.c @@ -0,0 +1,633 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain.c + * \date 05/2018 + * \brief Code for domain decomposition. + * \details This file contains the code for the domain decomposition of the + * simulation volume. The domains are constructed from disjoint + * subsets of the leaves of a fiducial top-level tree that covers + * the full simulation volume. Domain boundaries hence run along + * tree-node divisions of a fiducial global Barnes-Hut tree. As a + * result of this method, the tree force are in principle strictly + * independent of the way the domains are cut. The domain + * decomposition can be carried out for an arbitrary number of + * CPUs. Individual domains are not cubical, but spatially + * coherent since the leaves are traversed in a Peano-Hilbert + * order and individual domains form segments along this order. + * This also ensures that each domain has a small surface to + * volume ratio, which minimizes communication. + * contains functions: + * void domain_Decomposition(void) + * void domain_prepare_voronoi_dynamic_update(void) + * void domain_voronoi_dynamic_flag_particles(void) + * void domain_voronoi_dynamic_update_execute(void) + * void domain_preserve_relevant_topnode_data(void) + * void domain_find_total_cost(void) + * peano1D domain_double_to_int(double d) + * void domain_allocate(void) + * void domain_free(void) + * void domain_printf(char *buf) + * void domain_report_balance(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief The main routine for the domain decomposition. + * + * It acts as a driver routine that allocates various temporary buffers, + * maps the particles back onto the periodic box if needed, and then does the + * domain decomposition, and a final Peano-Hilbert order of all particles + * as a tuning measure. + * + * \return void + */ +void domain_Decomposition(void) +{ + TIMER_START(CPU_DOMAIN); + + double t0 = second(); + + mpi_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep); + + /* Prepare */ + domain_prepare_voronoi_dynamic_update(); + + /* map the particles back onto the box */ + do_box_wrapping(); + + /* Initialize and allocate */ + domain_init_sum_cost(); + domain_allocate(); + domain_allocate_lists(); + + topNodes = (struct local_topnode_data *)mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(struct local_topnode_data))); + /* find total cost factors */ + domain_find_total_cost(); + /* determine global dimensions of domain grid */ + domain_findExtent(); + + /* determine top-level tree */ + domain_determineTopTree(); + + /* find the split of the top-level tree */ + domain_combine_topleaves_to_domains(All.MultipleDomains * NTask, NTopleaves); + + /* combine on each MPI task several of the domains (namely the number All.MultipleDomains) */ + domain_combine_multipledomains(); + + /* permutate the task assignment such that the smallest number of particles needs to be moved */ + domain_optimize_domain_to_task_mapping(); + + double ta = second(); + /* in case we retain the neighbor connectivity, do some preparatory flagging */ + domain_voronoi_dynamic_flag_particles(); + /* eliminate cells that might have been eliminated or were turned into stars */ + domain_rearrange_particle_sequence(); + /* determine for each cpu how many particles have to be shifted to other cpus */ + domain_countToGo(); + double tb = second(); + mpi_printf("DOMAIN: particle rearrangement work took %g sec\n", timediff(ta, tb)); + + /* finally, carry out the actual particle exchange */ + domain_exchange(); + + /* copy what we need for the topnodes */ + domain_preserve_relevant_topnode_data(); + myfree(topNodes); + domain_free_lists(); + TimeOfLastDomainConstruction = All.Time; + + double t1 = second(); + mpi_printf("DOMAIN: domain decomposition done. (took in total %g sec)\n", timediff(t0, t1)); + + TIMER_STOP(CPU_DOMAIN); + TIMER_START(CPU_PEANO); + + peano_hilbert_order(); + myfree(Key); + + TIMER_STOPSTART(CPU_PEANO, CPU_DOMAIN); + + myfree(DomainListOfLocalTopleaves); + +#ifdef ONEDIMS + voronoi_1D_order(); +#endif /* #ifdef ONEDIMS */ + + TopNodes = (struct topnode_data *)myrealloc_movable(TopNodes, NTopnodes * sizeof(struct topnode_data)); + DomainTask = (int *)myrealloc_movable(DomainTask, NTopleaves * sizeof(int)); + + domain_voronoi_dynamic_update_execute(); + + DomainListOfLocalTopleaves = + (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (NTopleaves * sizeof(int))); + + memset(DomainNLocalTopleave, 0, NTask * sizeof(int)); + + for(int i = 0; i < NTopleaves; i++) + DomainNLocalTopleave[DomainTask[i]]++; + + DomainFirstLocTopleave[0] = 0; + for(int i = 1; i < NTask; i++) + DomainFirstLocTopleave[i] = DomainFirstLocTopleave[i - 1] + DomainNLocalTopleave[i - 1]; + + memset(DomainNLocalTopleave, 0, NTask * sizeof(int)); + + for(int i = 0; i < NTopleaves; i++) + { + int task = DomainTask[i]; + int off = DomainFirstLocTopleave[task] + DomainNLocalTopleave[task]++; + DomainListOfLocalTopleaves[off] = i; + } + + reconstruct_timebins(); + + for(int i = 0; i < GRAVCOSTLEVELS; i++) + All.LevelHasBeenMeasured[i] = 0; + + domain_report_balance(); + + TIMER_STOP(CPU_DOMAIN); +} + +/*! \brief Prepares for voronoi dynamic update. + * + * Allocates required arrays and communicates required information. + * + * \return void + */ +void domain_prepare_voronoi_dynamic_update(void) +{ + /* prepare storage for translation table */ + N_trans = NumGas; /* length of translation table */ + trans_table = mymalloc_movable(&trans_table, "trans_table", N_trans * sizeof(struct trans_data)); + MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); +} + +/*! \brief Flag particles that need to be exported. + * + * Go through all local particles and check if they are within new local + * domains. If this is not the case, flag them as to be exported. + * + * \return void + */ +void domain_voronoi_dynamic_flag_particles(void) +{ + /* flag the particles that need to be exported */ + for(int i = 0; i < NumPart; i++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + int task = DomainTask[no]; + domain_mark_in_trans_table(i, task); + } +} + +/*! \brief Execute voronoi_dynamic_update + * + * Calls domain_exchange_and_update_DC() if needed. + * + * \return void + */ +void domain_voronoi_dynamic_update_execute(void) +{ + CPU_Step[CPU_DOMAIN] += measure_time(); + if(Largest_Nvc > 0) + domain_exchange_and_update_DC(); + + myfree_movable(trans_table); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); +} + +/*! \brief Save the new top-level tree data into global arrays. + * + * \return void + */ +void domain_preserve_relevant_topnode_data(void) +{ + for(int i = 0; i < NTopnodes; i++) + { + TopNodes[i].StartKey = topNodes[i].StartKey; + TopNodes[i].Size = topNodes[i].Size; + TopNodes[i].Daughter = topNodes[i].Daughter; + TopNodes[i].Leaf = topNodes[i].Leaf; + + int bits = my_ffsll(TopNodes[i].Size); + int blocks = (bits - 1) / 3 - 1; + + for(int j = 0; j < 8; j++) + { + peano1D xb, yb, zb; + peano_hilbert_key_inverse(TopNodes[i].StartKey + j * (TopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb); + xb >>= blocks; + yb >>= blocks; + zb >>= blocks; + int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2); + if(idx < 0 || idx > 7) + terminate("j=%d idx=%d", j, idx); + + TopNodes[i].MortonToPeanoSubnode[idx] = j; + } + } +} + +/*! \brief Calculates the total cost of different operations. + * + * This function gathers information about the cost of gravity and + * hydrodynamics calculation as well as the particle load. + * + * \return void + */ +void domain_find_total_cost(void) +{ + if(All.MultipleDomains < 1 || All.MultipleDomains > 512) + terminate("All.MultipleDomains < 1 || All.MultipleDomains > 512"); + + gravcost = sphcost = 0; + double partcount = 0; + double sphpartcount = 0; + + for(int i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + partcount += 1.0; + + gravcost += domain_grav_tot_costfactor(i); + + double hydrocost = domain_hydro_tot_costfactor(i); + sphcost += hydrocost; + + if(hydrocost > 0) + sphpartcount += 1.0; + } + + double loc[4] = {gravcost, sphcost, partcount, sphpartcount}, sum[4]; + + MPI_Allreduce(loc, sum, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + totgravcost = sum[0]; + totsphcost = sum[1]; + totpartcount = sum[2]; + double totsphpartcount = sum[3]; + + if(totsphcost > 0 && totgravcost > 0 && totsphpartcount > (All.TopNodeFactor * All.MultipleDomains * NTask)) + { + /* in this case we give equal weight to gravitational work-load, hydro work load, and particle load. + */ + normsum_work = 0.333333; + normsum_load = 0.333333; + normsum_worksph = 0.333333; + fac_work = normsum_work / totgravcost; + fac_load = normsum_load / totpartcount; + fac_worksph = normsum_worksph / totsphcost; + } + else if(totgravcost > 0) + { + /* in this case we give equal weight to gravitational work-load and particle load. + * The final pieces should have at most imbalance 2.0 in either of the two + */ + normsum_work = 0.5; + normsum_load = 0.5; + normsum_worksph = 0; + fac_work = normsum_work / totgravcost; + fac_load = normsum_load / totpartcount; + fac_worksph = 0.0; + } + else if(totsphcost > 0) + { + /* here we only appear to do hydrodynamics. We hence give equal weight to SPH cost and + * particle load. + */ + normsum_work = 0; + normsum_load = 0.5; + normsum_worksph = 0.5; + fac_work = 0.0; + fac_load = normsum_load / totpartcount; + fac_worksph = normsum_worksph / totsphcost; + } + else + terminate("strange: totsphcost=%g totgravcost=%g\n", totsphcost, totgravcost); +} + +/*! \brief Coordinate conversion to integer. + * + * \param[in] d coordinate in double precision. + * + * \return coordinate in integer of type peano1D. + */ +peano1D domain_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (peano1D)((u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - BITS_PER_DIMENSION)); +} + +/*! \brief Allocates memory + * + * This function allocates all the stuff that will be required for the + * tree-construction/walk later on. + * + * \return void + */ +void domain_allocate(void) +{ + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + if(DomainStartList) + terminate("domain storage already allocated"); + + DomainStartList = (int *)mymalloc_movable(&DomainStartList, "DomainStartList", (NTask * All.MultipleDomains * sizeof(int))); + DomainEndList = (int *)mymalloc_movable(&DomainEndList, "DomainEndList", (NTask * All.MultipleDomains * sizeof(int))); + DomainFirstLocTopleave = (int *)mymalloc_movable(&DomainFirstLocTopleave, "DomainFirstLocTopleave", NTask * sizeof(int)); + DomainNLocalTopleave = (int *)mymalloc_movable(&DomainNLocalTopleave, "DomainNLocalTopleave", NTask * sizeof(int)); + TopNodes = (struct topnode_data *)mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(struct topnode_data))); + DomainTask = (int *)mymalloc_movable(&DomainTask, "DomainTask", (MaxTopNodes * sizeof(int))); + DomainListOfLocalTopleaves = + (int *)mymalloc_movable(&DomainListOfLocalTopleaves, "DomainListOfLocalTopleaves", (MaxTopNodes * sizeof(int))); +} + +/*! \brief Free arrays needed in domain decomposition. + * + * This is the counterpart to domain_allocate; need to free arrays in reverse + * allocation order. + * + * \return void + */ +void domain_free(void) +{ + if(!DomainStartList) + terminate("domain storage not allocated"); + + myfree_movable(DomainListOfLocalTopleaves); + myfree_movable(DomainTask); + myfree_movable(TopNodes); + myfree_movable(DomainNLocalTopleave); + myfree_movable(DomainFirstLocTopleave); + myfree_movable(DomainEndList); + myfree_movable(DomainStartList); + + DomainTask = NULL; + TopNodes = NULL; + DomainNLocalTopleave = NULL; + DomainFirstLocTopleave = NULL; + DomainEndList = NULL; + DomainStartList = NULL; +} + +/*! \brief Print message in domain.txt logfile. + * + * \param[in] buf String to be printed to domain.txt. + * + * \return void + */ +void domain_printf(char *buf) +{ + if(RestartFlag <= 2) + fprintf(FdDomain, "%s", buf); +} + +/*! \brief Function that reports load-balancing + * + * Function calculates load-balancing of the simulation and prints + * it to domain.txt + * + * \return void + */ +void domain_report_balance(void) +{ + /* get total particle counts */ + long long loc_count[2 * TIMEBINS], glob_count[2 * TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + loc_count[i] = TimeBinsGravity.TimeBinCount[i]; + loc_count[TIMEBINS + i] = TimeBinsHydro.TimeBinCount[i]; + } + + MPI_Reduce(loc_count, glob_count, 2 * TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + double loc_max_data[2 * TIMEBINS + 3], glob_max_data[2 * TIMEBINS + 3]; + + loc_max_data[2 * TIMEBINS + 0] = NumPart; + loc_max_data[2 * TIMEBINS + 1] = NumGas; + loc_max_data[2 * TIMEBINS + 2] = NumPart - NumGas; + + double glob_sum_data[2 * TIMEBINS]; + + double *loc_HydroCost = &loc_max_data[0]; + double *loc_GravCost = &loc_max_data[TIMEBINS]; + double *max_HydroCost = &glob_max_data[0]; + double *max_GravCost = &glob_max_data[TIMEBINS]; + double *glob_HydroCost = &glob_sum_data[0]; + double *glob_GravCost = &glob_sum_data[TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + loc_GravCost[i] = 0; + loc_HydroCost[i] = 0; + } + +#ifdef SELFGRAVITY + for(int i = 0; i < NumPart; i++) + { + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + loc_GravCost[bin] += MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + loc_GravCost[bin] += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + loc_GravCost[bin] += 1.0; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + for(int i = 0; i < NumPart; i++) + if(P[i].Type == 0) + loc_HydroCost[P[i].TimeBinHydro] += 1.0; + + /* now determine the cumulative cost for the hydrodynamics */ + for(int i = 1; i <= All.HighestOccupiedTimeBin; i++) + loc_HydroCost[i] += loc_HydroCost[i - 1]; + + MPI_Reduce(loc_max_data, glob_sum_data, 2 * TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(loc_max_data, glob_max_data, 2 * TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + double max_tot = glob_max_data[2 * TIMEBINS + 0]; + double max_sph = glob_max_data[2 * TIMEBINS + 1]; + double max_dm = glob_max_data[2 * TIMEBINS + 2]; + + long long *tot_count = &glob_count[0]; + long long *tot_count_sph = &glob_count[TIMEBINS]; + + long long tot_cumulative[TIMEBINS]; + tot_cumulative[0] = tot_count[0]; + + for(int i = 1; i < TIMEBINS; i++) + tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1]; + + double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0; + + All.TotGravCost = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + All.TotGravCost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask; + + tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask; + max_gravcost += domain_to_be_balanced[i] * max_GravCost[i]; + + tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask; + max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i]; + } + + double bal_grav_bin[TIMEBINS], bal_grav_bin_rel[TIMEBINS]; + double bal_hydro_bin[TIMEBINS], bal_hydro_bin_rel[TIMEBINS]; + + for(int i = 0; i < TIMEBINS; i++) + { + if(tot_count[i] > 0) + { + bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask + 1.0e-60); + bal_grav_bin_rel[i] = + (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost + 1.0e-60); + } + else + { + bal_grav_bin[i] = 0.0; + bal_grav_bin_rel[i] = 0.0; + } + + if(tot_count_sph[i] > 0) + { + bal_hydro_bin[i] = max_HydroCost[i] / (glob_HydroCost[i] / NTask + 1.0e-60); + bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) / + (tot_hydrocost + 1.0e-60); + } + else + { + bal_hydro_bin[i] = 0.0; + bal_hydro_bin_rel[i] = 0.0; + } + } + + char buf[1000]; + + sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time); + + domain_printf(buf); + + sprintf(buf, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n"); + + domain_printf(buf); + + long long tot = 0, tot_sph = 0; + + for(int i = TIMEBINS - 1; i >= 0; i--) + { +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + if(tot_count_sph[i] > 0 || tot_count[i] > 0) +#else /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */ + if(tot_count[i] > 0) + tot += tot_count[i]; + + if(tot_count_sph[i] > 0) +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + #else */ + { + char buf[1000]; + + sprintf(buf, "%c%cbin=%2d %10llu %10llu %10llu %c %6.3f |%6.3f %c %6.3f |%6.3f\n", + i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i, + tot_count[i], tot_count_sph[i], tot_cumulative[i], domain_bintolevel[i] >= 0 ? 'm' : ' ', bal_grav_bin[i], + bal_grav_bin_rel[i], domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]); + + domain_printf(buf); + + tot += tot_count[i]; + tot_sph += tot_count_sph[i]; + } + } + + sprintf(buf, "-------------------------------------------------------------------------------------\n"); + + domain_printf(buf); + + sprintf(buf, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n", + max_dm / (tot - tot_sph + 1.0e-60) * NTask, max_sph / (tot_sph + 1.0e-60) * NTask, max_tot / (tot + 1.0e-60) * NTask, + max_gravcost / (tot_gravcost + 1.0e-60), max_hydrocost / (tot_hydrocost + 1.0e-60)); + + domain_printf(buf); + + sprintf(buf, "-------------------------------------------------------------------------------------\n"); + + domain_printf(buf); + + sprintf(buf, "\n"); + + domain_printf(buf); + + myflush(FdDomain); + } + + /* the following needs to be known by all the tasks */ + MPI_Bcast(&All.TotGravCost, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); +} diff --git a/src/amuse/community/arepo/src/domain/domain.h b/src/amuse/community/arepo/src/domain/domain.h new file mode 100644 index 0000000000..f52781918a --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain.h @@ -0,0 +1,156 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain.h + * \date 05/2018 + * \brief Header for domain decomposition. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ALLVARS_H +#include "../main/allvars.h" +#endif /* #ifndef ALLVARS_H */ + +#ifndef DOMAIN_H +#define DOMAIN_H + +#define MASK_ACTIVE_FLAG_IN_TYPE 127 +#define SET_ACTIVE_FLAG_IN_TYPE 128 + +enum domain_displace_mode +{ + DISPLACE_POSITION_FORWARD, + DISPLACE_POSITION_BACKWARD +}; + +extern struct local_topnode_data +{ + peanokey Size; /*!< number of Peano-Hilbert mesh-cells represented by top-level node */ + peanokey StartKey; /*!< first Peano-Hilbert key in top-level node */ + long long Count; /*!< counts the number of particles in this top-level node */ + double Cost; + double SphCost; + int Daughter; /*!< index of first daughter cell (out of 8) of top-level node */ + int Leaf; /*!< if the node is a leaf, this gives its number when all leaves are traversed in Peano-Hilbert order */ + int Parent; + int PIndex; /*!< first particle in node */ + +} * topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ + +struct domain_count_data +{ + int task; + int count; + int origintask; +}; + +extern struct domain_peano_hilbert_data +{ + peanokey key; + int index; +} * mp; + +extern struct trans_data +{ + MyIDType ID; + int new_task; + int new_index; + int wrapped; +} * trans_table; + +extern int N_trans; + +extern int Nbranch; + +extern double fac_work, fac_load, fac_worksph; +extern double normsum_work, normsum_load, normsum_worksph; + +extern double totgravcost, totpartcount, gravcost, totsphcost, sphcost; + +extern struct domain_cost_data +{ + int no; + float Work; /*!< total "work" due to the particles stored by a leave node */ + float WorkSph; /*!< total "work" due to the particles stored by a leave node */ + int Count; /*!< a table that gives the total number of particles held by each processor */ + int CountSph; /*!< a table that gives the total number of SPH particles held by each processor */ +} * DomainLeaveNode; + +/* toGo[partner] gives the number of particles on the current task that have to go to task 'partner' + */ +extern int *toGo, *toGoSph; +extern int *toGet, *toGetSph; +extern int *list_NumPart; +extern int *list_NumGas; +extern int *list_load; +extern int *list_loadsph; +extern double *list_work; +extern double *list_worksph; + +/* functions for domain decomposition */ +peano1D domain_double_to_int(double d); +double domain_grav_tot_costfactor(int i); +double domain_hydro_tot_costfactor(int i); +void domain_init_sum_cost(void); +void domain_printf(char *buf); +void domain_report_balance(void); +int domain_sort_load(const void *a, const void *b); +int domain_compare_count(const void *a, const void *b); +int domain_sort_task(const void *a, const void *b); +int domain_compare_count(const void *a, const void *b); +void domain_rearrange_particle_sequence(void); +void domain_combine_topleaves_to_domains(int ncpu, int ndomain); +void domain_combine_multipledomains(void); +void domain_allocate(void); +void domain_Decomposition(void); +int domain_compare_key(const void *a, const void *b); +int domain_countToGo(void); +int domain_determineTopTree(void); +void domain_exchange(void); +void domain_findExtent(void); +void domain_free(void); +void domain_sumCost(void); +void domain_walktoptree(int no); +void domain_optimize_domain_to_task_mapping(void); +int domain_compare_count(const void *a, const void *b); +void domain_allocate_lists(void); +void domain_free_lists(void); +int domain_unpack_tree_branch(int no, int parent); +void domain_do_local_refine(int n, int *list); +void domain_preserve_relevant_topnode_data(void); +void domain_find_total_cost(void); +void domain_voronoi_dynamic_update_execute(void); +void domain_prepare_voronoi_dynamic_update(void); +void domain_voronoi_dynamic_flag_particles(void); +void domain_mark_in_trans_table(int i, int task); +void domain_exchange_and_update_DC(void); +int domain_compare_connection_ID(const void *a, const void *b); +int domain_compare_local_trans_data_ID(const void *a, const void *b); +int domain_compare_recv_trans_data_ID(const void *a, const void *b); +int domain_compare_recv_trans_data_oldtask(const void *a, const void *b); +void mysort_domain(void *b, size_t n, size_t s); +void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode); + +#endif /* #ifndef DOMAIN_H */ diff --git a/src/amuse/community/arepo/src/domain/domain_DC_update.c b/src/amuse/community/arepo/src/domain/domain_DC_update.c new file mode 100644 index 0000000000..bf960ebfc1 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_DC_update.c @@ -0,0 +1,699 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_DC_update.c + * \date 05/2018 + * \brief Algorithms for voronoi dynamic update + * \details contains functions: + * void domain_mark_in_trans_table(int i, int task) + * void domain_exchange_and_update_DC(void) + * int domain_compare_connection_ID(const void *a, + * const void *b) + * int domain_compare_local_trans_data_ID(const void *a, + * const void *b) + * int domain_compare_recv_trans_data_ID(const void *a, + * const void *b) + * int domain_compare_recv_trans_data_oldtask(const void *a, + * const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +struct trans_data *trans_table; +int N_trans; + +/*! \brief Data structure for local auxiliary translation table. + */ +static struct local_aux_trans_data +{ + MyIDType ID; + int new_index; +} * local_trans_data; + +/*! \brief Data structure for communicating the translation table. + */ +static struct aux_trans_data +{ + MyIDType ID; + int old_task; + int old_index; + int new_index; +} * send_trans_data, *recv_trans_data; + +/*! \brief Data structure for transcribing data. + */ +static struct aux_transscribe_data +{ + int old_index; + int new_task; + int new_index; + int image_flags; +} * send_transscribe_data, *recv_transscribe_data; + +/*! \brief Fill translation table. + * + * Mark where cells are moved to and mark in DC accordingly to make sure + * they get communicated to the same task. + * + * \param[in] i Index in P and SphP arrays. + * \param[in] task Task to which particle i is exported. + * + * \return void + */ +void domain_mark_in_trans_table(int i, int task) +{ + if(Largest_Nvc > 0) + { + if(i < NumGas) + { + trans_table[i].ID = P[i].ID; + trans_table[i].new_task = task; + + int q = SphP[i].first_connection; + + while(q >= 0) + { + int qq = DC[q].next; + if(q == qq) + terminate("preventing getting stuck in a loop due to q == DC[q].next : i=%d q=%d last_connection=%d", i, q, + SphP[i].last_connection); + + if((P[i].Mass == 0 && P[i].ID == 0) || P[i].Type != 0) /* this cell has been deleted or turned into a star */ + DC[q].next = -1; + else + DC[q].next = task; /* we will temporarily use the next variable to store the new task */ + + if(q == SphP[i].last_connection) + break; + + q = qq; + } + } + else if(i < N_trans) + trans_table[i].new_task = -1; /* this one has been removed by rerrange_particle_sequence() */ + } +} + +/*! \brief Communicates connections. + * + * This algorithms communicates Delauny connections and updates them on the + * new task. + * + * \return void + */ +void domain_exchange_and_update_DC(void) +{ + double t0 = second(); + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) + /* remove all image flags, after our box movement stunt they are all incorrect anyway */ + for(int i = 0; i < MaxNvc; i++) + { + DC[i].image_flags = 1; + } +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */ + + /* first, we need to complete the translation table */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < N_trans; i++) + if(trans_table[i].new_task >= 0) + Send_count[trans_table[i].new_task]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + int nimport = 0, nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + send_trans_data = mymalloc("send_trans_data", nexport * sizeof(struct aux_trans_data)); + recv_trans_data = mymalloc("recv_trans_data", nimport * sizeof(struct aux_trans_data)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < N_trans; i++) + { + int task = trans_table[i].new_task; + if(task >= 0) + { + send_trans_data[Send_offset[task] + Send_count[task]].ID = trans_table[i].ID; + send_trans_data[Send_offset[task] + Send_count[task]].old_index = i; + send_trans_data[Send_offset[task] + Send_count[task]].old_task = ThisTask; + Send_count[task]++; + } + } + + /* exchange the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&send_trans_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, + recvTask, TAG_DENS_B, &recv_trans_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* let's now sort the incoming list according to ID */ + mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_ID); + + /* make an auxiliary list for the local particles that we will also sort according to ID */ + local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data)); + for(int i = 0; i < NumGas; i++) + { + local_trans_data[i].ID = P[i].ID; + local_trans_data[i].new_index = i; + } + mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID); + + int i, j; + /* now we go through and put in the new index for matching IDs */ + for(i = 0, j = 0; i < nimport && j < NumGas;) + { + if(recv_trans_data[i].ID < local_trans_data[j].ID) + { + recv_trans_data[i].new_index = -1; /* this particle has been eliminated */ + i++; + } + else if(recv_trans_data[i].ID > local_trans_data[j].ID) + j++; + else + { + recv_trans_data[i].new_index = local_trans_data[j].new_index; + i++; + j++; + } + } + + for(; i < nimport; i++) + recv_trans_data[i].new_index = -1; /* this particle has been eliminated */ + + myfree(local_trans_data); + + /* now order the received data by sending task, so that we can return it */ + mysort(recv_trans_data, nimport, sizeof(struct aux_trans_data), domain_compare_recv_trans_data_oldtask); + + /* return the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&recv_trans_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, + recvTask, TAG_DENS_B, &send_trans_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct aux_trans_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* now let's fill in the new_index entry into the translation table */ + for(int i = 0; i < nexport; i++) + trans_table[send_trans_data[i].old_index].new_index = send_trans_data[i].new_index; + + myfree(recv_trans_data); + myfree(send_trans_data); + + /* it's now time to transcribe the task and index fields in the DC list */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + if(task >= NTask) + terminate("i=%d Nvc=%d MaxNvc=%d task=%d\n", i, Nvc, MaxNvc, task); + + Send_count[task]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + nimport = nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + send_transscribe_data = mymalloc("send_transscribe_data", nexport * sizeof(struct aux_transscribe_data)); + recv_transscribe_data = mymalloc("recv_transscribe_data", nimport * sizeof(struct aux_transscribe_data)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + send_transscribe_data[Send_offset[task] + Send_count[task]].old_index = DC[i].index; + send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags = DC[i].image_flags; + Send_count[task]++; + } + } + + /* exchange the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&send_transscribe_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct aux_transscribe_data), + MPI_BYTE, recvTask, TAG_DENS_B, &recv_transscribe_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(int i = 0; i < nimport; i++) + { + if(recv_transscribe_data[i].old_index >= N_trans) + terminate("recv_transscribe_data[i].old_index >= N_trans"); + + if(recv_transscribe_data[i].old_index < 0) + terminate("recv_transscribe_data[i].old_index < 0"); + + int old_index = recv_transscribe_data[i].old_index; + + recv_transscribe_data[i].new_task = trans_table[old_index].new_task; + recv_transscribe_data[i].new_index = trans_table[old_index].new_index; + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) + // Nothing to do here +#else /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) */ + if(recv_transscribe_data[i].new_task >= 0) + { + if(trans_table[old_index].wrapped) + { + int bitflags = ffs(recv_transscribe_data[i].image_flags) - 1; + int zbits = (bitflags / 9); + int ybits = (bitflags - zbits * 9) / 3; + int xbits = bitflags - zbits * 9 - ybits * 3; + + if(trans_table[old_index].wrapped & 1) + { + if(xbits == 1) + xbits = 0; + else if(xbits == 0) + xbits = 2; + else /* xbits == 2 */ + terminate("b"); + } + else if(trans_table[old_index].wrapped & 2) + { + if(xbits == 1) + { + terminate("a"); + } + else if(xbits == 0) + xbits = 1; + else /* xbits == 2 */ + xbits = 0; + } + + if(trans_table[old_index].wrapped & 4) + { + if(ybits == 1) + ybits = 0; + else if(ybits == 0) + ybits = 2; + else + { + terminate("b"); + } + } + else if(trans_table[old_index].wrapped & 8) + { + if(ybits == 1) + { + terminate("a"); + } + else if(ybits == 0) + ybits = 1; + else + ybits = 0; + } + + if(trans_table[old_index].wrapped & 16) + { + if(zbits == 1) + zbits = 0; + else if(zbits == 0) + zbits = 2; + else + { + terminate("b"); + } + } + else if(trans_table[old_index].wrapped & 32) + { + if(zbits == 1) + { + terminate("a"); + } + else if(zbits == 0) + zbits = 1; + else + zbits = 0; + } + + recv_transscribe_data[i].image_flags = (1 << (zbits * 9 + ybits * 3 + xbits)); + } + } +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) #else */ + } + + /* now return the data */ + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&recv_transscribe_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct aux_transscribe_data), + MPI_BYTE, recvTask, TAG_DENS_B, &send_transscribe_data[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct aux_transscribe_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* copy the results over to the DC structure */ + for(int i = 0; i < MaxNvc; i++) + { + int task = DC[i].task; + if(task >= 0) + { + DC[i].task = send_transscribe_data[Send_offset[task] + Send_count[task]].new_task; + DC[i].index = send_transscribe_data[Send_offset[task] + Send_count[task]].new_index; + DC[i].image_flags = send_transscribe_data[Send_offset[task] + Send_count[task]].image_flags; + Send_count[task]++; + } + } + + myfree(recv_transscribe_data); + myfree(send_transscribe_data); + + /* now we can exchange the DC data. The task where each item should go is stored in 'next' at this point */ + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* count where they should go */ + for(int i = 0; i < MaxNvc; i++) + { + if(DC[i].task >= 0) + { + int task = DC[i].next; + if(task >= 0) + { + if(task >= NTask) + terminate("Thistask=%d i=%d Nvc=%d MaxNvc=%d DC[i].task=%d DC[i].next=%d\n", ThisTask, i, Nvc, MaxNvc, DC[i].task, + DC[i].next); + + if(DC[i].index >= 0) + Send_count[task]++; + } + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + nimport = nexport = 0; + Recv_offset[0] = Send_offset[0] = 0; + + for(int j = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* make sure that we have enough room to store the new DC list */ + while(nimport > MaxNvc) + { + int old_MaxNvc = MaxNvc; + Mesh.Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = Mesh.Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, Mesh.Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + for(int n = old_MaxNvc; n < MaxNvc; n++) + DC[n].task = -1; + } + + connection *tmpDC = mymalloc("tmpDC", nexport * sizeof(connection)); + + for(int j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(int i = 0; i < MaxNvc; i++) + { + if(DC[i].task >= 0) + { + int task = DC[i].next; + + if(task >= 0 && DC[i].index >= 0) + tmpDC[Send_offset[task] + Send_count[task]++] = DC[i]; + } + } + + /* exchange the connection information */ + + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&tmpDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B, + &DC[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(connection), MPI_BYTE, recvTask, TAG_DENS_B, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + myfree(tmpDC); + + Nvc = nimport; + + /* mark the remaining ones as available */ + for(int i = Nvc; i < MaxNvc - 1; i++) + { + DC[i].next = i + 1; + DC[i].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + + if(Nvc < MaxNvc) + FirstUnusedConnection = Nvc; + else + FirstUnusedConnection = -1; + + /* now we need to connect the information to the particles, this we do via the IDs */ + + local_trans_data = mymalloc("local_trans_data", NumGas * sizeof(struct local_aux_trans_data)); + for(int i = 0; i < NumGas; i++) + { + local_trans_data[i].ID = P[i].ID; + local_trans_data[i].new_index = i; /* is here used as rank of the particle */ + } + mysort(local_trans_data, NumGas, sizeof(struct local_aux_trans_data), domain_compare_local_trans_data_ID); + + mysort(DC, Nvc, sizeof(connection), domain_compare_connection_ID); + + int last = -1; + for(i = 0, j = 0; i < NumGas && j < Nvc; i++) + { + int k = local_trans_data[i].new_index; + + if(P[k].ID < DC[j].ID) + { + /* this particle has no connection information (new cell) */ + SphP[k].first_connection = -1; + SphP[k].last_connection = -1; + } + else if(P[k].ID == DC[j].ID) + { + SphP[k].first_connection = j; + + while(j < Nvc) + { + SphP[k].last_connection = j; + + if(last >= 0) + DC[last].next = j; + + last = j; + j++; + if(j >= Nvc) + break; + if(P[k].ID != DC[j].ID) + break; + } + } + else + { + terminate("strange"); + } + } + + for(; i < NumGas; i++) + { + int k = local_trans_data[i].new_index; + SphP[k].first_connection = -1; + SphP[k].last_connection = -1; + } + + if(last >= 0) + DC[last].next = -1; + + myfree(local_trans_data); + + double t1 = second(); + mpi_printf("DOMAIN: done with rearranging connection information (took %g sec)\n", timediff(t0, t1)); +} + +/*! \brief Compare which ID is larger. + * + * For connection data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_connection_ID(const void *a, const void *b) +{ + if(((connection *)a)->ID < (((connection *)b)->ID)) + return -1; + + if(((connection *)a)->ID > (((connection *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which ID is larger. + * + * For local_aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_local_trans_data_ID(const void *a, const void *b) +{ + if(((struct local_aux_trans_data *)a)->ID < (((struct local_aux_trans_data *)b)->ID)) + return -1; + + if(((struct local_aux_trans_data *)a)->ID > (((struct local_aux_trans_data *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which ID is larger. + * + * For aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->ID is smaller. + */ +int domain_compare_recv_trans_data_ID(const void *a, const void *b) +{ + if(((struct aux_trans_data *)a)->ID < (((struct aux_trans_data *)b)->ID)) + return -1; + + if(((struct aux_trans_data *)a)->ID > (((struct aux_trans_data *)b)->ID)) + return +1; + + return 0; +} + +/*! \brief Compare which old_task is larger. + * + * For aux_trans_data. + * + * \param[in] a Pointer to first object. + * \param[in] b Pointer to second object. + * + * \return (-1,0,1) -1 if a->old_task is smaller. + */ +int domain_compare_recv_trans_data_oldtask(const void *a, const void *b) +{ + if(((struct aux_trans_data *)a)->old_task < (((struct aux_trans_data *)b)->old_task)) + return -1; + + if(((struct aux_trans_data *)a)->old_task > (((struct aux_trans_data *)b)->old_task)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/domain/domain_balance.c b/src/amuse/community/arepo/src/domain/domain_balance.c new file mode 100644 index 0000000000..fcb384ae38 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_balance.c @@ -0,0 +1,1154 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain/domain_balance.c + * \date 05/2018 + * \brief Load-balancing algorithms. + * \details Algorithms to estimate cost of different particles and cells + * and to balance the workload and memory usage equally over the + * mpi tasks. + * contains functions: + * double domain_grav_tot_costfactor(int i) + * double domain_hydro_tot_costfactor(int i) + * void domain_init_sum_cost(void) + * void domain_sumCost(void) + * void domain_combine_topleaves_to_domains(int ncpu, int + * ndomain) + * int domain_sort_task(const void *a, const void *b) + * int domain_sort_load(const void *a, const void *b) + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void domain_combine_multipledomains(void) + * void domain_optimize_domain_to_task_mapping(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +/* do some preparation work for use of red-black ordered binary tree based on BSD macros */ + +/*! \brief Defines structure of mytree nodes. + */ +struct mydata +{ + double pri; + int target; + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/* prototype of comparison function of tree elements */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs); + +/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */ +RB_HEAD(mytree, mydata); + +/* the following macros declare appropriate function prototypes and functions needed for this type of tree */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Computes gravity cost. + * + * All timebins in which the particle appears are summed, and the relative + * frequency with which this timebin is executed is taken into account. + * + * \param[in] i Index of cell in P and SphP array. + * + * \return cost-factor. + */ +double domain_grav_tot_costfactor(int i) +{ + double w = MIN_FLOAT_NUMBER; + +#ifdef SELFGRAVITY + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + w += domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + w += domain_grav_weight[bin]; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + return w; +} + +/*! \brief Computes hydro cost. + * + * If a cell is active on a certain timebin, it is assigned a cost of "1". + * All active timebins are summed, and the frequency with which each timebin + * is executed is taken into account. + * + * \param[in] i Index of cell in P and SphP array. + * + * \return cost-factor. + */ +double domain_hydro_tot_costfactor(int i) +{ + double w = 0; + + if(P[i].Type == 0) + for(int bin = P[i].TimeBinHydro; bin <= All.HighestOccupiedTimeBin; bin++) + if(domain_to_be_balanced[bin]) + w += domain_hydro_weight[bin]; + + return w; +} + +/*! \brief Prepares cost measurement. + * + * This function prepares the measurement of the total cost on each domain. + * In particular, we determine how the timebins are mapped to the explicit + * measurements of the gravity cost stored in the P.GravCost[] array (which + * in general will only be available for a subset of all timebins). For the + * unmatched timebins, a closest bin is selected that is the most similar in + * terms of particle number on the bin. Finally, the routine also determines + * how often each timebin is executed in one cycle associated with the + * highest occupied timebin. + * + * \return void + */ +void domain_init_sum_cost(void) +{ + long long tot_count[TIMEBINS], tot_count_sph[TIMEBINS]; + + sumup_large_ints(TIMEBINS, TimeBinsGravity.TimeBinCount, tot_count); + sumup_large_ints(TIMEBINS, TimeBinsHydro.TimeBinCount, tot_count_sph); + + for(int i = 0; i < TIMEBINS; i++) + { + domain_bintolevel[i] = -1; + domain_refbin[i] = -1; + } + + for(int j = 0; j < GRAVCOSTLEVELS; j++) /* bins that have known levels at this point */ + if(All.LevelToTimeBin[j] >= 0) + domain_bintolevel[All.LevelToTimeBin[j]] = j; + + for(int i = 0; i < TIMEBINS; i++) + if(tot_count[i] > 0 && domain_bintolevel[i] < 0) /* need to find a reference bin for this one */ + { + double mindiff = MAX_REAL_NUMBER; + int ref_bin = -1; + for(int j = 0; j < TIMEBINS; j++) + if(domain_bintolevel[j] >= 0 && tot_count[j] > 0) + { + if(mindiff > llabs(tot_count[i] - tot_count[j])) + { + mindiff = llabs(tot_count[i] - tot_count[j]); + ref_bin = j; + } + } + + if(ref_bin >= 0) + domain_refbin[i] = ref_bin; + } + + for(int i = 0; i < TIMEBINS; i++) + { + domain_to_be_balanced[i] = 0; + domain_grav_weight[i] = 1; + domain_hydro_weight[i] = 1; + } + +#ifdef HIERARCHICAL_GRAVITY + + domain_to_be_balanced[All.HighestActiveTimeBin] = 1; + domain_grav_weight[All.HighestActiveTimeBin] = 1; + domain_hydro_weight[All.HighestActiveTimeBin] = 1; + + for(int j = All.HighestActiveTimeBin - 1; j >= All.LowestOccupiedTimeBin; j--) + { + if(tot_count[j] > 0 || tot_count_sph[j] > 0) + domain_to_be_balanced[j] = 1; + + domain_grav_weight[j] += 2; + } + + for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2) + { + if(tot_count[i] > 0) + { + domain_grav_weight[i] = weight; + + for(int j = i - 1; j >= All.LowestOccupiedTimeBin; j--) + domain_grav_weight[j] += 2 * weight; + } + + if(tot_count_sph[i] > 0) + domain_hydro_weight[i] = weight; + } + +#else /* #ifdef HIERARCHICAL_GRAVITY */ + + domain_to_be_balanced[All.HighestActiveTimeBin] = 1; + domain_grav_weight[All.HighestActiveTimeBin] = 1; + domain_hydro_weight[All.HighestActiveTimeBin] = 1; + + for(int i = All.SmallestTimeBinWithDomainDecomposition - 1, weight = 1; i >= All.LowestOccupiedTimeBin; i--, weight *= 2) + { + if(tot_count[i] > 0 || tot_count_sph[i] > 0) + domain_to_be_balanced[i] = 1; + + if(tot_count[i] > 0) + domain_grav_weight[i] = weight; + + if(tot_count_sph[i] > 0) + domain_hydro_weight[i] = weight; + } + +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ +} + +/*! \brief Determine cost and load + * + * This function determines the cost and load associated with each top-level + * leaf node of the tree. These leave nodes can be distributed among the + * processors in order to reach a good work-load and memory-load balance. + * + * \return void + */ +void domain_sumCost(void) +{ + int i, j, n, no, nexport = 0, nimport = 0, ngrp, task, loc_first_no; + + struct domain_cost_data *loc_DomainLeaveNode, *listCost, *export_node_data, *import_node_data; + + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + int blk = NTopleaves / NTask; + int rmd = NTopleaves - blk * NTask; /* remainder */ + int pivot_no = rmd * (blk + 1); + + for(task = 0, loc_first_no = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_no += blocksize[task]; + } + + loc_DomainLeaveNode = mymalloc("loc_DomainLeaveNode", blocksize[ThisTask] * sizeof(struct domain_cost_data)); + memset(loc_DomainLeaveNode, 0, blocksize[ThisTask] * sizeof(struct domain_cost_data)); + + listCost = mymalloc("listCost", NTopleaves * sizeof(struct domain_cost_data)); + + int *no_place = mymalloc("no_place", NTopleaves * sizeof(int)); + memset(no_place, -1, NTopleaves * sizeof(int)); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + /* find for each particle its top-leave, and then add the associated cost with it */ + for(n = 0; n < NumPart; n++) + { +#ifdef ADDBACKGROUNDGRID + if(P[n].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + int p = no_place[no]; + if(p < 0) + { + p = nexport++; + no_place[no] = p; + + memset(&listCost[p], 0, sizeof(struct domain_cost_data)); + listCost[p].no = no; + + if(no < pivot_no) + task = no / (blk + 1); + else + task = rmd + (no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */ + + if(task < 0 || task > NTask) + terminate("task < 0 || task > NTask"); + + Send_count[task]++; + } + + listCost[p].Count += 1; + listCost[p].Work += domain_grav_tot_costfactor(n); + listCost[p].WorkSph += domain_hydro_tot_costfactor(n); + + if(P[n].Type == 0) + listCost[p].CountSph += 1; + } + + myfree(no_place); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + export_node_data = mymalloc("export_node_data", nexport * sizeof(struct domain_cost_data)); + import_node_data = mymalloc("import_node_data", nimport * sizeof(struct domain_cost_data)); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < nexport; i++) + { + if(listCost[i].no < pivot_no) + task = listCost[i].no / (blk + 1); + else + task = rmd + + (listCost[i].no - pivot_no) / blk; /* note: if blk=0, then this case can not occur, since then always no < pivot_no */ + + int ind = Send_offset[task] + Send_count[task]++; + export_node_data[ind] = listCost[i]; + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE, + recvTask, TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct domain_cost_data), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_node_data[i].no - loc_first_no; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d loc_first_no=%d import_node_data[i].no=%d i=%d nimport=%d", j, + blocksize[ThisTask], loc_first_no, import_node_data[i].no, i, nimport); + + loc_DomainLeaveNode[j].Count += import_node_data[i].Count; + loc_DomainLeaveNode[j].Work += import_node_data[i].Work; + loc_DomainLeaveNode[j].CountSph += import_node_data[i].CountSph; + loc_DomainLeaveNode[j].WorkSph += import_node_data[i].WorkSph; + } + + myfree(import_node_data); + myfree(export_node_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(struct domain_cost_data); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_DomainLeaveNode, bytecounts[ThisTask], MPI_BYTE, DomainLeaveNode, bytecounts, byteoffset, MPI_BYTE, + MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + myfree(listCost); + myfree(loc_DomainLeaveNode); + myfree(blocksize); +} + +/*! \brief Uses cost function to combine top-level nodes to domains. + * + * This function uses the cumulative cost function (which weights work-load + * and memory-load equally) to subdivide the list of top-level leave nodes + * into pieces that are (approximately) equal in size. + * + * \param[in] ncpu Number of chunks/damains. + * \param[in] ndomain Number of topleaves. + * + * \return void + */ +void domain_combine_topleaves_to_domains(int ncpu, int ndomain) +{ + double t0 = second(); + + double max_work = 0; + double workhalfnode = 0.5 / ndomain; + double workavg = 1.0 / ncpu; + double work_before = 0, workavg_before = 0; + int start = 0; + + int nabove_grav = 0, nabove_sph = 0; + double todistribute_grav = 0.0; + double todistribute_sph = 0.0; + double weightsum_grav = 0.0; + double weightsum_sph = 0.0; + + for(int i = 0; i < ndomain; i++) + { + if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu) + { + nabove_grav++; + todistribute_grav += DomainLeaveNode[i].Work - normsum_work / ncpu / fac_work; + } + else + weightsum_grav += DomainLeaveNode[i].Count; + + if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu) + { + nabove_sph++; + todistribute_sph += DomainLeaveNode[i].WorkSph - normsum_worksph / ncpu / fac_worksph; + } + else + weightsum_sph += DomainLeaveNode[i].Count; + } + + struct leafnode_data + { + double workgrav; + double worksph; + }; + + struct leafnode_data *leaf = (struct leafnode_data *)mymalloc("leaf", ndomain * sizeof(struct leafnode_data)); + + for(int i = 0; i < ndomain; i++) + { + leaf[i].workgrav = DomainLeaveNode[i].Work; + leaf[i].worksph = DomainLeaveNode[i].WorkSph; + + if(fac_work > 0 && weightsum_grav > 0) + { + if(fac_work * DomainLeaveNode[i].Work > normsum_work / ncpu) + leaf[i].workgrav = normsum_work / ncpu / fac_work; + else + leaf[i].workgrav += (DomainLeaveNode[i].Count / weightsum_grav) * todistribute_grav; + } + + if(fac_worksph > 0 && weightsum_sph > 0) + { + if(fac_worksph * DomainLeaveNode[i].WorkSph > normsum_worksph / ncpu) + leaf[i].worksph = normsum_worksph / ncpu / fac_worksph; + else + leaf[i].worksph += (DomainLeaveNode[i].Count / weightsum_sph) * todistribute_sph; + } + } + + for(int i = 0; i < ncpu; i++) + { + double work = 0; + int end = start; + + work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph; + + while((work + work_before + + (end + 1 < ndomain ? fac_work * leaf[end + 1].workgrav + fac_load * DomainLeaveNode[end + 1].Count + + fac_worksph * leaf[end + 1].worksph + : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + work += fac_work * leaf[end].workgrav + fac_load * DomainLeaveNode[end].Count + fac_worksph * leaf[end].worksph; + } + + DomainStartList[i] = start; + DomainEndList[i] = end; + + work_before += work; + workavg_before += workavg; + start = end + 1; + + if(max_work < work) + max_work = work; + } + + myfree(leaf); + + double t1 = second(); + mpi_printf("DOMAIN: balance reached among multiple-domains=%g, average leave-nodes per domain=%g (took %g sec)\n", + max_work / workavg, ((double)ndomain) / ncpu, timediff(t0, t1)); +} + +/*! \brief Structure containing data for segments. + */ +static struct domain_segments_data +{ + int task, start, end; + double bin_GravCost[TIMEBINS]; + double bin_HydroCost[TIMEBINS]; + double work; + double load; + double worksph; + double normalized_load; +} * domainAssign; + +/*! \brief Structure containing data for task list. + */ +struct tasklist_data +{ + double bin_GravCost[TIMEBINS]; + double bin_HydroCost[TIMEBINS]; + double work; + double load; + double worksph; + int count; +} * tasklist; + +/*! \brief Comparison function for domain_segments_data structure. + * + * Compares field task. + * + * \param a Pointer to fist object. + * \param b Pointer to second object. + * + * \return (-1,0,1); -1 if a < b. + */ +int domain_sort_task(const void *a, const void *b) +{ + if(((struct domain_segments_data *)a)->task < (((struct domain_segments_data *)b)->task)) + return -1; + + if(((struct domain_segments_data *)a)->task > (((struct domain_segments_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Comparison functions for domain_segmens_data structures. + * + * Compares field normalized_load. + * + * \param a Pointer to fist object. + * \param b Pointer to second object. + * + * \return (-1,0,1) -1 if a>b. + */ +int domain_sort_load(const void *a, const void *b) +{ + if(((struct domain_segments_data *)a)->normalized_load > (((struct domain_segments_data *)b)->normalized_load)) + return -1; + + if(((struct domain_segments_data *)a)->normalized_load < (((struct domain_segments_data *)b)->normalized_load)) + return +1; + + return 0; +} + +/*! \brief Comparison function for objects of type mydata. + * + * Compares elements pri and target. + * + * \param lhs Pointer to fist object. + * \param rhs Pointer to second object. + * + * \return (-1,0,1); -1 if lhs < rhs. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->pri < rhs->pri) + return -1; + else if(lhs->pri > rhs->pri) + return 1; + else if(lhs->target < rhs->target) + return -1; + else if(lhs->target > rhs->target) + return 1; + + return 0; +} + +/*! \brief Assigns the domain pieces to individual MPI tasks with the goal to + * balance the work-load on different timebins. + * + * The algorithm used works as follows: + * The domains are assigned to the CPUs in sequence of decreasing "effective + * load", which is a simple combined measure of relative total gravity, hydro + * and memory load. For each assignment, a number of possible target CPUs are + * evaluated, and the assignment leading to the lowest total runtime is + * adopted. The set of target CPUs that is tested in each step is the one + * that consists of the CPUs that currently have the lowest load in the set + * of primary tasks that are examined. + * + * \return void + */ +void domain_combine_multipledomains(void) +{ + double t0 = second(); + + int ndomains = All.MultipleDomains * NTask; + + domainAssign = (struct domain_segments_data *)mymalloc("domainAssign", ndomains * sizeof(struct domain_segments_data)); + + tasklist = mymalloc("tasklist", NTask * sizeof(struct tasklist_data)); + + for(int ta = 0; ta < NTask; ta++) + { + tasklist[ta].load = 0; + tasklist[ta].work = 0; + tasklist[ta].worksph = 0; + tasklist[ta].count = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + tasklist[ta].bin_GravCost[i] = 0; + tasklist[ta].bin_HydroCost[i] = 0; + } + } + + for(int n = 0; n < ndomains; n++) + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + DomainTask[i] = n; + + /* we first determine the grav-cost and hydro-cost separately for each + * timebin of all the domain-pieces that are available for a + * mapping to individual MPI tasks + */ + + struct cost_data + { + double GravCost; + double HydroCost; + } * loc_bin_Cost, *glob_bin_Cost; + + loc_bin_Cost = mymalloc_clear("loc_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS); + glob_bin_Cost = mymalloc_clear("glob_bin_Cost", sizeof(struct cost_data) * ndomains * TIMEBINS); + + for(int i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + int no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[i] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + int n = DomainTask[no]; + +#ifdef SELFGRAVITY + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestActiveTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { +#ifdef HIERARCHICAL_GRAVITY + if(bin >= P[i].TimeBinGrav) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + if(domain_bintolevel[bin] >= 0) + loc_bin_Cost[bin * ndomains + n].GravCost += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[bin]]; + else + { + if(domain_refbin[bin] >= 0) + loc_bin_Cost[bin * ndomains + n].GravCost += + MIN_FLOAT_NUMBER + domain_grav_weight[bin] * P[i].GravCost[domain_bintolevel[domain_refbin[bin]]]; + else + loc_bin_Cost[bin * ndomains + n].GravCost += domain_grav_weight[bin]; + } + } + } + } +#endif /* #ifdef SELFGRAVITY */ + + if(P[i].Type == 0) + { + for(int bin = P[i].TimeBinHydro; bin <= All.HighestActiveTimeBin; bin++) + if(domain_to_be_balanced[bin]) + loc_bin_Cost[bin * ndomains + n].HydroCost += domain_hydro_weight[bin]; + } + } + + allreduce_sparse_double_sum((double *)(loc_bin_Cost + All.LowestOccupiedTimeBin * ndomains), + (double *)(glob_bin_Cost + All.LowestOccupiedTimeBin * ndomains), + 2 * ndomains * (All.HighestOccupiedTimeBin - All.LowestOccupiedTimeBin + 1)); + + /* now assign this cost to the domainAssign-structure, which keeps track of the different pieces */ + double tot_work = 0; + double tot_load = 0; + double tot_worksph = 0; + + for(int n = 0; n < ndomains; n++) + { + domainAssign[n].start = DomainStartList[n]; + domainAssign[n].end = DomainEndList[n]; + domainAssign[n].work = 0; + domainAssign[n].load = 0; + domainAssign[n].worksph = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + domainAssign[n].bin_GravCost[i] = glob_bin_Cost[i * ndomains + n].GravCost; + domainAssign[n].bin_HydroCost[i] = glob_bin_Cost[i * ndomains + n].HydroCost; + } + + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + { + domainAssign[n].work += DomainLeaveNode[i].Work; + domainAssign[n].load += DomainLeaveNode[i].Count; + domainAssign[n].worksph += DomainLeaveNode[i].WorkSph; + } + + tot_work += domainAssign[n].work; + tot_load += domainAssign[n].load; + tot_worksph += domainAssign[n].worksph; + } + + for(int n = 0; n < ndomains; n++) + { + domainAssign[n].normalized_load = domainAssign[n].work / (tot_work + MIN_FLOAT_NUMBER) + + domainAssign[n].worksph / (tot_worksph + MIN_FLOAT_NUMBER) + + domainAssign[n].load / ((double)tot_load + MIN_FLOAT_NUMBER); + } + + myfree(glob_bin_Cost); + myfree(loc_bin_Cost); + + /* sort the pieces according to their normalized work-load, with the most heavily loaded coming first */ + mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_load); + + /* initialize a structure that stores the maximum gravity and hydro cost load for each timebin */ + double max_GravCost[TIMEBINS], max_HydroCost[TIMEBINS]; + for(int i = 0; i < TIMEBINS; i++) + { + max_GravCost[i] = 0; + max_HydroCost[i] = 0; + } + + double max_load = 0; + + /* create priority trees, one for the cost of each occupied timebin, + * one for the hydro cost of each occupied timebin */ + struct mytree queue_gravcost[TIMEBINS]; + struct mytree queue_hydrocost[TIMEBINS]; + struct mytree queue_load; + struct mydata *ngrav[TIMEBINS]; + struct mydata *nhydro[TIMEBINS]; + struct mydata *nload; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { + RB_INIT(&queue_gravcost[bin]); + ngrav[bin] = mymalloc("ngrav[bin]", NTask * sizeof(struct mydata)); + + RB_INIT(&queue_hydrocost[bin]); + nhydro[bin] = mymalloc("nhydro[bin]", NTask * sizeof(struct mydata)); + } + } + + RB_INIT(&queue_load); + nload = mymalloc("nload", NTask * sizeof(struct mydata)); + for(int i = 0; i < NTask; i++) + { + nload[i].pri = 0; + nload[i].target = i; + RB_INSERT(mytree, &queue_load, &nload[i]); + } + + /* fill in all the tasks into each queue. The priority will be the current cost of the bin, the tag 'val' is used to label the task + */ + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(!domain_to_be_balanced[bin]) + continue; + + for(int i = 0; i < NTask; i++) + { + ngrav[bin][i].pri = 0; + ngrav[bin][i].target = i; + RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][i]); + + nhydro[bin][i].pri = 0; + nhydro[bin][i].target = i; + RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][i]); + } + } + + int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED; + if(n_lowest > NTask) + n_lowest = NTask; + + int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int)); + struct mydata *np; + + /* now assign each of the domains to a CPU, trying to minimize the overall runtime */ + for(int n = 0; n < ndomains; n++) + { + double best_runtime = MAX_FLOAT_NUMBER; + int best_target = -1; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(!domain_to_be_balanced[bin]) + continue; + + int target; + + for(int set = 0; set < 2; set++) + { + if(set == 0) + { +#ifndef SELFGRAVITY + continue; +#endif /* #ifndef SELFGRAVITY */ + /* look up the n_lowest smallest elements from the tree */ + for(np = RB_MIN(mytree, &queue_gravcost[bin]), rep = 0; np != NULL && rep < n_lowest; + np = RB_NEXT(mytree, &queue_gravcost[bin], np), rep++) + candidates[rep] = np->target; + } + else + { + for(np = RB_MIN(mytree, &queue_hydrocost[bin]), rep = 0; np != NULL && rep < n_lowest; + np = RB_NEXT(mytree, &queue_hydrocost[bin], np), rep++) + candidates[rep] = np->target; + } + + for(rep = 0; rep < n_lowest; rep++) + { + target = candidates[rep]; + + double runtime = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i]; + if(sum < max_GravCost[i]) + sum = max_GravCost[i]; + + runtime += sum / (totgravcost + MIN_FLOAT_NUMBER); + } + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i]; + if(sum < max_HydroCost[i]) + sum = max_HydroCost[i]; + + runtime += sum / (totsphcost + MIN_FLOAT_NUMBER); + } + + double load = domainAssign[n].load + tasklist[target].load; + if(load < max_load) + load = max_load; + + runtime += ((double)load) / totpartcount; + + if(runtime < best_runtime || best_target < 0) + { + best_runtime = runtime; + best_target = target; + } + } + } + } + + /* now check also the load queue */ + for(np = RB_MIN(mytree, &queue_load), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queue_load, np), rep++) + candidates[rep] = np->target; + + int target; + + for(rep = 0; rep < n_lowest; rep++) + { + target = candidates[rep]; + + double runtime = 0; + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_GravCost[i] + tasklist[target].bin_GravCost[i]; + if(sum < max_GravCost[i]) + sum = max_GravCost[i]; + + runtime += sum / (totgravcost + 1.0e-60); + } + + for(int i = 0; i < TIMEBINS; i++) + { + double sum = domainAssign[n].bin_HydroCost[i] + tasklist[target].bin_HydroCost[i]; + if(sum < max_HydroCost[i]) + sum = max_HydroCost[i]; + + runtime += sum / (totsphcost + 1.0e-60); + } + + double load = domainAssign[n].load + tasklist[target].load; + if(load < max_load) + load = max_load; + + runtime += ((double)load) / totpartcount; + + if(runtime < best_runtime || best_target < 0) + { + best_runtime = runtime; + best_target = target; + } + } + + if(best_target < 0) + terminate("best_target < 0"); + + target = best_target; + + domainAssign[n].task = target; + tasklist[target].work += domainAssign[n].work; + tasklist[target].load += domainAssign[n].load; + tasklist[target].worksph += domainAssign[n].worksph; + tasklist[target].count++; + + /* now update the elements in the sorted trees */ + + RB_REMOVE(mytree, &queue_load, &nload[target]); + nload[target].pri = tasklist[target].load; + RB_INSERT(mytree, &queue_load, &nload[target]); + + if(max_load < tasklist[target].load) + max_load = tasklist[target].load; + + for(int bin = All.LowestOccupiedTimeBin; bin <= All.HighestOccupiedTimeBin; bin++) + { + if(domain_to_be_balanced[bin]) + { + tasklist[target].bin_GravCost[bin] += domainAssign[n].bin_GravCost[bin]; + tasklist[target].bin_HydroCost[bin] += domainAssign[n].bin_HydroCost[bin]; + + double eps_grav = 1.0e-9 * (domainAssign[n].load / totpartcount) * + totgravcost; /* these will be added in order to break degeneracies in the sort-order in case the + grav/hydro cost in certain cells is zero */ + double eps_hydro = 1.0e-9 * (domainAssign[n].load / totpartcount) * totsphcost; + + RB_REMOVE(mytree, &queue_gravcost[bin], &ngrav[bin][target]); + ngrav[bin][target].pri = ngrav[bin][target].pri + domainAssign[n].bin_GravCost[bin] + eps_grav; + RB_INSERT(mytree, &queue_gravcost[bin], &ngrav[bin][target]); + + RB_REMOVE(mytree, &queue_hydrocost[bin], &nhydro[bin][target]); + nhydro[bin][target].pri = nhydro[bin][target].pri + domainAssign[n].bin_HydroCost[bin] + eps_hydro; + RB_INSERT(mytree, &queue_hydrocost[bin], &nhydro[bin][target]); + + if(max_GravCost[bin] < tasklist[target].bin_GravCost[bin]) + max_GravCost[bin] = tasklist[target].bin_GravCost[bin]; + + if(max_HydroCost[bin] < tasklist[target].bin_HydroCost[bin]) + max_HydroCost[bin] = tasklist[target].bin_HydroCost[bin]; + } + } + } + + myfree(candidates); + + /* free the elements for the RB tree again */ + myfree(nload); + for(int bin = All.HighestOccupiedTimeBin; bin >= All.LowestOccupiedTimeBin; bin--) + { + if(domain_to_be_balanced[bin]) + { + myfree(nhydro[bin]); + myfree(ngrav[bin]); + } + } + + mysort(domainAssign, ndomains, sizeof(struct domain_segments_data), domain_sort_task); + + for(int n = 0; n < ndomains; n++) + { + DomainStartList[n] = domainAssign[n].start; + DomainEndList[n] = domainAssign[n].end; + + for(int i = DomainStartList[n]; i <= DomainEndList[n]; i++) + DomainTask[i] = domainAssign[n].task; + } + + myfree(tasklist); + myfree(domainAssign); + + double t1 = second(); + mpi_printf("DOMAIN: combining multiple-domains took %g sec\n", timediff(t0, t1)); +} + +/*! \brief Assign domains to tasks to minimize communication. + * + * This function determines a permutation of the new assignment of domains to + * CPUs such that the number of particles that has to be moved given the + * current distribution of particles is minimized. + * + * \return void + */ +void domain_optimize_domain_to_task_mapping(void) +{ + double t0 = second(); + + int *count_per_task = mymalloc_clear("count_per_task", NTask * sizeof(int)); + + /* count how many we want to send to each task */ + for(int i = 0; i < NumPart; i++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[i] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + int task = DomainTask[no]; + count_per_task[task]++; + } + + /* find the task that holds most of our particles (we really would like to be this task) */ + + int maxcount = count_per_task[0], maxtask = 0; + for(int i = 1; i < NTask; i++) + if(count_per_task[i] > maxcount) + { + maxcount = count_per_task[i]; + maxtask = i; + } + + struct domain_count_data loc_count; + struct domain_count_data *domain_count = mymalloc("domain_count", NTask * sizeof(struct domain_count_data)); + + loc_count.task = maxtask; + loc_count.count = maxcount; + loc_count.origintask = ThisTask; + + MPI_Allgather(&loc_count, sizeof(struct domain_count_data), MPI_BYTE, domain_count, sizeof(struct domain_count_data), MPI_BYTE, + MPI_COMM_WORLD); + + qsort(domain_count, NTask, sizeof(struct domain_count_data), domain_compare_count); + + /* this array will hold a permutation of all tasks constructed such that + particle exchange should be minimized */ + + int *new_task = mymalloc("new_task", NTask * sizeof(int)); + + /* this array will now flag tasks that have been assigned */ + for(int i = 0; i < NTask; i++) + { + count_per_task[i] = 0; + new_task[i] = -1; + } + + for(int i = 0; i < NTask; i++) + { + int task = domain_count[i].task; + int origin = domain_count[i].origintask; + + if(new_task[task] == -1 && count_per_task[origin] == 0) + { + count_per_task[origin] = 1; /* taken */ + new_task[task] = origin; + } + } + + /* now we have to fill up still unassigned ones in case there were collisions */ + for(int i = 0, j = 0; i < NTask; i++) + { + if(new_task[i] == -1) + { + while(count_per_task[j]) + j++; + + new_task[i] = j; + count_per_task[j] = 1; + } + } + + int *copy_DomainStartList = mymalloc("copy_DomainStartList", All.MultipleDomains * NTask * sizeof(int)); + int *copy_DomainEndList = mymalloc("copy_DomainEndList", All.MultipleDomains * NTask * sizeof(int)); + + memcpy(copy_DomainStartList, DomainStartList, All.MultipleDomains * NTask * sizeof(int)); + memcpy(copy_DomainEndList, DomainEndList, All.MultipleDomains * NTask * sizeof(int)); + + /* apply permutation to DomainTask assignment */ + + for(int i = 0; i < NTask; i++) + for(int m = 0; m < All.MultipleDomains; m++) + { + DomainStartList[new_task[i] * All.MultipleDomains + m] = copy_DomainStartList[i * All.MultipleDomains + m]; + + DomainEndList[new_task[i] * All.MultipleDomains + m] = copy_DomainEndList[i * All.MultipleDomains + m]; + } + + myfree(copy_DomainEndList); + myfree(copy_DomainStartList); + + for(int i = 0; i < NTopleaves; i++) + DomainTask[i] = new_task[DomainTask[i]]; + + myfree(new_task); + myfree(domain_count); + myfree(count_per_task); + + double t1 = second(); + mpi_printf("DOMAIN: task reshuffling took %g sec\n", timediff(t0, t1)); +} diff --git a/src/amuse/community/arepo/src/domain/domain_box.c b/src/amuse/community/arepo/src/domain/domain_box.c new file mode 100644 index 0000000000..d7466f2449 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_box.c @@ -0,0 +1,336 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_box.c + * \date 05/2018 + * \brief Routines that determine domain box and do periodic wrapping. + * \details contains files: + * void domain_findExtent(void) + * void do_box_wrapping(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Move the coordinate in pos by the global displacement vector + * + * \param[in] pos coordinate vector (3 entries). + * \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD + * + * \return void + */ +void domain_displacePosition(MyDouble *pos, enum domain_displace_mode mode) +{ + if(mode == DISPLACE_POSITION_FORWARD) + { + double xtmp, ytmp, ztmp; + pos[0] = WRAP_X(pos[0] + All.GlobalDisplacementVector[0]); + pos[1] = WRAP_Y(pos[1] + All.GlobalDisplacementVector[1]); + pos[2] = WRAP_Z(pos[2] + All.GlobalDisplacementVector[2]); + } + else if(mode == DISPLACE_POSITION_BACKWARD) + { + double xtmp, ytmp, ztmp; + pos[0] = WRAP_X(pos[0] - All.GlobalDisplacementVector[0]); + pos[1] = WRAP_Y(pos[1] - All.GlobalDisplacementVector[1]); + pos[2] = WRAP_Z(pos[2] - All.GlobalDisplacementVector[2]); + } + else + terminate("Unkown mode %d.", mode); +} + +/*! \brief Move the coordinate for all positions by the global displacement vector + * + * \param[in] mode displacement mode, either DISPLACE_POSITION_FORWARD or DISPLACE_POSITION_BACKWARD + * + * \return void + */ +static void domain_displacePositions(enum domain_displace_mode mode) +{ + for(int i = 0; i < NumPart; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) /* derefined */ + continue; + + domain_displacePosition(P[i].Pos, mode); + + if(i < NumGas) + domain_displacePosition(SphP[i].Center, mode); + } + +#ifdef PLACEHIGHRESREGION + domain_displacePosition(All.Xmintot[1], mode); + domain_displacePosition(All.Xmaxtot[1], mode); + domain_displacePosition(All.Corner[1], mode); + domain_displacePosition(All.UpperCorner[1], mode); +#endif +} + +/*! \brief Finds the extent of the global domain grid. + * + * The minimum extent is the box size. + * + * \return void + */ +void domain_findExtent(void) +{ + int i, j; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + + /* determine local extension */ + for(j = 0; j < 3; j++) + { + /* preset to simulation box */ + xmin[j] = 0; + xmax[j] = boxSize; + } + // Take care of stretched box +#ifdef LONG_X + xmax[0] = boxSize_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + xmax[1] = boxSize_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + xmax[2] = boxSize_Z; +#endif /* #ifdef LONG_Z */ + + for(i = 0; i < NumPart; i++) + { +#ifdef ADDBACKGROUNDGRID + if(P[i].Type != 0) + continue; +#endif /* #ifdef ADDBACKGROUNDGRID */ + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + +#ifdef ADDBACKGROUNDGRID + for(j = 0; j < 3; j++) + if(xmax_glob[j] < All.BoxSize) + xmax_glob[j] = All.BoxSize; + + for(j = 0; j < 3; j++) + if(xmin_glob[j] > 0) + xmin_glob[j] = 0; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + len = 0; + for(j = 0; j < 3; j++) + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + +#if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) + len *= 1.2; /* enlarge box a bit to avoid triggering of an out of box recovery */ +#else /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) */ + len *= 1.00001; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) && !defined(ADDBACKGROUNDGRID) #else */ + +#if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) + for(j = 0; j < 3; j++) + { + DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len; + } +#else /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) */ + for(j = 0; j < 3; j++) + { + DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + DomainCenter[j] += (2. * get_random_number() - 1.) * 0.5 * len; + } + + MPI_Bcast(DomainCenter, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + len *= 2; + + for(j = 0; j < 3; j++) + DomainCorner[j] = DomainCenter[j] - 0.5 * len; +#endif /* #if defined(DO_NOT_RANDOMIZE_DOMAINCENTER) || !defined(GRAVITY_NOT_PERIODIC) || defined(ONEDIMS) || defined(TWODIMS) #else \ + */ + + DomainLen = len; + + DomainInverseLen = 1.0 / DomainLen; + DomainFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + DomainBigFac = (DomainLen / (((long long)1) << 52)); +} + +/*! \brief Makes sure all particles are within box. + * + * This function makes sure that all particle coordinates (Pos) are + * periodically mapped onto the interval [0, BoxSize]. After this function + * has been called, a new domain decomposition should be done, which will + * also force a new tree construction. + * + * \return void + */ +void do_box_wrapping(void) +{ + int j; + double boxsize[3]; + +#ifdef ADDBACKGROUNDGRID + return; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + for(j = 0; j < 3; j++) + boxsize[j] = All.BoxSize; + +#ifdef LONG_X + boxsize[0] *= LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxsize[1] *= LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxsize[2] *= LONG_Z; +#endif /* #ifdef LONG_Z */ + +#if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2) + domain_displacePositions(DISPLACE_POSITION_BACKWARD); + + if(ThisTask == 0) + { + double prefac = 1.; +#ifdef PLACEHIGHRESREGION + prefac = 0.5; +#endif + for(j = 0; j < 3; j++) + All.GlobalDisplacementVector[j] = (get_random_number() - 0.5) * boxsize[j] * prefac; + } + + mpi_printf("DOMAIN: New global displacement vector: %g, %g, %g\n", All.GlobalDisplacementVector[0], All.GlobalDisplacementVector[1], + All.GlobalDisplacementVector[2]); + MPI_Bcast(All.GlobalDisplacementVector, 3, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + domain_displacePositions(DISPLACE_POSITION_FORWARD); +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) && !defined(DO_NOT_RANDOMIZE_DOMAINCENTER) && defined(SELFGRAVITY) && (NUMDIMS > 2) */ + + int i; + for(i = 0; i < NumPart; i++) + { + if(i < NumGas) + trans_table[i].wrapped = 0; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[i].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if !defined(REFLECTIVE_X) + while(P[i].Pos[0] < 0) + { + P[i].Pos[0] += boxsize[0]; + if(i < NumGas) + trans_table[i].wrapped |= 1; + } + + while(P[i].Pos[0] >= boxsize[0]) + { + P[i].Pos[0] -= boxsize[0]; + if(i < NumGas) + trans_table[i].wrapped |= 2; + } + +#else /* #if !defined(REFLECTIVE_X) */ + if(P[i].Pos[0] < 0 || P[i].Pos[0] >= boxsize[0]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. x=%g", i, P[i].ID, P[i].Type, P[i].Pos[0]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + while(P[i].Pos[1] < 0) + { + P[i].Pos[1] += boxsize[1]; + if(i < NumGas) + trans_table[i].wrapped |= 4; + } + + while(P[i].Pos[1] >= boxsize[1]) + { + P[i].Pos[1] -= boxsize[1]; + if(i < NumGas) + trans_table[i].wrapped |= 8; + } + +#else /* #if !defined(REFLECTIVE_Y) */ + if(P[i].Pos[1] < 0 || P[i].Pos[1] >= boxsize[1]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. y=%g", i, P[i].ID, P[i].Type, P[i].Pos[1]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + while(P[i].Pos[2] < 0) + { + P[i].Pos[2] += boxsize[2]; + if(i < NumGas) + trans_table[i].wrapped |= 16; + } + + while(P[i].Pos[2] >= boxsize[2]) + { + P[i].Pos[2] -= boxsize[2]; + if(i < NumGas) + trans_table[i].wrapped |= 32; + } + +#else /* #if !defined(REFLECTIVE_Z) */ + if(P[i].Pos[2] < 0 || P[i].Pos[2] >= boxsize[2]) + { + char buf[1000]; + + sprintf(buf, "i=%d ID=%d type=%d moved out of box. z=%g", i, P[i].ID, P[i].Type, P[i].Pos[2]); + terminate(buf); + } +#endif /* #if !defined(REFLECTIVE_Z) #else */ + } +} diff --git a/src/amuse/community/arepo/src/domain/domain_counttogo.c b/src/amuse/community/arepo/src/domain/domain_counttogo.c new file mode 100644 index 0000000000..82e798ef7e --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_counttogo.c @@ -0,0 +1,84 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_counttogo.c + * \date 05/2018 + * \brief Functions to determine number of exchanged particles. + * \details contains functions: + * int domain_countToGo(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Determines communication matrix for particles and cells. + * + * This function determines how many particles that are currently stored + * on the local CPU have to be moved off according to the domain + * decomposition. + * + * \return 0 + */ +int domain_countToGo(void) +{ + for(int n = 0; n < NTask; n++) + { + toGo[n] = 0; + toGoSph[n] = 0; + } + + for(int n = 0; n < NumPart; n++) + { + int no = 0; + + while(topNodes[no].Daughter >= 0) + no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size >> 3); + + no = topNodes[no].Leaf; + + if(DomainTask[no] != ThisTask) + { + toGo[DomainTask[no]] += 1; + + if(P[n].Type == 0) + toGoSph[DomainTask[no]] += 1; + } + } + + MPI_Alltoall(toGo, 1, MPI_INT, toGet, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Alltoall(toGoSph, 1, MPI_INT, toGetSph, 1, MPI_INT, MPI_COMM_WORLD); + + return 0; +} diff --git a/src/amuse/community/arepo/src/domain/domain_exchange.c b/src/amuse/community/arepo/src/domain/domain_exchange.c new file mode 100644 index 0000000000..454cfafc82 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_exchange.c @@ -0,0 +1,399 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_exchange.c + * \date 05/2018 + * \brief Algorithms for exchanging particle data and associated + * rearrangements. + * \details This includes changing the size of the P and SphP arrays as + * well as the particle exchange routine itself. + * contains functions: + * void domain_resize_storage(int count_get, int count_get_sph, + * int option_flag) + * void domain_exchange(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Changes memory allocation if necessary for particle and cell data. + * + * If the memory usage due to a net import or export of particles changes + * above a certain tolerance, the P and SphP structures need to be + * reallocated. + * + * \param[in] count get How many particles are imported? + * \param[in] count_get_sph How many cells are imported? + * \param[in] option_flag Options for reallocating peanokey or ngbtree. + * + * \return void + */ +void domain_resize_storage(int count_get, int count_get_sph, int option_flag) +{ + int load = NumPart + count_get; + int sphload = NumGas + count_get_sph; + int loc_data[2] = {load, sphload}, res[2]; + + MPI_Allreduce(loc_data, res, 2, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + int max_load = res[0]; + int max_sphload = res[1]; + + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + + if(option_flag == 1) + Key = (peanokey *)myrealloc_movable(Key, sizeof(peanokey) * All.MaxPart); + } + + if(max_sphload >= (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph || max_sphload < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPartSph) + { + All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE); + if(option_flag == 2) + { + if(All.MaxPartSph > Ngb_MaxPart) + ngb_treemodifylength(All.MaxPartSph - Ngb_MaxPart); + } + reallocate_memory_maxpartsph(); + } +} + +/*! \brief Exchanges particles and cells according to new domain decomposition. + * + * Communicates particles and cells to their new task. P and SphP arrays are + * changed in size accordingly. + * + * \return void + */ +void domain_exchange(void) +{ + double t0 = second(); + + int count_togo = 0, count_togo_sph = 0, count_get = 0, count_get_sph = 0; + int *count, *count_sph, *offset, *offset_sph; + int *count_recv, *count_recv_sph, *offset_recv, *offset_recv_sph; + int i, n, no, target; + struct particle_data *partBuf; + struct sph_particle_data *sphBuf; + + peanokey *keyBuf; + + long long sumtogo = 0; + + for(i = 0; i < NTask; i++) + sumtogo += toGo[i]; + + sumup_longs(1, &sumtogo, &sumtogo); + + count = (int *)mymalloc_movable(&count, "count", NTask * sizeof(int)); + count_sph = (int *)mymalloc_movable(&count_sph, "count_sph", NTask * sizeof(int)); + offset = (int *)mymalloc_movable(&offset, "offset", NTask * sizeof(int)); + offset_sph = (int *)mymalloc_movable(&offset_sph, "offset_sph", NTask * sizeof(int)); + count_recv = (int *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(int)); + count_recv_sph = (int *)mymalloc_movable(&count_recv_sph, "count_recv_sph", NTask * sizeof(int)); + offset_recv = (int *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(int)); + offset_recv_sph = (int *)mymalloc_movable(&offset_recv_sph, "offset_recv_sph", NTask * sizeof(int)); + + int prec_offset; + int *decrease; + + decrease = (int *)mymalloc_movable(&decrease, "decrease", NTask * sizeof(int)); + + for(i = 1, offset_sph[0] = 0, decrease[0] = 0; i < NTask; i++) + { + offset_sph[i] = offset_sph[i - 1] + toGoSph[i - 1]; + decrease[i] = toGoSph[i - 1]; + } + + prec_offset = offset_sph[NTask - 1] + toGoSph[NTask - 1]; + + offset[0] = prec_offset; + for(i = 1; i < NTask; i++) + offset[i] = offset[i - 1] + (toGo[i - 1] - decrease[i]); + + myfree(decrease); + + for(i = 0; i < NTask; i++) + { + count_togo += toGo[i]; + count_togo_sph += toGoSph[i]; + count_get += toGet[i]; + count_get_sph += toGetSph[i]; + } + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", count_togo * sizeof(struct particle_data)); + sphBuf = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", count_togo_sph * sizeof(struct sph_particle_data)); + + keyBuf = (peanokey *)mymalloc_movable(&keyBuf, "keyBuf", count_togo * sizeof(peanokey)); + + for(i = 0; i < NTask; i++) + { + count[i] = count_sph[i] = 0; + } + + for(n = 0; n < NumPart; n++) + { + no = 0; + + peanokey mask = ((peanokey)7) << (3 * (BITS_PER_DIMENSION - 1)); + int shift = 3 * (BITS_PER_DIMENSION - 1); + + while(topNodes[no].Daughter >= 0) + { + no = topNodes[no].Daughter + (int)((Key[n] & mask) >> shift); + mask >>= 3; + shift -= 3; + } + + no = topNodes[no].Leaf; + + target = DomainTask[no]; + + if(target != ThisTask) + { + /* copy this particle into the exchange buffer */ + if(P[n].Type == 0) + { + partBuf[offset_sph[target] + count_sph[target]] = P[n]; + keyBuf[offset_sph[target] + count_sph[target]] = Key[n]; + sphBuf[offset_sph[target] + count_sph[target]] = SphP[n]; + count_sph[target]++; + } + else + { + partBuf[offset[target] + count[target]] = P[n]; + keyBuf[offset[target] + count[target]] = Key[n]; + count[target]++; + } + + if(P[n].Type == 0) + { + P[n] = P[NumGas - 1]; + P[NumGas - 1] = P[NumPart - 1]; + + Key[n] = Key[NumGas - 1]; + Key[NumGas - 1] = Key[NumPart - 1]; + + SphP[n] = SphP[NumGas - 1]; + + NumGas--; + } + else + { + P[n] = P[NumPart - 1]; + Key[n] = Key[NumPart - 1]; + } + + NumPart--; + n--; + + } /* target != ThisTask */ + } /* n < NumPart */ + + /**** now resize the storage for the P[] and SphP[] arrays if needed ****/ + domain_resize_storage(count_get, count_get_sph, 1); + + /***** space has been created, now can do the actual exchange *****/ + int count_totget = count_get_sph; + + if(count_totget) + { + memmove(P + NumGas + count_totget, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data)); + memmove(Key + NumGas + count_totget, Key + NumGas, (NumPart - NumGas) * sizeof(peanokey)); + } + + for(i = 0; i < NTask; i++) + { + count_recv_sph[i] = toGetSph[i]; + count_recv[i] = toGet[i] - toGetSph[i]; + } + + int prec_count; + for(i = 1, offset_recv_sph[0] = NumGas; i < NTask; i++) + offset_recv_sph[i] = offset_recv_sph[i - 1] + count_recv_sph[i - 1]; + prec_count = NumGas + count_get_sph; + + offset_recv[0] = NumPart - NumGas + prec_count; + + for(i = 1; i < NTask; i++) + offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1]; + +#ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP + + int ngrp; +#ifdef NO_ISEND_IRECV_IN_DOMAIN /* synchronous communication */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_sph[target] > 0 || count_recv_sph[target] > 0) + { + MPI_Sendrecv(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE, + target, TAG_PDATA_SPH, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data), + MPI_BYTE, target, TAG_SPHDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + if(count[target] > 0 || count_recv[target] > 0) + { + MPI_Sendrecv(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, + Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + +#else /* #ifdef NO_ISEND_IRECV_IN_DOMAIN */ + /* asynchronous communication */ + + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 30 * NTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_recv_sph[target] > 0) + { + MPI_Irecv(P + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(SphP + offset_recv_sph[target], count_recv_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(Key + offset_recv_sph[target], count_recv_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(count_recv[target] > 0) + { + MPI_Irecv(P + offset_recv[target], count_recv[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Irecv(Key + offset_recv[target], count_recv[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + &requests[n_requests++]); + } + } + } + + MPI_Barrier(MPI_COMM_WORLD); /* not really necessary, but this will guarantee that all receives are + posted before the sends, which helps the stability of MPI on + bluegene, and perhaps some mpich1-clusters */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = ThisTask ^ ngrp; + + if(target < NTask) + { + if(count_sph[target] > 0) + { + MPI_Isend(partBuf + offset_sph[target], count_sph[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(sphBuf + offset_sph[target], count_sph[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, + TAG_SPHDATA, MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(keyBuf + offset_sph[target], count_sph[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(count[target] > 0) + { + MPI_Isend(partBuf + offset[target], count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + MPI_COMM_WORLD, &requests[n_requests++]); + + MPI_Isend(keyBuf + offset[target], count[target] * sizeof(peanokey), MPI_BYTE, target, TAG_KEY, MPI_COMM_WORLD, + &requests[n_requests++]); + } + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); +#endif /* #ifdef NO_ISEND_IRECV_IN_DOMAIN #else */ + +#else /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP */ + /* begins block of myMPI_Alltoallv communications */ + + myMPI_Alltoallv(partBuf, count_sph, offset_sph, P, count_recv_sph, offset_recv_sph, sizeof(struct particle_data), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(sphBuf, count_sph, offset_sph, SphP, count_recv_sph, offset_recv_sph, sizeof(struct sph_particle_data), 0, + MPI_COMM_WORLD); + + myMPI_Alltoallv(keyBuf, count_sph, offset_sph, Key, count_recv_sph, offset_recv_sph, sizeof(peanokey), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(partBuf, count, offset, P, count_recv, offset_recv, sizeof(struct particle_data), 0, MPI_COMM_WORLD); + + myMPI_Alltoallv(keyBuf, count, offset, Key, count_recv, offset_recv, sizeof(peanokey), 0, MPI_COMM_WORLD); + +#endif /* #ifndef USE_MPIALLTOALLV_IN_DOMAINDECOMP #else */ + /* close block of myMPI_Alltoallv communications */ + + NumPart += count_get; + NumGas += count_get_sph; + + myfree(keyBuf); + myfree(sphBuf); + myfree(partBuf); + myfree(offset_recv_sph); + myfree(offset_recv); + myfree(count_recv_sph); + myfree(count_recv); + myfree(offset_sph); + myfree(offset); + myfree(count_sph); + myfree(count); + + double t1 = second(); + mpi_printf("DOMAIN: exchange of %lld particles done. (took %g sec)\n", sumtogo, timediff(t0, t1)); +} diff --git a/src/amuse/community/arepo/src/domain/domain_rearrange.c b/src/amuse/community/arepo/src/domain/domain_rearrange.c new file mode 100644 index 0000000000..e75b37872e --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_rearrange.c @@ -0,0 +1,129 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_rearrange.c + * \date 05/2018 + * \brief Rearranges particle and cell arrays and gets rid of inactive + * particles. + * \details contains functions: + * void domain_rearrange_particle_sequence(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Gets rid of inactive/eliminated cells and particles. + * + * Cells that were de-refined or turned into star particles are kept in the + * SphP array, but flagged as inactive until this point. This routine cleans + * up these arrays in order to make sure only active particles/cells are + * exported. + * + * \return void + */ +void domain_rearrange_particle_sequence(void) +{ +#if defined(USE_SFR) + if(Stars_converted) + { + struct particle_data psave; + peanokey key; + + for(int i = 0; i < NumGas; i++) + if(P[i].Type != 0) /*If not a gas particle, swap to the end of the list */ + { + psave = P[i]; + key = Key[i]; + + P[i] = P[NumGas - 1]; + SphP[i] = SphP[NumGas - 1]; + Key[i] = Key[NumGas - 1]; + + P[NumGas - 1] = psave; + Key[NumGas - 1] = key; + + NumGas--; + i--; + } + /* Now we have rearranged the particles, + * we don't need to do it again unless there are more stars + */ + Stars_converted = 0; + } +#endif /* #if defined(USE_SFR) */ + +#if defined(REFINEMENT_MERGE_CELLS) + int i, count_elim, count_gaselim; + + count_elim = 0; + count_gaselim = 0; + + for(i = 0; i < NumPart; i++) + if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0)) + { + if(P[i].Type == 0) + { + P[i] = P[NumGas - 1]; + SphP[i] = SphP[NumGas - 1]; + Key[i] = Key[NumGas - 1]; + + P[NumGas - 1] = P[NumPart - 1]; + Key[NumGas - 1] = Key[NumPart - 1]; + + NumGas--; + count_gaselim++; + } + + NumPart--; + i--; + count_elim++; + } + + int count[2] = {count_elim, count_gaselim}; + int tot[2] = {0, 0}, nelem = 2; + + MPI_Allreduce(count, tot, nelem, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("DOMAIN: Eliminated %d derefined/swallowed gas cells.\n", tot[1]); + myflush(stdout); + } + + All.TotNumPart -= tot[0]; + All.TotNumGas -= tot[1]; + +#endif /* #if defined(REFINEMENT_MERGE_CELLS */ +} diff --git a/src/amuse/community/arepo/src/domain/domain_sort_kernels.c b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c new file mode 100644 index 0000000000..b0ad2c7a28 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_sort_kernels.c @@ -0,0 +1,158 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_sort_kernels.c + * \date 05/2018 + * \brief Comparison and sorting functions for Peano-Hilbert data. + * \details contains functions: + * int domain_compare_count(const void *a, const void *b) + * int domain_compare_key(const void *a, const void *b) + * static void msort_domain_with_tmp(struct + * domain_peano_hilbert_data *b, size_t n, struct + * domain_peano_hilbert_data *t) + * void mysort_domain(void *b, size_t n, size_t s) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +/*! \brief Comparison function for domain_count_data objects. + * + * Compares the variable count. + * + * \param[in] a Pointer to first domain_count_data object. + * \param[in] b Pointer to second domain_count_data object. + * + * \return 1 if b>a; -1 if a>b; otherwise 0. + */ +int domain_compare_count(const void *a, const void *b) +{ + if(((struct domain_count_data *)a)->count > (((struct domain_count_data *)b)->count)) + return -1; + + if(((struct domain_count_data *)a)->count < (((struct domain_count_data *)b)->count)) + return +1; + + return 0; +} + +/*! \brief Comparison function for domain_peano_hilbert_data objects. + * + * Compares element key. + * + * \param[in] a Pointer to first domain_peano_hilbert_data object. + * \param[in] b Pointer to second domain_peano_hilbert_data object. + * + * \return 1 if b>a; -1 if a>b; otherwise 0. + */ +int domain_compare_key(const void *a, const void *b) +{ + if(((struct domain_peano_hilbert_data *)a)->key < (((struct domain_peano_hilbert_data *)b)->key)) + return -1; + + if(((struct domain_peano_hilbert_data *)a)->key > (((struct domain_peano_hilbert_data *)b)->key)) + return +1; + + return 0; +} + +/*! \brief Customized mergesort sorting routine, requires temporary array. + * + * \param[in, out] b domain_peano_hilbert data array that is to be sorted. + * \param[in] n Number of elements in array. + * \param[in, out] t Temporary domain_peano_hilbert data array. + * + * \return void + */ +static void msort_domain_with_tmp(struct domain_peano_hilbert_data *b, size_t n, struct domain_peano_hilbert_data *t) +{ + struct domain_peano_hilbert_data *tmp; + struct domain_peano_hilbert_data *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_domain_with_tmp(b1, n1, t); + msort_domain_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(b1->key <= b2->key) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(struct domain_peano_hilbert_data)); + + memcpy(b, t, (n - n2) * sizeof(struct domain_peano_hilbert_data)); +} + +/*! \brief Customized mergesort sorting routine. + * + * This function tends to work slightly faster than a call of qsort() for + * this particular list, at least on most platforms. + * + * \param[in, out] b domain_peano_hilbert data array that is to be sorted. + * \param[in] n Number of elements. + * \param[in] s Size of structure. + * + * \return void + */ +void mysort_domain(void *b, size_t n, size_t s) +{ + const size_t size = n * s; + struct domain_peano_hilbert_data *tmp; + + tmp = (struct domain_peano_hilbert_data *)mymalloc("tmp", size); + + msort_domain_with_tmp((struct domain_peano_hilbert_data *)b, n, tmp); + + myfree(tmp); +} diff --git a/src/amuse/community/arepo/src/domain/domain_toplevel.c b/src/amuse/community/arepo/src/domain/domain_toplevel.c new file mode 100644 index 0000000000..6c1fc22ac5 --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_toplevel.c @@ -0,0 +1,393 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_toplevel.c + * \date 05/2018 + * \brief Top level tree construction and walk routines used for the + * domain decomposition. + * \details Uses BSD macros. + * contains functions: + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * int domain_determineTopTree(void) + * void domain_do_local_refine(int n, int *list) + * void domain_walktoptree(int no) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "bsd_tree.h" +#include "domain.h" + +/*! \brief Structure of tree nodes. + */ +struct mydata +{ + double workload; + int topnode_index; + + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/*! \brief Comparison function of tree elements. + * + * Compares elements workload and topnode_index. + * + * \param[in] lhs pointer to left hand side top level tree node. + * \param[in] rhs pointer to right hand side top level tree node. + * + * \return -1: left is larger or lower topnode index, 1 opposite, 0 equal. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->workload > rhs->workload) + return -1; + else if(lhs->workload < rhs->workload) + return 1; + else if(lhs->topnode_index < rhs->topnode_index) + return -1; + else if(lhs->topnode_index > rhs->topnode_index) + return 1; + + return 0; +} + +/* the following macro declares 'struct mytree', which is the header element + * needed as handle for a tree + */ +RB_HEAD(mytree, mydata); + +static struct mydata *nload; +static struct mytree queue_load; + +/* the following macros declare appropriate function prototypes and functions + * needed for this type of tree + */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +static double *list_cost, *list_sphcost; + +/*! \brief Construct top-level tree. + * + * This function constructs the global top-level tree node that is used + * for the domain decomposition. This is done by considering the string of + * Peano-Hilbert keys for all particles, which is recursively chopped off + * in pieces of eight segments until each segment holds at most a certain + * number of particles. + * + * \return 0 + */ +int domain_determineTopTree(void) +{ + double t0 = second(); + int count = 0, message_printed = 0; + + mp = (struct domain_peano_hilbert_data *)mymalloc_movable(&mp, "mp", sizeof(struct domain_peano_hilbert_data) * NumPart); + list_cost = mymalloc_movable(&list_cost, "list_cost", sizeof(double) * NumPart); + list_sphcost = mymalloc_movable(&list_sphcost, "listsph_cost", sizeof(double) * NumPart); + + for(int i = 0; i < NumPart; i++) + { + peano1D xb = domain_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0); + peano1D yb = domain_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0); + peano1D zb = domain_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0); + + mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION); + mp[count].index = i; + count++; + + list_cost[i] = domain_grav_tot_costfactor(i); + list_sphcost[i] = domain_hydro_tot_costfactor(i); + } + + /* sort according to key (local particles!) */ + mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data)); + + NTopnodes = 1; + NTopleaves = 1; + topNodes[0].Daughter = -1; + topNodes[0].Parent = -1; + topNodes[0].Size = PEANOCELLS; + topNodes[0].StartKey = 0; + topNodes[0].PIndex = 0; + topNodes[0].Count = count; + topNodes[0].Cost = gravcost; + topNodes[0].SphCost = sphcost; + + int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * All.MultipleDomains * NTask); + +#ifdef ADDBACKGROUNDGRID + limitNTopNodes = imax(limitNTopNodes, 2 * All.GridSize * All.GridSize * All.GridSize); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + while(limitNTopNodes > MaxTopNodes) + { + mpi_printf("DOMAIN: Increasing TopNodeAllocFactor=%g ", All.TopNodeAllocFactor); + All.TopNodeAllocFactor *= 1.3; + mpi_printf("new value=%g\n", All.TopNodeAllocFactor); + if(All.TopNodeAllocFactor > 1000) + terminate("something seems to be going seriously wrong here. Stopping.\n"); + + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + topNodes = (struct local_topnode_data *)myrealloc_movable(topNodes, (MaxTopNodes * sizeof(struct local_topnode_data))); + TopNodes = (struct topnode_data *)myrealloc_movable(TopNodes, (MaxTopNodes * sizeof(struct topnode_data))); + DomainTask = (int *)myrealloc_movable(DomainTask, (MaxTopNodes * sizeof(int))); + DomainLeaveNode = (struct domain_cost_data *)myrealloc_movable(DomainLeaveNode, (MaxTopNodes * sizeof(struct domain_cost_data))); + } + + RB_INIT(&queue_load); + nload = mymalloc("nload", limitNTopNodes * sizeof(struct mydata)); + int *list = mymalloc("list", limitNTopNodes * sizeof(int)); + +#ifdef ADDBACKGROUNDGRID + peanokey MaxTopleaveSize = (PEANOCELLS / (All.GridSize * All.GridSize * All.GridSize)); +#else /* #ifdef ADDBACKGROUNDGRID */ + double limit = 1.0 / (All.TopNodeFactor * All.MultipleDomains * NTask); +#endif /* #ifdef ADDBACKGROUNDGRID #else */ + + /* insert the root node */ + nload[0].workload = 1.0; + nload[0].topnode_index = 0; + RB_INSERT(mytree, &queue_load, &nload[0]); + + int iter = 0; + + do + { + count = 0; + + double first_workload = 0; + + for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst)) + { + if(topNodes[nfirst->topnode_index].Size >= 8) + { + first_workload = nfirst->workload; + break; + } + } + + for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np)) + { +#ifndef ADDBACKGROUNDGRID + if(np->workload < 0.125 * first_workload) + break; + + if(NTopnodes + 8 * (count + 1) >= limitNTopNodes) + break; +#endif /* #ifndef ADDBACKGROUNDGRID */ + +#ifdef ADDBACKGROUNDGRID + if(topNodes[np->topnode_index].Size > MaxTopleaveSize) +#else /* #ifdef ADDBACKGROUNDGRID */ + if(np->workload > limit || (NTopleaves < All.MultipleDomains * NTask && count == 0)) +#endif /* #ifdef ADDBACKGROUNDGRID #else */ + { + if(topNodes[np->topnode_index].Size < 8) + { + if(message_printed == 0) + { + mpi_printf("DOMAIN: Note: we would like to refine top-tree, but PEANOGRID is not fine enough\n"); +#ifndef OVERRIDE_PEANOGRID_WARNING + terminate( + "Consider setting BITS_PER_DIMENSION up to a value of 42 to get a fine enough PEANOGRID, or force a " + "continuation by activating OVERRIDE_PEANOGRID_WARNING"); +#endif /* #ifndef OVERRIDE_PEANOGRID_WARNING */ + message_printed = 1; + } + } + else + { + list[count] = np->topnode_index; + count++; + } + } + } + + if(count > 0) + { + domain_do_local_refine(count, list); + iter++; + } + } + while(count > 0); + + myfree(list); + myfree(nload); + myfree(list_sphcost); + myfree(list_cost); + myfree(mp); + + /* count the number of top leaves */ + NTopleaves = 0; + domain_walktoptree(0); + + double t1 = second(); + mpi_printf("DOMAIN: NTopleaves=%d, determination of top-level tree involved %d iterations and took %g sec\n", NTopleaves, iter, + timediff(t0, t1)); + + t0 = second(); + + domain_sumCost(); + + t1 = second(); + mpi_printf("DOMAIN: cost summation for top-level tree took %g sec\n", timediff(t0, t1)); + + return 0; +} + +/*! \brief Refine top-level tree locally. + * + * Requires arrays list_cost and list_sphcost, mp. + * + * \param[in] n Number of nodes that should be refined. + * \param[in] list List of node indices that should be refined. + * + * \return void + */ +void domain_do_local_refine(int n, int *list) +{ + double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double)); + double *worklist = mymalloc("worklist", 8 * n * sizeof(double)); + + double non_zero = 0, non_zero_tot; + + /* create the new nodes */ + for(int k = 0; k < n; k++) + { + int i = list[k]; + topNodes[i].Daughter = NTopnodes; + NTopnodes += 8; + NTopleaves += 7; + + for(int j = 0; j < 8; j++) + { + int sub = topNodes[i].Daughter + j; + + topNodes[sub].Daughter = -1; + topNodes[sub].Parent = i; + topNodes[sub].Size = (topNodes[i].Size >> 3); + topNodes[sub].StartKey = topNodes[i].StartKey + j * topNodes[sub].Size; + topNodes[sub].PIndex = topNodes[i].PIndex; + topNodes[sub].Count = 0; + topNodes[sub].Cost = 0; + topNodes[sub].SphCost = 0; + } + + int sub = topNodes[i].Daughter; + + for(int p = topNodes[i].PIndex, j = 0; p < topNodes[i].PIndex + topNodes[i].Count; p++) + { + if(j < 7) + while(mp[p].key >= topNodes[sub + 1].StartKey) + { + j++; + sub++; + topNodes[sub].PIndex = p; + if(j >= 7) + break; + } + + topNodes[sub].Cost += list_cost[mp[p].index]; + topNodes[sub].SphCost += list_sphcost[mp[p].index]; + topNodes[sub].Count++; + } + + for(int j = 0; j < 8; j++) + { + int sub = topNodes[i].Daughter + j; + worklist[k * 8 + j] = fac_work * topNodes[sub].Cost + fac_worksph * topNodes[sub].SphCost + fac_load * topNodes[sub].Count; + + if(worklist[k * 8 + j] != 0) + non_zero++; + } + } + + MPI_Allreduce(&non_zero, &non_zero_tot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + if(non_zero_tot > 0.05 * (NTask * 8 * n)) + MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + else + allreduce_sparse_double_sum(worklist, worktotlist, 8 * n); + + for(int k = 0; k < n; k++) + { + int i = list[k]; + RB_REMOVE(mytree, &queue_load, &nload[i]); + } + + for(int k = 0, l = 0; k < n; k++) + { + int i = list[k]; + + for(int j = 0; j < 8; j++, l++) + { + int sub = topNodes[i].Daughter + j; + + /* insert the node */ + nload[sub].workload = worktotlist[l]; + nload[sub].topnode_index = sub; + RB_INSERT(mytree, &queue_load, &nload[sub]); + } + } + + myfree(worklist); + myfree(worktotlist); +} + +/*! \brief Walks top level tree recursively. + * + * This function walks the global top tree in order to establish the + * number of leaves it has, and for assigning the leaf numbers along the + * Peano-Hilbert Curve. These leaves are later combined to domain pieces, + * which are distributed to different processors. + * + * \param[in] no Present node. + * + * \return void + */ +void domain_walktoptree(int no) +{ + if(topNodes[no].Daughter == -1) + { + topNodes[no].Leaf = NTopleaves; + NTopleaves++; + } + else + { + for(int i = 0; i < 8; i++) + domain_walktoptree(topNodes[no].Daughter + i); + } +} diff --git a/src/amuse/community/arepo/src/domain/domain_vars.c b/src/amuse/community/arepo/src/domain/domain_vars.c new file mode 100644 index 0000000000..d108a7dc8f --- /dev/null +++ b/src/amuse/community/arepo/src/domain/domain_vars.c @@ -0,0 +1,117 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/domain_vars.c + * \date 05/2018 + * \brief Variables and memory allocation functions for domain + * decomposition. + * \details contains functions: + * void domain_allocate_lists(void) + * void domain_free_lists(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" +#include "domain.h" + +struct domain_peano_hilbert_data *mp; + +struct local_topnode_data *topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ + +double totgravcost, totpartcount, gravcost, totsphcost, sphcost; + +struct domain_cost_data *DomainLeaveNode; + +double fac_work, fac_load, fac_worksph; +double normsum_work, normsum_load, normsum_worksph; + +int Nbranch; + +/*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner' + */ +int *toGo, *toGoSph; +int *toGet, *toGetSph; +int *list_NumPart; +int *list_NumGas; +int *list_load; +int *list_loadsph; +double *list_work; +double *list_worksph; + +/*! \brief Allocates lists needed for communication in domain decomposition. + * + * These lists are holding information about other tasks (number of particles, + * load, work, etc.). + * + * \return void + */ +void domain_allocate_lists(void) +{ + Key = (peanokey *)mymalloc_movable(&Key, "domain_key", (sizeof(peanokey) * All.MaxPart)); + toGo = (int *)mymalloc_movable(&toGo, "toGo", (sizeof(int) * NTask)); + toGoSph = (int *)mymalloc_movable(&toGoSph, "toGoSph", (sizeof(int) * NTask)); + toGet = (int *)mymalloc_movable(&toGet, "toGet", (sizeof(int) * NTask)); + toGetSph = (int *)mymalloc_movable(&toGetSph, "toGetSph", (sizeof(int) * NTask)); + list_NumPart = (int *)mymalloc_movable(&list_NumPart, "list_NumPart", (sizeof(int) * NTask)); + list_NumGas = (int *)mymalloc_movable(&list_NumGas, "list_NumGas", (sizeof(int) * NTask)); + list_load = (int *)mymalloc_movable(&list_load, "list_load", (sizeof(int) * NTask)); + list_loadsph = (int *)mymalloc_movable(&list_loadsph, "list_loadsph", (sizeof(int) * NTask)); + list_work = (double *)mymalloc_movable(&list_work, "list_work", (sizeof(double) * NTask)); + list_worksph = (double *)mymalloc_movable(&list_worksph, "list_worksph", (sizeof(double) * NTask)); + DomainLeaveNode = (struct domain_cost_data *)mymalloc_movable(&DomainLeaveNode, "DomainLeaveNode", + (MaxTopNodes * sizeof(struct domain_cost_data))); +} + +/*! \brief Frees lists needed for communication in domain decomposition. + * + * This routine is the counterpart of domain_allocate_lists(void). + * Frees memory of all arrays allocated there, except Key, which is freed + * elsewhere (in void domain_Decomposition(void); see domain.c). + * + * \return void + */ +void domain_free_lists(void) +{ + myfree(DomainLeaveNode); + myfree(list_worksph); + myfree(list_work); + myfree(list_loadsph); + myfree(list_load); + myfree(list_NumGas); + myfree(list_NumPart); + myfree(toGetSph); + myfree(toGet); + myfree(toGoSph); + myfree(toGo); +} diff --git a/src/amuse/community/arepo/src/domain/peano.c b/src/amuse/community/arepo/src/domain/peano.c new file mode 100644 index 0000000000..adb30ca43d --- /dev/null +++ b/src/amuse/community/arepo/src/domain/peano.c @@ -0,0 +1,569 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/peano.c + * \date 05/2018 + * \brief Order particles along Peano-Hilbert curve. + * \details contains functions: + * void peano_hilbert_order(void) + * void peano_hilbert_order_DP(void) + * int peano_compare_key(const void *a, const void *b) + * void reorder_DP(void) + * void reorder_gas(int *Id) + * void reorder_particles(int *Id) + * peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, + * int bits) + * void peano_hilbert_key_inverse(peanokey key, int bits, + * peano1D * x, peano1D * y, peano1D * z) + * static void msort_peano_with_tmp(struct peano_hilbert_data + * *b, size_t n, struct peano_hilbert_data *t) + * void mysort_peano(void *b, size_t n, size_t s, int (*cmp) + * (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#include + +/*! Data structure for Peano Hilbert data. + */ +static struct peano_hilbert_data +{ + peanokey key; + int index; +} * pmp; + +static int *Id; + +/*! \brief Sorts particles along Peano-Hilbert curve + * + * \return void + */ +void peano_hilbert_order(void) +{ + int i; + + double t0 = second(); + + // mpi_printf("DOMAIN: begin Peano-Hilbert order...\n"); + + if(NumGas) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + pmp[i].index = i; + pmp[i].key = Key[i]; + } + + mysort_peano(pmp, NumGas, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = 0; i < NumGas; i++) + Id[pmp[i].index] = i; + + reorder_gas(Id); + + myfree(Id); + myfree(pmp); + } + + if(NumPart - NumGas > 0) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * (NumPart - NumGas)); + pmp -= (NumGas); + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas)); + Id -= (NumGas); + + for(i = NumGas; i < NumPart; i++) + { + pmp[i].index = i; + pmp[i].key = Key[i]; + } + + mysort_peano(pmp + NumGas, NumPart - NumGas, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = NumGas; i < NumPart; i++) + Id[pmp[i].index] = i; + + reorder_particles(Id); + + Id += NumGas; + myfree(Id); + pmp += NumGas; + myfree(pmp); + } + + double t1 = second(); + mpi_printf("DOMAIN: Peano-Hilbert order done, took %g sec.\n", timediff(t0, t1)); +} + +/*! \brief Sorts Delaunay Points (DP array) along Peano-Hilbert curve. + * + * \return void + */ +void peano_hilbert_order_DP(void) +{ +#ifdef ONEDIMS + return; +#endif /* #ifdef ONEDIMS */ + + int i; + + if(Mesh.Ndp) + { + pmp = (struct peano_hilbert_data *)mymalloc("pmp", sizeof(struct peano_hilbert_data) * Mesh.Ndp); + Id = (int *)mymalloc("Id", sizeof(int) * Mesh.Ndp); + + point *DP = Mesh.DP; + + for(i = 0; i < Mesh.Ndp; i++) + { + pmp[i].index = i; + pmp[i].key = peano_hilbert_key((int)((DP[i].x + DomainLen) * DomainFac / 3), (int)((DP[i].y + DomainLen) * DomainFac / 3), + (int)((DP[i].z + DomainLen) * DomainFac / 3), BITS_PER_DIMENSION); + } + + mysort_peano(pmp, Mesh.Ndp, sizeof(struct peano_hilbert_data), peano_compare_key); + + for(i = 0; i < Mesh.Ndp; i++) + Id[pmp[i].index] = i; + + reorder_DP(); + + myfree(Id); + myfree(pmp); + } + + mpi_printf("VORONOI: Peano-Hilbert of DP points done.\n"); +} + +/*! \brief Compares two peano_hilbert_data objects with each other. + * + * Sorting kernel for sorting along Peano-Hilbert curve. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a->key < b->key + */ +int peano_compare_key(const void *a, const void *b) +{ + if(((struct peano_hilbert_data *)a)->key < (((struct peano_hilbert_data *)b)->key)) + return -1; + + if(((struct peano_hilbert_data *)a)->key > (((struct peano_hilbert_data *)b)->key)) + return +1; + + return 0; +} + +/*! \brief Rearranges Delaunay points in DP array according to new ordering. + * + * Requires access to an ordering array Id which is as long as the number of + * Delaunay points and contains the new index of each Delaunay point. + * + * \return void + */ +void reorder_DP(void) +{ + int i; + point DPsave, DPsource; + int idsource, idsave, dest; + point *DP = Mesh.DP; + + for(i = 0; i < Mesh.Ndp; i++) + { + if(Id[i] != i) + { + DPsource = DP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + DPsave = DP[dest]; + idsave = Id[dest]; + + DP[dest] = DPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + DPsource = DPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Rearranges gas cells in P and SphP arrays according to new ordering. + * + * \param[in] Id Array which is as long as the number of gas cells and + * which contains the new index of each cell. + * + * \return void + */ +void reorder_gas(int *Id) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Rearranges particles in P array according to new ordering. + * + * \param[in] Id Array which is as long as the number of particles and + * which contains the new index of each particle. + * + * \return void + */ +void reorder_particles(int *Id) +{ + int i; + struct particle_data Psave, Psource; + int idsource, idsave, dest; + + for(i = NumGas; i < NumPart; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/* The following rewrite of the original function + * peano_hilbert_key_old() has been written by MARTIN REINECKE. + * It is about a factor 2.3 - 2.5 faster than Volker's old routine! + */ +const unsigned char rottable3[48][8] = { + {36, 28, 25, 27, 10, 10, 25, 27}, {29, 11, 24, 24, 37, 11, 26, 26}, {8, 8, 25, 27, 30, 38, 25, 27}, + {9, 39, 24, 24, 9, 31, 26, 26}, {40, 24, 44, 32, 40, 6, 44, 6}, {25, 7, 33, 7, 41, 41, 45, 45}, + {4, 42, 4, 46, 26, 42, 34, 46}, {43, 43, 47, 47, 5, 27, 5, 35}, {33, 35, 36, 28, 33, 35, 2, 2}, + {32, 32, 29, 3, 34, 34, 37, 3}, {33, 35, 0, 0, 33, 35, 30, 38}, {32, 32, 1, 39, 34, 34, 1, 31}, + {24, 42, 32, 46, 14, 42, 14, 46}, {43, 43, 47, 47, 25, 15, 33, 15}, {40, 12, 44, 12, 40, 26, 44, 34}, + {13, 27, 13, 35, 41, 41, 45, 45}, {28, 41, 28, 22, 38, 43, 38, 22}, {42, 40, 23, 23, 29, 39, 29, 39}, + {41, 36, 20, 36, 43, 30, 20, 30}, {37, 31, 37, 31, 42, 40, 21, 21}, {28, 18, 28, 45, 38, 18, 38, 47}, + {19, 19, 46, 44, 29, 39, 29, 39}, {16, 36, 45, 36, 16, 30, 47, 30}, {37, 31, 37, 31, 17, 17, 46, 44}, + {12, 4, 1, 3, 34, 34, 1, 3}, {5, 35, 0, 0, 13, 35, 2, 2}, {32, 32, 1, 3, 6, 14, 1, 3}, + {33, 15, 0, 0, 33, 7, 2, 2}, {16, 0, 20, 8, 16, 30, 20, 30}, {1, 31, 9, 31, 17, 17, 21, 21}, + {28, 18, 28, 22, 2, 18, 10, 22}, {19, 19, 23, 23, 29, 3, 29, 11}, {9, 11, 12, 4, 9, 11, 26, 26}, + {8, 8, 5, 27, 10, 10, 13, 27}, {9, 11, 24, 24, 9, 11, 6, 14}, {8, 8, 25, 15, 10, 10, 25, 7}, + {0, 18, 8, 22, 38, 18, 38, 22}, {19, 19, 23, 23, 1, 39, 9, 39}, {16, 36, 20, 36, 16, 2, 20, 10}, + {37, 3, 37, 11, 17, 17, 21, 21}, {4, 17, 4, 46, 14, 19, 14, 46}, {18, 16, 47, 47, 5, 15, 5, 15}, + {17, 12, 44, 12, 19, 6, 44, 6}, {13, 7, 13, 7, 18, 16, 45, 45}, {4, 42, 4, 21, 14, 42, 14, 23}, + {43, 43, 22, 20, 5, 15, 5, 15}, {40, 12, 21, 12, 40, 6, 23, 6}, {13, 7, 13, 7, 41, 41, 22, 20}}; + +const unsigned char subpix3[48][8] = { + {0, 7, 1, 6, 3, 4, 2, 5}, {7, 4, 6, 5, 0, 3, 1, 2}, {4, 3, 5, 2, 7, 0, 6, 1}, {3, 0, 2, 1, 4, 7, 5, 6}, {1, 0, 6, 7, 2, 3, 5, 4}, + {0, 3, 7, 4, 1, 2, 6, 5}, {3, 2, 4, 5, 0, 1, 7, 6}, {2, 1, 5, 6, 3, 0, 4, 7}, {6, 1, 7, 0, 5, 2, 4, 3}, {1, 2, 0, 3, 6, 5, 7, 4}, + {2, 5, 3, 4, 1, 6, 0, 7}, {5, 6, 4, 7, 2, 1, 3, 0}, {7, 6, 0, 1, 4, 5, 3, 2}, {6, 5, 1, 2, 7, 4, 0, 3}, {5, 4, 2, 3, 6, 7, 1, 0}, + {4, 7, 3, 0, 5, 6, 2, 1}, {6, 7, 5, 4, 1, 0, 2, 3}, {7, 0, 4, 3, 6, 1, 5, 2}, {0, 1, 3, 2, 7, 6, 4, 5}, {1, 6, 2, 5, 0, 7, 3, 4}, + {2, 3, 1, 0, 5, 4, 6, 7}, {3, 4, 0, 7, 2, 5, 1, 6}, {4, 5, 7, 6, 3, 2, 0, 1}, {5, 2, 6, 1, 4, 3, 7, 0}, {7, 0, 6, 1, 4, 3, 5, 2}, + {0, 3, 1, 2, 7, 4, 6, 5}, {3, 4, 2, 5, 0, 7, 1, 6}, {4, 7, 5, 6, 3, 0, 2, 1}, {6, 7, 1, 0, 5, 4, 2, 3}, {7, 4, 0, 3, 6, 5, 1, 2}, + {4, 5, 3, 2, 7, 6, 0, 1}, {5, 6, 2, 1, 4, 7, 3, 0}, {1, 6, 0, 7, 2, 5, 3, 4}, {6, 5, 7, 4, 1, 2, 0, 3}, {5, 2, 4, 3, 6, 1, 7, 0}, + {2, 1, 3, 0, 5, 6, 4, 7}, {0, 1, 7, 6, 3, 2, 4, 5}, {1, 2, 6, 5, 0, 3, 7, 4}, {2, 3, 5, 4, 1, 0, 6, 7}, {3, 0, 4, 7, 2, 1, 5, 6}, + {1, 0, 2, 3, 6, 7, 5, 4}, {0, 7, 3, 4, 1, 6, 2, 5}, {7, 6, 4, 5, 0, 1, 3, 2}, {6, 1, 5, 2, 7, 0, 4, 3}, {5, 4, 6, 7, 2, 3, 1, 0}, + {4, 3, 7, 0, 5, 2, 6, 1}, {3, 2, 0, 1, 4, 5, 7, 6}, {2, 5, 1, 6, 3, 4, 0, 7}}; + +/*! \brief This function computes a Peano-Hilbert key for an integer triplet + * (x,y,z), with x,y,z in the range between 0 and 2^bits-1. + * + * \param[in] x X position. + * \param[in] y Y position. + * \param[in] z Z position. + * \param[in] bits Number of bits used for Peano key. + * + * \return Peano-Hilbert key corresponding to position x,y,z. + */ +peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits) +{ + peano1D mask; + unsigned char rotation = 0; + peanokey key = 0; + + for(mask = ((peano1D)1) << (bits - 1); mask > 0; mask >>= 1) + { + unsigned char pix = ((x & mask) ? 4 : 0) | ((y & mask) ? 2 : 0) | ((z & mask) ? 1 : 0); + + key <<= 3; + key |= subpix3[rotation][pix]; + rotation = rottable3[rotation][pix]; + } + + return key; +} + +static int quadrants[24][2][2][2] = { + /* rotx=0, roty=0-3 */ + {{{0, 7}, {1, 6}}, {{3, 4}, {2, 5}}}, + {{{7, 4}, {6, 5}}, {{0, 3}, {1, 2}}}, + {{{4, 3}, {5, 2}}, {{7, 0}, {6, 1}}}, + {{{3, 0}, {2, 1}}, {{4, 7}, {5, 6}}}, + /* rotx=1, roty=0-3 */ + {{{1, 0}, {6, 7}}, {{2, 3}, {5, 4}}}, + {{{0, 3}, {7, 4}}, {{1, 2}, {6, 5}}}, + {{{3, 2}, {4, 5}}, {{0, 1}, {7, 6}}}, + {{{2, 1}, {5, 6}}, {{3, 0}, {4, 7}}}, + /* rotx=2, roty=0-3 */ + {{{6, 1}, {7, 0}}, {{5, 2}, {4, 3}}}, + {{{1, 2}, {0, 3}}, {{6, 5}, {7, 4}}}, + {{{2, 5}, {3, 4}}, {{1, 6}, {0, 7}}}, + {{{5, 6}, {4, 7}}, {{2, 1}, {3, 0}}}, + /* rotx=3, roty=0-3 */ + {{{7, 6}, {0, 1}}, {{4, 5}, {3, 2}}}, + {{{6, 5}, {1, 2}}, {{7, 4}, {0, 3}}}, + {{{5, 4}, {2, 3}}, {{6, 7}, {1, 0}}}, + {{{4, 7}, {3, 0}}, {{5, 6}, {2, 1}}}, + /* rotx=4, roty=0-3 */ + {{{6, 7}, {5, 4}}, {{1, 0}, {2, 3}}}, + {{{7, 0}, {4, 3}}, {{6, 1}, {5, 2}}}, + {{{0, 1}, {3, 2}}, {{7, 6}, {4, 5}}}, + {{{1, 6}, {2, 5}}, {{0, 7}, {3, 4}}}, + /* rotx=5, roty=0-3 */ + {{{2, 3}, {1, 0}}, {{5, 4}, {6, 7}}}, + {{{3, 4}, {0, 7}}, {{2, 5}, {1, 6}}}, + {{{4, 5}, {7, 6}}, {{3, 2}, {0, 1}}}, + {{{5, 2}, {6, 1}}, {{4, 3}, {7, 0}}}}; + +static int rotxmap_table[24] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 17, 18, 19, 16, 23, 20, 21, 22}; + +static int rotymap_table[24] = {1, 2, 3, 0, 16, 17, 18, 19, 11, 8, 9, 10, 22, 23, 20, 21, 14, 15, 12, 13, 4, 5, 6, 7}; + +static int rotx_table[8] = {3, 0, 0, 2, 2, 0, 0, 1}; +static int roty_table[8] = {0, 1, 1, 2, 2, 3, 3, 0}; + +static int sense_table[8] = {-1, -1, -1, +1, +1, -1, -1, -1}; + +static int flag_quadrants_inverse = 1; +static char quadrants_inverse_x[24][8]; +static char quadrants_inverse_y[24][8]; +static char quadrants_inverse_z[24][8]; + +/*! \brief Computes position from Peano-Hilbert key. + * + * \param[in] key Peano-Hilbert key. + * \param[in] bits Bits used for Peano-Hilbert key. + * \param[out] x X position. + * \param[out] y Y position. + * \param[out] z Z position. + */ +void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z) +{ + if(flag_quadrants_inverse) + { + flag_quadrants_inverse = 0; + for(int rotation = 0; rotation < 24; rotation++) + for(int bitx = 0; bitx < 2; bitx++) + for(int bity = 0; bity < 2; bity++) + for(int bitz = 0; bitz < 2; bitz++) + { + int quad = quadrants[rotation][bitx][bity][bitz]; + quadrants_inverse_x[rotation][quad] = bitx; + quadrants_inverse_y[rotation][quad] = bity; + quadrants_inverse_z[rotation][quad] = bitz; + } + } + + int shift = 3 * (bits - 1); + peanokey mask = ((peanokey)7) << shift; + int rotation = 0; + char sense = 1; + + *x = *y = *z = 0; + + for(int i = 0; i < bits; i++, mask >>= 3, shift -= 3) + { + peanokey keypart = (key & mask) >> shift; + + int quad = (sense == 1) ? (keypart) : (7 - keypart); + + *x = (*x << 1) + quadrants_inverse_x[rotation][quad]; + *y = (*y << 1) + quadrants_inverse_y[rotation][quad]; + *z = (*z << 1) + quadrants_inverse_z[rotation][quad]; + + char rotx = rotx_table[quad]; + char roty = roty_table[quad]; + sense *= sense_table[quad]; + + while(rotx > 0) + { + rotation = rotxmap_table[rotation]; + rotx--; + } + + while(roty > 0) + { + rotation = rotymap_table[rotation]; + roty--; + } + } +} + +/*! \brief Sorting algorithm for sorting along Peano-Hilbert curve. + * + * Merge sort algorithm. + * + * \param[in, out] b Array to be sorted. + * \param[in] n size of array. + * \param[in] t Array for temporary data needed by msort. + * + * \return void + */ +static void msort_peano_with_tmp(struct peano_hilbert_data *b, size_t n, struct peano_hilbert_data *t) +{ + struct peano_hilbert_data *tmp; + struct peano_hilbert_data *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_peano_with_tmp(b1, n1, t); + msort_peano_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(b1->key <= b2->key) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(struct peano_hilbert_data)); + memcpy(b, t, (n - n2) * sizeof(struct peano_hilbert_data)); +} + +/*! \brief Wrapper for sorting algorithm for sorting along Peano-Hilbert curve. + * + * Allocates temporary array and then calls msort_peano_with_tmp. + * This function could be replaced by a call of qsort(b, n, s, cmp), but the + * present merge sort implementation is usually a bit faster for this array. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Size of array. + * \param[in] s Size of single array elements (needed for memory allocation). + * \param[in] cmp Sorting kernel function (obsolete, but still there in case + * an other sorting algorithm should be used). + * + * \return void + */ +void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + struct peano_hilbert_data *tmp = (struct peano_hilbert_data *)mymalloc("tmp", size); + + msort_peano_with_tmp((struct peano_hilbert_data *)b, n, tmp); + + myfree(tmp); +} diff --git a/src/amuse/community/arepo/src/fof/fof.c b/src/amuse/community/arepo/src/fof/fof.c new file mode 100644 index 0000000000..210f07f79c --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof.c @@ -0,0 +1,967 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof.c + * \date 05/2018 + * \brief Parallel friend of friends (FoF) group finder. + * \details contains functions: + * void fof_fof(int num) + * void fof_prepare_output_order(void) + * double fof_get_comoving_linking_length(void) + * void fof_compile_catalogue(void) + * void fof_assign_group_numbers(void) + * void fof_compute_group_properties(int gr, int start, int len) + * void fof_exchange_group_data(void) + * void fof_finish_group_properties(void) + * double fof_periodic(double x) + * double fof_periodic_wrap(double x) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +/*! \brief Main routine to execute the friend of friends group finder. + * + * If called with num == -1 as argument, only FOF is carried out and no group + * catalogs are saved to disk. If num >= 0, the code will store the + * group/subgroup catalogs, and bring the particles into output order. + * In this case, the calling routine (which is normally savepositions()) will + * need to free PS[] and bring the particles back into the original order, + * as well as reestablished the mesh. + * + * \param[in] num Index of output; if negative, no output written. + * + * \return void + */ +void fof_fof(int num) +{ + int i, start, lenloc, largestgroup; + double t0, t1, cputime; + + TIMER_START(CPU_FOF); + + mpi_printf("FOF: Begin to compute FoF group catalogue... (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + if(num >= 0 && RestartFlag != 3 && RestartFlag != 6) + { + /* let's discard an existing mesh - we do this here to reduce the peak memory usage, even at the price of + * having to recreate it later */ + free_mesh(); + } + + if(RestartFlag != 6) + { + ngb_treefree(); + + domain_free(); + } + + domain_Decomposition(); + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + /* check */ + for(i = 0; i < NumPart; i++) + if((P[i].Mass == 0 && P[i].ID == 0) || (P[i].Type == 4 && P[i].Mass == 0)) + terminate("this should not happen"); + + /* this structure will hold auxiliary information for each particle, needed only during group finding */ + PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data)); + + memset(PS, 0, NumPart * sizeof(struct subfind_data)); + + /* First, we save the original location of the particles, in order to be able to revert to this layout later on */ + for(i = 0; i < NumPart; i++) + { + PS[i].OriginTask = ThisTask; + PS[i].OriginIndex = i; + } + + fof_OldMaxPart = All.MaxPart; + fof_OldMaxPartSph = All.MaxPartSph; + + LinkL = fof_get_comoving_linking_length(); + + mpi_printf("FOF: Comoving linking length: %g (presently allocated=%g MB)\n", LinkL, AllocatedBytes / (1024.0 * 1024.0)); + + MinID = (MyIDType *)mymalloc("MinID", NumPart * sizeof(MyIDType)); + MinIDTask = (int *)mymalloc("MinIDTask", NumPart * sizeof(int)); + + Head = (int *)mymalloc("Head", NumPart * sizeof(int)); + Len = (int *)mymalloc("Len", NumPart * sizeof(int)); + Next = (int *)mymalloc("Next", NumPart * sizeof(int)); + Tail = (int *)mymalloc("Tail", NumPart * sizeof(int)); + +#ifdef HIERARCHICAL_GRAVITY + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestOccupiedTimeBin); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build tree for all particles */ + +#if defined(SUBFIND) + subfind_density_hsml_guess(); +#endif /* #if defined(SUBFIND) */ + + /* initialize link-lists */ + for(i = 0; i < NumPart; i++) + { + Head[i] = Tail[i] = i; + Len[i] = 1; + Next[i] = -1; + MinID[i] = P[i].ID; + MinIDTask[i] = ThisTask; + } + + /* call routine to find primary groups */ + cputime = fof_find_groups(MinID, Head, Len, Next, Tail, MinIDTask); + mpi_printf("FOF: group finding took = %g sec\n", cputime); + +#ifdef FOF_SECONDARY_LINK_TARGET_TYPES + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + + /* now rebuild the tree with all the types selected as secondary link targets */ + construct_forcetree(0, 0, 2, All.HighestOccupiedTimeBin); +#endif /* #ifdef FOF_SECONDARY_LINK_TARGET_TYPES */ + +#ifdef HIERARCHICAL_GRAVITY + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + /* call routine to attach secondary particles/cells to primary groups */ + cputime = fof_find_nearest_dmparticle(MinID, Head, Len, Next, Tail, MinIDTask); + + mpi_printf("FOF: attaching gas and star particles to nearest dm particles took = %g sec\n", cputime); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + myfree(Tail); + myfree(Next); + myfree(Len); + + t0 = second(); + + FOF_PList = (struct fof_particle_list *)mymalloc_movable(&FOF_PList, "FOF_PList", NumPart * sizeof(struct fof_particle_list)); + + for(i = 0; i < NumPart; i++) + { + FOF_PList[i].MinID = MinID[Head[i]]; + FOF_PList[i].MinIDTask = MinIDTask[Head[i]]; + FOF_PList[i].Pindex = i; + } + + myfree_movable(Head); + myfree_movable(MinIDTask); + myfree_movable(MinID); + + FOF_GList = (struct fof_group_list *)mymalloc_movable(&FOF_GList, "FOF_GList", sizeof(struct fof_group_list) * NumPart); + + fof_compile_catalogue(); + + t1 = second(); + mpi_printf("FOF: compiling local group data and catalogue took = %g sec\n", timediff(t0, t1)); + + MPI_Allreduce(&Ngroups, &TotNgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &Nids, &TotNids); + + if(TotNgroups > 0) + { + int largestloc = 0; + + for(i = 0; i < NgroupsExt; i++) + if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount > largestloc) + largestloc = FOF_GList[i].LocCount + FOF_GList[i].ExtCount; + MPI_Allreduce(&largestloc, &largestgroup, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + } + else + largestgroup = 0; + + mpi_printf("FOF: Total number of FOF groups with at least %d particles: %d\n", FOF_GROUP_MIN_LEN, TotNgroups); + mpi_printf("FOF: Largest FOF group has %d particles.\n", largestgroup); + mpi_printf("FOF: Total number of particles in FOF groups: %lld\n", TotNids); + + t0 = second(); + + MaxNgroups = 2 * imax(NgroupsExt, TotNgroups / NTask + 1); + + Group = (struct group_properties *)mymalloc_movable(&Group, "Group", sizeof(struct group_properties) * MaxNgroups); + + mpi_printf("FOF: group properties are now allocated.. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + for(i = 0, start = 0; i < NgroupsExt; i++) + { + while(FOF_PList[start].MinID < FOF_GList[i].MinID) + { + start++; + if(start > NumPart) + terminate("start > NumPart"); + } + + if(FOF_PList[start].MinID != FOF_GList[i].MinID) + terminate("ID mismatch"); + + for(lenloc = 0; start + lenloc < NumPart;) + if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID) + lenloc++; + else + break; + + Group[i].MinID = FOF_GList[i].MinID; + Group[i].MinIDTask = FOF_GList[i].MinIDTask; + + fof_compute_group_properties(i, start, lenloc); + + start += lenloc; + } + + fof_exchange_group_data(); + + fof_finish_group_properties(); + + t1 = second(); + mpi_printf("FOF: computation of group properties took = %g sec\n", timediff(t0, t1)); + + fof_assign_group_numbers(); + + mpi_printf("FOF: Finished computing FoF groups. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + myfree_movable(FOF_GList); + myfree_movable(FOF_PList); + +#ifdef SUBFIND + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + + subfind(num); + + TIMER_START(CPU_FOF); + } +#else /* #ifdef SUBFIND */ + Nsubgroups = 0; + TotNsubgroups = 0; + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + TIMER_START(CPU_SNAPSHOT); + + fof_save_groups(num); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_FOF); + } +#endif /* #ifdef SUBFIND #else */ + + myfree_movable(Group); + + mpi_printf("FOF: All FOF related work finished. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef FOF_STOREIDS + if(num >= 0) + { + TIMER_STOP(CPU_FOF); + TIMER_START(CPU_SNAPSHOT); + + /* now distribute the particles into output order */ + t0 = second(); + fof_prepare_output_order(); + fof_subfind_exchange( + MPI_COMM_WORLD); /* distribute particles such that FOF groups will appear in coherent way in snapshot files */ + t1 = second(); + mpi_printf("FOF: preparing output order of particles took %g sec\n", timediff(t0, t1)); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_FOF); + } + else + myfree(PS); +#else /* #ifndef FOF_STOREIDS */ + myfree(PS); +#endif /* #ifndef FOF_STOREIDS #else */ + + TIMER_STOP(CPU_FOF); +} + +/*! \brief Sorts groups by the desired output order. + * + * \return void + */ +void fof_prepare_output_order(void) +{ + int i, off, ntype[NTYPES]; + + struct data_aux_sort *aux_sort = (struct data_aux_sort *)mymalloc("aux_sort", sizeof(struct data_aux_sort) * NumPart); + + for(i = 0; i < NTYPES; i++) + ntype[i] = 0; + + for(i = 0; i < NumPart; i++) + { + aux_sort[i].OriginTask = ThisTask; + aux_sort[i].OriginIndex = i; + aux_sort[i].GrNr = PS[i].GrNr; +#ifdef SUBFIND + aux_sort[i].SubNr = PS[i].SubNr; + aux_sort[i].DM_BindingEnergy = PS[i].BindingEnergy; +#endif /* #ifdef SUBFIND */ + aux_sort[i].Type = P[i].Type; + aux_sort[i].ID = P[i].ID; +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + aux_sort[i].FileOrder = P[i].FileOrder; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + ntype[P[i].Type]++; + } + + qsort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_Type); + + if(RestartFlag == 18) + { +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++) + parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_FileOrder); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + } + else + { + for(i = 0, off = 0; i < NTYPES; off += ntype[i], i++) + parallel_sort(aux_sort + off, ntype[i], sizeof(struct data_aux_sort), fof_compare_aux_sort_GrNr); + } + + for(i = 0; i < NumPart; i++) + { + aux_sort[i].TargetTask = ThisTask; + aux_sort[i].TargetIndex = i; + } + + /* now bring back into starting order */ + parallel_sort(aux_sort, NumPart, sizeof(struct data_aux_sort), fof_compare_aux_sort_OriginTask_OriginIndex); + + for(i = 0; i < NumPart; i++) + { + PS[i].TargetTask = aux_sort[i].TargetTask; + PS[i].TargetIndex = aux_sort[i].TargetIndex; + } + + myfree(aux_sort); +} + +/*! \brief Calculate linking length based on mean particle separation. + * + * \return Linking length. + */ +double fof_get_comoving_linking_length(void) +{ + int i, ndm; + long long ndmtot; + double mass, masstot, rhodm; + + for(i = 0, ndm = 0, mass = 0; i < NumPart; i++) + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + ndm++; + mass += P[i].Mass; + } + sumup_large_ints(1, &ndm, &ndmtot); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + rhodm = (All.Omega0 - All.OmegaBaryon) * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + return FOF_LINKLENGTH * pow(masstot / ndmtot / rhodm, 1.0 / 3); +} + +/*! \brief Compiles the group catalogue. + * + * Combines results from all tasks. + * + * \return void + */ +void fof_compile_catalogue(void) +{ + int i, j, start, nimport, ngrp, recvTask; + struct fof_group_list *get_FOF_GList; + + /* sort according to MinID */ + mysort(FOF_PList, NumPart, sizeof(struct fof_particle_list), fof_compare_FOF_PList_MinID); + + for(i = 0; i < NumPart; i++) + { + FOF_GList[i].MinID = FOF_PList[i].MinID; + FOF_GList[i].MinIDTask = FOF_PList[i].MinIDTask; + if(FOF_GList[i].MinIDTask == ThisTask) + { + FOF_GList[i].LocCount = 1; + FOF_GList[i].ExtCount = 0; + } + else + { + FOF_GList[i].LocCount = 0; + FOF_GList[i].ExtCount = 1; + } + } + + /* eliminate duplicates in FOF_GList with respect to MinID */ + + if(NumPart) + NgroupsExt = 1; + else + NgroupsExt = 0; + + for(i = 1, start = 0; i < NumPart; i++) + { + if(FOF_GList[i].MinID == FOF_GList[start].MinID) + { + FOF_GList[start].LocCount += FOF_GList[i].LocCount; + FOF_GList[start].ExtCount += FOF_GList[i].ExtCount; + } + else + { + start = NgroupsExt; + FOF_GList[start] = FOF_GList[i]; + NgroupsExt++; + } + } + + /* sort the remaining ones according to task */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + for(i = 0; i < NgroupsExt; i++) + Send_count[FOF_GList[i].MinIDTask]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + if(j == ThisTask) /* we will not exchange the ones that are local */ + Recv_count[j] = 0; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + get_FOF_GList = (struct fof_group_list *)mymalloc("get_FOF_GList", nimport * sizeof(struct fof_group_list)); + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&FOF_GList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask, + TAG_DENS_A, &get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < nimport; i++) + get_FOF_GList[i].MinIDTask = i; + + /* sort the groups according to MinID */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); + mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); + + /* merge the imported ones with the local ones */ + for(i = 0, start = 0; i < nimport; i++) + { + while(FOF_GList[start].MinID < get_FOF_GList[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + if(get_FOF_GList[i].LocCount != 0) + terminate("start >= NgroupsExt"); + + if(FOF_GList[start].MinIDTask != ThisTask) + terminate("FOF_GList[start].MinIDTask != ThisTask"); + + if(FOF_GList[start].MinID != get_FOF_GList[i].MinID) + terminate( + "FOF_GList[start].MinID != get_FOF_GList[i].MinID start=%d i=%d FOF_GList[start].MinID=%llu get_FOF_GList[i].MinID=%llu\n", + start, i, (long long)FOF_GList[start].MinID, (long long)get_FOF_GList[i].MinID); + + FOF_GList[start].ExtCount += get_FOF_GList[i].ExtCount; + } + + /* copy the size information back into the list, to inform the others */ + for(i = 0, start = 0; i < nimport; i++) + { + while(FOF_GList[start].MinID < get_FOF_GList[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + get_FOF_GList[i].ExtCount = FOF_GList[start].ExtCount; + get_FOF_GList[i].LocCount = FOF_GList[start].LocCount; + } + + /* sort the imported/exported list according to MinIDTask */ + mysort(get_FOF_GList, nimport, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinIDTask); + + for(i = 0; i < nimport; i++) + get_FOF_GList[i].MinIDTask = ThisTask; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&get_FOF_GList[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, + recvTask, TAG_DENS_A, &FOF_GList[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct fof_group_list), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(get_FOF_GList); + + /* eliminate all groups that are too small, and count local groups */ + for(i = 0, Ngroups = 0, Nids = 0; i < NgroupsExt; i++) + { + if(FOF_GList[i].LocCount + FOF_GList[i].ExtCount < FOF_GROUP_MIN_LEN) + { + FOF_GList[i] = FOF_GList[NgroupsExt - 1]; + NgroupsExt--; + i--; + } + else + { + if(FOF_GList[i].MinIDTask == ThisTask) + { + Ngroups++; + Nids += FOF_GList[i].LocCount + FOF_GList[i].ExtCount; + } + } + } + + /* sort the group list according to MinID */ + mysort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_MinID); +} + +/*! \brief Assigns each group a global group number. + * + * \return void + */ +void fof_assign_group_numbers(void) +{ + int i, j, ngr, start, lenloc; + long long totNids; + double t0, t1; + + mpi_printf("FOF: start assigning group numbers\n"); + + t0 = second(); + + /* assign group numbers (at this point, both Group and FOF_GList are sorted by MinID) */ + for(i = 0; i < NgroupsExt; i++) + { + FOF_GList[i].LocCount += FOF_GList[i].ExtCount; /* total length */ + FOF_GList[i].ExtCount = ThisTask; /* original task */ + } + + parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_LocCountTaskDiffMinID); + + for(i = 0, ngr = 0; i < NgroupsExt; i++) + { + if(FOF_GList[i].ExtCount == FOF_GList[i].MinIDTask) + ngr++; + + FOF_GList[i].GrNr = ngr - 1; + } + + MPI_Allgather(&ngr, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + /* count how many groups there are on earlier CPUs */ + long long ngr_sum; + for(j = 0, ngr_sum = 0; j < ThisTask; j++) + ngr_sum += Send_count[j]; + + for(i = 0; i < NgroupsExt; i++) + FOF_GList[i].GrNr += ngr_sum; + + sumup_large_ints(1, &ngr, &ngr_sum); + if(ngr_sum != TotNgroups) + { + printf("ngr_sum=%d\n", (int)ngr_sum); + terminate("inconsistency"); + } + + /* bring the group list back into the original order */ + parallel_sort(FOF_GList, NgroupsExt, sizeof(struct fof_group_list), fof_compare_FOF_GList_ExtCountMinID); + + /* Assign the group numbers to the group properties array */ + for(i = 0, start = 0; i < Ngroups; i++) + { + while(FOF_GList[start].MinID < Group[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + Group[i].GrNr = FOF_GList[start].GrNr; + } + + /* sort the groups according to group-number */ + parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + + for(i = 0; i < NumPart; i++) + PS[i].GrNr = TotNgroups + 1; /* this marks all particles that are not in any group */ + + for(i = 0, start = 0, Nids = 0; i < NgroupsExt; i++) + { + while(FOF_PList[start].MinID < FOF_GList[i].MinID) + { + start++; + if(start > NumPart) + terminate("start > NumPart"); + } + + if(FOF_PList[start].MinID != FOF_GList[i].MinID) + terminate("FOF_PList[start=%d].MinID=%lld != FOF_GList[i=%d].MinID=%lld", start, (long long)FOF_PList[start].MinID, i, + (long long)FOF_GList[i].MinID); + + for(lenloc = 0; start + lenloc < NumPart;) + if(FOF_PList[start + lenloc].MinID == FOF_GList[i].MinID) + { + PS[FOF_PList[start + lenloc].Pindex].GrNr = FOF_GList[i].GrNr; + Nids++; + lenloc++; + } + else + break; + + start += lenloc; + } + + sumup_large_ints(1, &Nids, &totNids); + + if(totNids != TotNids) + { + char buf[1000]; + sprintf(buf, "Task=%d Nids=%d totNids=%d TotNids=%d\n", ThisTask, Nids, (int)totNids, (int)TotNids); + terminate(buf); + } + + t1 = second(); + + mpi_printf("FOF: Assigning of group numbers took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Computes all kind of properties of groups. + * + * Not complete after calling this. There is still the function + * fof_finish_group_properties, which finalizes the calculation + * (with normalization, averages, unit conversions and other operations). + * + * \param[in] gr Index in Group array. + * \param[in] start Start index in FOF_PList. + * \param[in] len Number of particles in this group. + * + * \return void + */ +void fof_compute_group_properties(int gr, int start, int len) +{ + int j, k, index, type, start_index = FOF_PList[start].Pindex; + double xyz[3]; + + Group[gr].Len = 0; + double gr_Mass = 0; +#ifdef USE_SFR + double gr_Sfr = 0; +#endif /* #ifdef USE_SFR */ + + double gr_CM[3], gr_Vel[3]; + for(k = 0; k < 3; k++) + { + gr_CM[k] = 0; + gr_Vel[k] = 0; + Group[gr].FirstPos[k] = P[start_index].Pos[k]; + } + + double gr_MassType[NTYPES]; + for(k = 0; k < NTYPES; k++) + { + Group[gr].LenType[k] = 0; + gr_MassType[k] = 0; + } + + // calculate + for(k = 0; k < len; k++) + { + index = FOF_PList[start + k].Pindex; + + Group[gr].Len++; + gr_Mass += P[index].Mass; + type = P[index].Type; + + Group[gr].LenType[type]++; + + gr_MassType[type] += P[index].Mass; + +#ifdef USE_SFR + if(P[index].Type == 0) + gr_Sfr += SphP[index].Sfr; +#endif /* #ifdef USE_SFR */ + + for(j = 0; j < 3; j++) + { + xyz[j] = P[index].Pos[j]; + xyz[j] = fof_periodic(xyz[j] - P[start_index].Pos[j]); + gr_CM[j] += P[index].Mass * xyz[j]; + gr_Vel[j] += P[index].Mass * P[index].Vel[j]; + } + } + + // put values into group struct + Group[gr].Mass = gr_Mass; +#ifdef USE_SFR + Group[gr].Sfr = gr_Sfr; +#endif /* #ifdef USE_SFR */ + + for(k = 0; k < 3; k++) + { + Group[gr].CM[k] = gr_CM[k]; + Group[gr].Vel[k] = gr_Vel[k]; + } + + for(k = 0; k < NTYPES; k++) + Group[gr].MassType[k] = gr_MassType[k]; +} + +/*! \brief Global exchange of identified groups to their appropriate task. + * + * \return void + */ +void fof_exchange_group_data(void) +{ + struct group_properties *get_Group; + int i, j, ngrp, recvTask, nimport, start; + double xyz[3]; + + /* sort the groups according to task */ + mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinIDTask); + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + for(i = 0; i < NgroupsExt; i++) + Send_count[FOF_GList[i].MinIDTask]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + if(j == ThisTask) /* we will not exchange the ones that are local */ + Recv_count[j] = 0; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + get_Group = (struct group_properties *)mymalloc("get_Group", sizeof(struct group_properties) * nimport); + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group data */ + MPI_Sendrecv(&Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask, + TAG_DENS_A, &get_Group[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct group_properties), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* sort the groups again according to MinID */ + mysort(Group, NgroupsExt, sizeof(struct group_properties), fof_compare_Group_MinID); + mysort(get_Group, nimport, sizeof(struct group_properties), fof_compare_Group_MinID); + + /* now add in the partial imported group data to the main ones */ + for(i = 0, start = 0; i < nimport; i++) + { + while(Group[start].MinID < get_Group[i].MinID) + { + start++; + if(start >= NgroupsExt) + terminate("start >= NgroupsExt"); + } + + Group[start].Len += get_Group[i].Len; + Group[start].Mass += get_Group[i].Mass; + + for(j = 0; j < NTYPES; j++) + { + Group[start].LenType[j] += get_Group[i].LenType[j]; + Group[start].MassType[j] += get_Group[i].MassType[j]; + } + +#ifdef USE_SFR + Group[start].Sfr += get_Group[i].Sfr; +#endif /* #ifdef USE_SFR */ + + for(j = 0; j < 3; j++) + { + xyz[j] = get_Group[i].CM[j] / get_Group[i].Mass; + xyz[j] = fof_periodic(xyz[j] + get_Group[i].FirstPos[j] - Group[start].FirstPos[j]); + Group[start].CM[j] += get_Group[i].Mass * xyz[j]; + Group[start].Vel[j] += get_Group[i].Vel[j]; + } + } + + myfree(get_Group); +} + +/*! \brief Finalizes group property calculation. + * + * Called after a loop over all particles of a group is already completed. + * + * \return void + */ +void fof_finish_group_properties(void) +{ + double cm[3]; + int i, j, ngr; + + for(i = 0; i < NgroupsExt; i++) + { + if(Group[i].MinIDTask == ThisTask) + { + for(j = 0; j < 3; j++) + { + Group[i].Vel[j] /= Group[i].Mass; + cm[j] = Group[i].CM[j] / Group[i].Mass; + cm[j] = fof_periodic_wrap(cm[j] + Group[i].FirstPos[j]); + Group[i].CM[j] = cm[j]; + } + } + } + + /* eliminate the non-local groups */ + for(i = 0, ngr = NgroupsExt; i < ngr; i++) + { + if(Group[i].MinIDTask != ThisTask) + { + Group[i] = Group[ngr - 1]; + i--; + ngr--; + } + } + + if(ngr != Ngroups) + terminate("ngr != Ngroups"); + + mysort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_MinID); +} + +/*! \brief Do periodic wrap for coordinate. + * + * Note that his works only for cubic box. + * + * \param[in] x Coordinate. + * + * \return coordinate within [-0.5*BoxSize,0.5*BoxSize). + */ +double fof_periodic(double x) +{ +#ifndef GRAVITY_NOT_PERIODIC + if(x >= 0.5 * All.BoxSize) + x -= All.BoxSize; + if(x < -0.5 * All.BoxSize) + x += All.BoxSize; +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ + return x; +} + +/*! \brief Do periodic wrap for coordinate. + * + * Note that his works only for cubic box. + * + * \param[in] x Coordinate. + * + * \return coordinate within [0,BoxSize). + */ +double fof_periodic_wrap(double x) +{ +#ifndef GRAVITY_NOT_PERIODIC + while(x >= All.BoxSize) + x -= All.BoxSize; + while(x < 0) + x += All.BoxSize; +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ + return x; +} + +#endif /* of FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof.h b/src/amuse/community/arepo/src/fof/fof.h new file mode 100644 index 0000000000..e60771b3aa --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof.h @@ -0,0 +1,319 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof.h + * \date 05/2018 + * \brief Header for Friend-of-Friends halo finder. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef FOF_H +#define FOF_H + +#include "../main/allvars.h" + +extern int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups; +extern int Nids; +extern long long TotNids; + +extern int fof_OldMaxPart; +extern int fof_OldMaxPartSph; + +extern double LinkL; +extern unsigned char *flag_node_inside_linkinglength; + +#define BITFLAG_INSIDE_LINKINGLENGTH 1 + +#ifndef FOF_SECONDARY_LINK_TARGET_TYPES +#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES +#endif + +extern struct group_properties +{ + int Len; + MyIDType MinID; + MyIDType MinIDTask; + int GrNr; + int LenType[NTYPES]; + MyFloat MassType[NTYPES]; + MyFloat Mass; + MyDouble CM[3]; + MyFloat Vel[3]; + MyDouble Pos[3]; + + MyDouble FirstPos[3]; +#ifdef USE_SFR + MyFloat Sfr; +#endif /* #ifdef USE_SFR */ + +#ifdef SUBFIND + int TargetTask; /* primary CPU responsible for this group */ + int Nsubs; + int FirstSub; + MyFloat M_Mean200, R_Mean200; + MyFloat M_Crit200, R_Crit200; + MyFloat M_Crit500, R_Crit500; + MyFloat M_TopHat200, R_TopHat200; +#ifdef SUBFIND_EXTENDED_PROPERTIES + MyFloat J_Mean200[3], JDM_Mean200[3], JGas_Mean200[3], JStars_Mean200[3], MassType_Mean200[NTYPES], CMFrac_Mean200, + CMFracType_Mean200[NTYPES]; + MyFloat J_Crit200[3], JDM_Crit200[3], JGas_Crit200[3], JStars_Crit200[3], MassType_Crit200[NTYPES], CMFrac_Crit200, + CMFracType_Crit200[NTYPES]; + MyFloat J_Crit500[3], JDM_Crit500[3], JGas_Crit500[3], JStars_Crit500[3], MassType_Crit500[NTYPES], CMFrac_Crit500, + CMFracType_Crit500[NTYPES]; + MyFloat J_TopHat200[3], JDM_TopHat200[3], JGas_TopHat200[3], JStars_TopHat200[3], MassType_TopHat200[NTYPES], CMFrac_TopHat200, + CMFracType_TopHat200[NTYPES]; + int LenType_Mean200[NTYPES], LenType_Crit200[NTYPES], LenType_Crit500[NTYPES], LenType_TopHat200[NTYPES]; + MyFloat J[3], JDM[3], JGas[3], JStars[3], CMFrac, CMFracType[NTYPES]; + MyFloat Ekin, Epot, Ethr; + MyFloat Ekin_Crit200, Epot_Crit200, Ethr_Crit200; + MyFloat Ekin_Crit500, Epot_Crit500, Ethr_Crit500; + MyFloat Ekin_Mean200, Epot_Mean200, Ethr_Mean200; + MyFloat Ekin_TopHat200, Epot_TopHat200, Ethr_TopHat200; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ +#endif /* #ifdef SUBFIND */ + +} * Group; + +struct data_aux_sort +{ + int OriginTask, OriginIndex; + int TargetTask, TargetIndex; + int GrNr; + int Type; + MyIDType ID; +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + MyIDType FileOrder; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ +#ifdef SUBFIND + int SubNr; + MyFloat DM_BindingEnergy; +#endif /* #ifdef SUBFIND */ +}; + +extern struct fof_particle_list +{ + MyIDType MinID; + int MinIDTask; + int Pindex; +} * FOF_PList; + +extern struct fof_group_list +{ + MyIDType MinID; + int MinIDTask; + int LocCount; + int ExtCount; + int GrNr; +} * FOF_GList; + +extern struct id_list +{ + MyIDType ID; + int GrNr; + int Type; +#ifdef SUBFIND + int SubNr; + MyFloat BindingEgy; +#endif /* #ifdef SUBFIND */ +} * ID_list; + +extern struct bit_flags +{ + unsigned char Nonlocal : 2, MinIDChanged : 2, Marked : 2, Changed : 2; +} * Flags; + +struct fof_local_sort_data +{ + int targetindex; + int index; +}; + +extern struct fof_subfind_header +{ + int Ngroups; + int Nsubgroups; + int Nids; + int TotNgroups; + int TotNsubgroups; + long long TotNids; + int num_files; + double time; + double redshift; + double HubbleParam; + double BoxSize; + double Omega0; + double OmegaLambda; + int flag_doubleprecision; +} catalogue_header; + +enum fof_subfind_iofields +{ + IO_FOF_LEN, + IO_FOF_MTOT, + IO_FOF_POS, + IO_FOF_CM, + IO_FOF_VEL, + IO_FOF_LENTYPE, + IO_FOF_MASSTYPE, + IO_FOF_SFR, + + IO_FOF_M_MEAN200, + IO_FOF_R_MEAN200, + IO_FOF_M_CRIT200, + IO_FOF_R_CRIT200, + IO_FOF_M_TOPHAT200, + IO_FOF_R_TOPHAT200, + IO_FOF_M_CRIT500, + IO_FOF_R_CRIT500, + +#ifdef SUBFIND_EXTENDED_PROPERTIES + IO_FOF_J_MEAN200, + IO_FOF_JDM_MEAN200, + IO_FOF_JGAS_MEAN200, + IO_FOF_JSTARS_MEAN200, + IO_FOF_MASSTYPE_MEAN200, + IO_FOF_LENTYPE_MEAN200, + IO_FOF_CMFRAC_MEAN200, + IO_FOF_CMFRACTYPE_MEAN200, + IO_FOF_J_CRIT200, + IO_FOF_JDM_CRIT200, + IO_FOF_JGAS_CRIT200, + IO_FOF_JSTARS_CRIT200, + IO_FOF_MASSTYPE_CRIT200, + IO_FOF_LENTYPE_CRIT200, + IO_FOF_CMFRAC_CRIT200, + IO_FOF_CMFRACTYPE_CRIT200, + IO_FOF_J_TOPHAT200, + IO_FOF_JDM_TOPHAT200, + IO_FOF_JGAS_TOPHAT200, + IO_FOF_JSTARS_TOPHAT200, + IO_FOF_MASSTYPE_TOPHAT200, + IO_FOF_LENTYPE_TOPHAT200, + IO_FOF_CMFRAC_TOPHAT200, + IO_FOF_CMFRACTYPE_TOPHAT200, + IO_FOF_J_CRIT500, + IO_FOF_JDM_CRIT500, + IO_FOF_JGAS_CRIT500, + IO_FOF_JSTARS_CRIT500, + IO_FOF_MASSTYPE_CRIT500, + IO_FOF_LENTYPE_CRIT500, + IO_FOF_CMFRAC_CRIT500, + IO_FOF_CMFRACTYPE_CRIT500, + IO_FOF_J, + IO_FOF_JDM, + IO_FOF_JGAS, + IO_FOF_JSTARS, + IO_FOF_CMFRAC, + IO_FOF_CMFRACTYPE, + IO_FOF_EKIN, + IO_FOF_ETHR, + IO_FOF_EPOT, + IO_FOF_EPOT_CRIT200, + IO_FOF_EKIN_CRIT200, + IO_FOF_ETHR_CRIT200, + IO_FOF_EPOT_MEAN200, + IO_FOF_EKIN_MEAN200, + IO_FOF_ETHR_MEAN200, + IO_FOF_EPOT_TOPHAT200, + IO_FOF_EKIN_TOPHAT200, + IO_FOF_ETHR_TOPHAT200, + IO_FOF_EPOT_CRIT500, + IO_FOF_EKIN_CRIT500, + IO_FOF_ETHR_CRIT500, +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + IO_FOF_NSUBS, + IO_FOF_FIRSTSUB, + IO_FOF_FUZZOFFTYPE, + + IO_SUB_LEN, + IO_SUB_MTOT, + IO_SUB_POS, + IO_SUB_VEL, + IO_SUB_LENTYPE, + IO_SUB_MASSTYPE, + IO_SUB_CM, + IO_SUB_SPIN, + IO_SUB_BFLD_HALO, + IO_SUB_BFLD_DISK, + +#ifdef SUBFIND_EXTENDED_PROPERTIES + IO_SUB_EKIN, + IO_SUB_ETHR, + IO_SUB_EPOT, + IO_SUB_J, + IO_SUB_JDM, + IO_SUB_JGAS, + IO_SUB_JSTARS, + IO_SUB_JINHALFRAD, + IO_SUB_JDMINHALFRAD, + IO_SUB_JGASINHALFRAD, + IO_SUB_JSTARSINHALFRAD, + IO_SUB_JINRAD, + IO_SUB_JDMINRAD, + IO_SUB_JGASINRAD, + IO_SUB_JSTARSINRAD, + IO_SUB_CMFRAC, + IO_SUB_CMFRACTYPE, + IO_SUB_CMFRACINHALFRAD, + IO_SUB_CMFRACTYPEINHALFRAD, + IO_SUB_CMFRACINRAD, + IO_SUB_CMFRACTYPEINRAD, +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + IO_SUB_VELDISP, + IO_SUB_VMAX, + IO_SUB_VMAXRAD, + IO_SUB_HALFMASSRAD, + IO_SUB_HALFMASSRADTYPE, + IO_SUB_MASSINRAD, + IO_SUB_MASSINHALFRAD, + IO_SUB_MASSINMAXRAD, + IO_SUB_MASSINRADTYPE, + IO_SUB_MASSINHALFRADTYPE, + IO_SUB_MASSINMAXRADTYPE, + IO_SUB_IDMOSTBOUND, + IO_SUB_GRNR, + IO_SUB_PARENT, + IO_SUB_SFR, + IO_SUB_SFRINRAD, + IO_SUB_SFRINHALFRAD, + IO_SUB_SFRINMAXRAD, + IO_FOFSUB_IDS, + IO_FOF_LASTENTRY +}; + +int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr); +int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr); +int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr); +int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr); +void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label); +void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label); +int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr); +void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc); +int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr); + +#endif /* #ifndef FOF_H */ diff --git a/src/amuse/community/arepo/src/fof/fof_distribute.c b/src/amuse/community/arepo/src/fof/fof_distribute.c new file mode 100644 index 0000000000..57c01ff81a --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_distribute.c @@ -0,0 +1,420 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_distribute.c + * \date 05/2018 + * \brief Communication and reordering routines for FoF. + * \details contains functions: + * void fof_subfind_exchange(MPI_Comm Communicator) + * void fof_reorder_PS(int *Id, int Nstart, int N) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +/*! \brief Redistributes the particles according to what is stored in + * PS[].TargetTask, and PS[].TargetIndex. + * + * \param[in] Communicator MPI communicator. + * + * \return void + */ +void fof_subfind_exchange(MPI_Comm Communicator) +{ + int nimport, nexport; + int i, j, n, type, ngrp, target; + int max_load, max_loadsph, load; + struct particle_data *partBuf; + struct subfind_data *subBuf; + struct sph_particle_data *sphBuf; + + int CommThisTask, CommNTask; + + MPI_Comm_size(Communicator, &CommNTask); + MPI_Comm_rank(Communicator, &CommThisTask); + + int old_AllMaxPart = All.MaxPart; + int old_AllMaxPartSph = All.MaxPartSph; + + for(type = 0; type < NTYPES; type++) + { + size_t ExportSpace = 0.5 * (FreeBytes); /* we will try to grab at most half of the still available memory */ + size_t PartSpace = sizeof(struct particle_data) + sizeof(struct subfind_data) + sizeof(struct sph_particle_data); + if(PartSpace > ExportSpace) + terminate("seems like we have insufficient storage, PartSpace=%lld ExportSpace=%lld", (long long)PartSpace, + (long long)ExportSpace); + + int glob_flag = 0; + + do + { + for(n = 0; n < CommNTask; n++) + { + Send_count[n] = 0; + } + + ptrdiff_t AvailableSpace = ExportSpace; /* this must be a type that can become negative */ + + for(n = 0; n < NumPart; n++) + { + if(AvailableSpace < 0) + break; + + if(P[n].Type == type && PS[n].TargetTask != CommThisTask) + { + target = PS[n].TargetTask; + + if(target < 0 || target >= CommNTask) + terminate("n=%d targettask=%d", n, target); + + AvailableSpace -= PartSpace; + + Send_count[target]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* for resize */ + load = (NumPart + nimport - nexport); + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + + if(type == 0) + { + load = (NumGas + nimport - nexport); + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator); + } + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data)); + subBuf = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data)); + if(type == 0) + sphBuf = (struct sph_particle_data *)mymalloc_movable(&sphBuf, "sphBuf", nexport * sizeof(struct sph_particle_data)); + + for(i = 0; i < CommNTask; i++) + { + Send_count[i] = 0; + } + + AvailableSpace = ExportSpace; /* this must be allowed to become negative */ + + int nstay = 0; + int delta_numpart = 0; + int delta_numgas = 0; + + for(n = 0; n < NumPart; n++) + { + if(AvailableSpace < 0) + break; + + if(P[n].Type == type && PS[n].TargetTask != CommThisTask) + { + target = PS[n].TargetTask; + + AvailableSpace -= PartSpace; + + partBuf[Send_offset[target] + Send_count[target]] = P[n]; + subBuf[Send_offset[target] + Send_count[target]] = PS[n]; + + if(P[n].Type == 0) + { + sphBuf[Send_offset[target] + Send_count[target]] = SphP[n]; + delta_numgas++; + } + + Send_count[target]++; + delta_numpart++; + } + else + { + if(nstay != n) + { + /* now move P[n] to P[nstay] */ + + P[nstay] = P[n]; + PS[nstay] = PS[n]; + + if(P[nstay].Type == 0) + SphP[nstay] = SphP[n]; + } + + nstay++; + } + } + + if(delta_numgas > 0) + if(delta_numpart != delta_numgas) + terminate("delta_numpart=%d != delta_numgas=%d", delta_numpart, delta_numgas); + + /* now close gap (if present) */ + memmove(P + nstay, P + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct particle_data)); + memmove(PS + nstay, PS + nstay + delta_numpart, (NumPart - (nstay + delta_numpart)) * sizeof(struct subfind_data)); + + if(delta_numgas > 0) + if(NumGas - (nstay + delta_numgas) > 0) + memmove(SphP + nstay, SphP + nstay + delta_numpart, + (NumGas - (nstay + delta_numgas)) * sizeof(struct sph_particle_data)); + + NumPart -= delta_numpart; + NumGas -= delta_numgas; + + /* do resize, but only increase arrays!! (otherwise data in ActiveParticleList etc. gets lost */ + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + if(type == 0) + { + if(max_loadsph > (1.0 - ALLOC_TOLERANCE) * All.MaxPartSph) + { + All.MaxPartSph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpartsph(); + } + } + + /* create a gap behind the existing gas particles where we will insert the incoming particles */ + memmove(P + NumGas + nimport, P + NumGas, (NumPart - NumGas) * sizeof(struct particle_data)); + memmove(PS + NumGas + nimport, PS + NumGas, (NumPart - NumGas) * sizeof(struct subfind_data)); + + for(i = 0; i < CommNTask; i++) + Recv_offset[i] += NumGas; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0 || Recv_count[target] > 0) + { + MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, + target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE); + + MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, + TAG_KEY, PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, + target, TAG_KEY, Communicator, MPI_STATUS_IGNORE); + + if(type == 0) + MPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(struct sph_particle_data), MPI_BYTE, + target, TAG_SPHDATA, SphP + Recv_offset[target], + Recv_count[target] * sizeof(struct sph_particle_data), MPI_BYTE, target, TAG_SPHDATA, + Communicator, MPI_STATUS_IGNORE); + } + } + } + + if(type == 0) + NumGas += nimport; + + NumPart += nimport; + + if(type == 0) + myfree_movable(sphBuf); + + myfree_movable(subBuf); + myfree_movable(partBuf); + + int loc_flag = 0; + if(AvailableSpace < 0) + loc_flag = 1; + + MPI_Allreduce(&loc_flag, &glob_flag, 1, MPI_INT, MPI_SUM, Communicator); + if(glob_flag > 0 && CommThisTask == 0) + { + printf( + "FOF-DISTRIBUTE: Need to cycle in particle exchange due to memory shortage. type=%d glob_flag=%d ThisTask=%d " + "CommThisTask=%d PartSpace=%lld ExportSpace=%lld\n", + type, glob_flag, ThisTask, CommThisTask, (long long)PartSpace, (long long)ExportSpace); + fflush(stdout); + } + } + while(glob_flag); + } + + /* if there was a temporary memory shortage during the exchange, we may had to increase the maximum allocations. Go back to smaller + * values again if possible */ + + load = NumPart; + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + max_load = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + if(max_load < old_AllMaxPart) + max_load = old_AllMaxPart; + if(max_load != All.MaxPart) + { + All.MaxPart = max_load; + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + load = NumGas; + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator); + max_loadsph = max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE); + if(max_loadsph < old_AllMaxPartSph) + max_loadsph = old_AllMaxPartSph; + if(max_loadsph != All.MaxPartSph) + { + All.MaxPartSph = max_loadsph; + reallocate_memory_maxpartsph(); + } + + /* finally, let's also address the desired local order according to PS[].TargetIndex */ + + struct fof_local_sort_data *mp; + int *Id; + + if(NumGas) + { + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp, NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + reorder_gas(Id); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + fof_reorder_PS(Id, 0, NumGas); + + myfree(Id); + myfree(mp); + } + + if(NumPart - NumGas > 0) + { + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart - NumGas)); + mp -= NumGas; + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart - NumGas)); + Id -= NumGas; + + for(i = NumGas; i < NumPart; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp + NumGas, NumPart - NumGas, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = NumGas; i < NumPart; i++) + Id[mp[i].index] = i; + + reorder_particles(Id); + + for(i = NumGas; i < NumPart; i++) + Id[mp[i].index] = i; + + fof_reorder_PS(Id, NumGas, NumPart); + + Id += NumGas; + myfree(Id); + mp += NumGas; + myfree(mp); + } +} + +/*! \brief Reorders the elements in the PS array according to the indices given + * in the ID array. + * + * \param[in, out] ID Array that specifies new index of element in PS array; + * i.e. PS[i] -> PS[ ID[i] ]. + * \param[in] Nstart Starting index in ID and PS arrays. + * \param[in] N Final element +1 in ID and PS arrays. + * + * \return void + */ +void fof_reorder_PS(int *Id, int Nstart, int N) +{ + int i; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + PSsource = PS[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + PSsave = PS[dest]; + idsave = Id[dest]; + + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_findgroups.c b/src/amuse/community/arepo/src/fof/fof_findgroups.c new file mode 100644 index 0000000000..55e2ae3d2e --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_findgroups.c @@ -0,0 +1,720 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_findgroups.c + * \date 05/2018 + * \brief Routine to identify friend of friends groups. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double fof_find_groups(MyIDType * vMinID, int *vHead, + * int *vLen, int *vNext, int *vTail, int *vMinIDTask) + * static int fof_find_dmparticles_evaluate(int target, + * int mode, int threadid) + * static int fof_treefind_fof_primary(MyDouble searchcenter[3], + * MyFloat hsml, int target, int numnodes, int *firstnode, + * int mode, int threadid) + * void fof_check_for_full_nodes_recursive(int no) + * int fof_return_a_particle_in_cell_recursive(int no) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static int fof_find_dmparticles_evaluate(int target, int mode, int threadid); +static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode, + int threadid); + +static int *Tree_Head; + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + + MyIDType MinID; + int MinIDTask; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P array. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + + in->MinID = MinID[Head[i]]; + in->MinIDTask = MinIDTask[Head[i]]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + char link_count_flag; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + terminate("here not used"); + } + else /* combine */ + { + if(out->link_count_flag) + Flags[i].Marked = 1; + } +} + +#include "../utils/generic_comm_helpers2.h" + +static int link_across; +static int nprocessed; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + if(Flags[i].Nonlocal && Flags[i].Changed) + { + fof_find_dmparticles_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + + nprocessed++; + } + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + link_across += fof_find_dmparticles_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Links particles to groups. + * + * \param[in, out] vMinID Pointer to MinID array. + * \param[in, out] vHead Pointer to Head array. + * \param[in, out] vLen Pointer to Len array. + * \param[in, out] vNext Pointer to Next array. + * \param[in, out] vTail Pointer to Tail array. + * \param[in, out] vMinIDTask Pointer to MinIDTask array. + * + * \return Time spent in this function. + */ +double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) +{ + MinID = vMinID; + Head = vHead; + Len = vLen; + Next = vNext; + Tail = vTail; + MinIDTask = vMinIDTask; + + int i, npart, marked; + long long totmarked, totnpart; + long long link_across_tot, ntot; + double t0, t1, tstart, tend; + + tstart = second(); + + mpi_printf("FOF: Start linking particles (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + /* allocate a flag field that is used to mark nodes that are fully inside the linking length */ + flag_node_inside_linkinglength = (unsigned char *)mymalloc("flag_node_inside_linkinglength", Tree_MaxNodes * sizeof(unsigned char)); + memset(flag_node_inside_linkinglength, 0, Tree_MaxNodes * sizeof(unsigned char)); + flag_node_inside_linkinglength -= Tree_MaxPart; + + Flags = (struct bit_flags *)mymalloc("Flags", NumPart * sizeof(struct bit_flags)); + + generic_set_MaxNexport(); + + Tree_Head = mymalloc("Tree_Head", Tree_NumNodes * sizeof(int)); + Tree_Head -= Tree_MaxPart; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + t0 = second(); + + /* first, link only among local particles */ + for(i = 0, marked = 0, npart = 0; i < NumPart; i++) + { + if(((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + { + fof_find_dmparticles_evaluate(i, MODE_LOCAL_NO_EXPORT, 0); + + npart++; + + if(Flags[i].Nonlocal) + marked++; + } + } + + sumup_large_ints(1, &marked, &totmarked); + sumup_large_ints(1, &npart, &totnpart); + t1 = second(); + mpi_printf("FOF: links on local processor done (took %g sec).\nFOF: Marked=%lld out of the %lld primaries which are linked\n", + timediff(t0, t1), totmarked, totnpart); + + generic_free_partlist_nodelist_ngblist_threadbufs(); + + t0 = second(); + fof_check_for_full_nodes_recursive(Tree_MaxPart); + t1 = second(); + mpi_printf("FOF: fully linked nodes determined (took %g sec).\n", timediff(t0, t1)); + mpi_printf("FOF: begin linking across processors (presently allocated=%g MB) \n", AllocatedBytes / (1024.0 * 1024.0)); + + for(i = 0; i < NumPart; i++) + Flags[i].Marked = 1; + + do + { + t0 = second(); + + for(i = 0; i < NumPart; i++) + { + Flags[i].Changed = Flags[i].Marked; + Flags[i].Marked = 0; + Flags[i].MinIDChanged = 0; + } + + NextParticle = 0; /* begin with this index */ + + link_across = 0; + nprocessed = 0; + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + sumup_large_ints(1, &link_across, &link_across_tot); + sumup_large_ints(1, &nprocessed, &ntot); + + t1 = second(); + + mpi_printf("FOF: have done %15lld cross links (processed %14lld, took %g sec)\n", link_across_tot, ntot, timediff(t0, t1)); + + /* let's check out which particles have changed their MinID */ + for(i = 0; i < NumPart; i++) + if(Flags[i].Nonlocal) + { + if(Flags[Head[i]].MinIDChanged) + Flags[i].Marked = 1; + } + } + while(link_across_tot > 0); + + Tree_Head += Tree_MaxPart; + myfree(Tree_Head); + myfree(Flags); + /* free flag */ + myfree(flag_node_inside_linkinglength + Tree_MaxPart); + + mpi_printf("FOF: Local groups found.\n"); + + tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Links dark matter particles. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return Number of links. + */ +static int fof_find_dmparticles_evaluate(int target, int mode, int threadid) +{ + int j, n, links, p, s, ss, numnodes, *firstnode; + int numngb; + MyDouble *pos; + data_in local, *target_data; + + links = 0; + + if(mode == MODE_LOCAL_NO_EXPORT || mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + + numngb = fof_treefind_fof_primary(pos, LinkL, target, numnodes, firstnode, mode, threadid); + + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT) + for(n = 0; n < numngb; n++) + { + j = Thread[threadid].Ngblist[n]; + + if(Head[target] != Head[j]) /* only if not yet linked */ + { + if(Len[Head[target]] > Len[Head[j]]) /* p group is longer */ + { + p = target; + s = j; + } + else + { + p = j; + s = target; + } + Next[Tail[Head[p]]] = Head[s]; + + Tail[Head[p]] = Tail[Head[s]]; + + Len[Head[p]] += Len[Head[s]]; + + if(MinID[Head[s]] < MinID[Head[p]]) + { + MinID[Head[p]] = MinID[Head[s]]; + MinIDTask[Head[p]] = MinIDTask[Head[s]]; + } + + ss = Head[s]; + do + Head[ss] = Head[p]; + while((ss = Next[ss]) >= 0); + } + } + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(numngb > 0) + DataResult[target].link_count_flag = 1; + else + DataResult[target].link_count_flag = 0; + } + + links += numngb; + + return links; +} + +/*! \brief Finds the neighbors among the primary link types which are within a + * certain distance. + * + * \param[in] searchcenter Position of search center. + * \param[in] hsml Search radius. + * \param[in] target Index of partcle. + * \param[in] numnodes Number of nodes. + * \param[in] fistnode First node. + * \param[in] mode + * -1: only local particles should be found and no export occurs; + * 0: export occurs, but local particles are ignored; + * 1: particles are found for an imported point. + * \param[in] threadid ID of thread. + * + * \return Number of particles found. + */ +static int fof_treefind_fof_primary(MyDouble searchcenter[3], MyFloat hsml, int target, int numnodes, int *firstnode, int mode, + int threadid) +{ + int k, numngb, no, p, nexport_flag = 0; + MyDouble dx, dy, dz, dist, r2; + +#define FACT2 0.866025403785 /* sqrt(3)/2 */ +#define FACT3 (2.0 * FACT2) /* sqrt(3) */ + + MyDouble xtmp, ytmp, ztmp; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_LOCAL_NO_EXPORT) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + if(!((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + + if(mode == MODE_LOCAL_PARTICLES) + continue; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - searchcenter[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - searchcenter[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - searchcenter[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(MinID[Head[p]] > DataGet[target].MinID) + { + MinID[Head[p]] = DataGet[target].MinID; + MinIDTask[Head[p]] = DataGet[target].MinIDTask; + Flags[Head[p]].MinIDChanged = 1; + numngb++; + } + } + else + { + /* this will only be done for MODE_LOCAL_NO_EXPORT */ + Thread[threadid].Ngblist[numngb++] = p; + } + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + + if(Tree_Head[no] >= 0) + if(MinID[Tree_Head[no]] <= DataGet[target].MinID) + { + no = Nodes[no].u.d.sibling; /* the node can be discarded */ + continue; + } + } + + struct NODE *current = &Nodes[no]; + int nocur = no; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(mode == MODE_LOCAL_PARTICLES) + { + if(nocur >= Tree_FirstNonTopLevelNode) + { + /* we have a node with only local particles, hence we can skip it for mode == 0 */ + continue; + } + } + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - searchcenter[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - searchcenter[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - searchcenter[2]); + if(dz > dist) + continue; + + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + r2 = dx * dx + dy * dy + dz * dz; + if(r2 > dist * dist) + continue; + + if(mode != MODE_LOCAL_PARTICLES) + { + /* test whether the node is contained within the sphere */ + dist = hsml - FACT2 * current->len; + if(dist > 0) + if(r2 < dist * dist && hsml > FACT3 * current->len) + { + if(flag_node_inside_linkinglength[nocur] & (1 << BITFLAG_INSIDE_LINKINGLENGTH)) /* already flagged */ + { + /* sufficient to return only one particle inside this cell */ + p = fof_return_a_particle_in_cell_recursive(nocur); + + if(p >= 0) + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(MinID[Head[p]] > DataGet[target].MinID) + { + MinID[Head[p]] = DataGet[target].MinID; + MinIDTask[Head[p]] = DataGet[target].MinIDTask; + Flags[Head[p]].MinIDChanged = 1; + numngb++; + } + } + else + Thread[threadid].Ngblist[numngb++] = p; + } + + continue; + } + else + { + /* flag it now */ + flag_node_inside_linkinglength[nocur] |= (1 << BITFLAG_INSIDE_LINKINGLENGTH); + } + } + } + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else + { + if(mode == MODE_LOCAL_PARTICLES) + { + if(target >= 0) + tree_treefind_export_node_threads(no, target, threadid); + } + else if(mode == MODE_LOCAL_NO_EXPORT) + { + nexport_flag = 1; + } + else if(mode == MODE_IMPORTED_PARTICLES) + terminate("stop no=%d Tree_MaxPart=%d Tree_MaxNodes=%d", no, Tree_MaxPart, Tree_MaxNodes); + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + } + } + + if(mode == MODE_LOCAL_NO_EXPORT) + { + if(nexport_flag == 0) + Flags[target].Nonlocal = 0; + else + Flags[target].Nonlocal = 1; + } + + return numngb; +} + +/*! \brief Walks a tree recursively and sets Tree_Head of node. + * + * \param[in] no Index of node we are in. + * + * \return void + */ +void fof_check_for_full_nodes_recursive(int no) +{ + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + int head = -1; /* no particle yet */ + + int p = Nodes[no].u.d.nextnode; + + while(p != Nodes[no].u.d.sibling) + { + if(p < Tree_MaxPart) /* a particle */ + { + if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + if(head == -1) + head = Head[p]; + else if(head >= 0) + { + if(head != Head[p]) + head = -2; + } + } + + p = Nextnode[p]; + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + fof_check_for_full_nodes_recursive(p); + + if(head == -1) + head = Tree_Head[p]; + else if(head >= 0) + { + if(head != Tree_Head[p]) + head = -2; + } + + p = Nodes[p].u.d.sibling; + } + else /* a pseudo particle */ + p = Nextnode[p - Tree_MaxNodes]; + } + + Tree_Head[no] = head; + } +} + +/*! \brief Finds a particle in node. + * + * \param[in] no Index of node. + * + * \return Particle index; -1 if no particle was found. + */ +int fof_return_a_particle_in_cell_recursive(int no) +{ + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + int p = Nodes[no].u.d.nextnode; + + while(p != Nodes[no].u.d.sibling) + { + if(p < Tree_MaxPart) /* a particle */ + { + if((1 << P[p].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + return p; + } + + p = Nextnode[p]; + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + int ret = fof_return_a_particle_in_cell_recursive(p); + + if(ret >= 0) + return ret; + + p = Nodes[p].u.d.sibling; + } + else /* a pseudo particle */ + p = Nextnode[p - Tree_MaxNodes]; + } + } + + return -1; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_io.c b/src/amuse/community/arepo/src/fof/fof_io.c new file mode 100644 index 0000000000..3c0755ab69 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_io.c @@ -0,0 +1,3151 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_io.c + * \date 05/2018 + * \brief Output functions for parallel FoF; also used by subfind. + * \details contains functions: + * void fof_save_groups(int num) + * void fof_subfind_prepare_ID_list(void) + * void fof_subfind_write_file(char *fname, int writeTask, + * int lastTask) + * void fof_subfind_fill_write_buffer(enum fof_subfind_iofields + * blocknr, int *startindex, int pc) + * void fof_subfind_get_dataset_name(enum fof_subfind_iofields + * blocknr, char *label) + * int fof_subfind_get_dataset_group(enum fof_subfind_iofields + * blocknr) + * int fof_subfind_get_particles_in_block(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_values_per_blockelement(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_bytes_per_blockelement(enum + * fof_subfind_iofields blocknr) + * int fof_subfind_get_datatype(enum fof_subfind_iofields + * blocknr) + * int fof_subfind_blockpresent(enum fof_subfind_iofields + * blocknr) + * void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields + * blocknr, char *label) + * void fof_subfind_write_header_attributes_in_hdf5(hid_t + * handle) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gitversion/version.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef HAVE_HDF5 +#include +void fof_subfind_write_header_attributes_in_hdf5(hid_t handle); +void write_parameters_attributes_in_hdf5(hid_t handle); +void write_compile_time_options_in_hdf5(hid_t handle); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef FOF + +/*! \brief Make sure a position lies in the box in case of periodic boundaries. + * + * \param[in] pos Single coordinate in one dimension to be wrapped + * \param[in] dim Index of coordinate [0/1/2] + * + * \return double: wrapped coordinate + */ +MyOutputFloat static wrap_position(MyOutputFloat pos, int dim) +{ +#if defined(REFLECTIVE_X) + if(dim == 0) + return pos; +#endif + +#if defined(REFLECTIVE_Y) + if(dim == 1) + return pos; +#endif + +#if defined(REFLECTIVE_Z) + if(dim == 2) + return pos; +#endif + + double boxsize = All.BoxSize; + +#ifdef LONG_X + if(dim == 0) + boxsize *= LONG_X; +#endif +#ifdef LONG_Y + if(dim == 1) + boxsize *= LONG_Y; +#endif +#ifdef LONG_Z + if(dim == 2) + boxsize *= LONG_Z; +#endif + + while(pos < 0) + pos += boxsize; + + while(pos >= boxsize) + pos -= boxsize; + + return pos; +} + +/*! \brief Main routine for group output. + * + * \param[in] num Index of group file (snapshot index for this output). + * + * \return void + */ +void fof_save_groups(int num) +{ + int filenr, gr, ngrps, masterTask, lastTask; + double t0, t1; + char buf[500]; + +#ifdef FOF_STOREIDS + fof_subfind_prepare_ID_list(); +#endif /* #ifdef FOF_STOREIDS */ + + t0 = second(); + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + mpi_printf("Unsupported File-Format. All.SnapFormat=%d\n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + mkdir(buf, 02755); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_tab", num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_tab", num); + + ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngrps++; + + for(gr = 0; gr < ngrps; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + fof_subfind_write_file(buf, masterTask, lastTask); + + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + +#ifdef FOF_STOREIDS + myfree(ID_list); +#endif /* #ifdef FOF_STOREIDS */ + + t1 = second(); + + mpi_printf("FOF: Group catalogues saved. took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Prepares ID list for option FOF_STOREIDS. + * + * \return void + */ +void fof_subfind_prepare_ID_list(void) +{ + int i, nids; + long long totNids; + double t0, t1; + + t0 = second(); + + ID_list = mymalloc("ID_list", sizeof(struct id_list) * Nids); + + for(i = 0, nids = 0; i < NumPart; i++) + { + if(PS[i].GrNr < TotNgroups) + { + if(nids >= Nids) + terminate("nids >= Nids"); + + ID_list[nids].GrNr = PS[i].GrNr; + ID_list[nids].Type = P[i].Type; + ID_list[nids].ID = P[i].ID; +#ifdef SUBFIND + ID_list[nids].SubNr = PS[i].SubNr; + ID_list[nids].BindingEgy = PS[i].BindingEnergy; +#endif /* #ifdef SUBFIND */ + nids++; + } + } + + sumup_large_ints(1, &nids, &totNids); + if(totNids != TotNids) + { + char buf[1000]; + sprintf(buf, "Task=%d Nids=%d totNids=%lld TotNids=%lld\n", ThisTask, Nids, totNids, TotNids); + terminate(buf); + } + + /* sort the particle IDs according to group-number, and optionally subhalo number and binding energy */ +#ifdef SUBFIND + parallel_sort(ID_list, Nids, sizeof(struct id_list), subfind_compare_ID_list); +#else /* #ifdef SUBFIND */ + parallel_sort(ID_list, Nids, sizeof(struct id_list), fof_compare_ID_list_GrNrID); +#endif /* #ifdef SUBFIND #else */ + + t1 = second(); + mpi_printf("FOF/SUBFIND: Particle/cell IDs in groups globally sorted. took = %g sec\n", timediff(t0, t1)); +} + +/*! \brief Writes a file with name fname containing data from writeTask to + * lastTask. + * + * \param[in] fname Filename of the output file. + * \param[in] writeTask Task responsible for writing the file. + * \param[in] lastTask Last task whose data is still in this file. + * + * \return void + */ +void fof_subfind_write_file(char *fname, int writeTask, int lastTask) +{ + int bytes_per_blockelement, npart, nextblock; + int n_for_this_task, n, p, pc, offset = 0, task; + int blockmaxlen, n_type[3], ntot_type[3], nn[3]; + enum fof_subfind_iofields blocknr; + char label[8]; + int bnr; + int blksize; + MPI_Status status; + FILE *fd = 0; +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[3], hdf5_headergrp = 0, hdf5_dataspace_memory; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0; + hid_t hdf5_paramsgrp = 0, hdf5_configgrp = 0; + herr_t hdf5_status; + hsize_t dims[2], count[2], start[2]; + int rank = 0, pcsum = 0; + char buf[1000]; +#endif /* #ifdef HAVE_HDF5 */ + +#define SKIP \ + { \ + my_fwrite(&blksize, sizeof(int), 1, fd); \ + } + + /* determine group/id numbers of each type in file */ + n_type[0] = Ngroups; + n_type[1] = Nsubgroups; + n_type[2] = Nids; + + if(ThisTask == writeTask) + { + for(n = 0; n < 3; n++) + ntot_type[n] = n_type[n]; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&nn[0], 3, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + for(n = 0; n < 3; n++) + ntot_type[n] += nn[n]; + } + + for(task = writeTask + 1; task <= lastTask; task++) + MPI_Send(&ntot_type[0], 3, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + } + else + { + MPI_Send(&n_type[0], 3, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Recv(&ntot_type[0], 3, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + } + + /* fill file header */ + catalogue_header.Ngroups = ntot_type[0]; + catalogue_header.Nsubgroups = ntot_type[1]; + catalogue_header.Nids = ntot_type[2]; + + catalogue_header.TotNgroups = TotNgroups; + catalogue_header.TotNsubgroups = TotNsubgroups; + catalogue_header.TotNids = TotNids; + + catalogue_header.num_files = All.NumFilesPerSnapshot; + + catalogue_header.time = All.Time; + if(All.ComovingIntegrationOn) + catalogue_header.redshift = 1.0 / All.Time - 1; + else + catalogue_header.redshift = 0; + catalogue_header.HubbleParam = All.HubbleParam; + catalogue_header.BoxSize = All.BoxSize; + catalogue_header.Omega0 = All.Omega0; + catalogue_header.OmegaLambda = All.OmegaLambda; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + catalogue_header.flag_doubleprecision = 1; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + catalogue_header.flag_doubleprecision = 0; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + + /* open file and write header */ + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + sprintf(buf, "%s.hdf5", fname); + hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot); + hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0); + + hdf5_grp[0] = my_H5Gcreate(hdf5_file, "/Group", 0); + hdf5_grp[1] = my_H5Gcreate(hdf5_file, "/Subhalo", 0); + hdf5_grp[2] = my_H5Gcreate(hdf5_file, "/IDs", 0); + + fof_subfind_write_header_attributes_in_hdf5(hdf5_headergrp); + + hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0); + write_parameters_attributes_in_hdf5(hdf5_paramsgrp); + + hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0); + write_compile_time_options_in_hdf5(hdf5_configgrp); + +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + if(!(fd = fopen(fname, "w"))) + { + printf("can't open file `%s' for writing snapshot.\n", fname); + terminate("file open error"); + } + + mpi_printf("FOF/SUBFIND: writing group catalogue: '%s' (file 1 of %d)\n", fname, All.NumFilesPerSnapshot); + + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + my_fwrite((void *)"HEAD", sizeof(char), 4, fd); + nextblock = sizeof(catalogue_header) + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = sizeof(catalogue_header); + + SKIP; + my_fwrite(&catalogue_header, sizeof(catalogue_header), 1, fd); + SKIP; + } + } + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum fof_subfind_iofields)bnr; + + if(blocknr == IO_FOF_LASTENTRY) + break; + + if(fof_subfind_blockpresent(blocknr)) + { + bytes_per_blockelement = fof_subfind_get_bytes_per_blockelement(blocknr); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = fof_subfind_get_particles_in_block(blocknr); + int grp = fof_subfind_get_dataset_group(blocknr); + + if(npart > 0) + { + if(ThisTask == 0) + { + char buf[1000]; + + fof_subfind_get_dataset_name(blocknr, buf); + printf("FOF/SUBFIND: writing block %d (%s)...\n", blocknr, buf); + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + { + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + fof_subfind_get_Tab_IO_Label(blocknr, label); + my_fwrite(label, sizeof(char), 4, fd); + nextblock = npart * bytes_per_blockelement + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = npart * bytes_per_blockelement; + SKIP; + } + else if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + switch(fof_subfind_get_datatype(blocknr)) + { + case 0: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_INT); + break; + case 1: +#ifdef OUTPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + break; + case 2: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); + break; + } + + dims[0] = ntot_type[grp]; + dims[1] = fof_subfind_get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + fof_subfind_get_dataset_name(blocknr, buf); + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); + + hdf5_dataset = my_H5Dcreate(hdf5_grp[grp], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); + + pcsum = 0; +#endif /* #ifdef HAVE_HDF5 */ + } + } + + for(task = writeTask, offset = 0; task <= lastTask; task++) + { + if(task == ThisTask) + { + n_for_this_task = n_type[grp]; + + for(p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD); + } + else + MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status); + + while(n_for_this_task > 0) + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(ThisTask == task) + fof_subfind_fill_write_buffer(blocknr, &offset, pc); + + if(ThisTask == writeTask && task != writeTask) + MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status); + + if(ThisTask != writeTask && task == ThisTask) + MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD); + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = pc; + count[1] = fof_subfind_get_values_per_blockelement(blocknr); + pcsum += pc; + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = pc; + dims[1] = fof_subfind_get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + hdf5_status = my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + (void)hdf5_status; + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, pc, fd); + } + } + + n_for_this_task -= pc; + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + my_H5Dclose(hdf5_dataset, buf); + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + my_H5Tclose(hdf5_datatype); +#endif /* #ifdef HAVE_HDF5 */ + } + else + SKIP; + } + } + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + my_H5Gclose(hdf5_grp[0], "/Group"); + my_H5Gclose(hdf5_grp[1], "/Subhalo"); + my_H5Gclose(hdf5_grp[2], "/IDs"); + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Gclose(hdf5_paramsgrp, "/Parameters"); + my_H5Gclose(hdf5_configgrp, "/Config"); + + my_H5Fclose(hdf5_file, fname); +#endif /* #ifdef HAVE_HDF5 */ + } + else + fclose(fd); + } +} + +/*! \brief Copies data from global group array to appropriate output buffer. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[in] startindex First particle index to be included. + * \param[in] pc Particle count; number of particles to be written. + * + * \return void + */ +void fof_subfind_fill_write_buffer(enum fof_subfind_iofields blocknr, int *startindex, int pc) +{ + int n, k, pindex, *ip; + MyOutputFloat *fp; + MyIDType *idp; + + fp = (MyOutputFloat *)CommBuffer; + ip = (int *)CommBuffer; + idp = (MyIDType *)CommBuffer; + + pindex = *startindex; + + for(n = 0; n < pc; pindex++, n++) + { + switch(blocknr) + { + case IO_FOF_LEN: + *ip++ = Group[pindex].Len; + break; + case IO_FOF_MTOT: + *fp++ = Group[pindex].Mass; + break; + case IO_FOF_POS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = wrap_position(Group[pindex].Pos[k] - All.GlobalDisplacementVector[k], k); +#else /* #ifdef SUBFIND */ + *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND #else */ + break; + case IO_FOF_CM: + for(k = 0; k < 3; k++) + *fp++ = wrap_position(Group[pindex].CM[k] - All.GlobalDisplacementVector[k], k); + break; + case IO_FOF_VEL: + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].Vel[k]; + break; + case IO_FOF_LENTYPE: + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType[k]; + break; + case IO_FOF_MASSTYPE: + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType[k]; + break; + case IO_FOF_SFR: +#ifdef USE_SFR + *fp++ = Group[pindex].Sfr; +#endif /* #ifdef USE_SFR */ + break; + case IO_FOF_M_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Mean200; +#endif /* #ifdef SUBFIND */ + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_MEAN200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_MEAN200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Mean200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_CRIT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_CRIT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Crit200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_CRIT500: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_CRIT500: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_Crit500[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_MASSTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].MassType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_LENTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = Group[pindex].LenType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType_TopHat200[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_MEAN200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Mean200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_J: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].J[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JDM: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JDM[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JGAS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JGas[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_JSTARS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = Group[pindex].JStars[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRAC: +#ifdef SUBFIND + *fp++ = Group[pindex].CMFrac; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_CMFRACTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = Group[pindex].CMFracType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EKIN: +#ifdef SUBFIND + *fp++ = Group[pindex].Ekin; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_ETHR: +#ifdef SUBFIND + *fp++ = Group[pindex].Ethr; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_EPOT: +#ifdef SUBFIND + *fp++ = Group[pindex].Epot; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_EKIN: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Ekin; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_ETHR: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Ethr; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_EPOT: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Epot; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_J: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDM: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGAS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARS: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDMINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGASINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARSINHALFRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].J_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JDMINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jdm_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JGASINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jgas_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_JSTARSINRAD: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Jstars_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRAC: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACINHALFRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac_inHalfRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPEINHALFRAD: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType_inHalfRad[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACINRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].CMFrac_inRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CMFRACTYPEINRAD: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].CMFracType_inRad[k]; +#endif /* #ifdef SUBFIND */ + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + break; + case IO_FOF_M_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_CRIT200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Crit200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_M_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].M_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_CRIT500: +#ifdef SUBFIND + *fp++ = Group[pindex].R_Crit500; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_M_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].M_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_R_TOPHAT200: +#ifdef SUBFIND + *fp++ = Group[pindex].R_TopHat200; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_NSUBS: +#ifdef SUBFIND + *ip++ = Group[pindex].Nsubs; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_FIRSTSUB: +#ifdef SUBFIND + *ip++ = Group[pindex].FirstSub; +#endif /* #ifdef SUBFIND */ + break; + case IO_FOF_FUZZOFFTYPE: + break; + case IO_SUB_LEN: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].Len; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MTOT: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Mass; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_POS: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = wrap_position(SubGroup[pindex].Pos[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VEL: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = SubGroup[pindex].Vel[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_LENTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *ip++ = SubGroup[pindex].LenType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].MassType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_CM: +#ifdef SUBFIND + for(k = 0; k < 3; k++) + *fp++ = wrap_position(SubGroup[pindex].CM[k] - All.GlobalDisplacementVector[k], k); +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_SPIN: + for(k = 0; k < 3; k++) +#ifdef SUBFIND + *fp++ = SubGroup[pindex].Spin[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VELDISP: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVelDisp; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VMAX: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVmax; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_VMAXRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubVmaxRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_HALFMASSRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubHalfMassRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_HALFMASSRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubHalfMassRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINHALFRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInHalfRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINHALFRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInHalfRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINMAXRAD: +#ifdef SUBFIND + *fp++ = SubGroup[pindex].SubMassInMaxRad; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_MASSINMAXRADTYPE: +#ifdef SUBFIND + for(k = 0; k < NTYPES; k++) + *fp++ = SubGroup[pindex].SubMassInMaxRadType[k]; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_IDMOSTBOUND: +#ifdef SUBFIND + *idp++ = SubGroup[pindex].SubMostBoundID; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_GRNR: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].GrNr; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_PARENT: +#ifdef SUBFIND + *ip++ = SubGroup[pindex].SubParent; +#endif /* #ifdef SUBFIND */ + break; + case IO_SUB_BFLD_HALO: +#if defined(MHD) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Bfld_Halo * sqrt(4. * M_PI); +#endif /* #if defined(MHD) && defined(SUBFIND) */ + break; + case IO_SUB_BFLD_DISK: +#if defined(MHD) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Bfld_Disk * sqrt(4. * M_PI); +#endif /* #if defined(MHD) && defined(SUBFIND) */ + break; + case IO_SUB_SFR: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].Sfr; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINHALFRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInHalfRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_SUB_SFRINMAXRAD: +#if defined(USE_SFR) && defined(SUBFIND) + *fp++ = SubGroup[pindex].SfrInMaxRad; +#endif /* #if defined(USE_SFR) && defined(SUBFIND) */ + break; + case IO_FOFSUB_IDS: +#ifdef FOF_STOREIDS + *idp++ = ID_list[pindex].ID; +#endif /* #ifdef FOF_STOREIDS */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + } +} + +/*! \brief Associates the output variable blocknumber with its name. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[out] label Name of field. + * + * \return void + */ +void fof_subfind_get_dataset_name(enum fof_subfind_iofields blocknr, char *label) +{ + switch(blocknr) + { + case IO_FOF_LEN: + strcpy(label, "GroupLen"); + break; + case IO_FOF_MTOT: + strcpy(label, "GroupMass"); + break; + case IO_FOF_POS: + strcpy(label, "GroupPos"); + break; + case IO_FOF_CM: + strcpy(label, "GroupCM"); + break; + case IO_FOF_VEL: + strcpy(label, "GroupVel"); + break; + case IO_FOF_LENTYPE: + strcpy(label, "GroupLenType"); + break; + case IO_FOF_MASSTYPE: + strcpy(label, "GroupMassType"); + break; + case IO_FOF_SFR: + strcpy(label, "GroupSFR"); + break; + case IO_FOF_M_MEAN200: + strcpy(label, "Group_M_Mean200"); + break; + case IO_FOF_R_MEAN200: + strcpy(label, "Group_R_Mean200"); + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + strcpy(label, "Group_J_Mean200"); + break; + case IO_FOF_JDM_MEAN200: + strcpy(label, "Group_Jdm_Mean200"); + break; + case IO_FOF_JGAS_MEAN200: + strcpy(label, "Group_Jgas_Mean200"); + break; + case IO_FOF_JSTARS_MEAN200: + strcpy(label, "Group_Jstars_Mean200"); + break; + case IO_FOF_MASSTYPE_MEAN200: + strcpy(label, "Group_MassType_Mean200"); + break; + case IO_FOF_LENTYPE_MEAN200: + strcpy(label, "Group_LenType_Mean200"); + break; + case IO_FOF_CMFRAC_MEAN200: + strcpy(label, "Group_CMFrac_Mean200"); + break; + case IO_FOF_CMFRACTYPE_MEAN200: + strcpy(label, "Group_CMFracType_Mean200"); + break; + case IO_FOF_J_CRIT200: + strcpy(label, "Group_J_Crit200"); + break; + case IO_FOF_JDM_CRIT200: + strcpy(label, "Group_Jdm_Crit200"); + break; + case IO_FOF_JGAS_CRIT200: + strcpy(label, "Group_Jgas_Crit200"); + break; + case IO_FOF_JSTARS_CRIT200: + strcpy(label, "Group_Jstars_Crit200"); + break; + case IO_FOF_MASSTYPE_CRIT200: + strcpy(label, "Group_MassType_Crit200"); + break; + case IO_FOF_LENTYPE_CRIT200: + strcpy(label, "Group_LenType_Crit200"); + break; + case IO_FOF_CMFRAC_CRIT200: + strcpy(label, "Group_CMFrac_Crit200"); + break; + case IO_FOF_CMFRACTYPE_CRIT200: + strcpy(label, "Group_CMFracType_Crit200"); + break; + case IO_FOF_J_CRIT500: + strcpy(label, "Group_J_Crit500"); + break; + case IO_FOF_JDM_CRIT500: + strcpy(label, "Group_Jdm_Crit500"); + break; + case IO_FOF_JGAS_CRIT500: + strcpy(label, "Group_Jgas_Crit500"); + break; + case IO_FOF_JSTARS_CRIT500: + strcpy(label, "Group_Jstars_Crit500"); + break; + case IO_FOF_MASSTYPE_CRIT500: + strcpy(label, "Group_MassType_Crit500"); + break; + case IO_FOF_LENTYPE_CRIT500: + strcpy(label, "Group_LenType_Crit500"); + break; + case IO_FOF_CMFRAC_CRIT500: + strcpy(label, "Group_CMFrac_Crit500"); + break; + case IO_FOF_CMFRACTYPE_CRIT500: + strcpy(label, "Group_CMFracType_Crit500"); + break; + case IO_FOF_J_TOPHAT200: + strcpy(label, "Group_J_TopHat200"); + break; + case IO_FOF_JDM_TOPHAT200: + strcpy(label, "Group_Jdm_TopHat200"); + break; + case IO_FOF_JGAS_TOPHAT200: + strcpy(label, "Group_Jgas_TopHat200"); + break; + case IO_FOF_JSTARS_TOPHAT200: + strcpy(label, "Group_Jstars_TopHat200"); + break; + case IO_FOF_MASSTYPE_TOPHAT200: + strcpy(label, "Group_MassType_TopHat200"); + break; + case IO_FOF_LENTYPE_TOPHAT200: + strcpy(label, "Group_LenType_TopHat200"); + break; + case IO_FOF_CMFRAC_TOPHAT200: + strcpy(label, "Group_CMFrac_TopHat200"); + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: + strcpy(label, "Group_CMFracType_TopHat200"); + break; + case IO_FOF_EPOT_CRIT200: + strcpy(label, "Group_Epot_Crit200"); + break; + case IO_FOF_EKIN_CRIT200: + strcpy(label, "Group_Ekin_Crit200"); + break; + case IO_FOF_ETHR_CRIT200: + strcpy(label, "Group_Ethr_Crit200"); + break; + case IO_FOF_EPOT_MEAN200: + strcpy(label, "Group_Epot_Mean200"); + break; + case IO_FOF_EKIN_MEAN200: + strcpy(label, "Group_Ekin_Mean200"); + break; + case IO_FOF_ETHR_MEAN200: + strcpy(label, "Group_Ethr_Mean200"); + break; + case IO_FOF_EPOT_TOPHAT200: + strcpy(label, "Group_Epot_TopHat200"); + break; + case IO_FOF_EKIN_TOPHAT200: + strcpy(label, "Group_Ekin_TopHat200"); + break; + case IO_FOF_ETHR_TOPHAT200: + strcpy(label, "Group_Ethr_TopHat200"); + break; + case IO_FOF_EPOT_CRIT500: + strcpy(label, "Group_Epot_Crit500"); + break; + case IO_FOF_EKIN_CRIT500: + strcpy(label, "Group_Ekin_Crit500"); + break; + case IO_FOF_ETHR_CRIT500: + strcpy(label, "Group_Ethr_Crit500"); + break; + case IO_FOF_J: + strcpy(label, "Group_J"); + break; + case IO_FOF_JDM: + strcpy(label, "Group_Jdm"); + break; + case IO_FOF_JGAS: + strcpy(label, "Group_Jgas"); + break; + case IO_FOF_JSTARS: + strcpy(label, "Group_Jstars"); + break; + case IO_FOF_CMFRAC: + strcpy(label, "Group_CMFrac"); + break; + case IO_FOF_CMFRACTYPE: + strcpy(label, "Group_CMFracType"); + break; + case IO_FOF_EKIN: + strcpy(label, "GroupEkin"); + break; + case IO_FOF_ETHR: + strcpy(label, "GroupEthr"); + break; + case IO_FOF_EPOT: + strcpy(label, "GroupEpot"); + break; + case IO_SUB_EKIN: + strcpy(label, "SubhaloEkin"); + break; + case IO_SUB_ETHR: + strcpy(label, "SubhaloEthr"); + break; + case IO_SUB_EPOT: + strcpy(label, "SubhaloEpot"); + break; + case IO_SUB_J: + strcpy(label, "Subhalo_J"); + break; + case IO_SUB_JDM: + strcpy(label, "Subhalo_Jdm"); + break; + case IO_SUB_JGAS: + strcpy(label, "Subhalo_Jgas"); + break; + case IO_SUB_JSTARS: + strcpy(label, "Subhalo_Jstars"); + break; + case IO_SUB_JINHALFRAD: + strcpy(label, "Subhalo_JInHalfRad"); + break; + case IO_SUB_JDMINHALFRAD: + strcpy(label, "Subhalo_JdmInHalfRad"); + break; + case IO_SUB_JGASINHALFRAD: + strcpy(label, "Subhalo_JgasInHalfRad"); + break; + case IO_SUB_JSTARSINHALFRAD: + strcpy(label, "Subhalo_JstarsInHalfRad"); + break; + case IO_SUB_JINRAD: + strcpy(label, "Subhalo_JInRad"); + break; + case IO_SUB_JDMINRAD: + strcpy(label, "Subhalo_JdmInRad"); + break; + case IO_SUB_JGASINRAD: + strcpy(label, "Subhalo_JgasInRad"); + break; + case IO_SUB_JSTARSINRAD: + strcpy(label, "Subhalo_JstarsInRad"); + break; + case IO_SUB_CMFRAC: + strcpy(label, "Subhalo_CMFrac"); + break; + case IO_SUB_CMFRACTYPE: + strcpy(label, "Subhalo_CMFracType"); + break; + case IO_SUB_CMFRACINHALFRAD: + strcpy(label, "Subhalo_CMFracInHalfRad"); + break; + case IO_SUB_CMFRACTYPEINHALFRAD: + strcpy(label, "Subhalo_CMFracTypeInHalfRad"); + break; + case IO_SUB_CMFRACINRAD: + strcpy(label, "Subhalo_CMFracInRad"); + break; + case IO_SUB_CMFRACTYPEINRAD: + strcpy(label, "Subhalo_CMFracTypeInRad"); + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + case IO_FOF_M_CRIT200: + strcpy(label, "Group_M_Crit200"); + break; + case IO_FOF_R_CRIT200: + strcpy(label, "Group_R_Crit200"); + break; + case IO_FOF_M_CRIT500: + strcpy(label, "Group_M_Crit500"); + break; + case IO_FOF_R_CRIT500: + strcpy(label, "Group_R_Crit500"); + break; + case IO_FOF_M_TOPHAT200: + strcpy(label, "Group_M_TopHat200"); + break; + case IO_FOF_R_TOPHAT200: + strcpy(label, "Group_R_TopHat200"); + break; + case IO_FOF_NSUBS: + strcpy(label, "GroupNsubs"); + break; + case IO_FOF_FIRSTSUB: + strcpy(label, "GroupFirstSub"); + break; + case IO_FOF_FUZZOFFTYPE: + strcpy(label, "GroupFuzzOffsetType"); + break; + case IO_SUB_LEN: + strcpy(label, "SubhaloLen"); + break; + case IO_SUB_MTOT: + strcpy(label, "SubhaloMass"); + break; + case IO_SUB_POS: + strcpy(label, "SubhaloPos"); + break; + case IO_SUB_VEL: + strcpy(label, "SubhaloVel"); + break; + case IO_SUB_LENTYPE: + strcpy(label, "SubhaloLenType"); + break; + case IO_SUB_MASSTYPE: + strcpy(label, "SubhaloMassType"); + break; + case IO_SUB_CM: + strcpy(label, "SubhaloCM"); + break; + case IO_SUB_SPIN: + strcpy(label, "SubhaloSpin"); + break; + case IO_SUB_VELDISP: + strcpy(label, "SubhaloVelDisp"); + break; + case IO_SUB_VMAX: + strcpy(label, "SubhaloVmax"); + break; + case IO_SUB_VMAXRAD: + strcpy(label, "SubhaloVmaxRad"); + break; + case IO_SUB_HALFMASSRAD: + strcpy(label, "SubhaloHalfmassRad"); + break; + case IO_SUB_HALFMASSRADTYPE: + strcpy(label, "SubhaloHalfmassRadType"); + break; + case IO_SUB_MASSINRAD: + strcpy(label, "SubhaloMassInRad"); + break; + case IO_SUB_MASSINHALFRAD: + strcpy(label, "SubhaloMassInHalfRad"); + break; + case IO_SUB_MASSINMAXRAD: + strcpy(label, "SubhaloMassInMaxRad"); + break; + case IO_SUB_MASSINRADTYPE: + strcpy(label, "SubhaloMassInRadType"); + break; + case IO_SUB_MASSINHALFRADTYPE: + strcpy(label, "SubhaloMassInHalfRadType"); + break; + case IO_SUB_MASSINMAXRADTYPE: + strcpy(label, "SubhaloMassInMaxRadType"); + break; + case IO_SUB_IDMOSTBOUND: + strcpy(label, "SubhaloIDMostbound"); + break; + case IO_SUB_GRNR: + strcpy(label, "SubhaloGrNr"); + break; + case IO_SUB_PARENT: + strcpy(label, "SubhaloParent"); + break; + case IO_SUB_BFLD_HALO: + strcpy(label, "SubhaloBfldHalo"); + break; + case IO_SUB_BFLD_DISK: + strcpy(label, "SubhaloBfldDisk"); + break; + case IO_SUB_SFR: + strcpy(label, "SubhaloSFR"); + break; + case IO_SUB_SFRINRAD: + strcpy(label, "SubhaloSFRinRad"); + break; + case IO_SUB_SFRINHALFRAD: + strcpy(label, "SubhaloSFRinHalfRad"); + break; + case IO_SUB_SFRINMAXRAD: + strcpy(label, "SubhaloSFRinMaxRad"); + break; + case IO_FOFSUB_IDS: + strcpy(label, "ID"); + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } +} + +/*! \brief Is this output field a group or subhalo property? + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return 0: group property; 1 subhalo property; 2: both (unused) + */ +int fof_subfind_get_dataset_group(enum fof_subfind_iofields blocknr) +{ + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_FOF_FUZZOFFTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + return 0; + + case IO_SUB_LEN: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_LENTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + return 1; + + case IO_FOFSUB_IDS: + return 2; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + } + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Returns number of particles of specific field. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of entries of this property. + */ +int fof_subfind_get_particles_in_block(enum fof_subfind_iofields blocknr) +{ + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_FUZZOFFTYPE: + return catalogue_header.Ngroups; + + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef SUBFIND + return catalogue_header.Ngroups; +#else /* #ifdef SUBFIND */ + return 0; +#endif /* #ifdef SUBFIND #else */ + + case IO_SUB_LEN: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_LENTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef SUBFIND + return catalogue_header.Nsubgroups; +#else /* #ifdef SUBFIND */ + return 0; +#endif /* #ifdef SUBFIND #else */ + + case IO_FOFSUB_IDS: + return catalogue_header.Nids; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + } + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Returns the number of elements per entry of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of values per element of the specified property. + */ +int fof_subfind_get_values_per_blockelement(enum fof_subfind_iofields blocknr) +{ + int values = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + case IO_FOF_MTOT: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: + case IO_FOFSUB_IDS: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_FOF_CMFRAC: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = 1; + break; + + case IO_FOF_LENTYPE: + case IO_SUB_LENTYPE: + case IO_FOF_MASSTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_FOF_FUZZOFFTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRACTYPE: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACTYPEINRAD: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_CMFRACTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = NTYPES; + break; + + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_CM: + case IO_SUB_SPIN: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + values = 3; + break; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - should not get here"); + break; + } + return values; +} + +/*! \brief Returns the number of bytes per element of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Number of bytes per element for this property. + */ +int fof_subfind_get_bytes_per_blockelement(enum fof_subfind_iofields blocknr) +{ + int bytes_per_blockelement = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_GRNR: + case IO_SUB_PARENT: + bytes_per_blockelement = sizeof(int); + break; + + case IO_FOF_LENTYPE: + case IO_SUB_LENTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = NTYPES * sizeof(int); + break; + + case IO_FOF_MTOT: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRAC: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = sizeof(MyOutputFloat); + break; + + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_CM: + case IO_SUB_SPIN: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = 3 * sizeof(MyOutputFloat); + break; + + case IO_FOF_MASSTYPE: + case IO_SUB_MASSTYPE: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_CMFRACTYPE: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + bytes_per_blockelement = NTYPES * sizeof(MyOutputFloat); + break; + + case IO_SUB_IDMOSTBOUND: + case IO_FOFSUB_IDS: + bytes_per_blockelement = sizeof(MyIDType); + break; + + case IO_FOF_FUZZOFFTYPE: + bytes_per_blockelement = NTYPES * sizeof(long long); + break; + + case IO_FOF_LASTENTRY: + terminate("reached last entry in switch - should not get here"); + break; + } + return bytes_per_blockelement; +} + +/*! \brief Returns key for datatype of element of a given property. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return Key for datatype: 0: int, 1: (output)float, 2: long long. + */ +int fof_subfind_get_datatype(enum fof_subfind_iofields blocknr) +{ + int typekey = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_LENTYPE: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_LENTYPE: + case IO_SUB_GRNR: + case IO_SUB_PARENT: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + typekey = 0; /* native int */ + break; + + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_MASSTYPE: + case IO_FOF_SFR: + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + typekey = 1; /* native MyOutputFloat */ + break; + + case IO_SUB_IDMOSTBOUND: + case IO_FOFSUB_IDS: +#ifdef LONGIDS + typekey = 2; /* native long long */ +#else /* #ifdef LONGIDS */ + typekey = 0; /* native int */ +#endif /* #ifdef LONGIDS #else */ + break; + + case IO_FOF_FUZZOFFTYPE: + typekey = 2; /* native long long */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + + return typekey; +} + +/*! \brief Determines if block is present in the current code configuration. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * + * \return 0: not present; 1: present. + */ +int fof_subfind_blockpresent(enum fof_subfind_iofields blocknr) +{ + int present = 0; + + switch(blocknr) + { + case IO_FOF_LEN: + case IO_FOF_LENTYPE: + case IO_FOF_MTOT: + case IO_FOF_POS: + case IO_FOF_CM: + case IO_FOF_VEL: + case IO_FOF_MASSTYPE: + present = 1; + break; + + case IO_FOF_SFR: + case IO_SUB_SFR: + case IO_SUB_SFRINRAD: + case IO_SUB_SFRINHALFRAD: + case IO_SUB_SFRINMAXRAD: +#ifdef USE_SFR + present = 1; +#endif /* #ifdef USE_SFR */ + break; + + case IO_SUB_BFLD_HALO: + case IO_SUB_BFLD_DISK: +#ifdef MHD + present = 1; +#endif /* #ifdef MHD */ + break; + + case IO_FOF_FUZZOFFTYPE: + break; + + case IO_FOF_M_MEAN200: + case IO_FOF_R_MEAN200: + case IO_FOF_M_CRIT200: + case IO_FOF_R_CRIT200: + case IO_FOF_M_TOPHAT200: + case IO_FOF_R_TOPHAT200: + case IO_FOF_M_CRIT500: + case IO_FOF_R_CRIT500: + case IO_FOF_NSUBS: + case IO_FOF_FIRSTSUB: + case IO_SUB_LEN: + case IO_SUB_LENTYPE: + case IO_SUB_MTOT: + case IO_SUB_POS: + case IO_SUB_VEL: + case IO_SUB_MASSTYPE: + case IO_SUB_CM: + case IO_SUB_SPIN: + case IO_SUB_VELDISP: + case IO_SUB_VMAX: + case IO_SUB_VMAXRAD: + case IO_SUB_HALFMASSRAD: + case IO_SUB_HALFMASSRADTYPE: + case IO_SUB_MASSINRAD: + case IO_SUB_MASSINHALFRAD: + case IO_SUB_MASSINMAXRAD: + case IO_SUB_MASSINRADTYPE: + case IO_SUB_MASSINHALFRADTYPE: + case IO_SUB_MASSINMAXRADTYPE: + case IO_SUB_IDMOSTBOUND: + case IO_SUB_GRNR: + case IO_SUB_PARENT: +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + case IO_FOF_JDM_MEAN200: + case IO_FOF_JGAS_MEAN200: + case IO_FOF_JSTARS_MEAN200: + case IO_FOF_CMFRAC_MEAN200: + case IO_FOF_CMFRACTYPE_MEAN200: + case IO_FOF_J_CRIT200: + case IO_FOF_JDM_CRIT200: + case IO_FOF_JGAS_CRIT200: + case IO_FOF_JSTARS_CRIT200: + case IO_FOF_CMFRAC_CRIT200: + case IO_FOF_CMFRACTYPE_CRIT200: + case IO_FOF_J_TOPHAT200: + case IO_FOF_JDM_TOPHAT200: + case IO_FOF_JGAS_TOPHAT200: + case IO_FOF_JSTARS_TOPHAT200: + case IO_FOF_CMFRAC_TOPHAT200: + case IO_FOF_CMFRACTYPE_TOPHAT200: + case IO_FOF_J_CRIT500: + case IO_FOF_JDM_CRIT500: + case IO_FOF_JGAS_CRIT500: + case IO_FOF_JSTARS_CRIT500: + case IO_FOF_CMFRAC_CRIT500: + case IO_FOF_CMFRACTYPE_CRIT500: + case IO_FOF_J: + case IO_FOF_JDM: + case IO_FOF_JGAS: + case IO_FOF_JSTARS: + case IO_FOF_CMFRAC: + case IO_FOF_CMFRACTYPE: + case IO_FOF_EKIN: + case IO_FOF_ETHR: + case IO_FOF_EPOT: + case IO_FOF_MASSTYPE_MEAN200: + case IO_FOF_MASSTYPE_CRIT200: + case IO_FOF_MASSTYPE_CRIT500: + case IO_FOF_MASSTYPE_TOPHAT200: + case IO_FOF_LENTYPE_MEAN200: + case IO_FOF_LENTYPE_CRIT200: + case IO_FOF_LENTYPE_CRIT500: + case IO_FOF_LENTYPE_TOPHAT200: + case IO_FOF_EPOT_CRIT200: + case IO_FOF_EKIN_CRIT200: + case IO_FOF_ETHR_CRIT200: + case IO_FOF_EPOT_MEAN200: + case IO_FOF_EKIN_MEAN200: + case IO_FOF_ETHR_MEAN200: + case IO_FOF_EPOT_TOPHAT200: + case IO_FOF_EKIN_TOPHAT200: + case IO_FOF_ETHR_TOPHAT200: + case IO_FOF_EPOT_CRIT500: + case IO_FOF_EKIN_CRIT500: + case IO_FOF_ETHR_CRIT500: + case IO_SUB_EKIN: + case IO_SUB_ETHR: + case IO_SUB_EPOT: + case IO_SUB_J: + case IO_SUB_JDM: + case IO_SUB_JGAS: + case IO_SUB_JSTARS: + case IO_SUB_JINHALFRAD: + case IO_SUB_JDMINHALFRAD: + case IO_SUB_JGASINHALFRAD: + case IO_SUB_JSTARSINHALFRAD: + case IO_SUB_JINRAD: + case IO_SUB_JDMINRAD: + case IO_SUB_JGASINRAD: + case IO_SUB_JSTARSINRAD: + case IO_SUB_CMFRAC: + case IO_SUB_CMFRACTYPE: + case IO_SUB_CMFRACINHALFRAD: + case IO_SUB_CMFRACTYPEINHALFRAD: + case IO_SUB_CMFRACINRAD: + case IO_SUB_CMFRACTYPEINRAD: +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ +#ifdef SUBFIND + present = 1; +#else /* #ifdef SUBFIND */ + present = 0; +#endif /* #ifdef SUBFIND #else */ + break; + + case IO_FOFSUB_IDS: +#ifdef FOF_STOREIDS + present = 1; +#else /* #ifdef FOF_STOREIDS */ + present = 0; +#endif /* #ifdef FOF_STOREIDS #else */ + break; + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } + return present; +} + +/*! \brief Get the 4 letter IO label for a given output field. + * + * \param[in] blocknr Number (identifier) of the field to be written. + * \param[out] label String with the label. + * + * \return void + */ +void fof_subfind_get_Tab_IO_Label(enum fof_subfind_iofields blocknr, char *label) +{ + switch(blocknr) + { + case IO_FOF_LEN: + strncpy(label, "FLEN", 4); + break; + case IO_FOF_MTOT: + strncpy(label, "FMAS", 4); + break; + case IO_FOF_POS: + strncpy(label, "FPOS", 4); + break; + case IO_FOF_CM: + strncpy(label, "FGCM", 4); + break; + case IO_FOF_VEL: + strncpy(label, "FVEL", 4); + break; + case IO_FOF_LENTYPE: + strncpy(label, "FLTY", 4); + break; + case IO_FOF_MASSTYPE: + strncpy(label, "FMTY", 4); + break; + case IO_FOF_SFR: + strncpy(label, "FSFR", 4); + break; + case IO_FOF_M_MEAN200: + strncpy(label, "FMM2", 4); + break; + case IO_FOF_R_MEAN200: + strncpy(label, "FRM2", 4); + break; + case IO_FOF_M_CRIT200: + strncpy(label, "FMC2", 4); + break; + case IO_FOF_R_CRIT200: + strncpy(label, "FRC2", 4); + break; + case IO_FOF_M_TOPHAT200: + strncpy(label, "FMT2", 4); + break; + case IO_FOF_R_TOPHAT200: + strncpy(label, "FRT2", 4); + break; + case IO_FOF_M_CRIT500: + strncpy(label, "FMC5", 4); + break; + case IO_FOF_R_CRIT500: + strncpy(label, "FRC5", 4); + break; + case IO_FOF_NSUBS: + strncpy(label, "FNSH", 4); + break; + case IO_FOF_FIRSTSUB: + strncpy(label, "FFSH", 4); + break; + case IO_FOF_FUZZOFFTYPE: + strncpy(label, "FUOF", 4); + break; + + case IO_SUB_LEN: + strncpy(label, "SLEN", 4); + break; + case IO_SUB_MTOT: + strncpy(label, "SMAS", 4); + break; + case IO_SUB_POS: + strncpy(label, "SPOS", 4); + break; + case IO_SUB_VEL: + strncpy(label, "SVEL", 4); + break; + case IO_SUB_LENTYPE: + strncpy(label, "SLTY", 4); + break; + case IO_SUB_MASSTYPE: + strncpy(label, "SMTY", 4); + break; + case IO_SUB_CM: + strncpy(label, "SCMP", 4); + break; + case IO_SUB_SPIN: + strncpy(label, "SSPI", 4); + break; + case IO_SUB_VELDISP: + strncpy(label, "SVDI", 4); + break; + case IO_SUB_VMAX: + strncpy(label, "SVMX", 4); + break; + case IO_SUB_VMAXRAD: + strncpy(label, "SVRX", 4); + break; + case IO_SUB_HALFMASSRAD: + strncpy(label, "SHMR", 4); + break; + case IO_SUB_HALFMASSRADTYPE: + strncpy(label, "SHMT", 4); + break; + case IO_SUB_MASSINRAD: + strncpy(label, "SMIR", 4); + break; + case IO_SUB_MASSINHALFRAD: + strncpy(label, "SMIH", 4); + break; + case IO_SUB_MASSINMAXRAD: + strncpy(label, "SMIM", 4); + break; + case IO_SUB_MASSINRADTYPE: + strncpy(label, "SMIT", 4); + break; + case IO_SUB_MASSINHALFRADTYPE: + strncpy(label, "SMHT", 4); + break; + case IO_SUB_MASSINMAXRADTYPE: + strncpy(label, "SMMT", 4); + break; + case IO_SUB_IDMOSTBOUND: + strncpy(label, "SIDM", 4); + break; + case IO_SUB_GRNR: + strncpy(label, "SGNR", 4); + break; + case IO_SUB_PARENT: + strncpy(label, "SPRT", 4); + break; + case IO_SUB_BFLD_HALO: + strncpy(label, "BFDH", 4); + break; + case IO_SUB_BFLD_DISK: + strncpy(label, "BFDD", 4); + break; + case IO_SUB_SFR: + strncpy(label, "SSFR", 4); + break; + case IO_SUB_SFRINRAD: + strncpy(label, "SSFI", 4); + break; + case IO_SUB_SFRINHALFRAD: + strncpy(label, "SSFH", 4); + break; + case IO_SUB_SFRINMAXRAD: + strncpy(label, "SSFM", 4); + break; + case IO_FOFSUB_IDS: + strncpy(label, "PIDS", 4); + break; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + case IO_FOF_J_MEAN200: + strncpy(label, "FJM2", 4); + break; + case IO_FOF_JDM_MEAN200: + strncpy(label, "JDM2", 4); + break; + case IO_FOF_JGAS_MEAN200: + strncpy(label, "JGM2", 4); + break; + case IO_FOF_JSTARS_MEAN200: + strncpy(label, "JSM2", 4); + break; + case IO_FOF_MASSTYPE_MEAN200: + strncpy(label, "MTM2", 4); + break; + case IO_FOF_LENTYPE_MEAN200: + strncpy(label, "LTM2", 4); + break; + case IO_FOF_CMFRAC_MEAN200: + strncpy(label, "CFM2", 4); + break; + case IO_FOF_CMFRACTYPE_MEAN200: + strncpy(label, "FTM2", 4); + break; + case IO_FOF_J_CRIT200: + strncpy(label, "FJC2", 4); + break; + case IO_FOF_JDM_CRIT200: + strncpy(label, "JDC2", 4); + break; + case IO_FOF_JGAS_CRIT200: + strncpy(label, "JGC2", 4); + break; + case IO_FOF_JSTARS_CRIT200: + strncpy(label, "JSC2", 4); + break; + case IO_FOF_MASSTYPE_CRIT200: + strncpy(label, "MTC2", 4); + break; + case IO_FOF_LENTYPE_CRIT200: + strncpy(label, "LTC2", 4); + break; + case IO_FOF_CMFRAC_CRIT200: + strncpy(label, "CFC2", 4); + break; + case IO_FOF_CMFRACTYPE_CRIT200: + strncpy(label, "FTC2", 4); + break; + case IO_FOF_J_TOPHAT200: + strncpy(label, "FJT2", 4); + break; + case IO_FOF_JDM_TOPHAT200: + strncpy(label, "JDT2", 4); + break; + case IO_FOF_JGAS_TOPHAT200: + strncpy(label, "JGT2", 4); + break; + case IO_FOF_JSTARS_TOPHAT200: + strncpy(label, "JST2", 4); + break; + case IO_FOF_MASSTYPE_TOPHAT200: + strncpy(label, "MTT2", 4); + break; + case IO_FOF_LENTYPE_TOPHAT200: + strncpy(label, "LTT2", 4); + break; + case IO_FOF_CMFRAC_TOPHAT200: + strncpy(label, "CFT2", 4); + break; + case IO_FOF_CMFRACTYPE_TOPHAT200: + strncpy(label, "FTT2", 4); + break; + case IO_FOF_J_CRIT500: + strncpy(label, "FJC5", 4); + break; + case IO_FOF_JDM_CRIT500: + strncpy(label, "JDC5", 4); + break; + case IO_FOF_JGAS_CRIT500: + strncpy(label, "JGC5", 4); + break; + case IO_FOF_JSTARS_CRIT500: + strncpy(label, "JSC5", 4); + break; + case IO_FOF_MASSTYPE_CRIT500: + strncpy(label, "MTC5", 4); + break; + case IO_FOF_LENTYPE_CRIT500: + strncpy(label, "LTC5", 4); + break; + case IO_FOF_CMFRAC_CRIT500: + strncpy(label, "CFC5", 4); + break; + case IO_FOF_CMFRACTYPE_CRIT500: + strncpy(label, "FTC5", 4); + break; + case IO_FOF_J: + strncpy(label, "FOFJ", 4); + break; + case IO_FOF_JDM: + strncpy(label, "FOJD", 4); + break; + case IO_FOF_JGAS: + strncpy(label, "FOJG", 4); + break; + case IO_FOF_JSTARS: + strncpy(label, "FOJS", 4); + break; + case IO_FOF_CMFRAC: + strncpy(label, "FOCF", 4); + break; + case IO_FOF_CMFRACTYPE: + strncpy(label, "FOFT", 4); + break; + case IO_FOF_EKIN: + strncpy(label, "EKIN", 4); + break; + case IO_FOF_ETHR: + strncpy(label, "ETHR", 4); + break; + case IO_FOF_EPOT: + strncpy(label, "EPOT", 4); + break; + + case IO_FOF_EPOT_CRIT200: + strncpy(label, "EPO1", 4); + break; + case IO_FOF_EKIN_CRIT200: + strncpy(label, "EKI1", 4); + break; + case IO_FOF_ETHR_CRIT200: + strncpy(label, "ETH1", 4); + break; + case IO_FOF_EPOT_MEAN200: + strncpy(label, "EPO2", 4); + break; + case IO_FOF_EKIN_MEAN200: + strncpy(label, "EKI2", 4); + break; + case IO_FOF_ETHR_MEAN200: + strncpy(label, "ETH2", 4); + break; + case IO_FOF_EPOT_TOPHAT200: + strncpy(label, "EPO3", 4); + break; + case IO_FOF_EKIN_TOPHAT200: + strncpy(label, "EKI3", 4); + break; + case IO_FOF_ETHR_TOPHAT200: + strncpy(label, "ETH3", 4); + break; + case IO_FOF_EPOT_CRIT500: + strncpy(label, "EPO4", 4); + break; + case IO_FOF_EKIN_CRIT500: + strncpy(label, "EKI4", 4); + break; + case IO_FOF_ETHR_CRIT500: + strncpy(label, "ETH4", 4); + break; + + case IO_SUB_EKIN: + strncpy(label, "SEKN", 4); + break; + case IO_SUB_ETHR: + strncpy(label, "SETH", 4); + break; + case IO_SUB_EPOT: + strncpy(label, "SEPT", 4); + break; + case IO_SUB_J: + strncpy(label, "SUBJ", 4); + break; + case IO_SUB_JDM: + strncpy(label, "SJDM", 4); + break; + case IO_SUB_JGAS: + strncpy(label, "SJGS", 4); + break; + case IO_SUB_JSTARS: + strncpy(label, "SJST", 4); + break; + case IO_SUB_JINHALFRAD: + strncpy(label, "SJHR", 4); + break; + case IO_SUB_JDMINHALFRAD: + strncpy(label, "SJDH", 4); + break; + case IO_SUB_JGASINHALFRAD: + strncpy(label, "SJGH", 4); + break; + case IO_SUB_JSTARSINHALFRAD: + strncpy(label, "SJSH", 4); + break; + case IO_SUB_JINRAD: + strncpy(label, "SJMR", 4); + break; + case IO_SUB_JDMINRAD: + strncpy(label, "SJDR", 4); + break; + case IO_SUB_JGASINRAD: + strncpy(label, "SJGR", 4); + break; + case IO_SUB_JSTARSINRAD: + strncpy(label, "SJSR", 4); + break; + case IO_SUB_CMFRAC: + strncpy(label, "SCMF", 4); + break; + case IO_SUB_CMFRACTYPE: + strncpy(label, "SCMT", 4); + break; + case IO_SUB_CMFRACINHALFRAD: + strncpy(label, "SCMH", 4); + break; + case IO_SUB_CMFRACTYPEINHALFRAD: + strncpy(label, "SCTH", 4); + break; + case IO_SUB_CMFRACINRAD: + strncpy(label, "SCMR", 4); + break; + case IO_SUB_CMFRACTYPEINRAD: + strncpy(label, "SCTR", 4); + break; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + case IO_FOF_LASTENTRY: + terminate("should not be reached"); + break; + } +} + +#ifdef HAVE_HDF5 +/*! \brief Function that handles writing hdf5 header. + * + * \param[in] handle Handle for header hdf5 group. + * + * \return void + */ +void fof_subfind_write_header_attributes_in_hdf5(hid_t handle) +{ + hid_t hdf5_dataspace, hdf5_attribute; + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Ngroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Ngroups, "Ngroups_ThisFile"); + my_H5Aclose(hdf5_attribute, "Ngroups_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nsubgroups, "Nsubgroups_ThisFile"); + my_H5Aclose(hdf5_attribute, "Nsubgroups_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nids_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.Nids, "Nids_ThisFile"); + my_H5Aclose(hdf5_attribute, "Nids_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Ngroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNgroups, "Ngroups_Total"); + my_H5Aclose(hdf5_attribute, "Ngroups_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nsubgroups_Total", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.TotNsubgroups, "Nsubgroups_Total"); + my_H5Aclose(hdf5_attribute, "Nsubgroups_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Nids_Total", H5T_NATIVE_INT64, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT64, &catalogue_header.TotNids, "Nids_Total"); + my_H5Aclose(hdf5_attribute, "Nids_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "NumFiles", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.num_files, "NumFiles"); + my_H5Aclose(hdf5_attribute, "NumFiles"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.time, "Time"); + my_H5Aclose(hdf5_attribute, "Time"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.redshift, "Redshift"); + my_H5Aclose(hdf5_attribute, "Redshift"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.HubbleParam, "HubbleParam"); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.BoxSize, "BoxSize"); + my_H5Aclose(hdf5_attribute, "BoxSize"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.Omega0, "Omega0"); + my_H5Aclose(hdf5_attribute, "Omega0"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &catalogue_header.OmegaLambda, "OmegaLambda"); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "FlagDoubleprecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &catalogue_header.flag_doubleprecision, "FlagDoubleprecision"); + my_H5Aclose(hdf5_attribute, "FlagDoubleprecision"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hid_t atype = my_H5Tcopy(H5T_C_S1); + + my_H5Tset_size(atype, strlen(GIT_COMMIT)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit"); + my_H5Aclose(hdf5_attribute, "Git_commit"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + my_H5Tset_size(atype, strlen(GIT_DATE)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date"); + my_H5Aclose(hdf5_attribute, "Git_date"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} +#endif /* #ifdef HAVE_HDF5 */ + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_nearest.c b/src/amuse/community/arepo/src/fof/fof_nearest.c new file mode 100644 index 0000000000..c21badf579 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_nearest.c @@ -0,0 +1,473 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_nearest.c + * \date 05/2018 + * \brief Routine to find nearest primary link type particle to link + * secondary link type to FoF groups. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double fof_find_nearest_dmparticle(MyIDType * vMinID, int + * *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) + * static int fof_find_nearest_dmparticle_evaluate(int target, + * int mode, int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +static MyFloat *fof_nearest_distance; +static MyFloat *fof_nearest_hsml; + +static MyIDType *MinID; +static int *Head, *Len, *Next, *Tail, *MinIDTask; + +static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + in->Hsml = fof_nearest_hsml[i]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Distance; + MyIDType MinID; + int MinIDTask; +#if defined(SUBFIND) + MyFloat DM_Hsml; +#endif /* #if defined(SUBFIND) */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (PS) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(out->Distance < fof_nearest_distance[i]) + { + fof_nearest_distance[i] = out->Distance; + MinID[i] = out->MinID; + MinIDTask[i] = out->MinIDTask; +#if defined(SUBFIND) + PS[i].Hsml = out->DM_Hsml; +#endif /* #if defined(SUBFIND) */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + if(fof_nearest_distance[i] > 1.0e29) /* we haven't found any neighbor yet */ + { + fof_find_nearest_dmparticle_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + fof_find_nearest_dmparticle_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Finds nearest dark matter particle for secondary link types + * + * \param[out] vMinID Pointer to MinID array. + * \param[in] vHead Pointer to Head array. + * \param[in] vLen Pointer to Len array. + * \param[in] vNext Pointer to Next array. + * \param[in] vTail Pointer to Tail array. + * \param[out] vMinIDTask Pointer to MinIDTask array. + * + * \return Time spent in this function. + */ +double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask) +{ + MinID = vMinID; + Head = vHead; + Len = vLen; + Next = vNext; + Tail = vTail; + MinIDTask = vMinIDTask; + + int i, n, npleft, iter; + long long ntot; + double tstart = second(); + + mpi_printf("FOF: Start finding nearest dm-particle (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + fof_nearest_distance = (MyFloat *)mymalloc("fof_nearest_distance", sizeof(MyFloat) * NumPart); + fof_nearest_hsml = (MyFloat *)mymalloc("fof_nearest_hsml", sizeof(MyFloat) * NumPart); + + for(n = 0; n < NumPart; n++) + { + if((1 << P[n].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + fof_nearest_distance[n] = 1.0e30; + if(P[n].Type == 0) +#ifdef USE_AREPO_FOF_WITH_GADGET_FIX + fof_nearest_hsml[n] = SphP[n].Hsml; +#else /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX */ + fof_nearest_hsml[n] = get_cell_radius(n); +#endif /* #ifdef USE_AREPO_FOF_WITH_GADGET_FIX #else */ + else + fof_nearest_hsml[n] = 0.1 * LinkL; + } + } + + generic_set_MaxNexport(); + + iter = 0; + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + double t0 = second(); + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPart; i++) + { + if((1 << P[i].Type) & (FOF_SECONDARY_LINK_TYPES)) + { + if(fof_nearest_distance[i] > 1.0e29) + { + if(fof_nearest_hsml[i] < 4 * LinkL) /* we only search out to a maximum distance */ + { + /* need to redo this particle */ + npleft++; + fof_nearest_hsml[i] *= 2.0; + if(iter >= MAXITER - 10) + { + printf("FOF: i=%d task=%d ID=%d P[i].Type=%d Hsml=%g LinkL=%g nearest=%g pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, P[i].Type, fof_nearest_hsml[i], LinkL, fof_nearest_distance[i], P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + } + else + { + fof_nearest_distance[i] = 0; /* we do not continue to search for this particle */ + } + } + } + } + + sumup_large_ints(1, &npleft, &ntot); + + double t1 = second(); + if(ntot > 0) + { + iter++; + if(iter > 0) + mpi_printf("FOF: fof-nearest iteration %d: need to repeat for %lld particles. (took = %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("FOF: failed to converge in fof-nearest\n"); + } + } + while(ntot > 0); + + myfree(fof_nearest_hsml); + myfree(fof_nearest_distance); + + mpi_printf("FOF: done finding nearest dm-particle\n"); + + double tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function to finding nearest dark matter particle for + * secondary link types. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int fof_find_nearest_dmparticle_evaluate(int target, int mode, int threadid) +{ + int k, no, index, numnodes, *firstnode; + double h, r2max, dist; + double dx, dy, dz, r2; + MyDouble *pos; + data_in local, *target_data; + data_out out; + + double xtmp, ytmp, ztmp; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; + + index = -1; + r2max = 1.0e30; + + /* Now start the actual tree-walk computation for this particle */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + int p = no; + no = Nextnode[no]; + + if(!((1 << P[p].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + + dist = h; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + + r2 = dx * dx + dy * dy + dz * dz; + if(r2 < r2max && r2 < h * h) + { + index = p; + r2max = r2; + } + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + struct NODE *current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = h + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + } + } + } + + if(index >= 0) + { + out.Distance = sqrt(r2max); + out.MinID = MinID[Head[index]]; + out.MinIDTask = MinIDTask[Head[index]]; +#if defined(SUBFIND) + out.DM_Hsml = PS[index].Hsml; +#endif /* #if defined(SUBFIND) */ + } + else + { + out.Distance = 2.0e30; + out.MinID = 0; + out.MinIDTask = -1; +#if defined(SUBFIND) + out.DM_Hsml = 0; +#endif /* #if defined(SUBFIND) */ + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_sort_kernels.c b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c new file mode 100644 index 0000000000..e10627ca7f --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_sort_kernels.c @@ -0,0 +1,495 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_sort_kernels.c + * \date 05/2018 + * \brief Various sort kernels used by the parallel FoF group finder. + * \details contains functions: + * int fof_compare_local_sort_data_targetindex(const void *a, + * const void *b) + * int fof_compare_aux_sort_Type(const void *a, const void *b) + * int fof_compare_aux_sort_FileOrder(const void *a, + * const void *b) + * int fof_compare_aux_sort_GrNr(const void *a, const void *b) + * int fof_compare_aux_sort_OriginTask_OriginIndex(const void + * *a, const void *b) + * int fof_compare_FOF_PList_MinID(const void *a, const void *b) + * int fof_compare_FOF_GList_MinID(const void *a, const void *b) + * int fof_compare_FOF_GList_MinIDTask(const void *a, + * const void *b) + * int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, + * const void *b) + * int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void + * *a, const void *b) + * int fof_compare_FOF_GList_ExtCountMinID(const void *a, + * const void *b) + * int fof_compare_Group_MinID(const void *a, const void *b) + * int fof_compare_Group_GrNr(const void *a, const void *b) + * int fof_compare_Group_MinIDTask(const void *a, const void *b) + * int fof_compare_Group_MinIDTask_MinID(const void *a, + * const void *b) + * int fof_compare_Group_Len(const void *a, const void *b) + * int fof_compare_ID_list_GrNrID(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +/*! \brief Comparison function for fof_local_sort_data objects. + * + * Sorting kernel comparing element targetindex. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_local_sort_data_targetindex(const void *a, const void *b) +{ + if(((struct fof_local_sort_data *)a)->targetindex < ((struct fof_local_sort_data *)b)->targetindex) + return -1; + + if(((struct fof_local_sort_data *)a)->targetindex > ((struct fof_local_sort_data *)b)->targetindex) + return +1; + + return 0; +} + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing element Type. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_Type(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->Type < ((struct data_aux_sort *)b)->Type) + return -1; + + if(((struct data_aux_sort *)a)->Type > ((struct data_aux_sort *)b)->Type) + return +1; + + return 0; +} + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing element FileOrder. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_FileOrder(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->FileOrder < ((struct data_aux_sort *)b)->FileOrder) + return -1; + + if(((struct data_aux_sort *)a)->FileOrder > ((struct data_aux_sort *)b)->FileOrder) + return +1; + + return 0; +} +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing elements (most important fist): + * GrNr, SubNr, DM_BindingEnergy + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_GrNr(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->GrNr < ((struct data_aux_sort *)b)->GrNr) + return -1; + + if(((struct data_aux_sort *)a)->GrNr > ((struct data_aux_sort *)b)->GrNr) + return +1; + +#ifdef SUBFIND + if(((struct data_aux_sort *)a)->SubNr < ((struct data_aux_sort *)b)->SubNr) + return -1; + + if(((struct data_aux_sort *)a)->SubNr > ((struct data_aux_sort *)b)->SubNr) + return +1; + + if(((struct data_aux_sort *)a)->DM_BindingEnergy < ((struct data_aux_sort *)b)->DM_BindingEnergy) + return -1; + + if(((struct data_aux_sort *)a)->DM_BindingEnergy > ((struct data_aux_sort *)b)->DM_BindingEnergy) + return +1; +#endif /* #ifdef SUBFIND */ + + if(((struct data_aux_sort *)a)->ID < ((struct data_aux_sort *)b)->ID) + return -1; + + if(((struct data_aux_sort *)a)->ID > ((struct data_aux_sort *)b)->ID) + return +1; + + return 0; +} + +/*! \brief Comparison function for data_aux_sort objects. + * + * Sorting kernel comparing elements (most important first): + * OriginTask, OriginIndex + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b) +{ + if(((struct data_aux_sort *)a)->OriginTask < ((struct data_aux_sort *)b)->OriginTask) + return -1; + + if(((struct data_aux_sort *)a)->OriginTask > ((struct data_aux_sort *)b)->OriginTask) + return +1; + + if(((struct data_aux_sort *)a)->OriginIndex < ((struct data_aux_sort *)b)->OriginIndex) + return -1; + + if(((struct data_aux_sort *)a)->OriginIndex > ((struct data_aux_sort *)b)->OriginIndex) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_particle_list objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_PList_MinID(const void *a, const void *b) +{ + if(((struct fof_particle_list *)a)->MinID < ((struct fof_particle_list *)b)->MinID) + return -1; + + if(((struct fof_particle_list *)a)->MinID > ((struct fof_particle_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing element MinIDTask. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask) + return -1; + + if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first): + * MinIDTask, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->MinIDTask < ((struct fof_group_list *)b)->MinIDTask) + return -1; + + if(((struct fof_group_list *)a)->MinIDTask > ((struct fof_group_list *)b)->MinIDTask) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first):. + * LocCount, MinID, ExtCount. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for LocCount where -1 if a > b + */ +int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->LocCount > ((struct fof_group_list *)b)->LocCount) + return -1; + + if(((struct fof_group_list *)a)->LocCount < ((struct fof_group_list *)b)->LocCount) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) < + labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask)) + return -1; + + if(labs(((struct fof_group_list *)a)->ExtCount - ((struct fof_group_list *)a)->MinIDTask) > + labs(((struct fof_group_list *)b)->ExtCount - ((struct fof_group_list *)b)->MinIDTask)) + return +1; + + return 0; +} + +/*! \brief Comparison function for fof_group_list objects. + * + * Sorting kernel comparing elements (most important first): + * ExtCount, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b) +{ + if(((struct fof_group_list *)a)->ExtCount < ((struct fof_group_list *)b)->ExtCount) + return -1; + + if(((struct fof_group_list *)a)->ExtCount > ((struct fof_group_list *)b)->ExtCount) + return +1; + + if(((struct fof_group_list *)a)->MinID < ((struct fof_group_list *)b)->MinID) + return -1; + + if(((struct fof_group_list *)a)->MinID > ((struct fof_group_list *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinID(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID) + return -1; + + if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element GrNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_GrNr(const void *a, const void *b) +{ + if(((struct group_properties *)a)->GrNr < ((struct group_properties *)b)->GrNr) + return -1; + + if(((struct group_properties *)a)->GrNr > ((struct group_properties *)b)->GrNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element MinIDTask. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinIDTask(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask) + return -1; + + if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing elements (most imporant first): + * MinIDTask, MinID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b) +{ + if(((struct group_properties *)a)->MinIDTask < ((struct group_properties *)b)->MinIDTask) + return -1; + + if(((struct group_properties *)a)->MinIDTask > ((struct group_properties *)b)->MinIDTask) + return +1; + + if(((struct group_properties *)a)->MinID < ((struct group_properties *)b)->MinID) + return -1; + + if(((struct group_properties *)a)->MinID > ((struct group_properties *)b)->MinID) + return +1; + + return 0; +} + +/*! \brief Comparison function for group_properties objects. + * + * Sorting kernel comparing element Len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a > b. + */ +int fof_compare_Group_Len(const void *a, const void *b) +{ + if(((struct group_properties *)a)->Len > ((struct group_properties *)b)->Len) + return -1; + + if(((struct group_properties *)a)->Len < ((struct group_properties *)b)->Len) + return +1; + + return 0; +} + +/*! \brief Comparison function for id_list objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr, Type, ID. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int fof_compare_ID_list_GrNrID(const void *a, const void *b) +{ + if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr) + return -1; + + if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr) + return +1; + + if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type) + return -1; + + if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type) + return +1; + + if(((struct id_list *)a)->ID < ((struct id_list *)b)->ID) + return -1; + + if(((struct id_list *)a)->ID > ((struct id_list *)b)->ID) + return +1; + + return 0; +} + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/fof/fof_vars.c b/src/amuse/community/arepo/src/fof/fof_vars.c new file mode 100644 index 0000000000..2df2856c66 --- /dev/null +++ b/src/amuse/community/arepo/src/fof/fof_vars.c @@ -0,0 +1,79 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/fof/fof_vars.c + * \date 05/2018 + * \brief Iinstances for the global variables used by FOF, which are + * declared in fof.h + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../subfind/subfind.h" +#include "fof.h" + +#ifdef FOF + +int Ngroups, NgroupsExt, MaxNgroups, TotNgroups, Nsubgroups, TotNsubgroups; +int Nids; +long long TotNids; + +double LinkL = 0; + +int fof_OldMaxPart; +int fof_OldMaxPartSph; + +unsigned char *flag_node_inside_linkinglength; + +struct group_properties *Group; + +struct fofdata_in *FoFDataIn, *FoFDataGet; + +struct fofdata_out *FoFDataResult, *FoFDataOut; + +struct fof_particle_list *FOF_PList; + +struct fof_group_list *FOF_GList; + +struct id_list *ID_list; + +struct bit_flags *Flags; + +struct fof_subfind_header catalogue_header; + +#endif /* #ifdef FOF */ diff --git a/src/amuse/community/arepo/src/gitversion/version b/src/amuse/community/arepo/src/gitversion/version new file mode 100644 index 0000000000..9cd3dc25eb --- /dev/null +++ b/src/amuse/community/arepo/src/gitversion/version @@ -0,0 +1,7 @@ +#ifndef VERSION_H +#define VERSION_H + +const char* GIT_DATE = "_DATE_"; +const char* GIT_COMMIT = "_COMMIT_"; + +#endif diff --git a/src/amuse/community/arepo/src/gitversion/version.h b/src/amuse/community/arepo/src/gitversion/version.h new file mode 100644 index 0000000000..7d33b0889a --- /dev/null +++ b/src/amuse/community/arepo/src/gitversion/version.h @@ -0,0 +1,38 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gitversion/version.h + * \date 05/2018 + * \brief Header for git-version variables. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef VERSION_H +#define VERSION_H + +extern const char* GIT_DATE; +extern const char* GIT_COMMIT; + +#endif /* #ifndef VERSION_H */ diff --git a/src/amuse/community/arepo/src/gravity/accel.c b/src/amuse/community/arepo/src/gravity/accel.c new file mode 100644 index 0000000000..493216c347 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/accel.c @@ -0,0 +1,347 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/accel.c + * \date 05/2018 + * \brief Routines to carry out gravity force computation. + * \details contains functions: + * void compute_grav_accelerations(int timebin, int fullflag) + * void gravity(int timebin, int fullflag) + * void gravity_force_finalize(int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Computes the gravitational accelerations for all active particles. + * + * If the particle mesh is used and the current time step + * requires a PM force computation, new long range forces are + * computed by long_range_force(). Then the short-range tree forces + * are computed by gravity(). The force tree is rebuild every time step. + * + * \param[in] timebin Current timebin for which gravity is calculated + * (positive integer). + * \param[in] fullflag Flag whether this is a global timestep + * (Flag_Full_Tree, Flag_Partial_Tree). + * + * \return void + */ +void compute_grav_accelerations(int timebin, int fullflag) +{ + if(TimeBinsGravity.GlobalNActiveParticles > 0) + { + if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0 && All.ErrTolTheta > 0) + { + /* For the first timestep, we do one gravity calculation up front + * with the Barnes & Hut Criterion to allow usage of relative opening + * criterion with consistent accuracy. + */ +#ifdef PMGRID + long_range_force(); +#endif /* #ifdef PMGRID */ + gravity(timebin, fullflag); + } + + gravity(timebin, fullflag); /* computes (short-range) gravity accel. */ + +#ifdef FORCETEST + gravity_forcetest(); +#endif /* #ifdef FORCETEST */ + } +} + +/*! \brief Main routine for tree force calculation. + * + * This routine handles the tree force calculation. First it builds a new + * force tree calling force_treebuild() at every timestep. This tree is then + * used to calculate a new tree force for every active particle by calling + * gravity_tree(). + * + * \param[in] timebin Current timebin for which gravity is calculated. + * \param[in] fullflag Flag whether this is a global timestep. + * + * \return void + */ +void gravity(int timebin, int fullflag) +{ + double tstart = second(); + +#if defined(SELFGRAVITY) + /* set new softening lengths on global steps to take into account possible cosmological time variation */ + if(timebin == All.HighestOccupiedGravTimeBin) + set_softenings(); + +#ifdef ALLOW_DIRECT_SUMMATION + if(TimeBinsGravity.GlobalNActiveParticles < DIRECT_SUMMATION_THRESHOLD) + { + gravity_direct(timebin); + +#ifndef ONEDIMS_SPHERICAL + gravity_force_finalize(timebin); +#endif /* #ifndef ONEDIMS_SPHERICAL */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + } + else +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ + { +#ifdef ONEDIMS_SPHERICAL + gravity_monopole_1d_spherical(); +#else /* #ifdef ONEDIMS_SPHERICAL */ + + if(TimeBinsGravity.GlobalNActiveParticles >= 10 * NTask) + construct_forcetree(0, 1, 0, timebin); /* build force tree with all particles */ + else + construct_forcetree(0, 0, 0, timebin); /* build force tree with all particles */ + + gravity_tree(timebin); + + gravity_force_finalize(timebin); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + + /* note: we here moved 'gravity_force_finalize' in front of the non-standard physics; + * reminder: restart flag 18: post-processing calculation potential without running simulation + */ + if(fullflag == FLAG_FULL_TREE && RestartFlag != 18) + calculate_non_standard_physics_with_valid_gravity_tree(); + + /* this is for runs which have the full tree at each time step; no HIERARCHICAL_GRAVITY */ + calculate_non_standard_physics_with_valid_gravity_tree_always(); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); +#endif /* #ifdef ONEDIMS_SPHERICAL #else */ + } + +#else /* defined(SELFGRAVITY) */ + + /* self-gravity is switched off */ + int idx, i, j; + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + + if(i < 0) + continue; + +#ifdef EVALPOTENTIAL + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] = 0; + } + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + calc_exact_gravity_for_particle_type(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef EXTERNALGRAVITY + gravity_external(); +#endif /* #ifdef EXTERNALGRAVITY */ + +#endif /* defined(SELFGRAVITY) #else */ + + double tend = second(); + mpi_printf("GRAVITY: done for timebin %d, %lld particles (took %g sec)\n", timebin, TimeBinsGravity.GlobalNActiveParticles, + timediff(tstart, tend)); +} + +/*! \brief Adds individual gravity contribution and appropriate factors. + * + * Routine combines accelerations of particle mesh and tree and applies + * the required physical constants and scaling factors e.g. for a cosmological + * simulation with nonperiodic gravity. + * + * \param[in] timebin Current timebin for which gravity is calculated. + * + * \return void + */ +void gravity_force_finalize(int timebin) +{ + int i, j, idx; + double ax, ay, az; + + TIMER_START(CPU_TREE); + + /* now add things for comoving integration */ +#ifdef GRAVITY_NOT_PERIODIC +#ifndef PMGRID + if(All.ComovingIntegrationOn) + { + double fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] += fac * P[i].Pos[j]; + } + } +#endif /* #ifndef PMGRID */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + +#ifdef HIERARCHICAL_GRAVITY + if(timebin == All.HighestOccupiedGravTimeBin) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + mpi_printf("GRAVTREE: Setting OldAcc!\n"); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef PMGRID + ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G; + ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G; + az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G; +#else /* #ifdef PMGRID */ + ax = P[i].GravAccel[0]; + ay = P[i].GravAccel[1]; + az = P[i].GravAccel[2]; +#endif /* #ifdef PMGRID #else */ + + P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az); + } + } + + /* muliply by G */ + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] *= All.G; + +#ifdef EVALPOTENTIAL + +#if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + P[i].Potential += All.MassPMregions[0] * M_PI / (All.Asmth[0] * All.Asmth[0] * boxSize_X * boxSize_Y * boxSize_Z); +#ifdef PLACEHIGHRESREGION + P[i].Potential += All.MassPMregions[1] * M_PI / (All.Asmth[1] * All.Asmth[1] * boxSize_X * boxSize_Y * boxSize_Z); +#endif /* #ifdef PLACEHIGHRESREGION */ +#endif /* #if defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ + + /* It's better to not remove the self-potential here to get a smooth potential field for co-spatial particles with varying mass + * or softening. For calculating the binding energy of a particle, the self-energy should then be removed as + * + * P[i].Potential += P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8); + */ + + P[i].Potential *= All.G; + +#ifdef PMGRID +#ifndef FORCETEST_TESTFORCELAW + P[i].Potential += P[i].PM_Potential; /* add in long-range potential */ +#endif /* #ifndef FORCETEST_TESTFORCELAW */ +#endif /* #ifdef PMGRID */ +#endif /* #ifdef EVALPOTENTIAL */ + if(All.ComovingIntegrationOn) + { +#ifdef GRAVITY_NOT_PERIODIC + double fac, r2; + int k; + + fac = -0.5 * All.Omega0 * All.Hubble * All.Hubble; + + for(k = 0, r2 = 0; k < 3; k++) + r2 += P[i].Pos[k] * P[i].Pos[k]; + +#ifdef EVALPOTENTIAL + P[i].Potential += fac * r2; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + } + else + { + double fac, r2; + int k; + + fac = -0.5 * All.OmegaLambda * All.Hubble * All.Hubble; + + if(fac != 0) + { + for(k = 0, r2 = 0; k < 3; k++) + r2 += P[i].Pos[k] * P[i].Pos[k]; +#ifdef EVALPOTENTIAL + P[i].Potential += fac * r2; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + } + + /* Finally, the following factor allows a computation of a cosmological + * simulation with vacuum energy in physical coordinates + */ +#ifdef GRAVITY_NOT_PERIODIC +#ifndef PMGRID + if(All.ComovingIntegrationOn == 0) + { + double fac = All.OmegaLambda * All.Hubble * All.Hubble; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + P[i].GravAccel[j] += fac * P[i].Pos[j]; + } + } +#endif /* #ifndef PMGRID */ +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + TIMER_STOP(CPU_TREE); +} diff --git a/src/amuse/community/arepo/src/gravity/forcetree.c b/src/amuse/community/arepo/src/gravity/forcetree.c new file mode 100644 index 0000000000..c659a75e97 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree.c @@ -0,0 +1,1827 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree.c + * \date 05/2018 + * \brief Gravitational tree build. + * \details This file contains the construction of the tree used for + * calculating the gravitational force. The type tree implemented + * is a geometrical oct-tree, starting from a cube encompassing + * all particles. This cube is automatically found in the domain + * decomposition, which also splits up the global "top-level" + * tree along node boundaries, moving the particles of different + * parts of the tree to separate processors. In this version of + * the code, the tree construction may be repeated every timestep + * without a renewed domain decomposition. If particles are on + * the "wrong" processor because a new domain decomposition has + * not been carried out, they are sent as temporary points to the + * right insertion processor according to the layout of the + * top-level nodes. In addition, the mapping of the top-level + * nodes to processors may be readjusted in order to improve + * work-load balance for the current time step. + * contains functions: + * int construct_forcetree(int mode, int + * optimized_domain_mapping, int insert_only_primary, + * int timebin) + * int force_treebuild(int npart, int optimized_domain_mapping, + * int insert_only_primary, int timebin) + * int force_treebuild_construct(int npart, int + * optimized_domain_mapping, int insert_only_primary, + * int timebin) + * int force_treebuild_insert_single_point(int i, unsigned + * long long *intpos, int th, unsigned char levels) + * void force_assign_cost_values(void) + * int force_create_empty_nodes(int no, int topnode, int bits, + * int x, int y, int z) + * void force_insert_pseudo_particles(void) + * void force_update_node_recursive(int no, int sib, int father, + * int *last) + * void force_exchange_topleafdata(void) + * void force_treeupdate_toplevel(int no, int topnode, int bits, + * int x, int y, int z) + * void force_treeallocate(int maxpart, int maxindex) + * void force_treefree(void) + * void dump_particles(void) + * int force_add_empty_nodes(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static int *th_list; +static unsigned char *level_list; +int NTreeInsert; + +#ifdef FOF +#ifndef FOF_SECONDARY_LINK_TARGET_TYPES +#define FOF_SECONDARY_LINK_TARGET_TYPES FOF_PRIMARY_LINK_TYPES +#endif /* #ifndef FOF_SECONDARY_LINK_TARGET_TYPES */ +#endif /* #ifdef FOF */ + +#ifdef HIERARCHICAL_GRAVITY +#define INDEX(idx) (TimeBinsGravity.ActiveParticleList[idx]) +#else /* #ifdef HIERARCHICAL_GRAVITY */ +#define INDEX(idx) (idx) +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + +/*! \brief Triggers forcetree construction until successful. + * + * Allocates memory and constructs forcetree until successful; + * currently, there are two valid modes: forcetree only for gas or for all + * particles. + * + * \param[in] mode Mode: all particles or just gas cells. + * \param[in] optimized_domain_mapping Handed over to force_treebuild. + * \param[in] insert_only_primary Handed over to force_treebuild. + * \param[in] timebin Handed over to force_treebuild. + * + * \return Number of nodes in tree. + */ +int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int npart, Tree_NumNodes = 0; + + do + { + /* Note: force_treebuild will call force_treefree if it is about to return a negative value! + * Therefore, this has to be allocated within the loop! The only exception is when + * insert_only_primary == 2, in which case the code assumes that the forcetree is already + * allocated (this happens only in fof.c). In this case, force_treeallocate is not called + * during the first loop. + */ + if(insert_only_primary != 2 || Tree_NumNodes < 0) + force_treeallocate(NumPart, All.MaxPart); /* reallocate force tree structure */ + + /* prepare variables for force_treebuild call */ + switch(mode) + { + case 0: /* all particles */ + { + npart = NumPart; + break; + } + case 1: /* only gas particles */ + { + npart = NumGas; + break; + } + default: + { + mpi_terminate("FORCETREE: construct_forcetree: invalid mode!\n"); + } + } + + Tree_NumNodes = force_treebuild(npart, optimized_domain_mapping, insert_only_primary, timebin); + } + while(Tree_NumNodes < 0); + + return Tree_NumNodes; +} + +/*! \brief Constructs the gravitational oct-tree and handles errors. + * + * \param[in] npart Number of particles on local task. + * \param[in] optimized_domain_mapping Specifies if mapping of the top-level + * nodes to processors may be readjusted. + * \param[in] insert_only_primary If this is set, only particles of the types + * set in FOF_PRIMARY_LINK_TYPES are inserted. + * \param[in] timebin Current timebin; needed for HIERARCHICAL_GRAVITY. + * + * \return number of local+top nodes of the constructed tree. + */ +int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int i, flag; + +#ifdef HIERARCHICAL_GRAVITY + NTreeInsert = TimeBinsGravity.NActiveParticles; + optimized_domain_mapping = 0; +#else /* #ifdef HIERARCHICAL_GRAVITY */ + NTreeInsert = npart; +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + + TIMER_START(CPU_TREEBUILD); + + long long loc_insert = NTreeInsert, tot_insert; + MPI_Reduce(&loc_insert, &tot_insert, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + mpi_printf("FORCETREE: Tree construction. (inserting %lld points)\n", tot_insert); + + TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_INSERT); + + int flag_single = force_treebuild_construct(npart, optimized_domain_mapping, insert_only_primary, timebin); + + TIMER_STOPSTART(CPU_TREEBUILD_INSERT, CPU_TREEBUILD); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + if(flag < 0) + { + /* tree construction was not successful and needs to be repeated */ + if(flag_single != -2) + { + myfree(Tree_Points); + } + + force_treefree(); + + if(flag == -3) + { + /* we need to do an extra domain decomposition to recover from an out-of-box condition for a particle, + which can happen if GRAVITY_NOT_PERIODIC is used */ + ngb_treefree(); + domain_free(); + + domain_Decomposition(); + + ngb_treeallocate(); + ngb_treebuild(NumGas); + } + else + { + All.TreeAllocFactor *= 1.15; + mpi_printf("FORCETREE: Increasing TreeAllocFactor, new value=%g\n", All.TreeAllocFactor); + + if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, + "task %d: looks like a serious problem in tree construction, stopping with particle dump. Tree_NumNodes=%d " + "Tree_MaxNodes=%d Tree_NumPartImported=%d NumPart=%d\n", + ThisTask, Tree_NumNodes, Tree_MaxNodes, Tree_NumPartImported, NumPart); + dump_particles(); + terminate(buf); + } + } + + TIMER_STOP(CPU_TREEBUILD); /* stop timer before returning */ + return -1; /* stop right here with error code to invoke a new call of this function, possibly with changed values for npart */ + } /* if(flag < 0) */ + + Nextnode = (int *)mymalloc_movable(&Nextnode, "Nextnode", (Tree_MaxPart + NTopleaves + Tree_NumPartImported) * sizeof(int)); + Father = (int *)mymalloc_movable(&Father, "Father", (Tree_MaxPart + Tree_NumPartImported) * sizeof(int)); + + for(i = 0; i < Tree_MaxPart + Tree_NumPartImported; i++) + Father[i] = -1; + + TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_BRANCHES); + + /* insert the pseudo particles that represent the mass distribution of other domains */ + force_insert_pseudo_particles(); + + /* now compute the multipole moments recursively */ + int last = -1; + + force_update_node_recursive(Tree_MaxPart, -1, -1, &last); + + if(last >= Tree_MaxPart) + { + if(last >= Tree_MaxPart + Tree_MaxNodes) /* a pseudo-particle or imported particle */ + Nextnode[last - Tree_MaxNodes] = -1; + else + Nodes[last].u.d.nextnode = -1; + } + else + Nextnode[last] = -1; + + TIMER_STOPSTART(CPU_TREEBUILD_BRANCHES, CPU_TREEBUILD_TOPLEVEL); + + force_exchange_topleafdata(); + + Tree_NextFreeNode = Tree_MaxPart + 1; + force_treeupdate_toplevel(Tree_MaxPart, 0, 1, 0, 0, 0); + + TIMER_STOPSTART(CPU_TREEBUILD_TOPLEVEL, CPU_LOGS); + +#ifdef HIERARCHICAL_GRAVITY + if(timebin == All.HighestOccupiedGravTimeBin) +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + { + double locdata[2] = {Tree_NumPartImported, Tree_NumNodes}, sumdata[2]; + MPI_Reduce(locdata, sumdata, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + double tot_imported = sumdata[0]; + double tot_numnodes = sumdata[1]; + + mpi_printf( + "FORCETREE: Tree construction done. =%g =%g NTopnodes=%d NTopleaves=%d " + "tree-build-scalability=%g\n", + tot_imported / (All.TotNumPart + 1.0e-60), tot_numnodes / NTask, NTopnodes, NTopleaves, + ((double)((tot_numnodes - NTask * ((double)NTopnodes)) + NTopnodes)) / (tot_numnodes + 1.0e-60)); + } +#ifdef HIERARCHICAL_GRAVITY + else + mpi_printf("FORCETREE: Tree construction done.\n"); +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + TIMER_STOP(CPU_LOGS); + + return Tree_NumNodes; +} + +/*! \brief Constructs the gravitational oct-tree. + * + * The index convention for accessing tree nodes is the following: + * node index + * [0... Tree_MaxPart-1] references single particles, + * the indices + * [Tree_MaxPart... Tree_MaxPart+Tree_MaxNodes-1] references tree nodes + * [Tree_MaxPart+Tree_MaxNodes... Tree_MaxPart+Tree_MaxNodes+NTopleaves-1] + * references "pseudo particles", i.e. mark branches on foreign CPUs + * [Tree_MaxPart+Tree_MaxNodes+NTopleaves... + * Tree_MaxPart+Tree_MaxNodes+NTopleaves+Tree_NumPartImported-1] + * references imported points. + * + * the pointer `Nodes' is shifted such that Nodes[Tree_MaxPart] gives the + * first tree node (i.e. the root node). + * + * \param[in] npart Number of particles on local task. + * \param[in] optimized_domain_mapping Specifies if mapping of the top-level + * nodes to processors may be readjusted. + * \param[in] insert_only_primary If this is set, only particles of the types + * set in FOF_PRIMARY_LINK_TYPES are inserted. + * \param[in] timebin (unused). + * + * \return if successful returns the number of local+top nodes of the + * constructed tree; + * -1 if the number of allocated tree nodes is too small; + * -2 if the number of allocated tree nodes is even too small to fit + * the top nodes; + * -3 if a particle out of domain box condition was encountered. + */ +int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin) +{ + int idx, i, j, no, flag = 0; + int ngrp, recvTask, count_ListNoData, *no_place = NULL; + unsigned long long *intposp; + MyDouble *posp; + +#ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING + optimized_domain_mapping = 0; +#endif /* #ifdef DISABLE_OPTIMIZE_DOMAIN_MAPPING */ + +#if !defined(GRAVITY_NOT_PERIODIC) + double boxsize[3]; + boxsize[0] = boxSize_X; + boxsize[1] = boxSize_Y; + boxsize[2] = boxSize_Z; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + /* create an empty root node */ + Tree_NextFreeNode = Tree_MaxPart; /* index of first free node */ + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; /* select first node */ + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + nfreep->len = DomainLen; + for(j = 0; j < 3; j++) + nfreep->center[j] = DomainCenter[j]; + + Tree_NumNodes = 1; + Tree_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(force_create_empty_nodes(Tree_MaxPart, 0, 1, 0, 0, 0) < 0) + return -2; + + Tree_FirstNonTopLevelNode = Tree_NextFreeNode; + + /* if a high-resolution region in a global tree is used, we need to generate + * an additional set of empty nodes to make sure that we have a complete + * top-level tree for the high-resolution inset + */ + + /* we first do a dummy allocation here that we'll resize later if needed, in which case the following arrays will have to be moved + * once. */ + int guess_nimported = 1.2 * NumPart; + + Tree_Points = + (struct treepoint_data *)mymalloc_movable(&Tree_Points, "Tree_Points", guess_nimported * sizeof(struct treepoint_data)); + + th_list = (int *)mymalloc_movable(&th_list, "th_list", NumPart * sizeof(int)); + level_list = (unsigned char *)mymalloc_movable(&level_list, "level_list", NumPart * sizeof(unsigned char)); + Tree_IntPos_list = + (unsigned long long *)mymalloc_movable(&Tree_IntPos_list, "Tree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + if(NumPart < NTreeInsert) + { + terminate("ERROR: NumPart %d, NTreeInsert %d! This should not happen!", NumPart, NTreeInsert); + } + + /* first check whether particles are still in domain box */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + posp = &Tree_Pos_list[i * 3]; + + for(j = 0; j < 3; j++, posp++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + *posp = SphP[i].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + *posp = P[i].Pos[j]; + +#if !defined(GRAVITY_NOT_PERIODIC) + if(*posp < 0) + *posp += boxsize[j]; + if(*posp >= boxsize[j]) + *posp -= boxsize[j]; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + if(*posp < DomainCorner[j] || *posp >= DomainCorner[j] + DomainLen) + { + flag = 1; + break; + } + } + } + +#if defined(GRAVITY_NOT_PERIODIC) + int flag_sum; + MPI_Allreduce(&flag, &flag_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(flag_sum) + { + mpi_printf( + "FORCETREE: Particle out of domain box condition was triggered. Need to do an (unplanned) new domain decomposition.\n"); + myfree(Tree_IntPos_list); + myfree(level_list); + myfree(th_list); + return -3; + } +#else /* #if defined(GRAVITY_NOT_PERIODIC) */ + if(flag) + { + char buf[1000]; + sprintf(buf, "i=%d ID=%lld type=%d moved out of box. Pos[j=%d]=%g DomainCorner[%d]=%g DomainLen=%g", i, (long long)P[i].ID, + P[i].Type, j, P[i].Pos[j], j, DomainCorner[j], DomainLen); + terminate(buf); + } +#endif /* #if defined(GRAVITY_NOT_PERIODIC) #else */ + +#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + double mass_highres = 0, mass_lowres = 0; + for(int idx = 0; idx < NTreeInsert; idx++) + { + int i = INDEX(idx); + if(i < 0) + continue; + +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(P[i].Type, &Tree_Pos_list[3 * i])) + mass_highres += P[i].Mass; + else +#endif /* #ifdef PLACEHIGHRESREGION */ + mass_lowres += P[i].Mass; + } + double mass_pmregions[2] = {mass_lowres, mass_highres}; + MPI_Allreduce(mass_pmregions, All.MassPMregions, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ + + /* now we determine for each point the insertion top-level node, and the task on which this lies */ + if(optimized_domain_mapping) + { + TaskCost = mymalloc("TaskCost", NTask * sizeof(double)); + TaskCount = mymalloc("TaskCount", NTask * sizeof(int)); + DomainCost = mymalloc("DomainCost", NTopleaves * sizeof(double)); + DomainCount = mymalloc("DomainCount", NTopleaves * sizeof(int)); + ListNoData = mymalloc("ListNoData", NTopleaves * sizeof(struct no_list_data)); + no_place = mymalloc("no_place", NTopleaves * sizeof(int)); + + memset(no_place, -1, NTopleaves * sizeof(int)); + + for(j = 0; j < NTopleaves; j++) + DomainCost[j] = 0; + for(j = 0; j < NTopleaves; j++) + DomainCount[j] = 0; + for(j = 0; j < NTask; j++) + TaskCost[j] = 0; + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + count_ListNoData = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + posp = &Tree_Pos_list[i * 3]; + + unsigned long long xxb = force_double_to_int(((*posp++ - DomainCorner[0]) * DomainInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((*posp++ - DomainCorner[1]) * DomainInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((*posp++ - DomainCorner[2]) * DomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + intposp = &Tree_IntPos_list[i * 3]; + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + no = 0; + while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */ + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = TopNodes[no].Leaf; + + th_list[i] = no; + level_list[i] = levels; + + if(optimized_domain_mapping) + { + /* find costs for all top leaves */ + + int bin = All.HighestActiveTimeBin; + double cost; + + if(domain_bintolevel[bin] >= 0) + cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[bin]] * domain_grav_weight[bin]; + else + { + if(domain_refbin[bin] >= 0) + cost = MIN_FLOAT_NUMBER + P[i].GravCost[domain_bintolevel[domain_refbin[bin]]] * domain_grav_weight[bin]; + else + cost = 1.0; + } + + int task = DomainTask[no]; + TaskCost[task] += cost; + + if(task == ThisTask) + { + DomainCost[no] += cost; + DomainCount[no]++; + } + else + { + int p = no_place[no]; + if(p >= 0) + { + ListNoData[p].domainCost += cost; + ListNoData[p].domainCount++; + } + else + { + Send_count[task]++; + p = count_ListNoData++; + no_place[no] = p; + ListNoData[p].task = task; + ListNoData[p].no = no; + ListNoData[p].domainCost = cost; + ListNoData[p].domainCount = 1; + } + } + } + } + + if(optimized_domain_mapping) + { + /* if necessary, re-adjust the mapping of the top-level nodes to the processors */ + + if(All.Ti_Current > 0) + { + double current_balance, impact; + current_balance = force_get_current_balance(&impact); + + mpi_printf("FORCETREE: current balance= %g | %g\n", current_balance, impact); + + if(All.HighestActiveTimeBin < + All.SmallestTimeBinWithDomainDecomposition) /* only do this for steps which did not do a domain decomposition */ + { + if(impact > MAX_IMPACT_BEFORE_OPTIMIZATION) + { + force_get_global_cost_for_leavenodes(count_ListNoData); + force_optimize_domain_mapping(); + } + else + { + mpi_printf( + "FORCETREE: we're not trying to optimize further because overall imbalance impact is only %g (threshold is " + "%g)\n", + impact, MAX_IMPACT_BEFORE_OPTIMIZATION); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + } + else + { + mpi_printf("FORCETREE: we're not trying to optimize futher because we just did a domain decomposition\n"); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + } + else + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } + else + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + + if(optimized_domain_mapping) + { + myfree(no_place); + myfree(ListNoData); + myfree(DomainCount); + myfree(DomainCost); + myfree(TaskCount); + myfree(TaskCost); + } + + for(j = 0; j < NTask; j++) + { + Force_Send_count[j] = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) /* make list of insertion top leaf and task for all particles */ + { + i = INDEX(idx); + if(i < 0) + continue; + + no = th_list[i]; + th_list[i] = DomainNodeIndex[no]; + + int task = DomainNewTask[no]; + + Tree_Task_list[i] = task; + + if(task != ThisTask) + { + Force_Send_count[task]++; + } + } + + MPI_Alltoall(Force_Send_count, 1, MPI_INT, Force_Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Tree_NumPartImported = 0, Tree_NumPartExported = 0, Force_Recv_offset[0] = 0, Force_Send_offset[0] = 0; j < NTask; j++) + { + Tree_NumPartImported += Force_Recv_count[j]; + Tree_NumPartExported += Force_Send_count[j]; + if(j > 0) + { + Force_Send_offset[j] = Force_Send_offset[j - 1] + Force_Send_count[j - 1]; + Force_Recv_offset[j] = Force_Recv_offset[j - 1] + Force_Recv_count[j - 1]; + } + } + + if(Tree_NumPartImported > guess_nimported) + { + printf("ThisTask=%d: Tree_NumPartImported=%d NumPart=%d\n", ThisTask, Tree_NumPartImported, NumPart); + Tree_Points = (struct treepoint_data *)myrealloc_movable(Tree_Points, Tree_NumPartImported * sizeof(struct treepoint_data)); + } + + if(Tree_NumPartImported > 0.25 * NumPart) + { + Tree_MaxNodes = (int)(All.TreeAllocFactor * (NumPart + Tree_NumPartImported)) + NTopnodes; + + Nodes += Tree_MaxPart; + Nodes = (struct NODE *)myrealloc_movable(Nodes, (Tree_MaxNodes + 1) * sizeof(struct NODE)); + Nodes -= Tree_MaxPart; + +#ifdef MULTIPLE_NODE_SOFTENING + ExtNodes += Tree_MaxPart; + ExtNodes = (struct ExtNODE *)myrealloc_movable(ExtNodes, (Tree_MaxNodes + 1) * sizeof(struct ExtNODE)); + ExtNodes -= Tree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + + struct treepoint_data *export_Tree_Points = + (struct treepoint_data *)mymalloc("export_Tree_Points", Tree_NumPartExported * sizeof(struct treepoint_data)); + + for(j = 0; j < NTask; j++) + { + Force_Send_count[j] = 0; + } + + for(idx = 0; idx < NTreeInsert; idx++) /* prepare particle data to be copied to other tasks */ + { + i = INDEX(idx); + if(i < 0) + continue; + + int task = Tree_Task_list[i]; + + if(task != ThisTask) + { + int n = Force_Send_offset[task] + Force_Send_count[task]++; + + /* this point has to go to another task */ + export_Tree_Points[n].Pos[0] = Tree_Pos_list[3 * i + 0]; + export_Tree_Points[n].Pos[1] = Tree_Pos_list[3 * i + 1]; + export_Tree_Points[n].Pos[2] = Tree_Pos_list[3 * i + 2]; + export_Tree_Points[n].IntPos[0] = Tree_IntPos_list[3 * i + 0]; + export_Tree_Points[n].IntPos[1] = Tree_IntPos_list[3 * i + 1]; + export_Tree_Points[n].IntPos[2] = Tree_IntPos_list[3 * i + 2]; + export_Tree_Points[n].Mass = P[i].Mass; + export_Tree_Points[n].OldAcc = P[i].OldAcc; + export_Tree_Points[n].SofteningType = P[i].SofteningType; + export_Tree_Points[n].index = i; + export_Tree_Points[n].Type = P[i].Type; + export_Tree_Points[n].th = th_list[i]; + export_Tree_Points[n].level = level_list[i]; +#ifndef HIERARCHICAL_GRAVITY + if(TimeBinSynchronized[P[i].TimeBinGrav]) + export_Tree_Points[n].ActiveFlag = 1; + else + export_Tree_Points[n].ActiveFlag = 0; +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_Tree_Points[Force_Send_offset[recvTask]], Force_Send_count[recvTask] * sizeof(struct treepoint_data), + MPI_BYTE, recvTask, TAG_DENS_A, &Tree_Points[Force_Recv_offset[recvTask]], + Force_Recv_count[recvTask] * sizeof(struct treepoint_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + myfree(export_Tree_Points); + + Tree_ImportedNodeOffset = Tree_MaxPart + Tree_MaxNodes + NTopleaves; + + int full_flag = 0; + + /* now we insert all particles */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + +#ifdef NO_GAS_SELFGRAVITY + if(P[i].Type == 0) + continue; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ +#ifdef NO_SELFGRAVITY_TYPE + if(P[i].Type == NO_SELFGRAVITY_TYPE) + continue; +#endif /* #ifdef NO_SELFGRAVITY_TYPE */ +#if defined(FOF) || defined(SUBFIND) + if(insert_only_primary == 1) + { + if(!((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + } + else if(insert_only_primary == 2) + { + if(!((1 << P[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + } +#endif /* #if defined(FOF) || defined(SUBFIND) */ + if(Tree_Task_list[i] == ThisTask) + { + if(force_treebuild_insert_single_point(i, &Tree_IntPos_list[3 * i], th_list[i], level_list[i]) < 0) + { + full_flag = 1; + break; + } + } + } + + if(full_flag == 0) /* only continue if previous step was successful */ + { + for(i = 0; i < Tree_NumPartImported; i++) + { +#ifdef NO_GAS_SELFGRAVITY + if(Tree_Points[i].Type == 0) + continue; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ +#ifdef NO_SELFGRAVITY_TYPE + if(Tree_Points[i].Type == NO_SELFGRAVITY_TYPE) + continue; +#endif /* #ifdef NO_SELFGRAVITY_TYPE */ +#if defined(FOF) || defined(SUBFIND) + if(insert_only_primary == 1) + { + if(!((1 << Tree_Points[i].Type) & (FOF_PRIMARY_LINK_TYPES))) + continue; + } + else if(insert_only_primary == 2) + { + if(!((1 << Tree_Points[i].Type) & (FOF_SECONDARY_LINK_TARGET_TYPES))) + continue; + } +#endif /* #if defined(FOF) || defined(SUBFIND) */ + if(force_treebuild_insert_single_point(i + Tree_ImportedNodeOffset, Tree_Points[i].IntPos, Tree_Points[i].th, + Tree_Points[i].level) < 0) + { + full_flag = 1; + break; + } + } + } + + myfree_movable(Tree_IntPos_list); + myfree_movable(level_list); + myfree_movable(th_list); + + if(full_flag) + return -1; + +#ifdef ADDBACKGROUNDGRID + if(force_add_empty_nodes()) + return -1; +#endif /* #ifdef ADDBACKGROUNDGRID */ + + return Tree_NumNodes; +} + +/*! \brief Inserts a single particle into the gravitational tree. + * + * \param[in] i Index of particle. + * \param[in] intpos Integer representation of particle position. + * \param[in] th Target node. + * \param[in] levels Level of target node. + * + * \return 0 if successful; + * -1 if too few nodes have been allocated in the Nodes array + */ +int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels) +{ + int j, parent = -1; + unsigned char subnode = 0; + unsigned long long xxb = intpos[0]; + unsigned long long yyb = intpos[1]; + unsigned long long zzb = intpos[2]; + unsigned long long mask = ((unsigned long long)1) << ((52 - 1) - levels); + unsigned char shiftx = (52 - 1) - levels; + unsigned char shifty = (52 - 2) - levels; + unsigned char shiftz = (52 - 3) - levels; + signed long long centermask = (0xFFF0000000000000llu); + unsigned long long *intppos; + centermask >>= levels; + + while(1) + { + if(th >= Tree_MaxPart && th < Tree_ImportedNodeOffset) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(Nodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = Nodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + Nodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + */ + Nodes[parent].u.suns[subnode] = Tree_NextFreeNode; + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; + + double len = ((double)(mask << 1)) * DomainBigFac; + double cx = ((double)((xxb & centermask) | mask)) * DomainBigFac + DomainCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * DomainBigFac + DomainCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * DomainBigFac + DomainCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + if(th >= Tree_ImportedNodeOffset) + intppos = Tree_Points[th - Tree_ImportedNodeOffset].IntPos; + else + intppos = &Tree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = Tree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */ + Tree_NumNodes++; + Tree_NextFreeNode++; + + if(Tree_NumNodes >= Tree_MaxNodes) + { + return -1; + } + } + } + + return 0; +} + +/*! \brief Distributes the gravity costs of each node among the particles it + * contains. + * + * \return void + */ +void force_assign_cost_values(void) +{ + int idx, i, ngrp, recvTask; + + if(TakeLevel >= 0) + { + int thread; + + /* consolidate the cost measurements done by the different threads */ + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < NumPart; i++) + Thread[0].P_CostCount[i] += Thread[thread].P_CostCount[i]; + + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < Tree_NumNodes; i++) + Thread[0].Node_CostCount[i + Tree_MaxPart] += Thread[thread].Node_CostCount[i + Tree_MaxPart]; + + for(thread = 1; thread < NUM_THREADS; thread++) + for(i = 0; i < Tree_NumPartImported; i++) + Thread[0].TreePoints_CostCount[i] += Thread[thread].TreePoints_CostCount[i]; + +#ifdef VERBOSE + /* calculate some check sums to validate the total cost assignment */ + double sumbefore = 0, sumbeforetot; + for(i = 0; i < NumPart; i++) + sumbefore += P[i].GravCost[TakeLevel]; + MPI_Allreduce(&sumbefore, &sumbeforetot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double nodecost = 0, nodecosttot; + for(i = 0; i < Tree_NumNodes; i++) + nodecost += Thread[0].Node_CostCount[i + Tree_MaxPart]; + MPI_Allreduce(&nodecost, &nodecosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double importedcost = 0, importedcosttot; + for(i = 0; i < Tree_NumPartImported; i++) + importedcost += Thread[0].TreePoints_CostCount[i]; + MPI_Allreduce(&importedcost, &importedcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double partcost = 0, partcosttot; + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + { + int no = Father[i]; + + if(no >= 0) + partcost += Thread[0].P_CostCount[i]; + } + } + MPI_Allreduce(&partcost, &partcosttot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif /* #ifdef VERBOSE */ + + double *loc_cost = mymalloc("loc_cost", NTopnodes * sizeof(double)); + double *glob_cost = mymalloc("glob_cost", NTopnodes * sizeof(double)); + + for(i = 0; i < NTopnodes; i++) + loc_cost[i] = Thread[0].Node_CostCount[i + Tree_MaxPart]; + + MPI_Allreduce(loc_cost, glob_cost, NTopnodes, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + for(i = 0; i < NTopnodes; i++) + Thread[0].Node_CostCount[i + Tree_MaxPart] = glob_cost[i]; + + myfree(glob_cost); + myfree(loc_cost); + + for(i = 0; i < NumPart; i++) + P[i].GravCost[TakeLevel] = 0; + + /* distribute costs of parent nodes to particles */ + for(idx = 0; idx < NTreeInsert; idx++) + { + i = INDEX(idx); + if(i < 0) + continue; + + { + double sum = Thread[0].P_CostCount[i]; + + int no = Father[i]; + + while(no >= 0) + { + if(Nodes[no].u.d.mass > 0) + sum += Thread[0].Node_CostCount[no] * (P[i].Mass / Nodes[no].u.d.mass); + + no = Nodes[no].u.d.father; + } + + P[i].GravCost[TakeLevel] = sum; + } + } + + /* Now, if we moved points to other CPUs, we need to collect these cost values */ + struct gravcost_data + { + float GravCost; + int index; + } * gdata_export, *gdata_import; + + gdata_export = mymalloc("grav_data_export", Tree_NumPartExported * sizeof(struct gravcost_data)); + gdata_import = mymalloc("grav_data_import", Tree_NumPartImported * sizeof(struct gravcost_data)); + + for(i = 0; i < Tree_NumPartImported; i++) + { + double sum = Thread[0].TreePoints_CostCount[i]; + + int no = Father[i + Tree_MaxPart]; + + while(no >= 0) + { + if(Nodes[no].u.d.mass > 0) + sum += Thread[0].Node_CostCount[no] * Tree_Points[i].Mass / Nodes[no].u.d.mass; + + no = Nodes[no].u.d.father; + } + + gdata_import[i].GravCost = sum; + gdata_import[i].index = Tree_Points[i].index; + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Force_Send_count[recvTask] > 0 || Force_Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&gdata_import[Force_Recv_offset[recvTask]], Force_Recv_count[recvTask] * sizeof(struct gravcost_data), + MPI_BYTE, recvTask, TAG_DENS_A, &gdata_export[Force_Send_offset[recvTask]], + Force_Send_count[recvTask] * sizeof(struct gravcost_data), MPI_BYTE, recvTask, TAG_DENS_A, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < Tree_NumPartExported; i++) + P[gdata_export[i].index].GravCost[TakeLevel] = gdata_export[i].GravCost; + + myfree(gdata_import); + myfree(gdata_export); + +#ifdef VERBOSE + double sum = 0, sumtot; + for(i = 0; i < NumPart; i++) + sum += P[i].GravCost[TakeLevel]; + MPI_Allreduce(&sum, &sumtot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mpi_printf( + "FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d yields cost=%g|%g (before %g) nodecosttot=%g " + "partcosttot=%g importedcosttot=%g\n", + TakeLevel, All.HighestActiveTimeBin, sumtot, nodecosttot + partcosttot + importedcosttot, sumbeforetot, nodecosttot, + partcosttot, importedcosttot); +#else /* #ifdef VERBOSE */ + mpi_printf("FORCETREE: Cost assignment for TakeLevel=%d, highest active-TimeBin=%d\n", TakeLevel, All.HighestActiveTimeBin); +#endif /* #ifdef VERBOSE #else */ + } +} + +/*! \brief Recursively creates a set of empty tree nodes which + * corresponds to the top-level tree for the domain grid. + * + * This is done to ensure that this top-level tree is always "complete" so + * that we can easily associate the pseudo-particles of other CPUs with + * tree-nodes at a given level in the tree, even when the particle population + * is so sparse that some of these nodes are actually empty. + * + * \param[in] no Parent node for which daughter nodes shall be created. + * \param[in] topnode Index of the parent node in the 'TopNodes' array. + * \param[in] bits 2^bits is the number of nodes per dimension at the level of + * the daughter nodes. + * \param[in] x Position of the parent node in the x direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] y Position of the parent node in the y direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] z Position of the parent node in the z direction, falls in the + * range [0,2^(bits-1) - 1]. + * + * \return 0 if successful; + * -1 if number of allocated tree nodes is too small to fit the newly + * created nodes. + */ +int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) +{ + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) /* loop over daughter nodes */ + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + if(Tree_NumNodes >= Tree_MaxNodes) + { + if(All.TreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask, + NTopnodes); + dump_particles(); + terminate(buf); + } + return -1; + } + + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + int count = i + 2 * j + 4 * k; + + Nodes[no].u.suns[count] = Tree_NextFreeNode; + + double lenhalf = 0.25 * Nodes[no].len; + Nodes[Tree_NextFreeNode].len = 0.5 * Nodes[no].len; + Nodes[Tree_NextFreeNode].center[0] = Nodes[no].center[0] + (2 * i - 1) * lenhalf; + Nodes[Tree_NextFreeNode].center[1] = Nodes[no].center[1] + (2 * j - 1) * lenhalf; + Nodes[Tree_NextFreeNode].center[2] = Nodes[no].center[2] + (2 * k - 1) * lenhalf; + + for(int n = 0; n < 8; n++) + Nodes[Tree_NextFreeNode].u.suns[n] = -1; + + if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1) + DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Tree_NextFreeNode; + + Tree_NextFreeNode++; + Tree_NumNodes++; + + if(force_create_empty_nodes(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k) < 0) + return -1; /* create granddaughter nodes for current daughter node */ + } + } + + return 0; +} + +/*! \brief Inserts pseudo particles. + * + * This function inserts pseudo-particles which will represent the mass + * distribution of the other CPUs. Initially, the mass of the + * pseudo-particles is set to zero, and their coordinate is set to the + * center of the domain-cell they correspond to. These quantities will be + * updated later on. + * + * \return void + */ +void force_insert_pseudo_particles(void) +{ + for(int i = 0; i < NTopleaves; i++) + { + int index = DomainNodeIndex[i]; + + if(DomainNewTask[i] != ThisTask) + Nodes[index].u.suns[0] = Tree_MaxPart + Tree_MaxNodes + i; + } +} + +/*! \brief Determines multipole moments. + * + * This routine determines the multipole moments for a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the Nodes[] structure in the sequence of this tree-walk. + * + * \param[in] no Node for which the moments shall be found. + * \param[in] sib Sibling of node no. + * \param[in] father Father node of node no. + * \param[in, out] last Last node for which this function was called, or -1 + * when called for root node. + * + * \return void + */ +void force_update_node_recursive(int no, int sib, int father, int *last) +{ + int j, jj, p, pp, nextsib, suns[8]; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no >= Tree_MaxPart && no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + for(j = 0; j < 8; j++) + suns[j] = Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(*last >= 0) + { + if(*last >= Tree_MaxPart) + { + if(*last >= Tree_MaxPart + Tree_MaxNodes) + Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or imported point */ + else + Nodes[*last].u.d.nextnode = no; + } + else + Nextnode[*last] = no; + } + + *last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + force_update_node_recursive(p, nextsib, no, last); + + if(p < Tree_MaxPart) /* a particle */ + { + MyDouble *pos = &Tree_Pos_list[3 * p]; + + mass += P[p].Mass; + s[0] += P[p].Mass * pos[0]; + s[1] += P[p].Mass * pos[1]; + s[2] += P[p].Mass * pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < Tree_MaxPart + Tree_MaxNodes) /* an internal node */ + { + mass += Nodes[p].u.d.mass; + s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0]; + s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1]; + s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype]) + maxsofttype = Nodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += ExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype) + minhydrosofttype = Nodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { /* an imported point */ + int n = p - (Tree_MaxPart + Tree_MaxNodes + NTopleaves); + + if(n >= Tree_NumPartImported) + terminate("n >= Tree_NumPartImported"); + + mass += Tree_Points[n].Mass; + s[0] += Tree_Points[n].Mass * Tree_Points[n].Pos[0]; + s[1] += Tree_Points[n].Mass * Tree_Points[n].Pos[1]; + s[2] += Tree_Points[n].Mass * Tree_Points[n].Pos[2]; + + /* Might not need the following routine */ + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Tree_Points[n].SofteningType]) + maxsofttype = Tree_Points[n].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[Tree_Points[n].Type == 0 ? 0 : Tree_Points[n].SofteningType] += Tree_Points[n].Mass; + + if(Tree_Points[n].Type == 0) + { + if(maxhydrosofttype < Tree_Points[n].SofteningType) + maxhydrosofttype = Tree_Points[n].SofteningType; + if(minhydrosofttype > Tree_Points[n].SofteningType) + minhydrosofttype = Tree_Points[n].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[Tree_Points[n].SofteningType] += Tree_Points[n].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = Nodes[no].center[0]; + s[1] = Nodes[no].center[1]; + s[2] = Nodes[no].center[2]; + } + + Nodes[no].u.d.mass = mass; + Nodes[no].u.d.s[0] = s[0]; + Nodes[no].u.d.s[1] = s[1]; + Nodes[no].u.d.s[2] = s[2]; + Nodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + Nodes[no].u.d.sibling = sib; + Nodes[no].u.d.father = father; + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= Tree_MaxPart) + { + if(*last >= Tree_MaxPart + Tree_MaxNodes) + Nextnode[*last - Tree_MaxNodes] = no; /* a pseudo-particle or an imported point */ + else + Nodes[*last].u.d.nextnode = no; + } + else + Nextnode[*last] = no; + } + + *last = no; + + if(no < Tree_MaxPart) /* only set it for single particles... */ + Father[no] = father; + if(no >= Tree_MaxPart + Tree_MaxNodes + NTopleaves) /* ...or for imported points */ + Father[no - Tree_MaxNodes - NTopleaves] = father; + } +} + +/*! \brief Communicates the values of the multipole moments of the + * top-level tree-nodes of the domain grid. + * + * This data can then be used to update the pseudo-particles on each CPU + * accordingly. + * + * \return void + */ +void force_exchange_topleafdata(void) +{ + struct DomainNODE + { + MyDouble s[3]; + MyDouble mass; +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + unsigned char maxsofttype; +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + int NodeGrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ + }; + + struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + recvcounts[DomainNewTask[n]]++; + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + recvoffset[0] = 0, byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + int idx = 0; + for(int n = 0; n < NTopleaves; n++) + { + if(DomainNewTask[n] == ThisTask) + { + int no = DomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ + loc_DomainMoment[idx].s[0] = Nodes[no].u.d.s[0]; + loc_DomainMoment[idx].s[1] = Nodes[no].u.d.s[1]; + loc_DomainMoment[idx].s[2] = Nodes[no].u.d.s[2]; + loc_DomainMoment[idx].mass = Nodes[no].u.d.mass; + loc_DomainMoment[idx].maxsofttype = Nodes[no].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + loc_DomainMoment[idx].mass_per_type[k] = ExtNodes[no].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + loc_DomainMoment[idx].maxhydrosofttype = Nodes[no].u.d.maxhydrosofttype; + loc_DomainMoment[idx].minhydrosofttype = Nodes[no].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + { + int task = DomainNewTask[n]; + if(task != ThisTask) + { + int no = DomainNodeIndex[n]; + int idx = recvoffset[task] + recvcounts[task]++; + + Nodes[no].u.d.s[0] = DomainMoment[idx].s[0]; + Nodes[no].u.d.s[1] = DomainMoment[idx].s[1]; + Nodes[no].u.d.s[2] = DomainMoment[idx].s[2]; + Nodes[no].u.d.mass = DomainMoment[idx].mass; + Nodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Updates the top-level tree after the multipole moments of the + * pseudo-particles have been updated. + * + * \param[in] no Node to be updated. + * \param[in] topnode Index of the node no in the 'TopNodes' array. + * \param[in] bits 2^bits is the number of nodes per dimension at the level of + * the daughter nodes of node no. + * \param[in] x Position of the node no in the x direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] y Position of the node no in the y direction, falls in the + * range [0,2^(bits-1) - 1]. + * \param[in] z Position of the node no in the z direction, falls in the + * range [0,2^(bits-1) - 1]. + * + * \return void + */ +void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z) +{ + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + Tree_NextFreeNode++; + force_treeupdate_toplevel(Tree_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k); + } + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; +#ifdef MULTIPLE_NODE_SOFTENING + for(int j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + int p = Nodes[no].u.d.nextnode; + + for(int j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */ + { + if(p >= Tree_MaxPart && p < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + mass += Nodes[p].u.d.mass; + s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0]; + s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1]; + s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[Nodes[p].u.d.maxsofttype]) + maxsofttype = Nodes[p].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += ExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < Nodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = Nodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > Nodes[p].u.d.minhydrosofttype) + minhydrosofttype = Nodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else + terminate("may not happen"); + + p = Nodes[p].u.d.sibling; + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = Nodes[no].center[0]; + s[1] = Nodes[no].center[1]; + s[2] = Nodes[no].center[2]; + } + + Nodes[no].u.d.s[0] = s[0]; + Nodes[no].u.d.s[1] = s[1]; + Nodes[no].u.d.s[2] = s[2]; + Nodes[no].u.d.mass = mass; + Nodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + for(int k = 0; k < NSOFTTYPES; k++) + ExtNodes[no].mass_per_type[k] = mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + Nodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + Nodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } +} + +/*! \brief Allocates the memory used for storage of the tree nodes. + * + * Usually, the number of required nodes is of order 0.7*maxpart, but if this + * is insufficient, the code will try to allocated more space. + * + * \param[in] maxpart Number of particles on the current task. + * \param[in] maxindex The Nodes pointer will be shifted such that the index + * of the first element is maxindex. + * + * \return void + */ +void force_treeallocate(int maxpart, int maxindex) +{ + if(Nodes) + terminate("already allocated"); + + Tree_MaxPart = maxindex; + Tree_MaxNodes = (int)(All.TreeAllocFactor * maxpart) + NTopnodes; + + DomainNewTask = (int *)mymalloc_movable(&DomainNewTask, "DomainNewTask", NTopleaves * sizeof(int)); + DomainNodeIndex = (int *)mymalloc_movable(&DomainNodeIndex, "DomainNodeIndex", NTopleaves * sizeof(int)); + Tree_Task_list = (int *)mymalloc_movable(&Tree_Task_list, "Tree_Task_list", maxpart * sizeof(int)); + Tree_Pos_list = (MyDouble *)mymalloc_movable(&Tree_Pos_list, "Tree_Pos_list", 3 * maxpart * sizeof(MyDouble)); + + Nodes = (struct NODE *)mymalloc_movable(&Nodes, "Nodes", (Tree_MaxNodes + 1) * sizeof(struct NODE)); + Nodes -= Tree_MaxPart; +#ifdef MULTIPLE_NODE_SOFTENING + ExtNodes = (struct ExtNODE *)mymalloc_movable(&ExtNodes, "ExtNodes", (Tree_MaxNodes + 1) * sizeof(struct ExtNODE)); + ExtNodes -= Tree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +} + +/*! \brief Frees the memory allocated for the tree. + * + * I.e. it frees the space allocated by the function force_treeallocate(). + * + * \return void + */ +void force_treefree(void) +{ + if(Nodes) + { +#ifdef MULTIPLE_NODE_SOFTENING + myfree(ExtNodes + Tree_MaxPart); + ExtNodes = NULL; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + myfree(Nodes + Tree_MaxPart); + myfree(Tree_Pos_list); + myfree(Tree_Task_list); + myfree(DomainNodeIndex); + myfree(DomainNewTask); + + Nodes = NULL; + DomainNodeIndex = NULL; + DomainNewTask = NULL; + Tree_Task_list = NULL; + Nextnode = NULL; + Father = NULL; + } + else + terminate("trying to free the tree even though it's not allocated"); +} + +/*! \brief Dump particle data into file. + * + * This function dumps some of the basic particle data to a file. In case + * the tree construction fails, it is called just before the run + * terminates with an error message. Examination of the generated file may + * then give clues to what caused the problem. + * + * \return void + */ +void dump_particles(void) +{ + char buffer[200]; + sprintf(buffer, "particles%d.dat", ThisTask); + FILE *fd = fopen(buffer, "w"); + my_fwrite(&NumPart, 1, sizeof(int), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].Pos[0], 3, sizeof(MyDouble), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].Vel[0], 3, sizeof(MyFloat), fd); + for(int i = 0; i < NumPart; i++) + my_fwrite(&P[i].ID, 1, sizeof(int), fd); + fclose(fd); +} + +#ifdef ADDBACKGROUNDGRID +/*! \brief Add additional empty nodes. + * + * Called during tree construction if ADDBACKGROUNDGRID is active. + * + * \return 0: default; 1: number of nodes > max number of nodes. + */ +int force_add_empty_nodes(void) +{ + int nempty = 0; + int no, j, subnode; + + for(no = Tree_MaxPart; no < Tree_MaxPart + Tree_NumNodes; no++) + { + int count = 0; + + for(subnode = 0; subnode < 8; subnode++) + if(Nodes[no].u.suns[subnode] == -1) + count++; + + if(count < 8) + { + for(subnode = 0, count = 0; subnode < 8; subnode++) + if(Nodes[no].u.suns[subnode] == -1) + { + Nodes[no].u.suns[subnode] = Tree_NextFreeNode; + struct NODE *nfreep = &Nodes[Tree_NextFreeNode]; + + nfreep->len = 0.5 * Nodes[no].len; + double lenhalf = 0.25 * Nodes[no].len; + + if(subnode & 1) + nfreep->center[0] = Nodes[no].center[0] + lenhalf; + else + nfreep->center[0] = Nodes[no].center[0] - lenhalf; + + if(subnode & 2) + nfreep->center[1] = Nodes[no].center[1] + lenhalf; + else + nfreep->center[1] = Nodes[no].center[1] - lenhalf; + + if(subnode & 4) + nfreep->center[2] = Nodes[no].center[2] + lenhalf; + else + nfreep->center[2] = Nodes[no].center[2] - lenhalf; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + Tree_NumNodes++; + Tree_NextFreeNode++; + + if(Tree_NumNodes >= Tree_MaxNodes) + { + if(All.TreeAllocFactor > 5.0) + { + char buf[500]; + sprintf( + buf, + "task %d: looks like a serious problem, stopping with particle dump. Tree_NumNodes=%d Tree_MaxNodes=%d\n", + ThisTask, Tree_NumNodes, Tree_MaxNodes); + dump_particles(); + terminate(buf); + } + return 1; + } + nempty++; + } + } + } + + printf("FORCETREE: Task %d has added %d empty nodes\n", ThisTask, nempty); + return 0; +} +#endif /* #ifdef ADDBACKGROUNDGRID */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree.h b/src/amuse/community/arepo/src/gravity/forcetree.h new file mode 100644 index 0000000000..0371e7e9f7 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree.h @@ -0,0 +1,168 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree.h + * \date 05/2018 + * \brief Functions and data structurer for forcetree. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef FORCETREE_H +#define FORCETREE_H + +#ifndef INLINE_FUNC +#define INLINE_FUNC +#endif /* #ifndef INLINE_FUNC */ + +typedef struct +{ + MyDouble Pos[3]; + float OldAcc; + unsigned char Type; + unsigned char SofteningType; + + int Firstnode; +} gravdata_in; + +typedef struct +{ + MyFloat Acc[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + int GravInteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + +} gravdata_out; + +#ifdef LONG_X +#define STRETCHX (LONG_X) +#else /* #ifdef LONG_X */ +#define STRETCHX 1 +#endif /* #ifdef LONG_X #else */ + +#ifdef LONG_Y +#define STRETCHY (LONG_Y) +#else /* #ifdef LONG_Y */ +#define STRETCHY 1 +#endif /* #ifdef LONG_Y #else */ + +#ifdef LONG_Z +#define STRETCHZ (LONG_Z) +#else /* #ifdef LONG_Z */ +#define STRETCHZ 1 +#endif /* #ifdef LONG_Z #else */ + +#define DBX 1 +#define DBY 1 +#define DBZ 1 +#define DBX_EXTRA 0 +#define DBY_EXTRA 0 +#define DBZ_EXTRA 0 + +/*! length of lock-up table for short-range force kernel in TreePM algorithm */ +#define NTAB 127 + +#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) + +#define EN 64 + +#define ENX (DBX * STRETCHX * EN) +#define ENY (DBY * STRETCHY * EN) +#define ENZ (DBZ * STRETCHZ * EN) + +extern MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1]; +extern MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1]; +extern double Ewd_fac_intp; + +extern int NTreeInsert; + +#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) */ + +#define MAX_TREE_LEVEL 30 +#define MAX_TREE_ALLOC_FACTOR 30.0 + +#define TAKE_NSLOTS_IN_ONE_GO 32 + +#define MAX_IMPACT_BEFORE_OPTIMIZATION 1.03 + +#define BITFLAG_TOPLEVEL 0 +#define BITFLAG_DEPENDS_ON_LOCAL_MASS 1 +#define BITFLAG_DEPENDS_ON_EXTERN_MASS 2 +#define BITFLAG_INTERNAL_TOPLEVEL 6 +#define BITFLAG_MULTIPLEPARTICLES 7 +#define BITFLAG_CONTAINS_GAS 10 + +#define BITFLAG_MASK ((1 << BITFLAG_CONTAINS_GAS) + (1 << BITFLAG_MULTIPLEPARTICLES)) + +static inline unsigned long long force_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu); +} + +static inline double force_int_to_double(unsigned long long x) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = 1.0; + u.ull |= x; + return u.d; +} + +int tree_treefind_export_node_threads(int no, int target, int thread_id); +int construct_forcetree(int mode, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild_construct(int npart, int optimized_domain_mapping, int insert_only_primary, int timebin); +int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char level); +int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z); +void force_insert_pseudo_particles(void); +void force_update_node_recursive(int no, int sib, int father, int *last); +void force_exchange_topleafdata(void); +void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z); +void force_treeallocate(int maxpart, int maxindex); +void force_treefree(void); +void dump_particles(void); +int force_add_empty_nodes(void); +void force_short_range_init(void); +int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode, + int measure_cost_flag); +void force_assign_cost_values(void); +void force_optimize_domain_mapping(void); +double force_get_current_balance(double *impact); +void force_get_global_cost_for_leavenodes(int nexport); +void forcetest_ewald_init(void); + +#endif /* #ifndef FORCETREE_H */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree_ewald.c b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c new file mode 100644 index 0000000000..f1b73fb5f6 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_ewald.c @@ -0,0 +1,529 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_ewald.c + * \date 05/2018 + * \brief Code for Ewald correction (i.e. tree force with periodic + * boundary conditions. + * \details This file contains the computation of the Ewald correction + * table. + * contains functins: + * void ewald_init(void) + * void ewald_corr(double dx, double dy, double dz, double + * *fper) + * double ewald_pot_corr(double dx, double dy, double dz) + * double ewald_psi(double x, double y, double z) + * void ewald_force(double x, double y, double z, double + * force[3]) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) +#include + +/* variables for Ewald correction lookup table */ +MyFloat Ewd_fcorrx[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_fcorry[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_fcorrz[ENX + 1][ENY + 1][ENZ + 1]; +MyFloat Ewd_potcorr[ENX + 1][ENY + 1][ENZ + 1]; +double Ewd_fac_intp; + +/*! \brief Structure that holds information of Ewald correction table. + */ +typedef struct +{ + int resx, resy, resz, varsize, ewaldtype; +} ewald_header; + +/*! \brief This function initializes tables with the correction force and the + * correction potential due to the periodic images of a point mass located + * at the origin. + * + * These corrections are obtained by Ewald summation. (See for example + * Hernquist, Bouchet, Suto, ApJS, 1991, 75, 231) The correction fields + * are used to obtain the full periodic force if periodic boundaries + * combined with the pure tree algorithm are used. For the TreePM + * algorithm, the Ewald correction is not used. + * + * The correction terms are computed by ewald_psi() and ewald_force() and + * stored in the arrays Ewd_fcorrx, Ewd_fcorry, Ewd_fcorrz and Ewd_potcorr. + * + * The correction fields are stored on disk once they are computed. If a + * corresponding file is found, they are loaded from disk to speed up the + * initialization. The Ewald summation issrc/gravtree_forcetest.c done in + * parallel, i.e. the processors share the work to compute the tables if + * needed. + * + * \return void + */ +void ewald_init(void) +{ + int recomputeflag = 0; + double force[3]; + char buf[200]; + FILE *fd; + + mpi_printf("EWALD: initialize Ewald correction...\n"); + +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer"); +#endif /* #ifdef LONG_Z */ + + sprintf(buf, "ewald_table_%d_%d_%d.dat", ENX, ENY, ENZ); + + if(ThisTask == 0) + { + if((fd = fopen(buf, "r"))) + { + mpi_printf("\nEWALD: reading Ewald tables from file `%s'\n", buf); + + ewald_header tabh; + my_fread(&tabh, sizeof(ewald_header), 1, fd); + + int ewaldtype = -1; + + if(tabh.resx != ENX || tabh.resy != ENY || tabh.resz != ENZ || tabh.varsize != sizeof(MyFloat) || + tabh.ewaldtype != ewaldtype) + { + mpi_printf("\nEWALD: something's wrong with this table file. Discarding it.\n"); + recomputeflag = 1; + } + else + { + my_fread(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fread(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + + recomputeflag = 0; + } + fclose(fd); + } + else + recomputeflag = 1; + } + + MPI_Bcast(&recomputeflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(recomputeflag) + { + mpi_printf("\nEWALD: No usable Ewald tables in file `%s' found. Recomputing them...\n", buf); + + /* ok, let's recompute things. Actually, we do that in parallel. */ + int size = (ENX + 1) * (ENY + 1) * (ENZ + 1); + int first, count; + + subdivide_evenly(size, NTask, ThisTask, &first, &count); + + for(int n = first; n < first + count; n++) + { + int i = n / ((ENY + 1) * (ENZ + 1)); + int j = (n - i * (ENY + 1) * (ENZ + 1)) / (ENZ + 1); + int k = (n - i * (ENY + 1) * (ENZ + 1) - j * (ENZ + 1)); + + if(ThisTask == 0) + { + if(((n - first) % (count / 20)) == 0) + { + printf("%4.1f percent done\n", (n - first) / (count / 100.0)); + myflush(stdout); + } + } + + double xx = 0.5 * DBX * STRETCHX * ((double)i) / ENX; + double yy = 0.5 * DBY * STRETCHY * ((double)j) / ENY; + double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / ENZ; + + Ewd_potcorr[i][j][k] = ewald_psi(xx, yy, zz); + + ewald_force(xx, yy, zz, force); + + Ewd_fcorrx[i][j][k] = force[0]; + Ewd_fcorry[i][j][k] = force[1]; + Ewd_fcorrz[i][j][k] = force[2]; + } + + int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int)); + int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int)); + + for(int i = 0; i < NTask; i++) + { + int off, cnt; + subdivide_evenly(size, NTask, i, &off, &cnt); + recvcnts[i] = cnt * sizeof(MyFloat); + recvoffs[i] = off * sizeof(MyFloat); + } + + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrx, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorry, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_fcorrz, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(MyFloat), MPI_BYTE, Ewd_potcorr, recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + + myfree(recvoffs); + myfree(recvcnts); + + mpi_printf("\nEWALD: writing Ewald tables to file `%s'\n", buf); + if(ThisTask == 0) + { + if((fd = fopen(buf, "w"))) + { + ewald_header tabh; + tabh.resx = ENX; + tabh.resy = ENY; + tabh.resz = ENZ; + tabh.varsize = sizeof(MyFloat); + tabh.ewaldtype = -1; + + my_fwrite(&tabh, sizeof(ewald_header), 1, fd); + my_fwrite(Ewd_fcorrx, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_fcorry, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_fcorrz, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + my_fwrite(Ewd_potcorr, sizeof(MyFloat), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd); + fclose(fd); + } + } + } + else + { + /* here we got them from disk */ + int len = (ENX + 1) * (ENY + 1) * (ENZ + 1) * sizeof(MyFloat); + + MPI_Bcast(Ewd_fcorrx, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_fcorry, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_fcorrz, len, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(Ewd_potcorr, len, MPI_BYTE, 0, MPI_COMM_WORLD); + } + + /* now scale things to the boxsize that is actually used */ + Ewd_fac_intp = 2 * EN / All.BoxSize; + + for(int i = 0; i <= ENX; i++) + for(int j = 0; j <= ENY; j++) + for(int k = 0; k <= ENZ; k++) + { + Ewd_potcorr[i][j][k] /= All.BoxSize; + Ewd_fcorrx[i][j][k] /= All.BoxSize * All.BoxSize; + Ewd_fcorry[i][j][k] /= All.BoxSize * All.BoxSize; + Ewd_fcorrz[i][j][k] /= All.BoxSize * All.BoxSize; + } + + mpi_printf("EWALD: Initialization of periodic boundaries finished.\n"); +} + +/*! \brief This function looks up the correction force due to the infinite + * number of periodic particle/node images. + * + * We here use trilinear interpolation to get it from the precomputed tables, + * which contain one octant around the target particle at the origin. The + * other octants are obtained from it by exploiting the symmetry properties. + * + * \param[in] dx x component of the distance between the two particles. + * \param[in] dx y component of the distance between the two particles. + * \param[in] dx z component of the distance between the two particles. + * \param[out] fper pointer to array containing the correction force. + * + * \return void + */ +void ewald_corr(double dx, double dy, double dz, double *fper) +{ + int signx, signy, signz; + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + { + dx = -dx; + signx = +1; + } + else + signx = -1; + if(dy < 0) + { + dy = -dy; + signy = +1; + } + else + signy = -1; + if(dz < 0) + { + dz = -dz; + signz = +1; + } + else + signz = -1; + u = dx * Ewd_fac_intp; + i = (int)u; + if(i >= ENX) + i = ENX - 1; + u -= i; + v = dy * Ewd_fac_intp; + j = (int)v; + if(j >= ENY) + j = ENY - 1; + v -= j; + w = dz * Ewd_fac_intp; + k = (int)w; + if(k >= ENZ) + k = ENZ - 1; + w -= k; + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + fper[0] = signx * (Ewd_fcorrx[i][j][k] * f1 + Ewd_fcorrx[i][j][k + 1] * f2 + Ewd_fcorrx[i][j + 1][k] * f3 + + Ewd_fcorrx[i][j + 1][k + 1] * f4 + Ewd_fcorrx[i + 1][j][k] * f5 + Ewd_fcorrx[i + 1][j][k + 1] * f6 + + Ewd_fcorrx[i + 1][j + 1][k] * f7 + Ewd_fcorrx[i + 1][j + 1][k + 1] * f8); + fper[1] = signy * (Ewd_fcorry[i][j][k] * f1 + Ewd_fcorry[i][j][k + 1] * f2 + Ewd_fcorry[i][j + 1][k] * f3 + + Ewd_fcorry[i][j + 1][k + 1] * f4 + Ewd_fcorry[i + 1][j][k] * f5 + Ewd_fcorry[i + 1][j][k + 1] * f6 + + Ewd_fcorry[i + 1][j + 1][k] * f7 + Ewd_fcorry[i + 1][j + 1][k + 1] * f8); + fper[2] = signz * (Ewd_fcorrz[i][j][k] * f1 + Ewd_fcorrz[i][j][k + 1] * f2 + Ewd_fcorrz[i][j + 1][k] * f3 + + Ewd_fcorrz[i][j + 1][k + 1] * f4 + Ewd_fcorrz[i + 1][j][k] * f5 + Ewd_fcorrz[i + 1][j][k + 1] * f6 + + Ewd_fcorrz[i + 1][j + 1][k] * f7 + Ewd_fcorrz[i + 1][j + 1][k + 1] * f8); +} + +/*! \brief This function looks up the correction potential due to the infinite + * number of periodic particle/node images. + * + * We here use tri-linear interpolation to get it from the precomputed + * table, which contains one octant around the target particle at the + * origin. The other octants are obtained from it by exploiting symmetry + * properties. + * + * \param[in] dx x component of the distance between the two particles. + * \param[in] dx y component of the distance between the two particles. + * \param[in] dx z component of the distance between the two particles. + * + * \return The correction potential. + */ +double ewald_pot_corr(double dx, double dy, double dz) +{ + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + dx = -dx; + if(dy < 0) + dy = -dy; + if(dz < 0) + dz = -dz; + u = dx * Ewd_fac_intp; + i = (int)u; + if(i >= ENX) + i = ENX - 1; + u -= i; + v = dy * Ewd_fac_intp; + j = (int)v; + if(j >= ENY) + j = ENY - 1; + v -= j; + w = dz * Ewd_fac_intp; + k = (int)w; + if(k >= ENZ) + k = ENZ - 1; + w -= k; + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + return Ewd_potcorr[i][j][k] * f1 + Ewd_potcorr[i][j][k + 1] * f2 + Ewd_potcorr[i][j + 1][k] * f3 + + Ewd_potcorr[i][j + 1][k + 1] * f4 + Ewd_potcorr[i + 1][j][k] * f5 + Ewd_potcorr[i + 1][j][k + 1] * f6 + + Ewd_potcorr[i + 1][j + 1][k] * f7 + Ewd_potcorr[i + 1][j + 1][k + 1] * f8; +} + +/*! \brief This function computes the potential correction term by means of + * Ewald summation. + * + * \param[in] x X distance for which the correction term should be computed. + * \param[in] y Y distance for which the correction term should be computed. + * \param[in] z Z distance for which the correction term should be computed. + * + * \return The correction term. + */ +double ewald_psi(double x, double y, double z) +{ + static int printed = 0; + + double r = sqrt(x * x + y * y + z * z); + + if(r == 0) + return 0; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 3.0 / lmin; + + const int nmax = 4; + + double sum1 = 0; + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX; + double dy = y - ny * STRETCHY; + double dz = z - nz * STRETCHZ; + double r = sqrt(dx * dx + dy * dy + dz * dz); + sum1 += erfc(alpha * r) / r; + } + + double alpha2 = alpha * alpha; + + int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5); + int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: potential tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax); + printed = 1; + } + + double sum2 = 0.0; + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + for(int nz = -nzmax; nz <= nzmax; nz++) + { + double kx = (2.0 * M_PI / (STRETCHX)) * nx; + double ky = (2.0 * M_PI / (STRETCHY)) * ny; + double kz = (2.0 * M_PI / (STRETCHZ)) * nz; + double k2 = kx * kx + ky * ky + kz * kz; + if(k2 > 0) + { + double kdotx = (x * kx + y * ky + z * kz); + sum2 += 4.0 * M_PI / (k2 * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2)) * cos(kdotx); + } + } + + double psi = /*-2.83729 + */ M_PI / (alpha * alpha * STRETCHX * STRETCHY * STRETCHZ) - sum1 - sum2 + 1.0 / r; + + return psi; +} + +/*! \brief This function computes the force correction term (difference + * between full force of infinite lattice and nearest image) by Ewald + * summation. + * + * \param[in] x X distance for which the correction term should be computed. + * \param[in] y Y distance for which the correction term should be computed. + * \param[in] z Z distance for which the correction term should be computed. + * \param force Array will containing the correction force, + * + * \return void + */ +void ewald_force(double x, double y, double z, double force[3]) +{ + static int printed = 0; + for(int i = 0; i < 3; i++) + force[i] = 0; + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 2.0 / lmin; + double alpha2 = alpha * alpha; + + double r3inv = 1.0 / (r2 * sqrt(r2)); + + force[0] += r3inv * x; + force[1] += r3inv * y; + force[2] += r3inv * z; + + const int nmax = 4; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX; + double dy = y - ny * STRETCHY; + double dz = z - nz * STRETCHZ; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + } + + int nxmax = (int)(2 * alpha * (STRETCHX / lmin) + 0.5); + int nymax = (int)(2 * alpha * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(2 * alpha * (STRETCHZ / lmin) + 0.5); + + if(printed == 0) + { + mpi_printf("EWALD: force tab: nxmax=%d nymax=%d nzmax=%d\n", nxmax, nymax, nzmax); + printed = 1; + } + + for(int hx = -nxmax; hx <= nxmax; hx++) + for(int hy = -nymax; hy <= nymax; hy++) + for(int hz = -nzmax; hz <= nzmax; hz++) + { + double h2 = hx * hx + hy * hy + hz * hz; + if(h2 > 0) + { + double hdotx = x * hx + y * hy + z * hz; + double val = 2.0 / h2 * exp(-M_PI * M_PI * h2 / alpha2) * sin(2.0 * M_PI * hdotx); + + force[0] -= hx * val; + force[1] -= hy * val; + force[2] -= hz * val; + } + } +} + +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c new file mode 100644 index 0000000000..3289af844c --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_optimizebalance.c @@ -0,0 +1,486 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_optimizebalance.c + * \date 05/2018 + * \brief Does some preparation work for use of red-black ordered binary + * tree based on BSD macros. + * \details contains functions: + * int force_sort_load(const void *a, const void *b) + * double force_get_current_balance(double *impact) + * void force_get_global_cost_for_leavenodes(int nexport) + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void force_optimize_domain_mapping(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" + +/* \brief Structure of my tree nodes. + */ +struct mydata +{ + double pri; + int target; + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/* prototype of comparison function of tree elements */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs); + +/* the following macro declares 'struct mytree', which is the header element needed as handle for a tree */ +RB_HEAD(mytree, mydata); + +/* the following macros declare appropriate function prototypes and functions needed for this type of tree */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Data structure that describes force-segment. + */ +static struct force_segments_data +{ + int start, end, task; + double work, cost, count, normalized_load; +} * force_domainAssign; + +/*! \brief Comparison function for force_segments_data. + * + * Sorting kernel. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a->normalized_load > b->normalized_load. + */ +int force_sort_load(const void *a, const void *b) +{ + if(((struct force_segments_data *)a)->normalized_load > (((struct force_segments_data *)b)->normalized_load)) + return -1; + + if(((struct force_segments_data *)a)->normalized_load < (((struct force_segments_data *)b)->normalized_load)) + return +1; + + return 0; +} + +static double oldmax, oldsum; + +/*! \brief Calculates current balance. + * + * \param[out] impact Impact factor of imbalance (1 if optimally balanced). + * + * \return Domain balance = max(cost) / average(cost). + */ +double force_get_current_balance(double *impact) +{ +#ifndef NO_MPI_IN_PLACE + MPI_Allreduce(MPI_IN_PLACE, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#else /* #ifndef NO_MPI_IN_PLACE */ + double *inTaskCost = mymalloc("inTaskCost", NTask * sizeof(double)); + ; + memcpy(inTaskCost, TaskCost, NTask * sizeof(double)); + MPI_Allreduce(inTaskCost, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + myfree(inTaskCost); +#endif /* #ifndef NO_MPI_IN_PLACE #else */ + + int i; + for(i = 0, oldmax = oldsum = 0; i < NTask; i++) + { + oldsum += TaskCost[i]; + if(oldmax < TaskCost[i]) + oldmax = TaskCost[i]; + } + + *impact = 1.0 + domain_grav_weight[All.HighestActiveTimeBin] * (oldmax - oldsum / NTask) / All.TotGravCost; + + return oldmax / (oldsum / NTask); +} + +/*! \brief Gather cost data of all leaf-nodes and communicate result. + * + * \param[in] nexport Number of exported nodes. + * + * \return void + */ +void force_get_global_cost_for_leavenodes(int nexport) +{ + int i, j, n, nimport, idx, task, ngrp; + + struct node_data + { + double domainCost; + int domainCount; + int no; + } * export_node_data, *import_node_data; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + export_node_data = mymalloc("export_node_data", nexport * sizeof(struct node_data)); + import_node_data = mymalloc("import_node_data", nimport * sizeof(struct node_data)); + + for(i = 0; i < nexport; i++) + { + int task = ListNoData[i].task; + int ind = Send_offset[task] + Send_count[task]++; + + export_node_data[ind].domainCost = ListNoData[i].domainCost; + export_node_data[ind].domainCount = ListNoData[i].domainCount; + export_node_data[ind].no = ListNoData[i].no; + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct node_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct node_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int no = import_node_data[i].no; + DomainCost[no] += import_node_data[i].domainCost; + DomainCount[no] += import_node_data[i].domainCount; + } + + myfree(import_node_data); + myfree(export_node_data); + + /* now share the cost data across all processors */ + struct DomainNODE + { + double domainCost; + int domainCount; + } * DomainMoment, *loc_DomainMoment; + + DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the cost data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < NTopleaves; n++) + recvcounts[DomainTask[n]]++; + + for(task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(n = 0, idx = 0; n < NTopleaves; n++) + { + if(DomainTask[n] == ThisTask) + { + loc_DomainMoment[idx].domainCost = DomainCost[n]; + loc_DomainMoment[idx].domainCount = DomainCount[n]; + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < NTopleaves; n++) + { + task = DomainTask[n]; + if(task != ThisTask) + { + idx = recvoffset[task] + recvcounts[task]++; + + DomainCost[n] = DomainMoment[idx].domainCost; + DomainCount[n] = DomainMoment[idx].domainCount; + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Comparison function of tree elements. + * + * Compares + * - pri and if this is equal + * - target + * + * \param[in] lhs First mydata object. + * \param[in] rhs Second mydata object. + * + * \return (-1,0,1) -1 if lhs < rhs. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->pri < rhs->pri) + return -1; + else if(lhs->pri > rhs->pri) + return 1; + else if(lhs->target < rhs->target) + return -1; + else if(lhs->target > rhs->target) + return 1; + + return 0; +} + +/*! \brief Optimization algorithm for the workload balance. + * + * \return void + */ +void force_optimize_domain_mapping(void) +{ + int i, j; + + double fac_cost = 0.5 / oldsum; + double fac_count = 0.5 / All.TotNumPart; + + int ncpu = NTask * All.MultipleDomains; + int ndomain = NTopleaves; + double workavg = 1.0 / ncpu; + double workhalfnode = 0.5 / NTopleaves; + double work_before = 0; + double workavg_before = 0; + + int start = 0; + + force_domainAssign = mymalloc("force_domainAssign", ncpu * sizeof(struct force_segments_data)); + + for(i = 0; i < ncpu; i++) + { + double work = 0, cost = 0, count = 0; + int end = start; + + cost += fac_cost * DomainCost[end]; + count += fac_count * DomainCount[end]; + work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; + + while((work + work_before + (end + 1 < NTopleaves ? fac_cost * DomainCost[end + 1] + fac_count * DomainCount[end + 1] : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + cost += fac_cost * DomainCost[end]; + count += fac_count * DomainCount[end]; + work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; + } + + force_domainAssign[i].start = start; + force_domainAssign[i].end = end; + force_domainAssign[i].work = work; + force_domainAssign[i].cost = cost; + force_domainAssign[i].count = count; + + force_domainAssign[i].normalized_load = cost + count; /* note: they are already multiplied by fac_cost/fac_count */ + + work_before += work; + workavg_before += workavg; + start = end + 1; + } + + qsort(force_domainAssign, ncpu, sizeof(struct force_segments_data), force_sort_load); + + /* create three priority trees, one for the cost load, one for the particle count, and one for the combined cost */ + struct mytree queues[3]; /* 0=cost, 1=count, 2=combi */ + + struct mydata *ncost = mymalloc("ncost", NTask * sizeof(struct mydata)); + struct mydata *ncount = mymalloc("ncount", NTask * sizeof(struct mydata)); + struct mydata *ncombi = mymalloc("ncombi", NTask * sizeof(struct mydata)); + + RB_INIT(&queues[0]); + RB_INIT(&queues[1]); + RB_INIT(&queues[2]); + + /* fill in all the tasks into the trees. The priority will be the current cost/count, the tag 'val' is used to label the task */ + for(i = 0; i < NTask; i++) + { + ncost[i].pri = 0; + ncost[i].target = i; + RB_INSERT(mytree, &queues[0], &ncost[i]); + + ncount[i].pri = 0; + ncount[i].target = i; + RB_INSERT(mytree, &queues[1], &ncount[i]); + + ncombi[i].pri = 0; + ncombi[i].target = i; + RB_INSERT(mytree, &queues[2], &ncombi[i]); + } + + double max_load = 0; + double max_cost = 0; + + int n_lowest = MAX_FIRST_ELEMENTS_CONSIDERED; + if(n_lowest > NTask) + n_lowest = NTask; + + int rep, *candidates = mymalloc("candidates", n_lowest * sizeof(int)); + struct mydata *np; + + for(i = 0; i < ncpu; i++) + { + /* pick the least work-loaded target from the queue, and the least particle-loaded, and then decide which choice + gives the smallest load overall */ + double cost, load; + double bestwork = 1.0e30; + int q, target = -1; + + for(q = 0; q < 3; q++) + { + /* look up the n_lowest smallest elements from the tree */ + for(np = RB_MIN(mytree, &queues[q]), rep = 0; np != NULL && rep < n_lowest; np = RB_NEXT(mytree, &queues[q], np), rep++) + candidates[rep] = np->target; + + for(rep = 0; rep < n_lowest; rep++) + { + int t = candidates[rep]; + + cost = ncost[t].pri + force_domainAssign[i].cost; + load = ncount[t].pri + force_domainAssign[i].count; + if(cost < max_cost) + cost = max_cost; + if(load < max_load) + load = max_load; + double w = cost + load; + if(w < bestwork) + { + bestwork = w; + target = t; + } + } + } + + force_domainAssign[i].task = target; + + cost = ncost[target].pri + force_domainAssign[i].cost; + load = ncount[target].pri + force_domainAssign[i].count; + + RB_REMOVE(mytree, &queues[0], &ncost[target]); + ncost[target].pri = cost; + RB_INSERT(mytree, &queues[0], &ncost[target]); + + RB_REMOVE(mytree, &queues[1], &ncount[target]); + ncount[target].pri = load; + RB_INSERT(mytree, &queues[1], &ncount[target]); + + RB_REMOVE(mytree, &queues[2], &ncombi[target]); + ncombi[target].pri = cost + load; + RB_INSERT(mytree, &queues[2], &ncombi[target]); + + if(max_cost < cost) + max_cost = cost; + + if(max_load < load) + max_load = load; + } + + myfree(candidates); + + /* free tree nodes again */ + myfree(ncombi); + myfree(ncount); + myfree(ncost); + + for(i = 0; i < ncpu; i++) + for(j = force_domainAssign[i].start; j <= force_domainAssign[i].end; j++) + DomainNewTask[j] = force_domainAssign[i].task; + + myfree(force_domainAssign); + + for(i = 0; i < NTask; i++) + { + TaskCost[i] = 0; + TaskCount[i] = 0; + } + + for(i = 0; i < NTopleaves; i++) + { + TaskCost[DomainNewTask[i]] += DomainCost[i]; + TaskCount[DomainNewTask[i]] += DomainCount[i]; + } + + double max, sum, maxload, sumload; + for(i = 0, max = sum = 0, maxload = sumload = 0; i < NTask; i++) + { + sum += TaskCost[i]; + if(max < TaskCost[i]) + max = TaskCost[i]; + sumload += TaskCount[i]; + if(maxload < TaskCount[i]) + maxload = TaskCount[i]; + } + + mpi_printf("FORCETREE: Active-TimeBin=%d [unoptimized work-balance=%g] new work-balance=%g, new load-balance=%g\n", + All.HighestActiveTimeBin, oldmax / (oldsum / NTask), max / (sum / NTask), maxload / (sumload / NTask)); + + if((max / (sum / NTask) > oldmax / (oldsum / NTask)) || (maxload > All.MaxPart)) + { + mpi_printf( + "FORCETREE: The work-load is either worse than before or the memory-balance is not viable. We keep the old distribution.\n"); + memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); + } +} diff --git a/src/amuse/community/arepo/src/gravity/forcetree_walk.c b/src/amuse/community/arepo/src/gravity/forcetree_walk.c new file mode 100644 index 0000000000..b773024cea --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/forcetree_walk.c @@ -0,0 +1,709 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/forcetree_walk.c + * \date 05/2018 + * \brief Gravitational tree walk code. + * \details This file contains the various gravitational tree walks. + * contains functions: + * void force_short_range_init(void) + * int force_treeevaluate(gravdata_in * in, gravdata_out * out, + * int target, int mode, int thread_id, int numnodes, int + * *firstnode, int measure_cost_flag) + * int tree_treefind_export_node_threads(int no, int i, int + * thread_id) + * void force_evaluate_direct(int target, int result_idx, + * int nimport) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Variable for short-range lookup table. + * + * Contains the factor needed for the short range + * contribution of the tree to the gravity force. + */ +static float shortrange_table[NTAB + 1]; + +/*! \brief Variable for short-range lookup table. + * + * Contains the factor needed for the short range + * contribution of the tree to the potential energy. + */ +static float shortrange_table_potential[NTAB + 1]; + +/*! \brief Initializes the short range table. + * + * The short range table contains the complementary error function + * needed for the computation of the short range part of the gravity + * force/potential in case of the TreePM algorithm. + * + * \return void + */ +void force_short_range_init(void) +{ + for(int i = 0; i <= NTAB; i++) + { + double u = ((RCUT / 2.0) / NTAB) * i; + + shortrange_table_potential[i] = -erfc(u); /* -r * g(r) */ + + if(u > 0) + shortrange_table[i] = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0) / (u * u); /* -g'(r) - 1/r^2 */ + else + shortrange_table[i] = 0; + } +} + +/*! \brief This routine calculates the (short range) force contribution + * for a given particle in case the Tree(PM) algorithm is used. + * + * In the TreePM algorithm, the tree is walked only locally around the + * target coordinate. Tree nodes that fall outside a box of half + * side-length Rcut= RCUT*ASMTH*MeshSize can be discarded. The short-range + * potential is modified by a complementary error function, multiplied + * with the Newtonian form. The resulting short-range suppression compared + * to the Newtonian force is tabulated, because looking up from this table + * is faster than recomputing the corresponding factor, despite the + * memory-access penalty (which reduces cache performance) incurred by the + * table. + * + * Depending on the value of TypeOfOpeningCriterion, either the geometrical BH + * cell-opening criterion, or the `relative' opening criterion is used. + * + * \param[in] in Gravdata communicated into function. + * \param[in, out] out Gravdata communicated from function. + * \param[in] target Index of the particle to be processed. + * \param[in] mode 0: process local particle (phase 1), 1: process imported + * particle (phase 2). + * \param[in] thread_id Id of this thread. + * \param[in, out] firstnode First node involved in this algorithm. + * \param[in] measure_cost_flag Whether the cost of the tree walk should be + * measured. + * + * \return Number of interactions processed for particle i. + */ +int force_treeevaluate(gravdata_in *in, gravdata_out *out, int target, int mode, int thread_id, int numnodes, int *firstnode, + int measure_cost_flag) +{ + struct NODE *nop = NULL; +#ifdef MULTIPLE_NODE_SOFTENING + struct ExtNODE *extnop = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; +#ifdef EVALPOTENTIAL + double pot = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + + int ninteractions = 0; + + double pos_x = in->Pos[0]; + double pos_y = in->Pos[1]; + double pos_z = in->Pos[2]; + double aold = All.ErrTolForceAcc * in->OldAcc; + double h_i = All.ForceSoftening[in->SofteningType]; + +#ifdef PMGRID + double rcut = All.Rcut[0]; + double asmth = All.Asmth[0]; +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(in->Type, in->Pos)) + { + rcut = All.Rcut[1]; + asmth = All.Asmth[1]; + } +#endif /* #ifdef PLACEHIGHRESREGION */ + + double rcut2 = rcut * rcut; + double asmthinv = 0.5 / asmth; + double asmthinv2 = asmthinv * asmthinv; + double asmthfac = asmthinv * (NTAB / (RCUT / 2.0)); +#endif /* #ifdef PMGRID */ + + for(int k = 0; k < numnodes; k++) + { + int no; + + if(mode == 0) + no = Tree_MaxPart; /* root node */ + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + double dx, dy, dz, r2, mass, hmax; + +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no < Tree_MaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(Tree_Pos_list[3 * no + 0] - pos_x); + dy = GRAVITY_NEAREST_Y(Tree_Pos_list[3 * no + 1] - pos_y); + dz = GRAVITY_NEAREST_Z(Tree_Pos_list[3 * no + 2] - pos_z); + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + if(measure_cost_flag) + Thread[thread_id].P_CostCount[no]++; + + double h_j = All.ForceSoftening[P[no].SofteningType]; + + hmax = (h_j > h_i) ? h_j : h_i; + + no = Nextnode[no]; + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* we have an internal node */ + { + if(mode == 1) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + { + no = -1; + continue; + } + } + + nop = &Nodes[no]; + + mass = nop->u.d.mass; + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + +#if defined(PMGRID) + if(r2 > rcut2) + { + /* check whether we can stop walking along this branch */ + double eff_dist = rcut + 0.5 * nop->len; + + double dist = GRAVITY_NEAREST_X(nop->center[0] - pos_x); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + + dist = GRAVITY_NEAREST_Y(nop->center[1] - pos_y); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + + dist = GRAVITY_NEAREST_Z(nop->center[2] - pos_z); + if(dist < -eff_dist || dist > eff_dist) + { + no = nop->u.d.sibling; + continue; + } + } +#endif /* #if defined(PMGRID) */ + + if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */ + { + if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + } + else /* check relative opening criterion */ + { + double len2 = nop->len * nop->len; + + if(len2 > r2 * (1.2 * 1.2)) /* add a worst case protection */ + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + + // note that aold is strictly speaking |acceleration| / G +#ifdef ACTIVATE_MINIMUM_OPENING_ANGLE + if(mass * len2 > r2 * r2 * aold && len2 > r2 * (0.4 * 0.4)) +#else /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE */ + if(mass * len2 > r2 * r2 * aold) +#endif /* #ifdef ACTIVATE_MINIMUM_OPENING_ANGLE #else */ + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } + + /* check in addition whether we lie inside or very close to the cell */ + if(fabs(GRAVITY_NEAREST_X(nop->center[0] - pos_x)) < 0.60 * nop->len) + { + if(fabs(GRAVITY_NEAREST_Y(nop->center[1] - pos_y)) < 0.60 * nop->len) + { + if(fabs(GRAVITY_NEAREST_Z(nop->center[2] - pos_z)) < 0.60 * nop->len) + { + no = nop->u.d.nextnode; + continue; + } + } + } + } + + double h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(ExtNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + /* ok, node can be used */ +#ifdef MULTIPLE_NODE_SOFTENING + extnop = &ExtNodes[no]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(measure_cost_flag && mass) + Thread[thread_id].Node_CostCount[no]++; + + no = nop->u.d.sibling; + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + + dx = GRAVITY_NEAREST_X(Tree_Points[n].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(Tree_Points[n].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(Tree_Points[n].Pos[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + mass = Tree_Points[n].Mass; + + if(measure_cost_flag) + Thread[thread_id].TreePoints_CostCount[n]++; + + double h_j = All.ForceSoftening[Tree_Points[n].SofteningType]; + + hmax = (h_j > h_i) ? h_j : h_i; + + no = Nextnode[no - Tree_MaxNodes]; + } + else /* pseudo particle */ + { + if(mode == 0) + { + tree_treefind_export_node_threads(no, target, thread_id); + } + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + + /* now evaluate the multipole moment */ + if(mass) + { + double r = sqrt(r2); + +#ifdef PMGRID + double tabentry = asmthfac * r; + int tabindex = (int)tabentry; + + if(tabindex < NTAB) + { + double tabweight = tabentry - tabindex; + double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1]; +#ifdef EVALPOTENTIAL + double factor_pot = + (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1]; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID */ + +#ifdef MULTIPLE_NODE_SOFTENING + for(int type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = extnop->mass_per_type[type]; + double h_j; +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + hmax = (h_j > h_i) ? h_j : h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + double fac; +#ifdef EVALPOTENTIAL + double wp; +#endif /* #ifdef EVALPOTENTIAL */ + + if(r >= hmax) + { + double rinv = 1.0 / r; + double rinv3 = rinv * rinv * rinv; +#ifdef PMGRID + fac = rinv3 + rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp = rinv * factor_pot; /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ +#else /* #ifdef PMGRID */ + fac = rinv3; +#ifdef EVALPOTENTIAL + wp = -rinv; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID #else */ + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); +#endif /* #ifdef EVALPOTENTIAL */ + } + else + { + double u2 = u * u; + double u3 = u2 * u; + fac = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u + + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); +#endif /* #ifdef EVALPOTENTIAL */ + } + +#ifdef PMGRID + if(r > 0) + { + double rinv = 1.0 / r; + fac += rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp += rinv * (factor_pot + 1.0); /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef PMGRID */ + } + +#ifdef EVALPOTENTIAL + pot += mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + fac *= mass; + + acc_x += dx * fac; + acc_y += dy * fac; + acc_z += dz * fac; + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + double fcorr[3]; + ewald_corr(dx, dy, dz, fcorr); + acc_x += mass * fcorr[0]; + acc_y += mass * fcorr[1]; + acc_z += mass * fcorr[2]; +#ifdef EVALPOTENTIAL + pot += mass * ewald_pot_corr(dx, dy, dz); +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + ninteractions++; +#ifdef PMGRID + } +#endif /* #ifdef PMGRID */ + } + } + } + + out->Acc[0] = acc_x; + out->Acc[1] = acc_y; + out->Acc[2] = acc_z; +#ifdef EVALPOTENTIAL + out->Potential = pot; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef NO_GRAVITY_TYPE + if(in->Type == NO_GRAVITY_TYPE) + { + out->Acc[0] = 0.0; + out->Acc[1] = 0.0; + out->Acc[2] = 0.0; +#ifdef EVALPOTENTIAL + out->Potential = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef NO_GRAVITY_TYPE */ +#ifdef OUTPUTGRAVINTERACTIONS + out->GravInteractions = ninteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + + return ninteractions; +} + +/*! \brief Prepares node to be exported. + * + * \param[in] no Index of node. + * \param[in] i Index of particle. + * \param[in] thread_id ID of thread. + * + * \return 0 + */ +int tree_treefind_export_node_threads(int no, int i, int thread_id) +{ + /* The task indicated by the pseudoparticle node */ + int task = DomainNewTask[no - (Tree_MaxPart + Tree_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != i) + { + Thread[thread_id].Exportflag[task] = i; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = i; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = i; + nodelist[nexp].Node = DomainNodeIndex[no - (Tree_MaxPart + Tree_MaxNodes)]; + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} + +#ifdef ALLOW_DIRECT_SUMMATION +/*! \brief Kernel of direct summation force calculation. + * + * \param[in] target Index of particle in import array. + * \param[in] result_idx Index in result array. + * \param[in] nimport number of imported particles. + * + * \return void + */ +void force_evaluate_direct(int target, int result_idx, int nimport) +{ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; +#ifdef EVALPOTENTIAL + double pot = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + + double pos_x = DirectDataAll[target].Pos[0]; + double pos_y = DirectDataAll[target].Pos[1]; + double pos_z = DirectDataAll[target].Pos[2]; + double h_i = All.ForceSoftening[DirectDataAll[target].SofteningType]; + +#ifdef PMGRID + double asmth = All.Asmth[0]; +#if defined(PLACEHIGHRESREGION) + int ptype_i = DirectDataAll[target].Type; + if(pmforce_is_particle_high_res(ptype_i, DirectDataAll[target].Pos)) + asmth = All.Asmth[1]; +#endif /* #if defined(PLACEHIGHRESREGION) */ + double asmthinv = 0.5 / asmth; + double asmthinv2 = asmthinv * asmthinv; + double asmthfac = asmthinv * (NTAB / (RCUT / 2.0)); +#endif /* #ifdef PMGRID */ + + for(int j = 0; j < nimport; j++) + { + double h_j = All.ForceSoftening[DirectDataAll[j].SofteningType]; + + double hmax = (h_j > h_i) ? h_j : h_i; + + double dx = GRAVITY_NEAREST_X(DirectDataAll[j].Pos[0] - pos_x); + double dy = GRAVITY_NEAREST_Y(DirectDataAll[j].Pos[1] - pos_y); + double dz = GRAVITY_NEAREST_Z(DirectDataAll[j].Pos[2] - pos_z); + + double r2 = dx * dx + dy * dy + dz * dz; + + double mass = DirectDataAll[j].Mass; + + /* now evaluate the force component */ + + double r = sqrt(r2); + +#ifdef PMGRID + double tabentry = asmthfac * r; + int tabindex = (int)tabentry; + + if(tabindex < NTAB) + { + double tabweight = tabentry - tabindex; + double factor_force = (1.0 - tabweight) * shortrange_table[tabindex] + tabweight * shortrange_table[tabindex + 1]; +#ifdef EVALPOTENTIAL + double factor_pot = + (1.0 - tabweight) * shortrange_table_potential[tabindex] + tabweight * shortrange_table_potential[tabindex + 1]; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID */ + + double fac; +#ifdef EVALPOTENTIAL + double wp; +#endif /* #ifdef EVALPOTENTIAL */ + + if(r >= hmax) + { + double rinv = 1.0 / r; + double rinv3 = rinv * rinv * rinv; +#ifdef PMGRID + fac = rinv3 + rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp = rinv * factor_pot; /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ +#else /* #ifdef PMGRID */ + fac = rinv3; +#ifdef EVALPOTENTIAL + wp = -rinv; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef PMGRID #else */ + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); +#endif /* #ifdef EVALPOTENTIAL */ + } + else + { + double u2 = u * u; + double u3 = u2 * u; + fac = h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); +#ifdef EVALPOTENTIAL + wp = h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); +#endif /* #ifdef EVALPOTENTIAL */ + } +#ifdef PMGRID + if(r > 0) + { + double rinv = 1.0 / r; + fac += rinv * factor_force * asmthinv2; /* fac = -g'(r)/r */ +#ifdef EVALPOTENTIAL + wp += rinv * (factor_pot + 1.0); /* wp = -g(r) */ +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef PMGRID */ + } + +#ifdef EVALPOTENTIAL + pot += mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + fac *= mass; + + acc_x += dx * fac; + acc_y += dy * fac; + acc_z += dz * fac; + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + { + double fcorr[3]; + ewald_corr(dx, dy, dz, fcorr); + acc_x += mass * fcorr[0]; + acc_y += mass * fcorr[1]; + acc_z += mass * fcorr[2]; +#if defined(EVALPOTENTIAL) + pot += mass * ewald_pot_corr(dx, dy, dz); +#endif /* #if defined(EVALPOTENTIAL) */ + } +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef PMGRID + } +#endif /* #ifdef PMGRID */ + } + + DirectAccOut[result_idx].Acc[0] = acc_x; + DirectAccOut[result_idx].Acc[1] = acc_y; + DirectAccOut[result_idx].Acc[2] = acc_z; +#ifdef EVALPOTENTIAL + DirectAccOut[result_idx].Potential = pot; +#endif /* #ifdef EVALPOTENTIAL */ +} +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ diff --git a/src/amuse/community/arepo/src/gravity/grav_external.c b/src/amuse/community/arepo/src/gravity/grav_external.c new file mode 100644 index 0000000000..784341a47b --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/grav_external.c @@ -0,0 +1,579 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Special gravity routines for external forces. + * \details contains functions: + * void gravity_external(void) + * static void gravity_external_get_force( double pos[3], + * int type, MyIDType ID, double acc[3], double *pot, int + * *flag_set ) + * void gravity_monopole_1d_spherical() + * double enclosed_mass(double R) + * void calc_exact_gravity_for_particle_type(void) + * void special_particle_create_list() + * void special_particle_update_list() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef EXTERNALGRAVITY +static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set); + +/*! \brief Main routine to add contribution of external gravitational potential + * to accelerations. + * + * Function is called in gravity() (in accel.c). Function also evaluates + * the gradient of the accelerations which is needed for the timestep + * criterion due to the external potential. + * + * \return void + */ +void gravity_external(void) +{ + mpi_printf("EXTERNALGRAVITY: execute\n"); + + TIMER_START(CPU_TREE); + + for(int idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + double *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + double acc[3], pot; + int flag_set = 0; + gravity_external_get_force(pos, P[i].Type, P[i].ID, acc, &pot, &flag_set); + + if(flag_set) + { + for(int k = 0; k < NUMDIMS; k++) + P[i].GravAccel[k] = acc[k]; + for(int k = NUMDIMS; k < 3; k++) + P[i].GravAccel[k] = 0; + P[i].ExtPotential = pot; + } + else + { + for(int k = 0; k < NUMDIMS; k++) + P[i].GravAccel[k] += acc[k]; +#ifdef EVALPOTENTIAL + P[i].Potential += pot; +#endif + P[i].ExtPotential += pot; + } + + double dx; + if(P[i].Type == 0) + dx = 0.1 * get_cell_radius(i); + else + dx = 0.1 * All.ForceSoftening[P[i].SofteningType]; + + P[i].dGravAccel = 0; + for(int dim = 0; dim < NUMDIMS; dim++) + { + double accL[3], posL[3]; + for(int k = 0; k < 3; k++) + posL[k] = pos[k]; + posL[dim] -= dx; + gravity_external_get_force(posL, P[i].Type, P[i].ID, accL, &pot, &flag_set); + + double accR[3], posR[3]; + for(int k = 0; k < 3; k++) + posR[k] = pos[k]; + posR[dim] += dx; + gravity_external_get_force(posR, P[i].Type, P[i].ID, accR, &pot, &flag_set); + + for(int k = 0; k < NUMDIMS; k++) + { + double dGrav = accR[k] - accL[k]; + P[i].dGravAccel += dGrav * dGrav; + } + } + P[i].dGravAccel = sqrt(P[i].dGravAccel) / (2. * dx); + } + + TIMER_STOP(CPU_TREE); +} + +/*! \brief Calculates the force from the external potential given a position. + * + * \param[in] pos Position at which force is to be evaluated. + * \param[in] type (unused) + * \param[in] ID (unused) + * \param[in, out] acc Acceleration array. + * \param[in, out] pot Pointer to potential. + * \param[in] flag_set (unused) + * + * \return void + */ +static void gravity_external_get_force(double pos[3], int type, MyIDType ID, double acc[3], double *pot, int *flag_set) +{ + for(int k = 0; k < 3; k++) + acc[k] = 0; + + *pot = 0; + +#ifdef EXTERNALGY + acc[1] += EXTERNALGY; + *pot = -(EXTERNALGY)*pos[1]; +#endif /* #ifdef EXTERNALGY */ + +#ifdef STATICISO + { + double r, m; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + + if(r > ISO_R200) + m = ISO_M200; + else + m = ISO_M200 * r / ISO_R200; + +#ifdef ISO_FRACTION + m *= ISO_FRACTION; +#endif /* #ifdef ISO_FRACTION */ + + if(r > 0) + { + acc[0] += -All.G * m * dx / r / (r * r + ISO_Eps * ISO_Eps); + acc[1] += -All.G * m * dy / r / (r * r + ISO_Eps * ISO_Eps); + acc[2] += -All.G * m * dz / r / (r * r + ISO_Eps * ISO_Eps); + } + } +#endif /* #ifdef STATICISO */ + +#ifdef STATICNFW + { + double r, m; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + m = enclosed_mass(r); +#ifdef NFW_DARKFRACTION + m *= NFW_DARKFRACTION; +#endif /* #ifdef NFW_DARKFRACTION */ + if(r > 0) + { + acc[0] += -All.G * m * dx / (r * r * r); + acc[1] += -All.G * m * dy / (r * r * r); + acc[2] += -All.G * m * dz / (r * r * r); + } + } +#endif /* #ifdef STATICNFW */ + +#ifdef STATICHQ + { + double r, m, a; + double dx, dy, dz; + + dx = pos[0] - boxHalf_X; + dy = pos[1] - boxHalf_Y; + dz = pos[2] - boxHalf_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + + a = pow(All.G * HQ_M200 / (100 * All.Hubble * All.Hubble), 1.0 / 3) / HQ_C * sqrt(2 * (log(1 + HQ_C) - HQ_C / (1 + HQ_C))); + + m = HQ_M200 * pow(r / (r + a), 2); +#ifdef HQ_DARKFRACTION + m *= HQ_DARKFRACTION; +#endif /* #ifdef HQ_DARKFRACTION */ + if(r > 0) + { + acc[0] += -All.G * m * dx / (r * r * r); + acc[1] += -All.G * m * dy / (r * r * r); + acc[2] += -All.G * m * dz / (r * r * r); + } + } +#endif /* #ifdef STATICHQ */ +} +#endif /* #ifdef EXTERNALGRAVITY */ + +#ifdef ONEDIMS_SPHERICAL +/*! \brief One-dimensional gravity in the spherically symmetric case. + * + * \return void + */ +void gravity_monopole_1d_spherical() +{ + printf("Doing 1D gravity...\n"); + + int i; + double msum = All.CoreMass; + + for(i = 0; i < NumGas; i++) + { + double r0; + if(i > 0) + r0 = 0.5 * (P[i].Pos[0] + P[i - 1].Pos[0]); + else + r0 = All.CoreRadius; + double dm = 4. / 3. * M_PI * (SphP[i].Center[0] * SphP[i].Center[0] * SphP[i].Center[0] - r0 * r0 * r0) * SphP[i].Density; + double rad = SphP[i].Center[0]; + + P[i].GravAccel[0] = -(msum + dm) * All.G / (rad * rad); + +#ifdef EVALPOTENTIAL + P[i].Potential = -(msum + dm) * All.G / rad; +#endif /* #ifdef EVALPOTENTIAL */ + + msum += P[i].Mass; + + P[i].GravAccel[1] = 0; + P[i].GravAccel[2] = 0; + } + + printf("... 1D gravity done.\n"); +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#ifdef STATICNFW +/*! \brief Auxiliary function for static NFW potential. + * + * \param[in] R Radius from center of potential. + * + * \return Enclosed mass (which causes the external potential). + */ +double enclosed_mass(double R) +{ + /* Eps is in units of Rs !!!! */ + + if(R > Rs * NFW_C) + R = Rs * NFW_C; + + return fac * 4 * M_PI * RhoCrit * Dc * + (-(Rs * Rs * Rs * (1 - NFW_Eps + log(Rs) - 2 * NFW_Eps * log(Rs) + NFW_Eps * NFW_Eps * log(NFW_Eps * Rs))) / + ((NFW_Eps - 1) * (NFW_Eps - 1)) + + (Rs * Rs * Rs * + (Rs - NFW_Eps * Rs - (2 * NFW_Eps - 1) * (R + Rs) * log(R + Rs) + NFW_Eps * NFW_Eps * (R + Rs) * log(R + NFW_Eps * Rs))) / + ((NFW_Eps - 1) * (NFW_Eps - 1) * (R + Rs))); +} +#endif /* #ifdef STATICNFW */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +/*! \brief Routine that computes gravitational force by direct summation. + * + * Called by gravity() (in accel.c). + * + * \return void + */ +void calc_exact_gravity_for_particle_type(void) +{ + int i, idx; +#ifdef EXACT_GRAVITY_REACTION + double *accx, *accy, *accz; + accx = (double *)mymalloc("accx", All.MaxPartSpecial * sizeof(double)); + accy = (double *)mymalloc("accy", All.MaxPartSpecial * sizeof(double)); + accz = (double *)mymalloc("accz", All.MaxPartSpecial * sizeof(double)); +#ifdef EVALPOTENTIAL + double *pot; + pot = (double *)mymalloc("pot", All.MaxPartSpecial * sizeof(double)); +#endif /* #ifdef EVALPOTENTIAL */ + int n; + for(n = 0; n < All.MaxPartSpecial; n++) + { + accx[n] = accy[n] = accz[n] = 0.0; +#ifdef EVALPOTENTIAL + pot[n] = 0.0; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef EXACT_GRAVITY_REACTION */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + double fac, wp; + double dx, dy, dz, r, r2; + double h, h_inv, h3_inv, u; + int k; + + /* set softening to corresponding particle's softening length */ + h = All.ForceSoftening[All.SofteningTypeOfPartType[EXACT_GRAVITY_FOR_PARTICLE_TYPE]]; + + for(k = 0; k < All.MaxPartSpecial; k++) + { + if(PartSpecialListGlobal[k].ID == P[i].ID) + continue; + + dx = P[i].Pos[0] - PartSpecialListGlobal[k].pos[0]; + dy = P[i].Pos[1] - PartSpecialListGlobal[k].pos[1]; + dz = P[i].Pos[2] - PartSpecialListGlobal[k].pos[2]; + + r2 = dx * dx + dy * dy + dz * dz; + r = sqrt(r2); + + // using spline softening + if(r >= h) + { + fac = 1 / (r2 * r); + wp = -1 / r; + } + else + { + h_inv = 1.0 / h; + h3_inv = h_inv * h_inv * h_inv; + u = r * h_inv; + + if(u < 0.5) + { + fac = h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4)); + wp = h_inv * (-2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6))); + } + else + { + fac = h3_inv * + (21.333333333333 - 48.0 * u + 38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u)); + wp = h_inv * (-3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)))); + } + } + + P[i].GravAccel[0] -= All.G * PartSpecialListGlobal[k].mass * fac * dx; + P[i].GravAccel[1] -= All.G * PartSpecialListGlobal[k].mass * fac * dy; + P[i].GravAccel[2] -= All.G * PartSpecialListGlobal[k].mass * fac * dz; + +#ifdef EVALPOTENTIAL + P[i].Potential += All.G * PartSpecialListGlobal[k].mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef EXACT_GRAVITY_REACTION + /* avoid double counting */ + if(P[i].Type != EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + accx[k] += All.G * P[i].Mass * fac * dx; + accy[k] += All.G * P[i].Mass * fac * dy; + accz[k] += All.G * P[i].Mass * fac * dz; +#ifdef EVALPOTENTIAL + pot[k] += All.G * P[i].Mass * wp; +#endif /* #ifdef EVALPOTENTIAL */ + } +#endif /* #ifdef EXACT_GRAVITY_REACTION */ + } + } +#ifdef EXACT_GRAVITY_REACTION + double *buf = (double *)mymalloc("buf", All.MaxPartSpecial * sizeof(double)); + + MPI_Allreduce(accx, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accx[n] = buf[n]; + MPI_Allreduce(accy, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accy[n] = buf[n]; + MPI_Allreduce(accz, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + accz[n] = buf[n]; +#ifdef EVALPOTENTIAL + MPI_Allreduce(pot, buf, All.MaxPartSpecial, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + for(n = 0; n < All.MaxPartSpecial; n++) + pot[n] = buf[n]; +#endif /* #ifdef EVALPOTENTIAL */ + myfree(buf); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + for(n = 0; n < All.MaxPartSpecial; n++) + { + if(PartSpecialListGlobal[n].ID == P[i].ID) + { + P[i].GravAccel[0] += accx[n]; + P[i].GravAccel[1] += accy[n]; + P[i].GravAccel[2] += accz[n]; +#ifdef EVALPOTENTIAL + P[i].Potential += pot[n]; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + } + +#ifdef EVALPOTENTIAL + myfree(pot); +#endif /* #ifdef EVALPOTENTIAL */ + myfree(accz); + myfree(accy); + myfree(accx); +#endif /* #ifdef EXACT_GRAVITY_REACTION */ +} + +/*! \brief Creates list of special particles, i.e. particles for which gravity + * is calculated by direct summation. + * + * Called in begrund2() (begrun.c), i.e. only at startup of the simulation. + * + * \return void + */ +void special_particle_create_list() +{ + struct special_particle_data *SpecialPartList; + SpecialPartList = + (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data)); + + int i, j, nsrc, nimport, ngrp; + for(i = 0, nsrc = 0; i < NumPart; i++) + { + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + SpecialPartList[nsrc].ID = P[i].ID; + + SpecialPartList[nsrc].pos[0] = P[i].Pos[0]; + SpecialPartList[nsrc].pos[1] = P[i].Pos[1]; + SpecialPartList[nsrc].pos[2] = P[i].Pos[2]; + + SpecialPartList[nsrc++].mass = P[i].Mass; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = nsrc; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = 0; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data), + MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(SpecialPartList); +} + +/*! \brief Updates list of special particles, i.e. particles for which gravity + * is calculated by direct summation. + * + * Called in run() (run.c). + * + * \return void + */ +void special_particle_update_list() +{ + struct special_particle_data *SpecialPartList; + SpecialPartList = + (struct special_particle_data *)mymalloc("SpecialPartList", All.MaxPartSpecial * sizeof(struct special_particle_data)); + + int i, j, nsrc, nimport, ngrp; + for(i = 0, nsrc = 0; i < NumPart; i++) + { + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + { + SpecialPartList[nsrc].ID = P[i].ID; + + SpecialPartList[nsrc].pos[0] = P[i].Pos[0]; + SpecialPartList[nsrc].pos[1] = P[i].Pos[1]; + SpecialPartList[nsrc].pos[2] = P[i].Pos[2]; + + SpecialPartList[nsrc++].mass = P[i].Mass; + } + } + + for(j = 0; j < NTask; j++) + Send_count[j] = nsrc; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = 0; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&SpecialPartList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct special_particle_data), + MPI_BYTE, recvTask, TAG_DENS_A, &PartSpecialListGlobal[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct special_particle_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(SpecialPartList); +} +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ diff --git a/src/amuse/community/arepo/src/gravity/grav_softening.c b/src/amuse/community/arepo/src/gravity/grav_softening.c new file mode 100644 index 0000000000..4494f4df08 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/grav_softening.c @@ -0,0 +1,215 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Routines for setting the gravitational softening lengths. + * \details contains functions: + * void set_softenings(void) + * int get_softeningtype_for_hydro_cell(int i) + * double get_default_softening_of_particletype(int type) + * int get_softening_type_from_mass(double mass) + * double get_desired_softening_from_mass(double mass) + * void init_individual_softenings(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +/*! \brief Sets the (comoving) softening length of all particle + * types in the table All.SofteningTable[...]. + * + * A check is performed that the physical softening length is bounded by the + * Softening-MaxPhys values. + * + * \return void + */ +void set_softenings(void) +{ + int i; + + if(All.ComovingIntegrationOn) + { + for(i = 0; i < NSOFTTYPES; i++) + if(All.SofteningComoving[i] * All.Time > All.SofteningMaxPhys[i]) + All.SofteningTable[i] = All.SofteningMaxPhys[i] / All.Time; + else + All.SofteningTable[i] = All.SofteningComoving[i]; + } + else + { + for(i = 0; i < NSOFTTYPES; i++) + All.SofteningTable[i] = All.SofteningComoving[i]; + } + +#ifdef ADAPTIVE_HYDRO_SOFTENING + for(i = 0; i < NSOFTTYPES_HYDRO; i++) + All.SofteningTable[i + NSOFTTYPES] = All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, i); + + if(All.AdaptiveHydroSofteningSpacing < 1) + terminate("All.AdaptiveHydroSofteningSpacing < 1"); + +#ifdef MULTIPLE_NODE_SOFTENING + /* we check that type=0 has its own slot 0 in the softening types, so that only gas masses are stored there */ + if(All.SofteningTypeOfPartType[0] != 0) + terminate("All.SofteningTypeOfPartType[0] != 0"); + + for(i = 1; i < NTYPES; i++) + if(All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0]) + terminate("i=%d: All.SofteningTypeOfPartType[i] == All.SofteningTypeOfPartType[0]", i); +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + for(i = 0; i < NSOFTTYPES + NSOFTTYPES_HYDRO; i++) + All.ForceSoftening[i] = 2.8 * All.SofteningTable[i]; + + All.ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO] = 0; /* important - this entry is actually used */ +} + +#ifdef ADAPTIVE_HYDRO_SOFTENING +/*! \brief Finds the index of the softening table for a given cell depending + * on its radius. + * + * \param[in] i Index of cell in SphP array. + * + * \return Index of corresponding softening in softening lookup-table. + */ +int get_softeningtype_for_hydro_cell(int i) +{ + double soft = All.GasSoftFactor * get_cell_radius(i); + + if(soft <= All.ForceSoftening[NSOFTTYPES]) + return NSOFTTYPES; + + int k = 0.5 + log(soft / All.ForceSoftening[NSOFTTYPES]) / log(All.AdaptiveHydroSofteningSpacing); + if(k >= NSOFTTYPES_HYDRO) + k = NSOFTTYPES_HYDRO - 1; + + return NSOFTTYPES + k; +} +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + +/*! \brief Returns the default softening length for particle type 'type'. + * + * \param[in] type Type of the local particle. + * + * \return The softening length of particle with type 'type'. + */ +double get_default_softening_of_particletype(int type) { return All.SofteningTable[All.SofteningTypeOfPartType[type]]; } + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +/*! \brief Determines the softening type from the mass of a particle. + * + * \param[in] mass Mass of the particle. + * + * \return Index in gravitational softening table. + */ +int get_softening_type_from_mass(double mass) +{ + int i, min_type = -1; + double eps = get_desired_softening_from_mass(mass); + double min_dln = MAX_FLOAT_NUMBER; + +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + i = 1; +#else /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + i = 0; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) #else */ + + for(; i < NSOFTTYPES; i++) + { + if(All.ForceSoftening[i] > 0) + { + double dln = fabs(log(eps) - log(All.ForceSoftening[i])); + + if(dln < min_dln) + { + min_dln = dln; + min_type = i; + } + } + } + if(min_type < 0) + terminate("min_type < 0 mass=%g eps=%g All.AvgType1Mass=%g All.ForceSoftening[1]=%g", mass, eps, All.AvgType1Mass, + All.ForceSoftening[1]); + + return min_type; +} + +/*! \brief Returns the softening length of softening type 1 + * particles depending on the particle mass. + * + * \param[in] mass Particle mass. + * + * \return Softening length for a softening type 1 particle of mass 'mass'. + */ +double get_desired_softening_from_mass(double mass) +{ + if(mass <= All.AvgType1Mass) + return 2.8 * All.SofteningComoving[1]; + else + return 2.8 * All.SofteningComoving[1] * pow(mass / All.AvgType1Mass, 1.0 / 3); +} + +/*! \brief Initializes the mass dependent softening calculation for Type 1 + * particles. + * + * The average mass of Type 1 particles is calculated. + * + * \return void + */ +void init_individual_softenings(void) +{ + int i, ndm; + double mass, masstot; + long long ndmtot; + + for(i = 0, ndm = 0, mass = 0; i < NumPart; i++) + if(P[i].Type == 1) + { + ndm++; + mass += P[i].Mass; + } + sumup_large_ints(1, &ndm, &ndmtot); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + All.AvgType1Mass = masstot / ndmtot; + + mpi_printf("INIT: AvgType1Mass = %g\n", All.AvgType1Mass); + + for(i = 0; i < NumPart; i++) + { + if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[i].SofteningType = get_softening_type_from_mass(P[i].Mass); + } +} +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ diff --git a/src/amuse/community/arepo/src/gravity/gravdirect.c b/src/amuse/community/arepo/src/gravity/gravdirect.c new file mode 100644 index 0000000000..cbe7be7426 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravdirect.c @@ -0,0 +1,259 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravdirect.c + * \date 05/2018 + * \brief Main driver routines for gravitational (short-range) force + * computation through direct summation + * \details Note that this is not the same thing as + * EXACT_GRAVITY_FOR_PARTICLE_TYPE! + * ALLOW_DIRECT_SUMMATION does direct summation for performance + * reasons if there is only a small number of interactions to be + * calculated and the overhead of a tree-construction would be + * more expensive than the direct summation calculation, while + * EXACT_GRAVITY_FOR_PARTICLE_TYPE always enforces a direct + * summation for all particle pairs of a given type. + * contains functions: + * void gravity_direct(int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef ALLOW_DIRECT_SUMMATION +static int Nimport; + +/*! \brief Computes the gravitational forces for all active particles through + * direct summation. + * + * \param[in] timebin (unused) + * + * \return void + */ +void gravity_direct(int timebin) +{ + int i, j, k, idx; + + TIMER_START(CPU_TREEDIRECT); + + if(TimeBinsGravity.GlobalNActiveParticles <= 1) + { + if(TimeBinsGravity.NActiveParticles > 0) + { + i = TimeBinsGravity.ActiveParticleList[0]; + if(i >= 0) + { + for(k = 0; k < 3; k++) + P[i].GravAccel[k] = 0; + +#ifdef EVALPOTENTIAL + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + } + } + + mpi_printf("Found only %d particles to do direct summation -> SKIPPING IT\n", TimeBinsGravity.GlobalNActiveParticles); + TIMER_STOP(CPU_TREEDIRECT); + return; + } + + mpi_printf("GRAVDIRECT: direct summation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + double tstart = second(); + + DirectDataIn = (struct directdata *)mymalloc("DirectDataIn", TimeBinsGravity.NActiveParticles * sizeof(struct directdata)); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(k = 0; k < 3; k++) + DirectDataIn[Nforces].Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(k = 0; k < 3; k++) + DirectDataIn[Nforces].Pos[k] = P[i].Pos[k]; + } + + DirectDataIn[Nforces].Mass = P[i].Mass; + + DirectDataIn[Nforces].Type = P[i].Type; + DirectDataIn[Nforces].SofteningType = P[i].SofteningType; + + Nforces++; + } + + MPI_Allgather(&Nforces, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Nimport = 0, Recv_offset[0] = 0; j < NTask; j++) + { + Nimport += Recv_count[j]; + + if(j > 0) + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + + DirectDataAll = (struct directdata *)mymalloc("DirectDataAll", Nimport * sizeof(struct directdata)); + + for(j = 0; j < NTask; j++) + { + Send_count[j] = Recv_count[j] * sizeof(struct directdata); + Send_offset[j] = Recv_offset[j] * sizeof(struct directdata); + } + + MPI_Allgatherv(DirectDataIn, Nforces * sizeof(struct directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE, + MPI_COMM_WORLD); + + /* subdivide the work evenly */ + int first, count; + subdivide_evenly(Nimport, NTask, ThisTask, &first, &count); + + DirectAccOut = (struct accdata *)mymalloc("DirectDataOut", count * sizeof(struct accdata)); + + /* now calculate the forces */ + for(i = 0; i < count; i++) + force_evaluate_direct(i + first, i, Nimport); + + /* now send the forces to the right places */ + + DirectAccIn = (struct accdata *)mymalloc("DirectDataIn", Nforces * sizeof(struct accdata)); + + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", 2 * NTask * sizeof(MPI_Request)); + int n_requests = 0; + + int recvTask = 0; + int sendTask = 0; + int send_first, send_count; + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + + while(recvTask < NTask && sendTask < NTask) /* go through both lists */ + { + while(send_first + send_count < Recv_offset[recvTask]) + { + if(sendTask >= NTask - 1) + terminate("sendTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask); + + sendTask++; + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + } + + while(Recv_offset[recvTask] + Recv_count[recvTask] < send_first) + { + if(recvTask >= NTask - 1) + terminate("recvTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask); + + recvTask++; + } + + int start = imax(Recv_offset[recvTask], send_first); + int next = imin(Recv_offset[recvTask] + Recv_count[recvTask], send_first + send_count); + + if(next - start >= 1) + { + if(ThisTask == sendTask) + MPI_Isend(DirectAccOut + start - send_first, (next - start) * sizeof(struct accdata), MPI_BYTE, recvTask, TAG_PDATA_SPH, + MPI_COMM_WORLD, &requests[n_requests++]); + + if(ThisTask == recvTask) + MPI_Irecv(DirectAccIn + start - Recv_offset[recvTask], (next - start) * sizeof(struct accdata), MPI_BYTE, sendTask, + TAG_PDATA_SPH, MPI_COMM_WORLD, &requests[n_requests++]); + } + + if(next == Recv_offset[recvTask] + Recv_count[recvTask]) + recvTask++; + else + { + sendTask++; + if(sendTask >= NTask) + break; + + subdivide_evenly(Nimport, NTask, sendTask, &send_first, &send_count); + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(k = 0; k < 3; k++) + P[i].GravAccel[k] = DirectAccIn[Nforces].Acc[k]; + +#ifdef EVALPOTENTIAL + P[i].Potential = DirectAccIn[Nforces].Potential; +#endif /* #ifdef EVALPOTENTIAL */ + Nforces++; + } + + myfree(DirectAccIn); + myfree(DirectAccOut); + myfree(DirectDataAll); + myfree(DirectDataIn); + + mpi_printf("GRAVDIRECT: force is done.\n"); + + All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles; + + double tend = second(); + + double timedirect, sumt; + timedirect = tend - tstart; + + MPI_Reduce(&timedirect, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdTimings, "Nf=%9lld active part/task: avg=%g total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles, + ((double)TimeBinsGravity.GlobalNActiveParticles) / NTask, All.TotNumOfForces); + fprintf(FdTimings, " (direct) part/sec: %g ia/sec: %g\n", TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20), + TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20) * TimeBinsGravity.GlobalNActiveParticles); + myflush(FdTimings); + } + + TIMER_STOP(CPU_TREEDIRECT); +} + +#endif /* #ifdef ALLOW_DIRECT_SUMMATION */ diff --git a/src/amuse/community/arepo/src/gravity/gravtree.c b/src/amuse/community/arepo/src/gravity/gravtree.c new file mode 100644 index 0000000000..810aa9c3da --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravtree.c @@ -0,0 +1,749 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree.c + * \date 05/2018 + * \brief Main driver routines for gravitational (short-range) force + * computation. + * \details This file contains the code for the gravitational force + * computation by means of the tree algorithm. To this end, a tree + * force is computed for all active local particles, and particles + * are exported to other processors if needed, where they can + * receive additional force contributions. If the TreePM algorithm + * is enabled, the force computed will only be the short-range + * part. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void gravity_primary_loop(void) + * void gravity_secondary_loop(void) + * void gravity_tree(int timebin) + * static int gravity_evaluate(int target, int mode, int + * threadid) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static double ThreadsCosttotal[NUM_THREADS]; /*!< The total cost of the particles/nodes processed by each thread */ +static int ThreadFirstExec[NUM_THREADS]; /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */ +static int MeasureCostFlag; /*!< Whether the tree costs are measured for the current time step */ + +static int gravity_evaluate(int target, int mode, int threadid); + +typedef gravdata_in data_in; + +typedef gravdata_out data_out; + +#ifdef DETAILEDTIMINGS +static double tstart; +static int current_timebin; +#endif /* #ifdef DETAILEDTIMINGS */ + +/* local data structure for collecting particle/cell data that is sent to other processors if needed */ +static data_in *DataIn, *DataGet; +static data_out *DataResult, *DataOut; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + if(i < NumPart) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->Type = P[i].Type; + in->SofteningType = P[i].SofteningType; + in->OldAcc = P[i].OldAcc; + } + else + { + i -= Tree_ImportedNodeOffset; + + for(int k = 0; k < 3; k++) + in->Pos[k] = Tree_Points[i].Pos[k]; + + in->Type = Tree_Points[i].Type; + in->SofteningType = Tree_Points[i].SofteningType; + in->OldAcc = Tree_Points[i].OldAcc; + } + in->Firstnode = firstnode; +} + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(i < NumPart) + { + P[i].GravAccel[0] = out->Acc[0]; + P[i].GravAccel[1] = out->Acc[1]; + P[i].GravAccel[2] = out->Acc[2]; +#ifdef EVALPOTENTIAL + P[i].Potential = out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + P[i].GravInteractions = out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + else + { + int idx = Tree_ResultIndexList[i - Tree_ImportedNodeOffset]; + Tree_ResultsActiveImported[idx].GravAccel[0] = out->Acc[0]; + Tree_ResultsActiveImported[idx].GravAccel[1] = out->Acc[1]; + Tree_ResultsActiveImported[idx].GravAccel[2] = out->Acc[2]; +#ifdef EVALPOTENTIAL + Tree_ResultsActiveImported[idx].Potential = out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + Tree_ResultsActiveImported[idx].GravInteractions = out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + } + else /* combine */ + { + if(i < NumPart) + { + P[i].GravAccel[0] += out->Acc[0]; + P[i].GravAccel[1] += out->Acc[1]; + P[i].GravAccel[2] += out->Acc[2]; +#ifdef EVALPOTENTIAL + P[i].Potential += out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + P[i].GravInteractions += out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + else + { + int idx = Tree_ResultIndexList[i - Tree_ImportedNodeOffset]; + Tree_ResultsActiveImported[idx].GravAccel[0] += out->Acc[0]; + Tree_ResultsActiveImported[idx].GravAccel[1] += out->Acc[1]; + Tree_ResultsActiveImported[idx].GravAccel[2] += out->Acc[2]; +#ifdef EVALPOTENTIAL + Tree_ResultsActiveImported[idx].Potential += out->Potential; +#endif /* #ifdef EVALPOTENTIAL */ +#ifdef OUTPUTGRAVINTERACTIONS + Tree_ResultsActiveImported[idx].GravInteractions += out->GravNinteractions; +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Primary loop of gravity calculation. + * + * Gravitational interactions between local particles; see gravity_tree(..). + * Equivalent to 'kernel_local'. + * + * \return void + */ +static void gravity_primary_loop(void) +{ + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK1); + +#ifdef DETAILEDTIMINGS + double t0 = second(); +#endif /* #ifdef DETAILEDTIMINGS */ + + int idx; + /* do local particles */ + { + int j, threadid = get_thread_num(); + double cost = 0; + + if(ThreadFirstExec[threadid]) + { + ThreadFirstExec[threadid] = 0; + + if(MeasureCostFlag) + { + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + } + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= Nforces) + break; + + int i = TargetList[idx]; + + cost += gravity_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + + ThreadsCosttotal[threadid] += cost; + } + +#ifdef DETAILEDTIMINGS + double t1 = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_LOCAL_PARTICLES, + timediff(tstart, t0), timediff(tstart, t1)); +#endif /* #ifdef DETAILEDTIMINGS */ + + TIMER_STOPSTART(CPU_TREEWALK1, CPU_TREEBALSNDRCV); +} + +/*! \brief Secondary loop of gravity calculation. + * + * Gravitational interactions between imported particles; see gravity_tree(.). + * Equivalent to 'kernel_imported'. + * + * \return void + */ +void gravity_secondary_loop(void) +{ + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREEWALK2); + +#ifdef DETAILEDTIMINGS + double t0 = second(); +#endif /* #ifdef DETAILEDTIMINGS */ + + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + double cost = 0; + + if(ThreadFirstExec[threadid]) + { + ThreadFirstExec[threadid] = 0; + + if(MeasureCostFlag) + { + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + } + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + cost += gravity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + + ThreadsCosttotal[threadid] += cost; + } + +#ifdef DETAILEDTIMINGS + double t1 = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_IMPORTED_PARTICLES, + timediff(tstart, t0), timediff(tstart, t1)); +#endif /* #ifdef DETAILEDTIMINGS */ + + TIMER_STOPSTART(CPU_TREEWALK2, CPU_TREEBALSNDRCV); +} + +/*! \brief This function computes the gravitational forces for all active + * particles. + * + * The tree walk is done in two phases: First the local part of the force tree + * is processed (gravity_primary_loop() ). Whenever an external node is + * encountered during the walk, this node is saved on a list. This node list + * along with data about the particles is then exchanged among tasks. + * In the second phase (gravity_secondary_loop() ) each task now continues + * the tree walk for the imported particles. Finally the resulting partial + * forces are send back to the original task and are summed up there to + * complete the tree force calculation. + * + * If only the tree algorithm is used in a periodic box, the whole tree walk + * is done twice. First a normal tree walk is done as described above, and + * afterwards a second tree walk, which adds the needed Ewald corrections is + * performed. + * + * Particles are only exported to other processors when really needed, + * thereby allowing a good use of the communication buffer. Every particle is + * sent at most once to a given processor together with the complete list of + * relevant tree nodes to be checked on the other task. + * + * Particles which drifted into the domain of another task are sent to this + * task for the force computation. Afterwards the resulting force is sent + * back to the originating task. + * + * In order to improve the work load balancing during a domain decomposition, + * the work done by each node/particle is measured. The work is measured for + * the interaction partners (i.e. the nodes or particles) and not for the + * particles itself that require a force computation. This way, work done for + * imported particles is accounted for at the task where the work actually + * incurred. The cost measurement is only done for the "GRAVCOSTLEVELS" + * highest occupied time bins. The variable 'MeasureCostFlag' will state + * whether a measurement is done at the present time step. + * + * The particles requiring a force computation are split into chunks of size + * 'Nchunksize'. A set of every 'Nchunk' -th chunk is processed first. + * Then the process is repeated, processing the next set of chunks. This way + * the amount of exported particles is more balanced, as communication heavy + * regions are mixed with less communication intensive regions. + * + * \param[in] timebin Time bin for which gravity should be calculated. + * + * \return void + */ +void gravity_tree(int timebin) +{ + int idx, i, j, k, ncount, iter = 0, maxiter; + struct detailed_timings + { + double all, tree1, tree2, tree, commwait; + double sumnexport, costtotal, numnodes; + ; + } timer, tisum, timax; + memset(&timer, 0, sizeof(struct detailed_timings)); + double Costtotal; + int ngrp; + int recvTask; + + TIMER_STORE; + TIMER_START(CPU_TREE); + + /* allocate buffers to arrange communication */ + mpi_printf("GRAVTREE: Begin tree force. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + TIMER_STOPSTART(CPU_TREE, CPU_TREECOSTMEASURE); + + for(i = 0; i < NUM_THREADS; i++) + { + ThreadsCosttotal[i] = 0; + ThreadFirstExec[i] = 0; + } + + /* find the level (if any) for which we measure gravity cost */ + for(i = 0, TakeLevel = -1; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == timebin) + { + TakeLevel = i; + break; + } + } + + if(TakeLevel < 0) /* we have not found a matching slot */ + { + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] < 0) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + + if(TakeLevel < 0) + { + if(All.HighestOccupiedGravTimeBin - timebin < GRAVCOSTLEVELS) /* we should have space */ + { + /* clear levels that are out of range */ + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] > All.HighestOccupiedGravTimeBin) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + if(All.LevelToTimeBin[i] < All.HighestOccupiedGravTimeBin - (GRAVCOSTLEVELS - 1)) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + + if(TakeLevel < 0) + { + if(timebin > All.HighestOccupiedGravTimeBin) + { + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == All.HighestOccupiedGravTimeBin) + { + All.LevelToTimeBin[i] = timebin; + TakeLevel = i; + All.LevelHasBeenMeasured[i] = 0; + break; + } + } + } + } + + if(TakeLevel < 0) + { + mpi_printf("All.HighestOccupiedGravTimeBin=%d timebin=%d\n", All.HighestOccupiedGravTimeBin, timebin); + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + mpi_printf("All.LevelToTimeBin[i=%d]=%d\n", i, All.LevelToTimeBin[i]); + } + + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + + terminate("TakeLevel=%d < 0", TakeLevel); + } + } + } + } + + MeasureCostFlag = 0; + + if(TakeLevel >= 0) + if(All.LevelHasBeenMeasured[TakeLevel] == 0) + { + MeasureCostFlag = 1; + + Thread[0].P_CostCount = mymalloc("Thread_P_CostCount", NumPart * sizeof(int)); + Thread[0].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int)); + Thread[0].Node_CostCount = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int)); + + for(i = 1; i < NUM_THREADS; i++) + { + Thread[i].P_CostCount = mymalloc("Threads_P_CostCount", NumPart * sizeof(int)); + Thread[i].TreePoints_CostCount = mymalloc("Threads_TreePoints_CostCount", Tree_NumPartImported * sizeof(int)); + Thread[i].Node_CostCount = mymalloc("Threads_Node_CostCount", Tree_NumNodes * sizeof(int)); + } + + for(i = 0; i < NUM_THREADS; i++) + Thread[i].Node_CostCount -= Tree_MaxPart; + + for(i = 0; i < NUM_THREADS; i++) + ThreadFirstExec[i] = 1; + } + + TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE); + + /* Create list of targets. We do this here to simplify the treatment of the two possible sources of points */ + + TargetList = mymalloc("TargetList", (NumPart + Tree_NumPartImported) * sizeof(int)); + Tree_ResultIndexList = mymalloc("Tree_ResultIndexList", Tree_NumPartImported * sizeof(int)); + + Nforces = 0; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(Tree_Task_list[i] == ThisTask) + TargetList[Nforces++] = i; + } + + for(i = 0, ncount = 0; i < Tree_NumPartImported; i++) +#ifndef HIERARCHICAL_GRAVITY + if(Tree_Points[i].ActiveFlag) +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + { + Tree_ResultIndexList[i] = ncount++; + TargetList[Nforces++] = i + Tree_ImportedNodeOffset; + } + + Tree_ResultsActiveImported = mymalloc("Tree_ResultsActiveImported", ncount * sizeof(struct resultsactiveimported_data)); + + permutate_chunks_in_list(Nforces, TargetList); + + generic_set_MaxNexport(); + + /******************************************/ + /* now execute the tree walk calculations */ + /******************************************/ + + TIMER_STOPSTART(CPU_TREE, CPU_TREEBALSNDRCV); + +#ifdef DETAILEDTIMINGS + tstart = second(); + current_timebin = timebin; +#endif /* #ifdef DETAILEDTIMINGS */ + + iter = generic_comm_pattern(Nforces, gravity_primary_loop, gravity_secondary_loop); + + /* now communicate the forces in Tree_ResultsActiveImported */ + + TIMER_STOPSTART(CPU_TREEBALSNDRCV, CPU_TREESENDBACK); + +#ifdef DETAILEDTIMINGS + double tend = second(); + + fprintf(FdDetailed, "%d %d %d %d %g %g\n", All.NumCurrentTiStep, current_timebin, DETAILED_TIMING_GRAVWALK, MODE_FINISHED, + timediff(tstart, tend), timediff(tstart, tend)); + fflush(FdDetailed); +#endif /* #ifdef DETAILEDTIMINGS */ + + for(j = 0; j < NTask; j++) + Recv_count[j] = 0; + + int n; + for(i = 0, n = 0, k = 0; i < NTask; i++) + for(j = 0; j < Force_Recv_count[i]; j++, n++) + { +#ifndef HIERARCHICAL_GRAVITY + if(Tree_Points[n].ActiveFlag) +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + { + Tree_ResultsActiveImported[k].index = Tree_Points[n].index; + Recv_count[i]++; + k++; + } + } + + MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Nexport = 0, Nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + Nexport += Send_count[j]; + Nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct resultsactiveimported_data *tmp_results = mymalloc("tmp_results", Nexport * sizeof(struct resultsactiveimported_data)); + memset(tmp_results, -1, Nexport * sizeof(struct resultsactiveimported_data)); + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&Tree_ResultsActiveImported[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct resultsactiveimported_data), MPI_BYTE, recvTask, TAG_FOF_A, + &tmp_results[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct resultsactiveimported_data), + MPI_BYTE, recvTask, TAG_FOF_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + for(i = 0; i < Nexport; i++) + { + int target = tmp_results[i].index; + + for(k = 0; k < 3; k++) + P[target].GravAccel[k] = tmp_results[i].GravAccel[k]; + } + + myfree(tmp_results); + + myfree(Tree_ResultsActiveImported); + myfree(Tree_ResultIndexList); + myfree(TargetList); + + TIMER_STOPSTART(CPU_TREESENDBACK, CPU_TREECOSTMEASURE); + + /* assign node cost to particles */ + if(MeasureCostFlag) + { + for(int threadid = 0; threadid < NUM_THREADS; threadid++) + if(ThreadFirstExec[threadid]) + { + /* this could happen if neither the primary nor the secondary loop had anything to do */ + ThreadFirstExec[threadid] = 0; + memset(Thread[threadid].P_CostCount, 0, NumPart * sizeof(int)); + memset(Thread[threadid].TreePoints_CostCount, 0, Tree_NumPartImported * sizeof(int)); + memset(Thread[threadid].Node_CostCount + Tree_MaxPart, 0, Tree_NumNodes * sizeof(int)); + } + + force_assign_cost_values(); + domain_init_sum_cost(); + + All.LevelHasBeenMeasured[TakeLevel] = 1; + + if(All.TypeOfOpeningCriterion == 1 && All.Ti_Current == 0) + All.LevelHasBeenMeasured[TakeLevel] = 0; + + for(i = 0; i < NUM_THREADS; i++) + Thread[i].Node_CostCount += Tree_MaxPart; + + for(i = NUM_THREADS - 1; i >= 1; i--) + { + myfree(Thread[i].Node_CostCount); + myfree(Thread[i].TreePoints_CostCount); + myfree(Thread[i].P_CostCount); + } + + myfree(Thread[0].Node_CostCount); + myfree(Thread[0].TreePoints_CostCount); + myfree(Thread[0].P_CostCount); + } + + TIMER_STOPSTART(CPU_TREECOSTMEASURE, CPU_TREE); + + if(All.TypeOfOpeningCriterion == 1) + All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */ + + mpi_printf("GRAVTREE: tree-force is done.\n"); + + /* gather some diagnostic information */ + + TIMER_STOPSTART(CPU_TREE, CPU_LOGS); + + Costtotal = 0; + for(i = 0; i < NUM_THREADS; i++) + Costtotal += ThreadsCosttotal[i]; + + timer.tree1 = TIMER_DIFF(CPU_TREEWALK1); + timer.tree2 = TIMER_DIFF(CPU_TREEWALK2); + timer.tree = timer.tree1 + timer.tree2; + timer.commwait = TIMER_DIFF(CPU_TREEBALSNDRCV) + TIMER_DIFF(CPU_TREESENDBACK); + timer.all = timer.tree + timer.commwait + TIMER_DIFF(CPU_TREE) + TIMER_DIFF(CPU_TREECOSTMEASURE); + timer.sumnexport = SumNexport; + timer.costtotal = Costtotal; + timer.numnodes = Tree_NumNodes; + + MPI_Reduce(&iter, &maxiter, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD); + MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(struct detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0, + MPI_COMM_WORLD); + + All.TotNumOfForces += TimeBinsGravity.GlobalNActiveParticles; + + if(ThisTask == 0) + { + fprintf(FdTimings, "Nf=%9lld timebin=%d total-Nf=%lld\n", TimeBinsGravity.GlobalNActiveParticles, timebin, All.TotNumOfForces); + + fprintf(FdTimings, " work-load balance: %g (%g %g), rel1to2: %g\n", timax.tree / ((tisum.tree + 1e-20) / NTask), + timax.tree1 / ((tisum.tree1 + 1e-20) / NTask), timax.tree2 / ((tisum.tree2 + 1e-20) / NTask), + tisum.tree1 / (tisum.tree1 + tisum.tree2 + 1e-20)); + fprintf(FdTimings, " number of iterations: max=%d, exported fraction: %g\n", maxiter, + tisum.sumnexport / (TimeBinsGravity.GlobalNActiveParticles + 1e-20)); + fprintf(FdTimings, " part/sec: raw=%g, effective=%g ia/part: avg=%g\n", + TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20), + TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * NTask), + tisum.costtotal / (TimeBinsGravity.GlobalNActiveParticles + 1.0e-20)); + + fprintf(FdTimings, " maximum number of nodes: %g, filled: %g\n", timax.numnodes, timax.numnodes / Tree_MaxNodes); + + fprintf(FdTimings, " avg times: all=%g tree1=%g tree2=%g commwait=%g sec\n", tisum.all / NTask, tisum.tree1 / NTask, + tisum.tree2 / NTask, tisum.commwait / NTask); + + myflush(FdTimings); + } + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Evaluate-function for gravitational tree. Calls + * force_treeevaluate. + * + * \param[in] target Index of particle. + * \param[in] mode Flag if local or imported particles should be considered. + * \param[in] threadid ID or thread. + * + * \return Number of interactions processed for this particle. + */ +static int gravity_evaluate(int target, int mode, int threadid) +{ + int cost, numnodes, *firstnode; + data_in local, *target_data; + data_out out, *target_result; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + target_result = &out; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + target_result = &DataResult[target]; + generic_get_numnodes(target, &numnodes, &firstnode); + } + + cost = force_treeevaluate(target_data, target_result, target, mode, threadid, numnodes, firstnode, MeasureCostFlag); + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + + /* note: for imported particles, we already have the result places into DataResult[target] */ + + return cost; +} diff --git a/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c new file mode 100644 index 0000000000..54e1c5c299 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/gravtree_forcetest.c @@ -0,0 +1,1089 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/gravtree_forcetest.c + * \date 05/2018 + * \brief Test short range gravity evaluation. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void gravity_forcetest(void) + * static void gravity_forcetest_evaluate(int target, int mode, + * int threadid) + * void gravity_forcetest_testforcelaw(void) + * static void ewald_other_images(double x, double y, double z, + * double alpha, double force[4]) + * static void ewald_correction_force(double x, double y, + * double z, double force[4]) + * void forcetest_ewald_init(void) + * static void ewald_correction_force_table_lookup(double dx, + * double dy, double dz, double force[4]) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +#ifdef FORCETEST + +#if !defined(EVALPOTENTIAL) && defined(FORCETEST) +#error "When you enable FORCETEST you should also switch on EVALPOTENTIAL" +#endif /* #if !defined(EVALPOTENTIAL) && defined(FORCETEST) */ + +static void gravity_forcetest_evaluate(int target, int mode, int threadid); +static void ewald_correction_force(double x, double y, double z, double force[4]); +static void ewald_other_images(double x, double y, double z, double alpha, double force[4]); +static void ewald_correction_force_table_lookup(double x, double y, double z, double force[4]); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + unsigned char Type; + unsigned char SofteningType; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = SphP[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->Type = P[i].Type; + in->SofteningType = P[i].SofteningType; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Acc[3]; + MyFloat Pot; + MyFloat DistToID1; +#ifdef PMGRID + MyFloat AccLongRange[3]; + MyFloat AccShortRange[3]; + MyFloat PotLongRange; + MyFloat PotShortRange; +#endif /* #ifdef PMGRID */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + P[i].GravAccelDirect[0] = out->Acc[0]; + P[i].GravAccelDirect[1] = out->Acc[1]; + P[i].GravAccelDirect[2] = out->Acc[2]; + P[i].PotentialDirect = out->Pot; + P[i].DistToID1 = out->DistToID1; +#ifdef PMGRID + P[i].GravAccelLongRange[0] = out->AccLongRange[0]; + P[i].GravAccelLongRange[1] = out->AccLongRange[1]; + P[i].GravAccelLongRange[2] = out->AccLongRange[2]; + P[i].GravAccelShortRange[0] = out->AccShortRange[0]; + P[i].GravAccelShortRange[1] = out->AccShortRange[1]; + P[i].GravAccelShortRange[2] = out->AccShortRange[2]; + P[i].PotentialLongRange = out->PotLongRange; + P[i].PotentialShortRange = out->PotShortRange; +#endif /* #ifdef PMGRID */ + } + else /* combine */ + { + P[i].GravAccelDirect[0] += out->Acc[0]; + P[i].GravAccelDirect[1] += out->Acc[1]; + P[i].GravAccelDirect[2] += out->Acc[2]; + P[i].PotentialDirect += out->Pot; + if(out->DistToID1 > 0) + P[i].DistToID1 = out->DistToID1; +#ifdef PMGRID + P[i].GravAccelLongRange[0] += out->AccLongRange[0]; + P[i].GravAccelLongRange[1] += out->AccLongRange[1]; + P[i].GravAccelLongRange[2] += out->AccLongRange[2]; + P[i].GravAccelShortRange[0] += out->AccShortRange[0]; + P[i].GravAccelShortRange[1] += out->AccShortRange[1]; + P[i].GravAccelShortRange[2] += out->AccShortRange[2]; + P[i].PotentialLongRange += out->PotLongRange; + P[i].PotentialShortRange += out->PotShortRange; +#endif /* #ifdef PMGRID */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= TimeBinsGravity.NActiveParticles) + break; + + i = TimeBinsGravity.ActiveParticleList[i]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + gravity_forcetest_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + gravity_forcetest_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief This function computes the gravitational forces for all active + * particles. + * + * A new tree is constructed, if the number of force computations since + * it's last construction exceeds some fraction of the total + * particle number, otherwise tree nodes are dynamically updated if needed. + * + * \return void + */ +void gravity_forcetest(void) +{ + int nthis, nloc, ntot; + int idx, i, j; + double fac1; + char buf[200]; + + nloc = 0; + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(get_random_number() < FORCETEST) + { + P[i].TimeBinGrav = -P[i].TimeBinGrav - 1; /* Mark as selected */ + nloc++; + } + } + + MPI_Allreduce(&nloc, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("FORCETEST: Testing forces of %d particles\n", ntot); + + double t0 = second(); + + generic_set_MaxNexport(); + + generic_comm_pattern(TimeBinsGravity.NActiveParticles, kernel_local, kernel_imported); + + double t1 = second(); + double maxt = timediff(t0, t1); + + /* muliply by G */ + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + { + for(j = 0; j < 3; j++) + { + P[i].GravAccelDirect[j] *= All.G; +#ifdef PMGRID + P[i].GravAccelLongRange[j] *= All.G; + P[i].GravAccelShortRange[j] *= All.G; +#endif /* #ifdef PMGRID */ + } + + P[i].PotentialDirect *= All.G; +#ifdef PMGRID + P[i].PotentialLongRange *= All.G; + P[i].PotentialShortRange *= All.G; +#endif /* #ifdef PMGRID */ + } + } + + /* Finally, the following factor allows a computation of cosmological simulation + with vacuum energy in physical coordinates */ + + if(All.ComovingIntegrationOn == 0) + { + fac1 = All.OmegaLambda * All.Hubble * All.Hubble; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + for(j = 0; j < 3; j++) + P[i].GravAccelDirect[j] += fac1 * P[i].Pos[j]; + } + } + + /* now output the forces to a file */ + + for(nthis = 0; nthis < NTask; nthis++) + { + if(nthis == ThisTask) + { + sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + + if(!(FdForceTest = fopen(buf, "a"))) + terminate("error in opening file '%s'\n", buf); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + { +#ifdef PMGRID + fprintf(FdForceTest, + "%d %d %lld %g %g %g %g %g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g " + "%15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g\n", + P[i].Type, ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1, + P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccelShortRange[0], + P[i].GravAccelShortRange[1], P[i].GravAccelShortRange[2], P[i].GravAccelLongRange[0], + P[i].GravAccelLongRange[1], P[i].GravAccelLongRange[2], P[i].GravAccel[0], P[i].GravAccel[1], + P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], P[i].PotentialDirect, + P[i].PotentialShortRange, P[i].PotentialLongRange, P[i].Potential, P[i].PM_Potential); +#else /* #ifdef PMGRID */ + fprintf(FdForceTest, + "%d %d %lld %g %g %g %g %g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g %15.10g\n", P[i].Type, + ThisTask, (long long)P[i].ID, All.Time, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], P[i].DistToID1, + P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2], P[i].GravAccel[0], + P[i].GravAccel[1], P[i].GravAccel[2], P[i].PotentialDirect, P[i].Potential); +#endif /* #ifdef PMGRID #else */ + } + } + + fclose(FdForceTest); + } + + MPI_Barrier(MPI_COMM_WORLD); + } + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < 0) + P[i].TimeBinGrav = -P[i].TimeBinGrav - 1; + } + + /* Now the force computation is finished */ + + if(ThisTask == 0) + { + double costtotal = NumPart * ntot; + + fprintf(FdTimings, "DIRECT Nf= %d part/sec=%g | %g ia/part=%g\n\n", ntot, ((double)ntot) / (NTask * maxt + 1.0e-20), + ntot / ((maxt + 1.0e-20) * NTask), ((double)(costtotal)) / (ntot + 1.0e-20)); + + myflush(FdTimings); + } +} + +/*! \brief This function does the gravitational force computation with direct + * summation for the specified particle. + * + * This can be useful for debugging purposes, in particular for explicit + * checks of the force accuracy reached with the tree. Depending on whether + * or not a PMGRID is used, the code does a short-range tree-walk or a full + * one. + * + * \param i Index of the particle to be processed. + * \param mode 0: process local particle (phase 1), 1: process imported + * particle (phase 2). + * \param thread_id Id of this thread. + * \param measure_cost_flag Whether the cost of the tree walk should be + * measured. + * + * \return Number of interactions processed for particle i. + */ +static void gravity_forcetest_evaluate(int target, int mode, int threadid) +{ + int j; + double h_i, h_j, hmax, mass, dx, dy, dz, r, r2, fac, wp, fac_newton, wp_newton; + double pos_x, pos_y, pos_z; +#ifdef PMGRID + double asmth = All.Asmth[0]; +#endif /* #ifdef PMGRID */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) */ + + double acc_x = 0; + double acc_y = 0; + double acc_z = 0; + double pot = 0; + double disttoid1 = 0; + + data_out out; + data_in local, *target_data; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + /* make sure that the particle is exported to all other tasks */ + for(int task = 0; task < NTask; task++) + if(task != ThisTask) + { + if(Thread[threadid].Exportflag[task] != target) + { + Thread[threadid].Exportflag[task] = target; + int nexp = Thread[threadid].Nexport++; + Thread[threadid].PartList[nexp].Task = task; + Thread[threadid].PartList[nexp].Index = target; + Thread[threadid].ExportSpace -= Thread[threadid].ItemSize; + } + + int nexp = Thread[threadid].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = + (struct datanodelist *)(((char *)Thread[threadid].PartList) + Thread[threadid].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = target; + nodelist[nexp].Node = 0; /* the node doesn't matter here */ + Thread[threadid].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + } + } + else + { + target_data = &DataGet[target]; + } + + pos_x = target_data->Pos[0]; + pos_y = target_data->Pos[1]; + pos_z = target_data->Pos[2]; + h_i = All.ForceSoftening[target_data->SofteningType]; + +#ifdef PLACEHIGHRESREGION + if(pmforce_is_particle_high_res(target_data->Type, target_data->Pos)) + asmth = All.Asmth[1]; +#endif /* #ifdef PLACEHIGHRESREGION */ + + out.Pot = 0; +#ifdef PMGRID + out.PotShortRange = 0; + out.PotLongRange = 0; +#endif /* #ifdef PMGRID */ + + for(int i = 0; i < 3; i++) + { + out.Acc[i] = 0; +#ifdef PMGRID + out.AccShortRange[i] = 0; + out.AccLongRange[i] = 0; +#endif /* #ifdef PMGRID */ + } + + for(j = 0; j < NumPart; j++) + { + h_j = All.ForceSoftening[P[j].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + +#ifdef CELL_CENTER_GRAVITY + if(P[j].Type == 0) + { + dx = GRAVITY_NEAREST_X(SphP[j].Center[0] - pos_x); + dy = GRAVITY_NEAREST_Y(SphP[j].Center[1] - pos_y); + dz = GRAVITY_NEAREST_Z(SphP[j].Center[2] - pos_z); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[j].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(P[j].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(P[j].Pos[2] - pos_z); + } + + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[j].Mass; + + /* now evaluate the multipole moment */ + + r = sqrt(r2); + + if(P[j].ID == 1) + disttoid1 = r; + + /* we compute 3 different forces: + * (1) The correct direct summation force, if needed with Ewald correction: ftrue + * In the case of PM: + * (2) The short range direct summation force with only the erfc cut-off (this is what the tree can at best deliver): fsr + * (3) The expected PM force based on the long-range part of the Ewald sum. This is equal to ftrue - fsr - fsfr_periodic_images + * */ + + if(r > 0) + { + fac_newton = mass / (r2 * r); + wp_newton = -mass / r; + } + else + { + fac_newton = 0; + wp_newton = 0; + } + + if(r >= hmax) + { + fac = fac_newton; + wp = wp_newton; + } + else + { + double h_inv = 1.0 / hmax; + double h3_inv = h_inv * h_inv * h_inv; + double u = r * h_inv; + + if(u < 0.5) + { + double u2 = u * u; + fac = mass * h3_inv * (SOFTFAC1 + u2 * (SOFTFAC2 * u + SOFTFAC3)); + wp = mass * h_inv * (SOFTFAC4 + u2 * (SOFTFAC5 + u2 * (SOFTFAC6 * u + SOFTFAC7))); + } + else + { + double u2 = u * u, u3 = u2 * u; + fac = mass * h3_inv * (SOFTFAC8 + SOFTFAC9 * u + SOFTFAC10 * u2 + SOFTFAC11 * u3 + SOFTFAC12 / u3); + wp = mass * h_inv * (SOFTFAC13 + SOFTFAC14 / u + u2 * (SOFTFAC1 + u * (SOFTFAC15 + u * (SOFTFAC16 + SOFTFAC17 * u)))); + } + } + + double acc_newton_x = dx * fac; + double acc_newton_y = dy * fac; + double acc_newton_z = dz * fac; + double pot_newton = wp; + +#ifdef PMGRID + double u = 0.5 / asmth * r; + + double factor_force = (erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u) - 1.0); + double factor_pot = erfc(u); + + fac += fac_newton * factor_force; + wp += wp_newton * (factor_pot - 1.0); + + double acc_short_x = dx * fac; + double acc_short_y = dy * fac; + double acc_short_z = dz * fac; + double pot_short = wp + mass * M_PI / (asmth * asmth * boxSize_X * boxSize_Y * boxSize_Z); + + out.AccShortRange[0] += acc_short_x; + out.AccShortRange[1] += acc_short_y; + out.AccShortRange[2] += acc_short_z; + out.PotShortRange += pot_short; +#endif /* #ifdef PMGRID */ + +#if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + double fcorr[4]; + +#if !defined(FORCETEST_TESTFORCELAW) + ewald_correction_force_table_lookup(dx, dy, dz, fcorr); +#else /* #if !defined(FORCETEST_TESTFORCELAW) */ + ewald_correction_force(dx, dy, dz, fcorr); +#endif /* #if !defined(FORCETEST_TESTFORCELAW) #else */ + + acc_x = acc_newton_x + mass * fcorr[0]; + acc_y = acc_newton_y + mass * fcorr[1]; + acc_z = acc_newton_z + mass * fcorr[2]; + + pot = pot_newton + mass * fcorr[3]; +#else /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + acc_x = acc_newton_x; + acc_y = acc_newton_y; + acc_z = acc_newton_z; + pot = pot_newton; +#endif /* #if defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) #else */ + + out.Acc[0] += acc_x; + out.Acc[1] += acc_y; + out.Acc[2] += acc_z; + out.Pot += pot; + +#ifdef PMGRID + double fimages[4] = {0, 0, 0, 0}; +#ifdef FORCETEST_TESTFORCELAW + ewald_other_images(dx, dy, dz, 0.5 / asmth, fimages); +#endif /* #ifdef FORCETEST_TESTFORCELAW */ + out.AccLongRange[0] += acc_x - acc_short_x - mass * fimages[0]; + out.AccLongRange[1] += acc_y - acc_short_y - mass * fimages[1]; + out.AccLongRange[2] += acc_z - acc_short_z - mass * fimages[2]; + out.PotLongRange += pot - pot_short - mass * fimages[3]; +#endif /* #ifdef PMGRID */ + } + + out.DistToID1 = disttoid1; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; +} + +#ifdef FORCETEST_TESTFORCELAW +/*! \brief Places particle with ID 1 radomly in box and calculates force on it. + * + * \return void + */ +void gravity_forcetest_testforcelaw(void) +{ + int Ncycles = 40; + double xyz[3], eps; + + ngb_treefree(); + mark_active_timebins(); + + for(int cycle = 0; cycle < Ncycles; cycle++) + { + mpi_printf("\nTEST-FORCE-LAW: cycle=%d|%d ----------------------------------\n\n", cycle, Ncycles); + + double epsloc = 0, xyzloc[3] = {0, 0, 0}; + + /* set particle with ID=1 to new random coordinate in box */ + for(int n = 0; n < NumPart; n++) + { + P[n].Type = 1; + + if(P[n].ID == 1) + { + xyzloc[0] = All.BoxSize * STRETCHX * get_random_number(); + xyzloc[1] = All.BoxSize * STRETCHY * get_random_number(); + xyzloc[2] = All.BoxSize * STRETCHZ * get_random_number(); + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) + for(int j = 0; j < 3; j++) + xyzloc[j] = 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]); +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */ + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2) + if(get_random_number() < 0.5) + { + for(int j = 0; j < 3; j++) + xyzloc[j] = All.Xmintot[1][j] + get_random_number() * (All.Xmaxtot[1][j] - All.Xmintot[1][j]); + } +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 2) */ + + for(int i = 0; i < 3; i++) + P[n].Pos[i] = xyzloc[i]; + + epsloc = All.ForceSoftening[P[n].SofteningType]; + } + } + + MPI_Allreduce(xyzloc, xyz, 3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&epsloc, &eps, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double rmin = 0.01 * eps; + double rmax = + sqrt(pow(0.5 * All.BoxSize * STRETCHX, 2) + pow(0.5 * All.BoxSize * STRETCHY, 2) + pow(0.5 * All.BoxSize * STRETCHZ, 2)); + + for(int n = 0; n < NumPart; n++) + { + if(P[n].ID != 1) + { + double r = exp(log(rmin) + (log(rmax) - log(rmin)) * get_random_number()); + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + double dx = r * sin(theta) * cos(phi); + double dy = r * sin(theta) * sin(phi); + double dz = r * cos(theta); + + double xtmp, ytmp, ztmp; + P[n].Pos[0] = WRAP_X(xyz[0] + dx); + P[n].Pos[1] = WRAP_Y(xyz[1] + dy); + P[n].Pos[2] = WRAP_Z(xyz[2] + dz); + } + } + + domain_free(); + domain_Decomposition(); /* do domain decomposition if needed */ + +#ifdef PMGRID + long_range_force(); +#endif /* #ifdef PMGRID */ + + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + } + + endrun(); +} +#endif /* #ifdef FORCETEST_TESTFORCELAW */ + +/*! \brief Periodicity effects in gravity. + * + * \param[in] x X coordinate of point. + * \param[in] y Y coordinate of point. + * \param[in] z Z coordinate of point. + * \param[in] alpha Cutoff for tree-PM. + * \param[out] force Force vector. + */ +static void ewald_other_images(double x, double y, double z, double alpha, double force[4]) +{ + double signx, signy, signz; + + for(int i = 0; i < 4; i++) + force[i] = 0; + + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + if(x < 0) + { + x = -x; + signx = +1; + } + else + signx = -1; + if(y < 0) + { + y = -y; + signy = +1; + } + else + signy = -1; + if(z < 0) + { + z = -z; + signz = +1; + } + else + signz = -1; + + double alpha2 = alpha * alpha; + + const int nmax = 4; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + if(nx != 0 || ny != 0 || nz != 0) + { + double dx = x - nx * STRETCHX * All.BoxSize; + double dy = y - ny * STRETCHY * All.BoxSize; + double dz = z - nz * STRETCHZ * All.BoxSize; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + double val3 = erfc(alpha * r) / r; + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + force[3] -= val3; + } + } + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +/*! \brief Force due to periodic boundary conditions. + * + * \param[in] x X coordinate of point. + * \param[in] y Y coordinate of point. + * \param[in] z Z coordinate of point. + * \param[out] force Force vector. + */ +static void ewald_correction_force(double x, double y, double z, double force[4]) +{ + double signx, signy, signz; + + for(int i = 0; i < 4; i++) + force[i] = 0; + + double r2 = x * x + y * y + z * z; + + if(r2 == 0) + return; + + if(x < 0) + { + x = -x; + signx = +1; + } + else + signx = -1; + if(y < 0) + { + y = -y; + signy = +1; + } + else + signy = -1; + if(z < 0) + { + z = -z; + signz = +1; + } + else + signz = -1; + + double lmin = imin(imin(STRETCHX, STRETCHY), STRETCHZ); + double alpha = 2.0 / lmin / All.BoxSize; + double alpha2 = alpha * alpha; + double r = sqrt(r2); + double r3inv = 1.0 / (r2 * r); + + force[0] += r3inv * x; + force[1] += r3inv * y; + force[2] += r3inv * z; + + const int nmax = 6; + + for(int nx = -nmax; nx <= nmax; nx++) + for(int ny = -nmax; ny <= nmax; ny++) + for(int nz = -nmax; nz <= nmax; nz++) + { + double dx = x - nx * STRETCHX * All.BoxSize; + double dy = y - ny * STRETCHY * All.BoxSize; + double dz = z - nz * STRETCHZ * All.BoxSize; + double r2 = dx * dx + dy * dy + dz * dz; + double r = sqrt(r2); + double val = erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2); + double val2 = val / (r2 * r); + double val3 = erfc(alpha * r) / r; /* for potential */ + + force[0] -= dx * val2; + force[1] -= dy * val2; + force[2] -= dz * val2; + force[3] -= val3; + } + + int nxmax = (int)(4 * alpha * All.BoxSize * (STRETCHX / lmin) + 0.5); + int nymax = (int)(4 * alpha * All.BoxSize * (STRETCHY / lmin) + 0.5); + int nzmax = (int)(4 * alpha * All.BoxSize * (STRETCHZ / lmin) + 0.5); + + for(int nx = -nxmax; nx <= nxmax; nx++) + for(int ny = -nymax; ny <= nymax; ny++) + for(int nz = -nzmax; nz <= nzmax; nz++) + { + double kx = (2.0 * M_PI / (All.BoxSize * STRETCHX)) * nx; + double ky = (2.0 * M_PI / (All.BoxSize * STRETCHY)) * ny; + double kz = (2.0 * M_PI / (All.BoxSize * STRETCHZ)) * nz; + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double kdotx = (x * kx + y * ky + z * kz); + double vv = 4.0 * M_PI / (k2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) * exp(-k2 / (4.0 * alpha2)); + double val = vv * sin(kdotx); + double val2 = vv * cos(kdotx); + force[0] -= kx * val; + force[1] -= ky * val; + force[2] -= kz * val; + force[3] -= val2; + } + } + + force[3] += M_PI / (alpha2 * pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ) + 1.0 / r; + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +#if !defined(FORCETEST_TESTFORCELAW) + +#define TEW_N 128 + +#define TEW_NX (DBX * STRETCHX * TEW_N) +#define TEW_NY (DBY * STRETCHY * TEW_N) +#define TEW_NZ (DBZ * STRETCHZ * TEW_N) + +static double Ewd_table[4][TEW_NX + 1][TEW_NY + 1][TEW_NZ + 1]; +static double Ewd_table_intp; + +/*! \brief Initializes Ewald correction force test. + * + * \return void + */ +void forcetest_ewald_init(void) +{ + double t0 = second(); + + mpi_printf("FORCETEST: initialize high-res Ewald lookup table...\n"); + +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer"); +#endif /* #ifdef LONG_Z */ + + /* ok, let's compute things. Actually, we do that in parallel. */ + int size = (TEW_NX + 1) * (TEW_NY + 1) * (TEW_NZ + 1); + int first, count; + + subdivide_evenly(size, NTask, ThisTask, &first, &count); + + for(int n = first; n < first + count; n++) + { + int i = n / ((TEW_NY + 1) * (TEW_NZ + 1)); + int j = (n - i * (TEW_NY + 1) * (TEW_NZ + 1)) / (TEW_NZ + 1); + int k = (n - i * (TEW_NY + 1) * (TEW_NZ + 1) - j * (TEW_NZ + 1)); + + if(ThisTask == 0) + { + if(((n - first) % (count / 20)) == 0) + { + printf("%4.1f percent done\n", (n - first) / (count / 100.0)); + myflush(stdout); + } + } + + double xx = 0.5 * DBX * STRETCHX * ((double)i) / TEW_NX * All.BoxSize; + double yy = 0.5 * DBY * STRETCHY * ((double)j) / TEW_NY * All.BoxSize; + double zz = 0.5 * DBZ * STRETCHZ * ((double)k) / TEW_NZ * All.BoxSize; + + double fcorr[4]; + ewald_correction_force(xx, yy, zz, fcorr); + + for(int rep = 0; rep < 4; rep++) + Ewd_table[rep][i][j][k] = fcorr[rep]; + } + + int *recvcnts = (int *)mymalloc("recvcnts", NTask * sizeof(int)); + int *recvoffs = (int *)mymalloc("recvoffs", NTask * sizeof(int)); + + for(int i = 0; i < NTask; i++) + { + int off, cnt; + subdivide_evenly(size, NTask, i, &off, &cnt); + recvcnts[i] = cnt * sizeof(double); + recvoffs[i] = off * sizeof(double); + } + + for(int rep = 0; rep < 4; rep++) + MPI_Allgatherv(MPI_IN_PLACE, size * sizeof(double), MPI_BYTE, Ewd_table[rep], recvcnts, recvoffs, MPI_BYTE, MPI_COMM_WORLD); + + myfree(recvoffs); + myfree(recvcnts); + + /* now scale things to the boxsize that is actually used */ + Ewd_table_intp = 2 * TEW_N / All.BoxSize; + + double t1 = second(); + mpi_printf("FORCETEST: Initialization of high-res Ewald table finished, took %g sec.\n", timediff(t0, t1)); +} + +/*! \brief Looks up Ewald force from tabulated values. + * + * \param[in] dx X position. + * \param[in] dy Y position. + * \param[in] dz Z position. + * \param[out] force Ewald force correction. + * + * \return void + */ +static void ewald_correction_force_table_lookup(double dx, double dy, double dz, double force[4]) +{ + int signx, signy, signz; + int i, j, k; + double u, v, w; + double f1, f2, f3, f4, f5, f6, f7, f8; + + if(dx < 0) + { + dx = -dx; + signx = -1; + } + else + signx = +1; + + if(dy < 0) + { + dy = -dy; + signy = -1; + } + else + signy = +1; + + if(dz < 0) + { + dz = -dz; + signz = -1; + } + else + signz = +1; + + u = dx * Ewd_table_intp; + i = (int)u; + if(i >= TEW_NX) + i = TEW_NX - 1; + u -= i; + v = dy * Ewd_table_intp; + j = (int)v; + if(j >= TEW_NY) + j = TEW_NY - 1; + v -= j; + w = dz * Ewd_table_intp; + k = (int)w; + if(k >= TEW_NZ) + k = TEW_NZ - 1; + w -= k; + + f1 = (1 - u) * (1 - v) * (1 - w); + f2 = (1 - u) * (1 - v) * (w); + f3 = (1 - u) * (v) * (1 - w); + f4 = (1 - u) * (v) * (w); + f5 = (u) * (1 - v) * (1 - w); + f6 = (u) * (1 - v) * (w); + f7 = (u) * (v) * (1 - w); + f8 = (u) * (v) * (w); + + for(int rep = 0; rep < 4; rep++) + { + force[rep] = Ewd_table[rep][i][j][k] * f1 + Ewd_table[rep][i][j][k + 1] * f2 + Ewd_table[rep][i][j + 1][k] * f3 + + Ewd_table[rep][i][j + 1][k + 1] * f4 + Ewd_table[rep][i + 1][j][k] * f5 + Ewd_table[rep][i + 1][j][k + 1] * f6 + + Ewd_table[rep][i + 1][j + 1][k] * f7 + Ewd_table[rep][i + 1][j + 1][k + 1] * f8; + } + + force[0] *= signx; + force[1] *= signy; + force[2] *= signz; +} + +#endif /* #if !defined(FORCETEST_TESTFORCELAW) */ + +#endif /* #ifdef FORCETEST */ diff --git a/src/amuse/community/arepo/src/gravity/longrange.c b/src/amuse/community/arepo/src/gravity/longrange.c new file mode 100644 index 0000000000..2fbd6a2e53 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/longrange.c @@ -0,0 +1,199 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/longrange.c + * \date 05/2018 + * \brief Driver routines for computation of long-range gravitational + * PM force + * \details contains functions: + * void long_range_init(void) + * void long_range_init_regionsize(void) + * void long_range_force(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef PMGRID +/*! \brief Driver routine to call initialization of periodic or/and + * non-periodic FFT routines. + * + * \return void + */ +void long_range_init(void) +{ +#ifndef GRAVITY_NOT_PERIODIC + pm_init_periodic(); +#ifdef TWODIMS + pm2d_init_periodic(); +#endif /* #ifdef TWODIMS */ +#ifdef PLACEHIGHRESREGION + pm_init_nonperiodic(); +#endif /* #ifdef PLACEHIGHRESREGION */ +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + pm_init_nonperiodic(); +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ +} + +/*! \brief Driver routine to determine the extend of the non- + * periodic or high resolution region. + * + * The initialization is done by pm_init_regionsize(). Afterwards + * the convolution kernels are computed by pm_setup_nonperiodic_kernel(). + * + * \return void + */ +void long_range_init_regionsize(void) +{ +#ifndef GRAVITY_NOT_PERIODIC +#ifdef PLACEHIGHRESREGION + if(RestartFlag != 1) + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); +#endif /* #ifdef PLACEHIGHRESREGION */ + +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + if(RestartFlag != 1) + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ +} + +/*! \brief This function computes the long-range PM force for all particles. + * + * In case of a periodic grid the force is calculated by pmforce_periodic() + * otherwise by pmforce_nonperiodic(). If a high resolution region is + * specified for the PM force, pmforce_nonperiodic() calculates that force in + * both cases. + * + * \return void + */ +void long_range_force(void) +{ + int i; + + TIMER_START(CPU_PM_GRAVITY); + +#ifdef GRAVITY_NOT_PERIODIC + int j; + double fac; +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + for(i = 0; i < NumPart; i++) + { + P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0; +#ifdef EVALPOTENTIAL + P[i].PM_Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ + } + +#ifndef SELFGRAVITY + return; +#endif /* #ifndef SELFGRAVITY */ + +#ifndef GRAVITY_NOT_PERIODIC + +#ifdef TWODIMS + pm2d_force_periodic(0); +#else /* #ifdef TWODIMS */ + pmforce_periodic(0, NULL); +#endif /* #ifdef TWODIMS #else */ + +#ifdef PLACEHIGHRESREGION + i = pmforce_nonperiodic(1); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + i = pmforce_nonperiodic(1); /* try again */ + } + if(i == 1) + terminate("despite we tried to increase the region, we still don't fit all particles in it"); +#endif /* #ifdef PLACEHIGHRESREGION */ + +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + i = pmforce_nonperiodic(0); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + i = pmforce_nonperiodic(0); /* try again */ + } + if(i == 1) + terminate("despite we tried to increase the region, somehow we still don't fit all particles in it"); +#ifdef PLACEHIGHRESREGION + i = pmforce_nonperiodic(1); + + if(i == 1) /* this is returned if a particle lied outside allowed range */ + { + pm_init_regionsize(); + pm_setup_nonperiodic_kernel(); + + /* try again */ + + for(i = 0; i < NumPart; i++) + P[i].GravPM[0] = P[i].GravPM[1] = P[i].GravPM[2] = 0; + + i = pmforce_nonperiodic(0) + pmforce_nonperiodic(1); + } + if(i != 0) + terminate("despite we tried to increase the region, somehow we still don't fit all particles in it"); +#endif /* #ifdef PLACEHIGHRESREGION */ +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ + +#ifdef GRAVITY_NOT_PERIODIC + if(All.ComovingIntegrationOn) + { + fac = 0.5 * All.Hubble * All.Hubble * All.Omega0; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].GravPM[j] += fac * P[i].Pos[j]; + } + + /* Finally, the following factor allows a computation of cosmological simulation + with vacuum energy in physical coordinates */ + if(All.ComovingIntegrationOn == 0) + { + fac = All.OmegaLambda * All.Hubble * All.Hubble; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].GravPM[j] += fac * P[i].Pos[j]; + } +#endif /* #ifdef GRAVITY_NOT_PERIODIC */ + + TIMER_STOP(CPU_PM_GRAVITY); + + find_long_range_step_constraint(); +} +#endif /* #ifdef PMGRID */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c new file mode 100644 index 0000000000..866ef06459 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_mpi_fft.c @@ -0,0 +1,1771 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_mpi_fft.c + * \date 05/2018 + * \brief Home-made parallel FFT transforms as needed by the code. + * \details We only use the one-dimensional FFTW3 routines, because the + * MPI versions of FFTW3 allocate memory for themselves during the + * transforms (which we want to strictly avoid), and because we + * want to allow transforms that are so big that more than 2GB + * may be transferred betweeen processors. + * + * contains functions: + * void my_slab_based_fft_init(fft_plan * plan, int NgridX, + * int NgridY, int NgridZ) + * void my_slab_transposeA(fft_plan * plan, fft_real * field, + * fft_real * scratch) + * void my_slab_transposeB(fft_plan * plan, fft_real * field, + * fft_real * scratch) + * static void my_slab_transpose(void *av, void *bv, int *sx, + * int *firstx, int *sy, int *firsty, int nx, int ny, int nz, + * int mode) + * void my_slab_based_fft(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_slab_based_fft_c2c(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_column_based_fft_init(fft_plan * plan, int NgridX, + * int NgridY, int NgridZ) + * void my_column_based_fft_init_c2c(fft_plan * plan, + * int NgridX, int NgridY, int NgridZ) + * void my_fft_swap23(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap23back(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap13(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_fft_swap13back(fft_plan * plan, fft_real * data, + * fft_real * out) + * void my_column_based_fft(fft_plan * plan, void *data, + * void *workspace, int forward) + * void my_column_based_fft_c2c(fft_plan * plan, void *data, + * void *workspace, int forward)# + * static void my_fft_column_remap(fft_complex * data, + * int Ndims[3], int in_firstcol, int in_ncol, + * fft_complex * out, int perm[3], int out_firstcol, + * int out_ncol, size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * static void my_fft_column_transpose(fft_real * data, + * int Ndims[3], int in_firstcol, int in_ncol, fft_real * out, + * int perm[3], int out_firstcol, int out_ncol, + * size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * static void my_fft_column_transpose_c(fft_complex * data, + * int Ndims[3], int in_firstcol, int in_ncol, + * fft_complex * out, int perm[3], int out_firstcol, + * int out_ncol, size_t * offset_send, size_t * offset_recv, + * size_t * count_send, size_t * count_recv, + * size_t just_count_flag) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 26.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) + +#ifndef FFT_COLUMN_BASED +/*! \brief Initializes slab based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + subdivide_evenly(NgridX, NTask, ThisTask, &plan->slabstart_x, &plan->nslab_x); + subdivide_evenly(NgridY, NTask, ThisTask, &plan->slabstart_y, &plan->nslab_y); + + plan->slab_to_task = (int *)mymalloc("slab_to_task", NgridX * sizeof(int)); + + for(int task = 0; task < NTask; task++) + { + int start, n; + + subdivide_evenly(NgridX, NTask, task, &start, &n); + + for(int i = start; i < start + n; i++) + plan->slab_to_task[i] = task; + } + + MPI_Allreduce(&plan->nslab_x, &plan->largest_x_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&plan->nslab_y, &plan->largest_y_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + plan->slabs_x_per_task = (int *)mymalloc("slabs_x_per_task", NTask * sizeof(int)); + MPI_Allgather(&plan->nslab_x, 1, MPI_INT, plan->slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->first_slab_x_of_task = (int *)mymalloc("first_slab_x_of_task", NTask * sizeof(int)); + MPI_Allgather(&plan->slabstart_x, 1, MPI_INT, plan->first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->slabs_y_per_task = (int *)mymalloc("slabs_y_per_task", NTask * sizeof(int)); + MPI_Allgather(&plan->nslab_y, 1, MPI_INT, plan->slabs_y_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->first_slab_y_of_task = (int *)mymalloc("first_slab_y_of_task", NTask * sizeof(int)); + MPI_Allgather(&plan->slabstart_y, 1, MPI_INT, plan->first_slab_y_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int Ngridz = NgridZ / 2 + 1; /* dimension needed in complex space */ + + plan->Ngridz = Ngridz; + plan->Ngrid2 = 2 * Ngridz; +} + +/*! \brief Transposes the array field. + * + * The array field is transposed such that the data in x direction is local + * to only one task. This is done, so the force in x-direction can be + * obtained by finite differencing. However the array is not fully + * transposed, i.e. the x-direction is not the fastest running array index. + * + * \param[in] plan FFT pan. + * \param[in, out] field The array to transpose. + * \param[out] scratch Scratch space used during communication (same size as + * field). + * + * \return void + */ +void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch) +{ + int n, prod, task, flag_big = 0, flag_big_all = 0; + + prod = NTask * plan->nslab_x; + + for(n = 0; n < prod; n++) + { + int x = n / NTask; + int task = n % NTask; + + int y; + + for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++) + memcpy(scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x + + x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])), + field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), plan->NgridZ * sizeof(fft_real)); + } + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + + for(task = 0; task < NTask; task++) + { + scount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + rcount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + + soff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real)); + roff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real)); + + if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + myMPI_Alltoallv(scratch, scount, soff, field, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); +} + +/*! \brief Undo the transposition of the array field. + * + * The transposition of the array field is undone such that the data in + * x direction is distributed among all tasks again. Thus the result of + * force computation in x-direction is sent back to the original task. + * + * \param[in] plan FFT plan. + * \param[in, out] field The array to transpose. + * \param[out] scratch Scratch space used during communication (same size as + * field). + * + * \return void + */ +void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch) +{ + int n, prod, task, flag_big = 0, flag_big_all = 0; + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + + for(task = 0; task < NTask; task++) + { + rcount[task] = plan->nslab_x * plan->slabs_y_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + scount[task] = plan->nslab_y * plan->slabs_x_per_task[task] * (plan->NgridZ * sizeof(fft_real)); + + roff[task] = plan->first_slab_y_of_task[task] * plan->nslab_x * (plan->NgridZ * sizeof(fft_real)); + soff[task] = plan->first_slab_x_of_task[task] * plan->nslab_y * (plan->NgridZ * sizeof(fft_real)); + + if(scount[task] > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + myMPI_Alltoallv(field, scount, soff, scratch, rcount, roff, 1, flag_big_all, MPI_COMM_WORLD); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); + + prod = NTask * plan->nslab_x; + + for(n = 0; n < prod; n++) + { + int x = n / NTask; + int task = n % NTask; + + int y; + for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++) + memcpy(field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), + scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x + + x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])), + plan->NgridZ * sizeof(fft_real)); + } +} + +/* \brief Transpose a slab decomposed 3D field. + * + * Given a slab-decomposed 3D field a[...] with total dimension + * [nx x ny x nz], whose first dimension is split across the processors, this + * routine outputs in b[] the transpose where then the second dimension is + * split across the processors. sx[] gives for each MPI task how many slabs + * it has, and firstx[] is the first slab for a given task. Likewise, + * sy[]/firsty[] gives the same thing for the transposed order. Note, the + * contents of the array a[] will be destroyed by the routine. + * + * An element (x,y,z) is accessed in a[] with index + * [([x - firstx] * ny + y) * nz + z] and in b[] as + * [((y - firsty) * nx + x) * nz + z] + * + * \param[in, out] av Pointer to array a. + * \param[in, out] bv Pointer to array b. + * \param[in] sx Array storing number of slabs in each task. + * \param[in] fristx Array with first slab in each task. + * \param[in] sy Array storing number of transposed slabs in each task. + * \param[in] firsty Array storing first transposed slab in each task. + * \param[in] nx Number of elements in x direction. + * \param[in] ny Number of elements in y direction. + * \param[in] nz Number of elements in z direction. + * \param[in] mode If mode = 1, the reverse operation is carried out. + * + * \return void + */ +static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy, int *firsty, int nx, int ny, int nz, int mode) +{ + char *a = (char *)av; + char *b = (char *)bv; + + size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t)); + size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t)); + size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t)); + size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t)); + int i, n, prod, flag_big = 0, flag_big_all = 0; + + for(i = 0; i < NTask; i++) + { + scount[i] = sy[i] * sx[ThisTask] * ((size_t)nz); + rcount[i] = sy[ThisTask] * sx[i] * ((size_t)nz); + soff[i] = firsty[i] * sx[ThisTask] * ((size_t)nz); + roff[i] = sy[ThisTask] * firstx[i] * ((size_t)nz); + + if(scount[i] * sizeof(fft_complex) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + } + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(mode == 0) + { + /* first pack the data into contiguous blocks */ + prod = NTask * sx[ThisTask]; + for(n = 0; n < prod; n++) + { + int k = n / NTask; + int i = n % NTask; + int j; + + for(j = 0; j < sy[i]; j++) + memcpy(b + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), + a + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + + /* tranfer the data */ + myMPI_Alltoallv(b, scount, soff, a, rcount, roff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD); + + /* unpack the data into the right order */ + prod = NTask * sy[ThisTask]; + for(n = 0; n < prod; n++) + { + int j = n / NTask; + int i = n % NTask; + int k; + + for(k = 0; k < sx[i]; k++) + memcpy(b + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), + a + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + } + else + { + /* first pack the data into contiguous blocks */ + prod = NTask * sy[ThisTask]; + for(n = 0; n < prod; n++) + { + int j = n / NTask; + int i = n % NTask; + int k; + + for(k = 0; k < sx[i]; k++) + memcpy(b + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), + a + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + + /* tranfer the data */ + myMPI_Alltoallv(b, rcount, roff, a, scount, soff, sizeof(fft_complex), flag_big_all, MPI_COMM_WORLD); + + /* unpack the data into the right order */ + prod = NTask * sx[ThisTask]; + for(n = 0; n < prod; n++) + { + int k = n / NTask; + int i = n % NTask; + int j; + + for(j = 0; j < sy[i]; j++) + memcpy(b + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), + a + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex)); + } + } + /* now the result is in b[] */ + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); +} + +/*! \brief Performs a slab-based Fast Fourier transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward) +{ + int n, prod; + int slabsx = plan->slabs_x_per_task[ThisTask]; + int slabsy = plan->slabs_y_per_task[ThisTask]; + + int ngridx = plan->NgridX; + int ngridy = plan->NgridY; + int ngridz = plan->Ngridz; + int ngridz2 = 2 * ngridz; + + size_t ngridx_long = ngridx; + size_t ngridy_long = ngridy; + size_t ngridz_long = ngridz; + size_t ngridz2_long = ngridz2; + + fft_real *data_real = (fft_real *)data; + fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace; + + if(forward == 1) + { + /* do the z-direction FFT, real to complex */ + prod = slabsx * ngridy; + for(n = 0; n < prod; n++) + { + FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * ngridz2_long, workspace_complex + n * ngridz_long); + } + + /* do the y-direction FFT, complex to complex */ + prod = slabsx * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j); + } + + /* now our data resides in data_complex[] */ + + /* do the transpose */ + my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0); + + /* now the data is in workspace_complex[] */ + + /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */ + prod = slabsy * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j); + } + + /* now the result is in data_complex[] */ + } + else + { + prod = slabsy * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j); + } + + my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1); + + prod = slabsx * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j); + } + + prod = slabsx * ngridy; + + for(n = 0; n < prod; n++) + { + FFTW(execute_dft_c2r)(plan->backward_plan_zdir, workspace_complex + n * ngridz_long, data_real + n * ngridz2_long); + } + + /* now the result is in data[] */ + } +} + +/*! \brief Performs a slab-based complex to complex Fast Fourier + * transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward) +{ + int n, prod; + int slabsx = plan->slabs_x_per_task[ThisTask]; + int slabsy = plan->slabs_y_per_task[ThisTask]; + + int ngridx = plan->NgridX; + int ngridy = plan->NgridY; + int ngridz = plan->NgridZ; + + size_t ngridx_long = ngridx; + size_t ngridy_long = ngridy; + size_t ngridz_long = ngridz; + + fft_complex *data_start = (fft_complex *)data; + fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace; + + if(forward == 1) + { + /* do the z-direction FFT, complex to complex */ + prod = slabsx * ngridy; + for(n = 0; n < prod; n++) + { + FFTW(execute_dft)(plan->forward_plan_zdir, data_start + n * ngridz, workspace_complex + n * ngridz); + } + + /* do the y-direction FFT, complex to complex */ + prod = slabsx * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j); + } + + /* now our data resides in data_complex[] */ + + /* do the transpose */ + my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0); + + /* now the data is in workspace_complex[] */ + + /* finally, do the transform along the x-direction (we are in transposed order, x and y have interchanged */ + prod = slabsy * ngridz; + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j); + } + + /* now the result is in data_complex[] */ + } + else + { + prod = slabsy * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j); + } + + my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, + plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1); + + prod = slabsx * ngridz; + + for(n = 0; n < prod; n++) + { + int i = n / ngridz; + int j = n % ngridz; + + FFTW(execute_dft) + (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j); + } + + prod = slabsx * ngridy; + + for(n = 0; n < prod; n++) + { + FFTW(execute_dft)(plan->backward_plan_zdir, workspace_complex + n * ngridz, data_start + n * ngridz); + } + + /* now the result is in data[] */ + } +} + +#else /* #ifndef FFT_COLUMN_BASED */ + +static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag); + +static void my_fft_column_transpose(fft_real *data, int Ndims[3], /* global dimensions of data cube */ + int in_firstcol, int in_ncol, /* first column and number of columns */ + fft_real *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send, + size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag); + +static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], /* global dimensions of data cube */ + int in_firstcol, int in_ncol, /* first column and number of columns */ + fft_complex *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send, + size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag); + +/*! \brief Initializes column based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int Ngridz = NgridZ / 2 + 1; + + plan->Ngridz = Ngridz; + plan->Ngrid2 = 2 * Ngridz; + + int columns, avg, exc, tasklastsection, pivotcol; + + columns = NgridX * NgridY; + avg = (columns - 1) / NTask + 1; + exc = NTask * avg - columns; + tasklastsection = NTask - exc; + pivotcol = tasklastsection * avg; + + plan->pivotcol = pivotcol; + plan->avg = avg; + plan->tasklastsection = tasklastsection; + + if(ThisTask < tasklastsection) + { + plan->base_firstcol = ThisTask * avg; + plan->base_ncol = avg; + } + else + { + plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection); + plan->base_ncol = avg - 1; + } + + plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1; + + subdivide_evenly(NgridX * Ngridz, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol); + + subdivide_evenly(NgridY * Ngridz, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol); + + subdivide_evenly(plan->NgridX * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ); + + subdivide_evenly(plan->NgridY * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ); + + plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol; + + plan->max_datasize = ((size_t)plan->Ngrid2) * plan->base_ncol; + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX); + + plan->fftsize = plan->max_datasize; + + plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t)); + plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t)); + plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t)); + plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t)); + plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t)); + plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t)); + plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t)); + plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t)); + plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t)); + plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t)); + plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t)); + plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t)); + plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t)); + plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t)); + plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t)); + plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t)); + + plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t)); + plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t)); + plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t)); + plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t)); + plan->count_send_C = mymalloc_clear("count_send_C", NTask * sizeof(size_t)); + plan->count_recv_C = mymalloc_clear("count_recv_C", NTask * sizeof(size_t)); + plan->count_send_D = mymalloc_clear("count_send_D", NTask * sizeof(size_t)); + plan->count_recv_D = mymalloc_clear("count_recv_D", NTask * sizeof(size_t)); + plan->count_send_13 = mymalloc_clear("count_send_13", NTask * sizeof(size_t)); + plan->count_recv_13 = mymalloc_clear("count_recv_13", NTask * sizeof(size_t)); + plan->count_send_23 = mymalloc_clear("count_send_23", NTask * sizeof(size_t)); + plan->count_recv_23 = mymalloc_clear("count_recv_23", NTask * sizeof(size_t)); + plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t)); + plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t)); + plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t)); + plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t)); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->Ngridz}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol, + plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1); + + int dimB[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol, + plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B, + 1); + + int dimC[3] = {plan->NgridY, plan->Ngridz, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C, + plan->count_recv_C, 1); + + int dimD[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1); + + int dim23[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1); + + int dim23back[3] = {plan->NgridX, plan->Ngrid2, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1); + + int dim13[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1); + + int dim13back[3] = {plan->Ngrid2, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1); +} + +/*! \brief Initializes complex to complex column based FFT. + * + * \param[out] plan FFT plan. + * \param[in] NgridX Number of grid points in X direction. + * \param[in] NgridY Number of grid points in Y direction. + * \param[in] NgridZ Number of grid points in Z direction. + * + * \return void + */ +void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ) +{ + plan->NgridX = NgridX; + plan->NgridY = NgridY; + plan->NgridZ = NgridZ; + + int columns, avg, exc, tasklastsection, pivotcol; + + columns = NgridX * NgridY; + avg = (columns - 1) / NTask + 1; + exc = NTask * avg - columns; + tasklastsection = NTask - exc; + pivotcol = tasklastsection * avg; + + plan->pivotcol = pivotcol; + plan->avg = avg; + plan->tasklastsection = tasklastsection; + + if(ThisTask < tasklastsection) + { + plan->base_firstcol = ThisTask * avg; + plan->base_ncol = avg; + } + else + { + plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection); + plan->base_ncol = avg - 1; + } + + plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1; + + subdivide_evenly(NgridX * NgridZ, NTask, ThisTask, &plan->transposed_firstcol, &plan->transposed_ncol); + + subdivide_evenly(NgridY * NgridZ, NTask, ThisTask, &plan->second_transposed_firstcol, &plan->second_transposed_ncol); + + subdivide_evenly(plan->NgridX * plan->NgridZ, NTask, ThisTask, &plan->firstcol_XZ, &plan->ncol_XZ); + + subdivide_evenly(plan->NgridY * plan->NgridZ, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ); + + plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol; + + plan->max_datasize = 2 * ((size_t)plan->NgridZ) * plan->base_ncol; + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY); + plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX); + + plan->fftsize = plan->max_datasize; + + plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t)); + plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t)); + plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t)); + plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t)); + plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t)); + plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t)); + plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t)); + plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t)); + plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t)); + plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t)); + plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t)); + plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t)); + plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t)); + plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t)); + plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t)); + plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t)); + + plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t)); + plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t)); + plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t)); + plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t)); + plan->count_send_C = mymalloc_clear("count_send_C", NTask * sizeof(size_t)); + plan->count_recv_C = mymalloc_clear("count_recv_C", NTask * sizeof(size_t)); + plan->count_send_D = mymalloc_clear("count_send_D", NTask * sizeof(size_t)); + plan->count_recv_D = mymalloc_clear("count_recv_D", NTask * sizeof(size_t)); + plan->count_send_13 = mymalloc_clear("count_send_13", NTask * sizeof(size_t)); + plan->count_recv_13 = mymalloc_clear("count_recv_13", NTask * sizeof(size_t)); + plan->count_send_23 = mymalloc_clear("count_send_23", NTask * sizeof(size_t)); + plan->count_recv_23 = mymalloc_clear("count_recv_23", NTask * sizeof(size_t)); + plan->count_send_13back = mymalloc_clear("count_send_13back", NTask * sizeof(size_t)); + plan->count_recv_13back = mymalloc_clear("count_recv_13back", NTask * sizeof(size_t)); + plan->count_send_23back = mymalloc_clear("count_send_23back", NTask * sizeof(size_t)); + plan->count_recv_23back = mymalloc_clear("count_recv_23back", NTask * sizeof(size_t)); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimA, plan->base_firstcol, plan->base_ncol, NULL, permA, plan->transposed_firstcol, plan->transposed_ncol, + plan->offsets_send_A, plan->offsets_recv_A, plan->count_send_A, plan->count_recv_A, 1); + + int dimB[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimB, plan->transposed_firstcol, plan->transposed_ncol, NULL, permB, plan->second_transposed_firstcol, + plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, plan->count_send_B, plan->count_recv_B, + 1); + + int dimC[3] = {plan->NgridY, plan->NgridZ, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(NULL, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, NULL, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, plan->count_send_C, + plan->count_recv_C, 1); + + int dimD[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(NULL, dimD, plan->transposed_firstcol, plan->transposed_ncol, NULL, permD, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, plan->count_recv_D, 1); + + int dim23[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose_c(NULL, dim23, plan->base_firstcol, plan->base_ncol, NULL, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 1); + + int dim23back[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose_c(NULL, dim23back, plan->firstcol_XZ, plan->ncol_XZ, NULL, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 1); + + int dim13[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose_c(NULL, dim13, plan->base_firstcol, plan->base_ncol, NULL, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 1); + + int dim13back[3] = {plan->NgridZ, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose_c(NULL, dim13back, plan->firstcol_YZ, plan->ncol_YZ, NULL, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 1); +} + +/*! \brief YZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim23[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm23[3] = {0, 2, 1}; + + my_fft_column_transpose(data, dim23, plan->base_firstcol, plan->base_ncol, out, perm23, plan->firstcol_XZ, plan->ncol_XZ, + plan->offsets_send_23, plan->offsets_recv_23, plan->count_send_23, plan->count_recv_23, 0); +} + +/*! \brief Reverse YZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim23back[3] = {plan->NgridX, plan->Ngrid2, plan->NgridY}; + int perm23back[3] = {0, 2, 1}; + + my_fft_column_transpose(data, dim23back, plan->firstcol_XZ, plan->ncol_XZ, out, perm23back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_23back, plan->offsets_recv_23back, plan->count_send_23back, plan->count_recv_23back, 0); +} + +/*! \brief XZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim13[3] = {plan->NgridX, plan->NgridY, plan->Ngrid2}; + int perm13[3] = {2, 1, 0}; + + my_fft_column_transpose(data, dim13, plan->base_firstcol, plan->base_ncol, out, perm13, plan->firstcol_YZ, plan->ncol_YZ, + plan->offsets_send_13, plan->offsets_recv_13, plan->count_send_13, plan->count_recv_13, 0); +} + +/*! \brief Reverse XZ column transpose. + * + * \param[in] plan FFT plan. + * \param[in] data Array with data to be swapped. + * \param[out] out Array with data output. + * + * \return void + */ +void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out) +{ + int dim13back[3] = {plan->Ngrid2, plan->NgridY, plan->NgridX}; + int perm13back[3] = {2, 1, 0}; + + my_fft_column_transpose(data, dim13back, plan->firstcol_YZ, plan->ncol_YZ, out, perm13back, plan->base_firstcol, plan->base_ncol, + plan->offsets_send_13back, plan->offsets_recv_13back, plan->count_send_13back, plan->count_recv_13back, 0); +} + +/*! \brief Performs a column-based Fast Fourier transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward) +{ + size_t n; + fft_real *data_real = data, *workspace_real = workspace; + fft_complex *data_complex = data, *workspace_complex = workspace; + + if(forward == 1) + { + /* do the z-direction FFT, real to complex */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * plan->Ngrid2, workspace_complex + n * plan->Ngridz); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->Ngridz}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A, + plan->count_send_A, plan->count_recv_A, 0); + + /* do the y-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimB[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB, + plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, + plan->count_send_B, plan->count_recv_B, 0); + + /* do the x-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + /* result is now in workspace */ + } + else + { + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + int dimC[3] = {plan->NgridY, plan->Ngridz, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, + plan->count_send_C, plan->count_recv_C, 0); + + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimD[3] = {plan->NgridX, plan->Ngridz, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD, + plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, + plan->count_recv_D, 0); + + /* do complex-to-real inverse transform on z-coordinates */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft_c2r)(plan->backward_plan_zdir, data_complex + n * plan->Ngridz, workspace_real + n * plan->Ngrid2); + } +} + +/*! \brief Performs a slab-based complex to complex Fast Fourier + * transformation. + * + * \param[in] plan FFT plan. + * \param[in, out] data Array to be Fourier transformed. + * \param[out] workspace Workspace to temporary operate in. + * \param[in] forward Forward (1) or backward (-1) Fourier transformaiton? + * + * \return void + */ +void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward) +{ + size_t n; + fft_complex *data_complex = data, *workspace_complex = workspace; + + if(forward == 1) + { + /* do the z-direction FFT, complex to complex */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ); + + int dimA[3] = {plan->NgridX, plan->NgridY, plan->NgridZ}; + int permA[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimA, plan->base_firstcol, plan->base_ncol, data_complex, permA, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_A, plan->offsets_recv_A, + plan->count_send_A, plan->count_recv_A, 0); + + /* do the y-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimB[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permB[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimB, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permB, + plan->second_transposed_firstcol, plan->second_transposed_ncol, plan->offsets_send_B, plan->offsets_recv_B, + plan->count_send_B, plan->count_recv_B, 0); + + /* do the x-direction FFT in 'data', complex to complex */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->forward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + /* result is now in workspace */ + } + else + { + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->second_transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_xdir, data_complex + n * plan->NgridX, workspace_complex + n * plan->NgridX); + + int dimC[3] = {plan->NgridY, plan->NgridZ, plan->NgridX}; + int permC[3] = {2, 1, 0}; + + my_fft_column_remap(workspace_complex, dimC, plan->second_transposed_firstcol, plan->second_transposed_ncol, data_complex, permC, + plan->transposed_firstcol, plan->transposed_ncol, plan->offsets_send_C, plan->offsets_recv_C, + plan->count_send_C, plan->count_recv_C, 0); + + /* do inverse FFT in 'data' */ + for(n = 0; n < plan->transposed_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_ydir, data_complex + n * plan->NgridY, workspace_complex + n * plan->NgridY); + + int dimD[3] = {plan->NgridX, plan->NgridZ, plan->NgridY}; + int permD[3] = {0, 2, 1}; + + my_fft_column_remap(workspace_complex, dimD, plan->transposed_firstcol, plan->transposed_ncol, data_complex, permD, + plan->base_firstcol, plan->base_ncol, plan->offsets_send_D, plan->offsets_recv_D, plan->count_send_D, + plan->count_recv_D, 0); + + /* do complex-to-complex inverse transform on z-coordinates */ + for(n = 0; n < plan->base_ncol; n++) + FFTW(execute_dft)(plan->backward_plan_zdir, data_complex + n * plan->NgridZ, workspace_complex + n * plan->NgridZ); + } +} + +/*! \brief Remaps column-based FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off][0] = data[i][0]; + out[off][1] = data[i][1]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index][0] = data[off][0]; + out[index][1] = data[off][1]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +/*! \brief Transposes column-based FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_transpose(fft_real *data, int Ndims[3], int in_firstcol, int in_ncol, fft_real *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off] = data[i]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_DENS_A, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index] = data[off]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +/*! \brief Transposes column-based complex FFT data. + * + * \param[in] data Data to be transposed. + * \param[in] Ndims Global number of dimensions of data cube. + * \param[in] in_firstcol First column. + * \param[in] in_ncol Number of columns. + * \param[out] out Data output. + * \param[in] perm Permutations in dimensions. + * \param[out] out_firstcol First column in output data. + * \param[out] out_ncol Number of columns in output data. + * \param[out] offset_send Offset in array for send operation to MPI tasks. + * \param[out] offset_recv Offset in array for receive operation from MPI + * tasks. + * \param[out] count_send Count how many elements have to be sent to each + * MPI task. + * \param[out] count_recv Count how many elements have to be received from + * each MPI task. + * \param[in] just_count_flag Do element counting for communication instead + * of data transfer. + * + * \return void + */ +static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3], + int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send, + size_t *count_recv, size_t just_count_flag) +{ + int j, target, origin, ngrp, recvTask, perm_rev[3], xyz[3], uvw[3]; + size_t nimport, nexport; + + /* determine the inverse permutation */ + for(j = 0; j < 3; j++) + perm_rev[j] = perm[j]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) /* not yet the inverse */ + { + for(j = 0; j < 3; j++) + perm_rev[j] = perm[perm[j]]; + + if(!(perm_rev[perm[0]] == 0 && perm_rev[perm[1]] == 1 && perm_rev[perm[2]] == 2)) + terminate("bummer"); + } + + int in_colums = Ndims[0] * Ndims[1]; + int in_avg = (in_colums - 1) / NTask + 1; + int in_exc = NTask * in_avg - in_colums; + int in_tasklastsection = NTask - in_exc; + int in_pivotcol = in_tasklastsection * in_avg; + + int out_colums = Ndims[perm[0]] * Ndims[perm[1]]; + int out_avg = (out_colums - 1) / NTask + 1; + int out_exc = NTask * out_avg - out_colums; + int out_tasklastsection = NTask - out_exc; + int out_pivotcol = out_tasklastsection * out_avg; + + size_t i, ncells = ((size_t)in_ncol) * Ndims[2]; + + xyz[0] = in_firstcol / Ndims[1]; + xyz[1] = in_firstcol % Ndims[1]; + xyz[2] = 0; + + memset(count_send, 0, NTask * sizeof(size_t)); + + /* loop over all cells in input array and determine target processor */ + for(i = 0; i < ncells; i++) + { + /* determine target task */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int newcol = Ndims[perm[1]] * uvw[0] + uvw[1]; + if(newcol < out_pivotcol) + target = newcol / out_avg; + else + target = (newcol - out_pivotcol) / (out_avg - 1) + out_tasklastsection; + + /* move data element to targettask */ + + if(just_count_flag) + count_send[target]++; + else + { + size_t off = offset_send[target] + count_send[target]++; + out[off][0] = data[i][0]; + out[off][1] = data[i][1]; + } + xyz[2]++; + if(xyz[2] == Ndims[2]) + { + xyz[2] = 0; + xyz[1]++; + if(xyz[1] == Ndims[1]) + { + xyz[1] = 0; + xyz[0]++; + } + } + } + + if(just_count_flag) + { + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, offset_send[0] = 0, offset_recv[0] = 0; j < NTask; j++) + { + nexport += count_send[j]; + nimport += count_recv[j]; + + if(j > 0) + { + offset_send[j] = offset_send[j - 1] + count_send[j - 1]; + offset_recv[j] = offset_recv[j - 1] + count_recv[j - 1]; + } + } + + if(nexport != ncells) + terminate("nexport=%lld != ncells=%lld", (long long)nexport, (long long)ncells); + } + else + { + nimport = 0; + + /* exchange all the data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(count_send[recvTask] > 0 || count_recv[recvTask] > 0) + myMPI_Sendrecv(&out[offset_send[recvTask]], count_send[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, TAG_DENS_A, + &data[offset_recv[recvTask]], count_recv[recvTask] * sizeof(fft_complex), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + nimport += count_recv[recvTask]; + } + } + + /* now loop over the new cell layout */ + /* find enclosing rectangle around columns in new plane */ + + int first[3], last[3]; + + first[0] = out_firstcol / Ndims[perm[1]]; + first[1] = out_firstcol % Ndims[perm[1]]; + first[2] = 0; + + last[0] = (out_firstcol + out_ncol - 1) / Ndims[perm[1]]; + last[1] = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + last[2] = Ndims[perm[2]] - 1; + + if(first[1] + out_ncol >= Ndims[perm[1]]) + { + first[1] = 0; + last[1] = Ndims[perm[1]] - 1; + } + + /* now need to map this back to the old coordinates */ + + int xyz_first[3], xyz_last[3]; + + for(j = 0; j < 3; j++) + { + xyz_first[j] = first[perm_rev[j]]; + xyz_last[j] = last[perm_rev[j]]; + } + + memset(count_recv, 0, NTask * sizeof(size_t)); + + size_t count = 0; + + /* traverse an enclosing box around the new cell layout in the old order */ + for(xyz[0] = xyz_first[0]; xyz[0] <= xyz_last[0]; xyz[0]++) + for(xyz[1] = xyz_first[1]; xyz[1] <= xyz_last[1]; xyz[1]++) + for(xyz[2] = xyz_first[2]; xyz[2] <= xyz_last[2]; xyz[2]++) + { + /* check that the point is actually part of a column */ + uvw[0] = xyz[perm[0]]; + uvw[1] = xyz[perm[1]]; + uvw[2] = xyz[perm[2]]; + + int col = uvw[0] * Ndims[perm[1]] + uvw[1]; + + if(col >= out_firstcol && col < out_firstcol + out_ncol) + { + /* determine origin task */ + int newcol = Ndims[1] * xyz[0] + xyz[1]; + if(newcol < in_pivotcol) + origin = newcol / in_avg; + else + origin = (newcol - in_pivotcol) / (in_avg - 1) + in_tasklastsection; + + size_t index = ((size_t)Ndims[perm[2]]) * (col - out_firstcol) + uvw[2]; + + /* move data element from origin task */ + size_t off = offset_recv[origin] + count_recv[origin]++; + out[index][0] = data[off][0]; + out[index][1] = data[off][1]; + + count++; + } + } + + if(count != nimport) + { + int fi = out_firstcol % Ndims[perm[1]]; + int la = (out_firstcol + out_ncol - 1) % Ndims[perm[1]]; + + terminate("count=%lld nimport=%lld ncol=%d fi=%d la=%d first=%d last=%d\n", (long long)count, (long long)nimport, out_ncol, + fi, la, first[1], last[1]); + } + } +} + +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#endif /* #if defined(PMGRID) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c new file mode 100644 index 0000000000..7346af2849 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_nonperiodic.c @@ -0,0 +1,2087 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_non_periodic.c + * \date 05/2018 + * \brief Code for non-periodic FFT to compute long-range PM force. + * \details contains functions: + * void pm_init_regionsize(void) + * void pm_init_nonperiodic(void) + * int pmforce_is_particle_high_res(int type, MyDouble * Pos) + * void pmforce_nonperiodic_zoom_optimized_prepare_density(int + * grnr) + * void pmforce_nonperiodic_zoom_optimized_readout_forces_or_ + * potential(int grnr, int dim) + * void pmforce_nonperiodic_uniform_optimized_prepare_density( + * int grnr) + * void pmforce_nonperiodic_uniform_optimized_readout_forces_or_ + * potential(int grnr, int dim) + * int pmforce_nonperiodic(int grnr) + * void pm_setup_nonperiodic_kernel(void) + * static int pm_periodic_compare_sortindex(const void *a, + * const void *b) + * static void msort_pmperiodic_with_tmp(large_numpart_type * b, + * size_t n, large_numpart_type * t) + * static void mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC)) + +#if defined(LONG_X) || defined(LONG_Y) || defined(LONG_Z) +#error "LONG_X/Y/Z not supported for the non-periodic FFT gravity code" +#endif /* #if defined(LONG_X) || defined(LONG_Y) || defined (LONG_Z) */ + +#ifndef GRIDBOOST +#define GRIDBOOST 2 +#endif /* #ifndef GRIDBOOST */ + +#define GRID (GRIDBOOST * PMGRID) +#define GRIDz (GRID / 2 + 1) +#define GRID2 (2 * GRIDz) + +#if(GRID > 1024) +typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid + with a single index */ +#else /* #if (GRID > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (GRID > 1024) #else */ + +#ifdef NUMPART_PER_TASK_LARGE +typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this + data type should be used */ +#else /* #ifdef NUMPART_PER_TASK_LARGE */ +typedef int large_numpart_type; +#endif /* #ifdef NUMPART_PER_TASK_LARGE */ + +/* short-cut macros for accessing different 3D arrays */ +#define FI(x, y, z) (((large_array_offset)GRID2) * (GRID * (x) + (y)) + (z)) +#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z)) +#define TI(x, y, z) (((large_array_offset)GRID) * ((x) + (y)*myplan.nslab_x) + (z)) + +static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */ + +/*! \var maxfftsize + * \brief maximum size of the local fft grid among all tasks + */ +static size_t maxfftsize; + +/*! \var rhogrid + * \brief This array hold the local part of the density field and + * after the FFTs the local part of the potential + * + * \var forcegrid + * \brief This array will contain the force field + * + * \var workspace + * \brief Workspace array used during the FFTs + */ +static fft_real *rhogrid, *forcegrid, *workspace; + +/*! \brief Array containing the FFT of 'rhogrid' + * + * This pointer points to the same array as 'rhogrid', + * because in-place FFTs are used. + */ +static fft_complex *fft_of_rhogrid; + +static fft_real *kernel[2]; +static fft_complex *fft_of_kernel[2]; + +/*! \param Determine particle extent. + * + * This function determines the particle extension of all particles, and for + * those types selected with PLACEHIGHRESREGION if this is used, and then + * determines the boundaries of the non-periodic FFT-mesh that can be placed + * on this region. Note that a sufficient buffer region at the rim of the + * occupied part of the mesh needs to be reserved in order to allow a correct + * finite differencing using a 4-point formula. In addition, to allow + * non-periodic boundaries, the actual FFT mesh used is twice as large in + * each dimension compared with GRID. + * + * \return void + */ +void pm_init_regionsize(void) +{ + double meshinner[2], xmin[2][3], xmax[2][3]; + int i, j; + + /* find enclosing rectangle */ + + for(j = 0; j < 3; j++) + { + xmin[0][j] = xmin[1][j] = 1.0e36; + xmax[0][j] = xmax[1][j] = -1.0e36; + } + + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + { + if(P[i].Pos[j] > xmax[0][j]) + xmax[0][j] = P[i].Pos[j]; + if(P[i].Pos[j] < xmin[0][j]) + xmin[0][j] = P[i].Pos[j]; + +#ifdef PLACEHIGHRESREGION + if(((1 << P[i].Type) & (PLACEHIGHRESREGION))) + { + if(P[i].Pos[j] > xmax[1][j]) + xmax[1][j] = P[i].Pos[j]; + if(P[i].Pos[j] < xmin[1][j]) + xmin[1][j] = P[i].Pos[j]; + } +#endif /* #ifdef PLACEHIGHRESREGION */ + } + + MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + + for(j = 0; j < 2; j++) + { + All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0]; + All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]); + All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]); +#ifdef ENLARGEREGION + All.TotalMeshSize[j] *= ENLARGEREGION; +#endif /* #ifdef ENLARGEREGION */ + + /* symmetrize the box onto the center */ + for(i = 0; i < 3; i++) + { + All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2; + All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j]; + } + } + + /* this will produce enough room for zero-padding and buffer region to + allow finite differencing of the potential */ + + for(j = 0; j < 2; j++) + { + meshinner[j] = All.TotalMeshSize[j]; + All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double)(GRID - 2 - 8)); + } + + /* move lower left corner by two cells to allow finite differencing of the potential by a 4-point function */ + + for(j = 0; j < 2; j++) + for(i = 0; i < 3; i++) + { + All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID; + All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID); + } + +#ifdef PLACEHIGHRESREGION + All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID; + All.Rcut[1] = RCUT * All.Asmth[1]; +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PLACEHIGHRESREGION + if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0]) + { + All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 2)); + + for(i = 0; i < 3; i++) + { + All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0]; + All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID); + } + + if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0]) + { + All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double)(GRID - 10)); + + for(i = 0; i < 3; i++) + { + All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[1] / GRID); + All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID); + } + } + + All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID; + All.Rcut[1] = RCUT * All.Asmth[1]; + + mpi_printf("PM-NONPERIODIC: All.Asmth[0]=%g All.Asmth[1]=%g\n", All.Asmth[0], All.Asmth[1]); + } +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PLACEHIGHRESREGION + mpi_printf( + "PM-NONPERIODIC: Allowed region for isolated PM mesh (high-res): (%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g " + "meshsize=%g\n\n", + All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2], All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2], meshinner[1], + All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID); +#endif /* #ifdef PLACEHIGHRESREGION */ +} + +/*! \brief Initialization of the non-periodic PM routines. + * + * The plan-files for FFTW are created. Finally, the routine to set-up the + * non-periodic Greens function is called. + * + * \return void + */ +void pm_init_nonperiodic(void) +{ + /* Set up the FFTW-3 plan files. */ + int ndim[1] = {GRID}; /* dimension of the 1D transforms */ + + /* temporarily allocate some arrays to make sure that out-of-place plans are created */ + rhogrid = (fft_real *)mymalloc("rhogrid", GRID2 * sizeof(fft_real)); + forcegrid = (fft_real *)mymalloc("forcegrid", GRID2 * sizeof(fft_real)); + +#ifdef DOUBLEPRECISION_FFTW + int alignflag = 0; +#else /* #ifdef DOUBLEPRECISION_FFTW */ + /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */ + int alignflag = FFTW_UNALIGNED; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#ifndef FFT_COLUMN_BASED + int stride = GRIDz; +#else /* #ifndef FFT_COLUMN_BASED */ + int stride = 1; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndim, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_xdir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_ydir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndim, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_xdir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_ydir = + FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myfree(forcegrid); + myfree(rhogrid); + +#ifndef FFT_COLUMN_BASED + + my_slab_based_fft_init(&myplan, GRID, GRID, GRID); + + maxfftsize = myplan.largest_x_slab * GRID * ((size_t)GRID2); + +#else /* #ifndef FFT_COLUMN_BASED */ + + my_column_based_fft_init(&myplan, GRID, GRID, GRID); + + maxfftsize = myplan.max_datasize; + +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* now allocate memory to hold the FFT fields */ + + size_t bytes, bytes_tot = 0; + +#if defined(GRAVITY_NOT_PERIODIC) + kernel[0] = (fft_real *)mymalloc("kernel[0]", bytes = maxfftsize * sizeof(fft_real)); + bytes_tot += bytes; + fft_of_kernel[0] = (fft_complex *)kernel[0]; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if defined(PLACEHIGHRESREGION) + kernel[1] = (fft_real *)mymalloc("kernel[1]", bytes = maxfftsize * sizeof(fft_real)); + bytes_tot += bytes; + fft_of_kernel[1] = (fft_complex *)kernel[1]; +#endif /* #if defined(PLACEHIGHRESREGION) */ + + mpi_printf("\nPM-NONPERIODIC: Allocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0)); +} + +#ifdef PLACEHIGHRESREGION +/*! \brief Is this a high res particle in high resolution region? + * + * For cosmological zoom simulations. + * + * \param[in] type Parcile type. + * \param[in] Pos Position of particle. + * + * \return 0: not high res; 1: high res. + */ +int pmforce_is_particle_high_res(int type, MyDouble *Pos) +{ + int flag = 1; + + if((1 << type) & (PLACEHIGHRESREGION)) + return 1; + +#if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) + double r2 = 0; + for(int j = 0; j < 3; j++) + r2 += pow(Pos[j] - 0.5 * (All.Xmintot[1][j] + All.Xmaxtot[1][j]), 2); + + if(sqrt(r2) > 0.5 * (All.Xmaxtot[1][0] - All.Xmintot[1][0])) + return 0; +#else /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) */ + + for(int j = 0; j < 3; j++) + if(Pos[j] < All.Xmintot[1][j] || Pos[j] > All.Xmaxtot[1][j]) + { + flag = 0; /* we are outside */ + break; + } + +#endif /* #if defined(PLACEHIGHRESREGION) && (FORCETEST_TESTFORCELAW == 1) #else */ + + return flag; +} +#endif /* #ifdef PLACEHIGHRESREGION */ + +#ifdef PM_ZOOM_OPTIMIZED + +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +static int pm_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief This structure links the particles to the mesh cells, to which they + * contribute their mass. + * + * Each particle will have eight items of this structure in the 'part' array. + * For each of the eight mesh cells the CIC assignment will contribute, + * one item of this struct exists. + */ +static struct part_slab_data +{ + large_array_offset globalindex; /*!< index in the global density mesh */ + large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the + CIC assignment this item belongs to */ + large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during + local mass and force assignment) */ +} * part; /*!< array of part_slab_data linking the local particles to their mesh cells */ + +static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount; +static large_array_offset *localfield_globalindex, *import_globalindex; +static fft_real *localfield_data, *import_data; +static large_numpart_type num_on_grid; + +/*! \brief Prepares density field for nonperiodic FFTs. + * + * \param[in] grnr (0, 1) 0 if full mesh, 1 if highres grid. + * + * \return void + */ +void pmforce_nonperiodic_zoom_optimized_prepare_density(int grnr) +{ + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + part = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data))); + large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type))); + + int ngrid = 0; + + /* determine the cells each particle accesses */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + int myngrid; + + { + myngrid = ngrid; + ngrid += 1; + } + + large_numpart_type index_on_grid = ((large_numpart_type)myngrid) * 8; + + int xx, yy, zz; + + for(xx = 0; xx < 2; xx++) + for(yy = 0; yy < 2; yy++) + for(zz = 0; zz < 2; zz++) + { + int slab_xx = slab_x + xx; + int slab_yy = slab_y + yy; + int slab_zz = slab_z + zz; + + if(slab_xx >= GRID) + slab_xx -= GRID; + if(slab_yy >= GRID) + slab_yy -= GRID; + if(slab_zz >= GRID) + slab_zz -= GRID; + + large_array_offset offset = FI(slab_xx, slab_yy, slab_zz); + + part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz; + part[index_on_grid].globalindex = offset; + part_sortindex[index_on_grid] = index_on_grid; + index_on_grid++; + } + } + + /* note: num_on_grid will be 8 times larger than the particle number, but num_field_points will generally be much smaller */ + num_on_grid = ((large_numpart_type)ngrid) * 8; + + /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */ + mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex); + + large_array_offset num_field_points; + + if(num_on_grid > 0) + num_field_points = 1; + else + num_field_points = 0; + + /* determine the number of unique field points */ + for(i = 1; i < num_on_grid; i++) + { + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex", + num_field_points * sizeof(large_array_offset)); + localfield_data = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real)); + localfield_first = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t)); + localfield_sendcount = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t)); + localfield_offset = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t)); + localfield_recvcount = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_sendcount[i] = 0; + } + + /* establish the cross link between the part[ ]-array and the local list of + * mesh points. Also, count on which CPU the needed field points are stored. + */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + +#ifndef FFT_COLUMN_BASED + int slab = part[part_sortindex[i]].globalindex / (GRID * GRID2); + int task = myplan.slab_to_task[slab]; +#else /* #ifndef FFT_COLUMN_BASED */ + int task, column = part[part_sortindex[i]].globalindex / (GRID2); + + if(column < myplan.pivotcol) + task = column / myplan.avg; + else + task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(localfield_sendcount[task] == 0) + localfield_first[task] = num_field_points; + + localfield_sendcount[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1]; + + myfree_movable(part_sortindex); + part_sortindex = NULL; + + /* now bin the local particle data onto the mesh list */ + for(i = 0; i < num_field_points; i++) + localfield_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 8) + { + int pindex = (part[i].partindex >> 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[pindex].Type == 0) + pos = SphP[pindex].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[pindex].Pos; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + + double dx = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x; + double dy = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y; + double dz = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z; + + double weight = P[pindex].Mass; + + localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz; + localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz); + localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz; + localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz; + localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz); + localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz; + } + + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + + /* exchange data and add contributions to the local mesh-path */ + MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + /* note: here every element in rhogrid is only accessed once, so there should be no race condition */ + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { + /* determine offset in local FFT slab */ +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + rhogrid[offset] += import_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_data); + } + } + } +} + +/*! \brief Reads out the force component corresponding to spatial dimension + * 'dim'. + * + * If dim is negative, potential values are read out and assigned to + * particles. + * + * \param[in] grnr Number of grid (0: base, 1 high-res) + * \param[in] dim Dimension to be read out + * (<0: potential,>=0 force component). + * + * \return void + */ +void pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(int grnr, int dim) +{ +#ifdef EVALPOTENTIAL + /* factor to get potential */ + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); +#endif /* #ifdef EVALPOTENTIAL */ + + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRID * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + import_data[i] = grid[offset]; + } + + if(level > 0) + { + myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the force/potential values, which all have been assembled in localfield_data */ + + int k, ngrid = (num_on_grid >> 3); + + for(k = 0; k < ngrid; k++) + { + large_numpart_type j = (((large_numpart_type)k) << 3); + + int i = (part[j].partindex >> 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + +#ifdef PLACEHIGHRESREGION + if(grnr == 1) + if(!(pmforce_is_particle_high_res(P[i].Type, pos))) + continue; +#endif /* #ifdef PLACEHIGHRESREGION */ + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + double dx = to_slab_fac * (pos[0] - All.Corner[grnr][0]) - slab_x; + + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + double dy = to_slab_fac * (pos[1] - All.Corner[grnr][1]) - slab_y; + + int slab_z = (int)(to_slab_fac * (pos[2] - All.Corner[grnr][2])); + double dz = to_slab_fac * (pos[2] - All.Corner[grnr][2]) - slab_z; + + double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz + + localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) + + localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz + + localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz + + localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) + + localfield_data[part[j + 7].localindex] * (dx)*dy * dz; + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } +} + +#else /* #ifdef PM_ZOOM_OPTIMIZED */ +/* Here come the routines for a different communication algorithm that is better suited for a homogenuously loaded boxes. + */ + +/*! \brief Particle buffer structure + */ +static struct partbuf +{ + MyFloat Mass; + MyFloat Pos[3]; +} * partin, *partout; + +static size_t nimport, nexport; + +static size_t *Sndpm_count, *Sndpm_offset; +static size_t *Rcvpm_count, *Rcvpm_offset; + +/*! \brief Prepares density for pm calculation in algorithm optimized for + * uniform densities. + * + * \param[in] grnr Number of grid (0: base grid, 1: high res grid). + * + * \return void + */ +void pmforce_nonperiodic_uniform_optimized_prepare_density(int grnr) +{ + int i, j; + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset. + * This should hopefully prevent a performance penalty from 'false sharing' for these variables + */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + Sndpm_count = mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t)); + Sndpm_offset = mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t)); + Rcvpm_count = mymalloc("Rcvpm_count", NTask * sizeof(size_t)); + Rcvpm_offset = mymalloc("Rcvpm_offset", NTask * sizeof(size_t)); + + /* determine the slabs/columns each particles accesses */ + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + + /* each threads needs to do theloop to clear its send_count[] array */ + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + send_count[task0]++; + if(task0 != task1) + send_count[task1]++; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + send_count[task0]++; + if(task1 != task0) + send_count[task1]++; + if(task2 != task1 && task2 != task0) + send_count[task2]++; + if(task3 != task0 && task3 != task1 && task3 != task2) + send_count[task3]++; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect thread-specific offset table and collect the results from the other threads */ + for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++) + for(j = 0; j < MaxThreads; j++) + { + int ind_prev, ind = j * multiNtask + i; + if(ind > 0) + { + if(j == 0) + ind_prev = (MaxThreads - 1) * multiNtask + i - 1; + else + ind_prev = ind - multiNtask; + + Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; + } + } + + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++) + { + nexport += Sndpm_count[j]; + nimport += Rcvpm_count[j]; + + if(j > 0) + { + Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1]; + Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1]; + } + } + + /* allocate import and export buffer */ + partin = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf)); + partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf)); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* fill export buffer */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task0 != task1) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task1 != task0) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } + if(task2 != task1 && task2 != task0) + { + size_t ind2 = send_offset[task2] + send_count[task2]++; + partout[ind2].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind2].Pos[j] = pos[j]; + } + if(task3 != task0 && task3 != task1 && task3 != task2) + { + size_t ind3 = send_offset[task3] + send_count[task3]++; + partout[ind3].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind3].Pos[j] = pos[j]; + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect the send_count[] results from the other threads */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange particle data */ + myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all, + MPI_COMM_WORLD); + + myfree(partout); + + /* allocate density field */ + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + +#ifndef FFT_COLUMN_BASED + /* bin particle data onto mesh, in multi-threaded fashion */ + { + int tid = get_thread_num(); + + int first_y, count_y; + subdivide_evenly(GRID, MaxThreads, tid, &first_y, &count_y); + int last_y = first_y + count_y - 1; + + for(i = 0; i < nimport; i++) + { + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + double dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + int flag_slab_y, flag_slab_yy; + + if(slab_y >= first_y && slab_y <= last_y) + flag_slab_y = 1; + else + flag_slab_y = 0; + + if(slab_yy >= first_y && slab_yy <= last_y) + flag_slab_yy = 1; + else + flag_slab_yy = 0; + + if(flag_slab_y || flag_slab_yy) + { + double mass = partin[i].Mass; + + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + int slab_xx = slab_x + 1; + int slab_zz = slab_z + 1; + + double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + int flag_slab_x, flag_slab_xx; + + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_x = 1; + } + else + flag_slab_x = 0; + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_xx = 1; + } + else + flag_slab_xx = 0; + + if(flag_slab_x) + { + if(flag_slab_y) + { + rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + } + + if(flag_slab_xx) + { + if(flag_slab_y) + { + rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + struct data_cols + { + int col0, col1, col2, col3; + double dx, dy; + } * aux; + + aux = mymalloc("aux", nimport * sizeof(struct data_cols)); + + for(i = 0; i < nimport; i++) + { + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + aux[i].dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + aux[i].dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + + aux[i].col0 = slab_x * GRID + slab_y; + aux[i].col1 = slab_x * GRID + slab_yy; + aux[i].col2 = slab_xx * GRID + slab_y; + aux[i].col3 = slab_xx * GRID + slab_yy; + } + + { + int tid = get_thread_num(); + + int first_col, last_col, count_col; + subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col); + last_col = first_col + count_col - 1; + first_col += myplan.base_firstcol; + last_col += myplan.base_firstcol; + + for(i = 0; i < nimport; i++) + { + int flag0, flag1, flag2, flag3; + int col0 = aux[i].col0; + int col1 = aux[i].col1; + int col2 = aux[i].col2; + int col3 = aux[i].col3; + + if(col0 >= first_col && col0 <= last_col) + flag0 = 1; + else + flag0 = 0; + + if(col1 >= first_col && col1 <= last_col) + flag1 = 1; + else + flag1 = 0; + + if(col2 >= first_col && col2 <= last_col) + flag2 = 1; + else + flag2 = 0; + + if(col3 >= first_col && col3 <= last_col) + flag3 = 1; + else + flag3 = 0; + + if(flag0 || flag1 || flag2 || flag3) + { + double mass = partin[i].Mass; + + double dx = aux[i].dx; + double dy = aux[i].dy; + + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + int slab_zz = slab_z + 1; + + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + if(flag0) + { + rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag1) + { + rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + + if(flag2) + { + rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag3) + { + rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + + myfree(aux); + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/*! \brief If dim<0, this function reads out the potential, otherwise + * Cartesian force components. + * + * \param[in] grnr Grid number (0: base grid, 1: high res grid). + * \param[in] dim Dimension of component to be read out (< 0: potential). + * + * \return void + */ +void pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(int grnr, int dim) +{ +#ifdef EVALPOTENTIAL + /* factor to get potential */ + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); +#endif /* #ifdef EVALPOTENTIAL */ + + double to_slab_fac = GRID / All.TotalMeshSize[grnr]; + + double *flistin = (double *)mymalloc("flistin", nimport * sizeof(double)); + double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double)); + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + size_t i; + for(i = 0; i < nimport; i++) + { + flistin[i] = 0; + + int slab_x = (int)(to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0])); + int slab_y = (int)(to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1])); + int slab_z = (int)(to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2])); + + double dx = to_slab_fac * (partin[i].Pos[0] - All.Corner[grnr][0]) - slab_x; + double dy = to_slab_fac * (partin[i].Pos[1] - All.Corner[grnr][1]) - slab_y; + double dz = to_slab_fac * (partin[i].Pos[2] - All.Corner[grnr][2]) - slab_z; + + int slab_xx = slab_x + 1; + int slab_yy = slab_y + 1; + int slab_zz = slab_z + 1; + +#ifndef FFT_COLUMN_BASED + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += +grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) + + grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + + grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += +grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) + + grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz); + } +#else /* #ifndef FFT_COLUMN_BASED */ + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol) + { + flistin[i] += +grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz); + } + if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol) + { + flistin[i] += + +grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol) + { + flistin[i] += + +grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz); + } + + if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol) + { + flistin[i] += +grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + + /* exchange the potential component data */ + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange data */ + myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, + MPI_COMM_WORLD); + + /* now assign them to the correct particles */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + int j; + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + int i; + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + if(pos[0] < All.Corner[grnr][0] || pos[0] >= All.UpperCorner[grnr][0]) + continue; + if(pos[1] < All.Corner[grnr][1] || pos[1] >= All.UpperCorner[grnr][1]) + continue; + if(pos[2] < All.Corner[grnr][2] || pos[2] >= All.UpperCorner[grnr][2]) + continue; + + int slab_x = (int)(to_slab_fac * (pos[0] - All.Corner[grnr][0])); + int slab_xx = slab_x + 1; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task0 != task1) + value += flistout[send_offset[task1] + send_count[task1]++]; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * (pos[1] - All.Corner[grnr][1])); + int slab_yy = slab_y + 1; + + int column0 = slab_x * GRID + slab_y; + int column1 = slab_x * GRID + slab_yy; + int column2 = slab_xx * GRID + slab_y; + int column3 = slab_xx * GRID + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task1 != task0) + value += flistout[send_offset[task1] + send_count[task1]++]; + + if(task2 != task1 && task2 != task0) + value += flistout[send_offset[task2] + send_count[task2]++]; + + if(task3 != task0 && task3 != task1 && task3 != task2) + value += flistout[send_offset[task3] + send_count[task3]++]; +#endif /* #ifndef FFT_COLUMN_BASED */ + +#ifdef PLACEHIGHRESREGION + if(grnr == 1) + if(!(pmforce_is_particle_high_res(P[i].Type, pos))) + continue; +#endif /* #ifdef PLACEHIGHRESREGION */ + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } + } + + int j; + /* restore total Sndpm_count */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + myfree(flistout); + myfree(flistin); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + +/*! \brief Calculates the long-range non-periodic forces using the PM method. + * + * The potential is Gaussian filtered with Asmth, given in mesh-cell units. + * The potential is finite differenced using a 4-point finite differencing + * formula to obtain the force fields, which are then interpolated to the + * particle positions. We carry out a CIC charge assignment, and compute the + * potenial by Fourier transform methods. The CIC kernel is deconvolved. + * + * \param[in] grnr Grid number (0: base grid, 1 high res grid). + * + * \return 0 + */ +int pmforce_nonperiodic(int grnr) +{ + int i, j, flag, flagsum, dim; + + double tstart = second(); + + mpi_printf("PM-NONPERIODIC: Starting non-periodic PM calculation (grid=%d) presently allocated=%g MB).\n", grnr, + AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef NUMPART_PER_TASK_LARGE + if((((long long)NumPart) << 3) >= (((long long)1) << 31)) + terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help."); +#endif /* #ifndef NUMPART_PER_TASK_LARGE */ + + double fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); /* to get potential */ + fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID); /* for finite differencing */ + + /* first, check whether all particles lie in the allowed region */ + for(i = 0, flag = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + pos = SphP[i].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + +#ifdef PLACEHIGHRESREGION + if(grnr == 0 || (grnr == 1 && pmforce_is_particle_high_res(P[i].Type, pos))) +#endif /* #ifdef PLACEHIGHRESREGION */ + { + for(j = 0; j < 3; j++) + { + if(pos[j] < All.Xmintot[grnr][j] || pos[j] > All.Xmaxtot[grnr][j]) + { + if(flag == 0) + { + printf("Particle Id=%llu on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\n", + (unsigned long long)P[i].ID, ThisTask, pos[0], pos[1], pos[2]); + myflush(stdout); + } + flag++; + break; + } + } + } + } + + MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(flagsum > 0) + { + mpi_printf("PM-NONPERIODIC: In total %d particles were outside allowed range.\n", flagsum); + return 1; /* error - need to return because particles were outside allowed range */ + } + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_prepare_density(grnr); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_prepare_density(grnr); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + /* allocate the memory to hold the FFT fields */ + forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real)); + + workspace = forcegrid; + +#ifndef FFT_COLUMN_BASED + fft_of_rhogrid = (fft_complex *)&rhogrid[0]; +#else /* #ifndef FFT_COLUMN_BASED */ + fft_of_rhogrid = (fft_complex *)&workspace[0]; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Do the FFT of the density field */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */ +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* multiply with kernel in Fourier space */ + /* multiply with the Fourier transform of the Green's function (kernel) */ + /* multiply with Green's function in order to obtain the potential */ + +#ifdef FFT_COLUMN_BASED + for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++) + { +#else /* #ifdef FFT_COLUMN_BASED */ + for(int x = 0; x < GRID; x++) + for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(int z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ + + double re = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][0] - fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][1]; + double im = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][1] + fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][0]; + + fft_of_rhogrid[ip][0] = re; + fft_of_rhogrid[ip][1] = im; + } + + /* Do the inverse FFT to get the potential */ + +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, rhogrid, workspace, -1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, workspace, rhogrid, -1); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Now rhogrid holds the potential */ + +#ifdef EVALPOTENTIAL +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, -1); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, -1); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing of the potential for each dimension, + * and send the results back to the right CPUs + */ + for(dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */ + { + /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose */ +#ifndef FFT_COLUMN_BASED + if(dim == 0) + my_slab_transposeA(&myplan, rhogrid, forcegrid); /* compute the transpose of the potential field for finite differencing */ + + for(int y = 2; y < GRID / 2 - 2; y++) + for(int x = 0; x < myplan.nslab_x; x++) + if(x + myplan.slabstart_x >= 2 && x + myplan.slabstart_x < GRID / 2 - 2) + for(int z = 2; z < GRID / 2 - 2; z++) + { + int yrr = y, yll = y, yr = y, yl = y; + int zrr = z, zll = z, zr = z, zl = z; + + switch(dim) + { + case 0: /* note: for the x-direction, we difference the transposed direction (y) */ + case 1: + yr = y + 1; + yl = y - 1; + yrr = y + 2; + yll = y - 2; + + break; + case 2: + zr = z + 1; + zl = z - 1; + zrr = z + 2; + zll = z - 2; + + break; + } + + if(dim == 0) + forcegrid[TI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[TI(x, yl, zl)] - rhogrid[TI(x, yr, zr)]) - + (1.0 / 6) * (rhogrid[TI(x, yll, zll)] - rhogrid[TI(x, yrr, zrr)])); + else + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, zl)] - rhogrid[FI(x, yr, zr)]) - + (1.0 / 6) * (rhogrid[FI(x, yll, zll)] - rhogrid[FI(x, yrr, zrr)])); + } + + if(dim == 0) + my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */ +#else /* #ifndef FFT_COLUMN_BASED */ + fft_real *scratch = NULL, *forcep, *potp; + + if(dim != 2) + { + scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + if(dim == 1) + my_fft_swap23(&myplan, scratch, forcegrid); + else + my_fft_swap13(&myplan, scratch, forcegrid); + } + + int ncols; + if(dim == 2) + ncols = myplan.base_ncol; + else if(dim == 1) + ncols = myplan.ncol_XZ; + else + ncols = myplan.ncol_YZ; + + large_array_offset i; + + for(i = 0; i < ncols; i++) + { + if(dim != 2) + { + forcep = &scratch[GRID * i]; + potp = &forcegrid[GRID * i]; + } + else + { + forcep = &forcegrid[GRID2 * i]; + potp = &rhogrid[GRID2 * i]; + } + + int z; + for(z = 2; z < GRID / 2 - 2; z++) + { + int zr = z + 1; + int zl = z - 1; + int zrr = z + 2; + int zll = z - 2; + + forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr])); + } + } + + if(dim != 2) + { + if(dim == 1) + my_fft_swap23back(&myplan, scratch, forcegrid); + else + my_fft_swap13back(&myplan, scratch, forcegrid); + + myfree(scratch); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, dim); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, dim); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + } + + /* free stuff */ + myfree(forcegrid); + myfree(rhogrid); + +#ifdef PM_ZOOM_OPTIMIZED + myfree(localfield_recvcount); + myfree(localfield_offset); + myfree(localfield_sendcount); + myfree(localfield_first); + myfree(localfield_data); + myfree(localfield_globalindex); + myfree(part); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + myfree(partin); + myfree(Rcvpm_offset); + myfree(Rcvpm_count); + myfree(Sndpm_offset); + myfree(Sndpm_count); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + double tend = second(); + + mpi_printf("PM-NONPERIODIC: done. (took %g seconds)\n", timediff(tstart, tend)); + + return 0; +} + +/*! \brief Sets-up the Greens function for the non-periodic potential in real + * space, and then converts it to Fourier space by means of an FFT. + * + * \return void + */ +void pm_setup_nonperiodic_kernel(void) +{ + int i, j, k, x, y, z; + double xx, yy, zz, r, u, fac; + + mpi_printf("PM-NONPERIODIC: Setting up non-periodic PM kernel (GRID=%d) presently allocated=%g MB).\n", (int)GRID, + AllocatedBytes / (1024.0 * 1024.0)); + + /* now set up kernel and its Fourier transform */ + +#if defined(GRAVITY_NOT_PERIODIC) + for(i = 0; i < maxfftsize; i++) /* clear local field */ + kernel[0][i] = 0; + +#ifndef FFT_COLUMN_BASED + for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++) + for(j = 0; j < GRID; j++) + { +#else /* #ifndef FFT_COLUMN_BASED */ + int c; + for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++) + { + i = c / GRID; + j = c % GRID; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + for(k = 0; k < GRID; k++) + { + xx = ((double)i) / GRID; + yy = ((double)j) / GRID; + zz = ((double)k) / GRID; + + if(xx >= 0.5) + xx -= 1.0; + if(yy >= 0.5) + yy -= 1.0; + if(zz >= 0.5) + zz -= 1.0; + + r = sqrt(xx * xx + yy * yy + zz * zz); + + u = 0.5 * r / (((double)ASMTH) / GRID); + + fac = 1 - erfc(u); + +#ifndef FFT_COLUMN_BASED + size_t ip = FI(i - myplan.slabstart_x, j, k); +#else /* #ifndef FFT_COLUMN_BASED */ + size_t ip = FC(c, k); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + if(r > 0) + kernel[0][ip] = -fac / r; + else + kernel[0][ip] = -1 / (sqrt(M_PI) * (((double)ASMTH) / GRID)); + } + } + + { + fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real)); + /* Do the FFT of the kernel */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, kernel[0], workspc, 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, kernel[0], workspc, 1); /* result is in workspace, not in kernel */ + memcpy(kernel[0], workspc, maxfftsize * sizeof(fft_real)); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + myfree(workspc); + } + +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + +#if defined(PLACEHIGHRESREGION) + + for(i = 0; i < maxfftsize; i++) /* clear local field */ + kernel[1][i] = 0; + +#ifndef FFT_COLUMN_BASED + for(i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++) + for(j = 0; j < GRID; j++) + { +#else /* #ifndef FFT_COLUMN_BASED */ + int c; + for(c = myplan.base_firstcol; c < (myplan.base_firstcol + myplan.base_ncol); c++) + { + i = c / GRID; + j = c % GRID; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + for(k = 0; k < GRID; k++) + { + xx = ((double)i) / GRID; + yy = ((double)j) / GRID; + zz = ((double)k) / GRID; + + if(xx >= 0.5) + xx -= 1.0; + if(yy >= 0.5) + yy -= 1.0; + if(zz >= 0.5) + zz -= 1.0; + + r = sqrt(xx * xx + yy * yy + zz * zz); + + u = 0.5 * r / (((double)ASMTH) / GRID); + + fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u); + +#ifndef FFT_COLUMN_BASED + size_t ip = FI(i - myplan.slabstart_x, j, k); +#else /* #ifndef FFT_COLUMN_BASED */ + size_t ip = FC(c, k); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(r > 0) + kernel[1][ip] = -fac / r; + else + { + fac = 1 - All.Asmth[1] / All.Asmth[0]; + kernel[1][ip] = -fac / (sqrt(M_PI) * (((double)ASMTH) / GRID)); + } + } + } + + { + fft_real *workspc = (fft_real *)mymalloc("workspc", maxfftsize * sizeof(fft_real)); + /* Do the FFT of the kernel */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, kernel[1], workspc, 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, kernel[1], workspc, 1); /* result is in workspace, not in kernel */ + memcpy(kernel[1], workspc, maxfftsize * sizeof(fft_real)); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + myfree(workspc); + } + +#endif /* #if defined(PLACEHIGHRESREGION) */ + + /* deconvolve the Greens function twice with the CIC kernel */ +#ifdef FFT_COLUMN_BASED + + large_array_offset ip, ipcell; + + for(ip = 0; ip < myplan.second_transposed_ncells; ip++) + { + ipcell = ip + myplan.transposed_firstcol * GRID; + y = ipcell / (GRID * GRIDz); + int yr = ipcell % (GRID * GRIDz); + z = yr / GRID; + x = yr % GRID; +#else /* #ifdef FFT_COLUMN_BASED */ + for(x = 0; x < GRID; x++) + for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + + double kx, ky, kz; + + if(x > GRID / 2) + kx = x - GRID; + else + kx = x; + if(y > GRID / 2) + ky = y - GRID; + else + ky = y; + if(z > GRID / 2) + kz = z - GRID; + else + kz = z; + + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double fx = 1, fy = 1, fz = 1; + + if(kx != 0) + { + fx = (M_PI * kx) / GRID; + fx = sin(fx) / fx; + } + if(ky != 0) + { + fy = (M_PI * ky) / GRID; + fy = sin(fy) / fy; + } + if(kz != 0) + { + fz = (M_PI * kz) / GRID; + fz = sin(fz) / fz; + } + + double ff = 1 / (fx * fy * fz); + ff = ff * ff * ff * ff; + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ +#if defined(GRAVITY_NOT_PERIODIC) + fft_of_kernel[0][ip][0] *= ff; + fft_of_kernel[0][ip][1] *= ff; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ +#if defined(PLACEHIGHRESREGION) + fft_of_kernel[1][ip][0] *= ff; + fft_of_kernel[1][ip][1] *= ff; +#endif /* #if defined(PLACEHIGHRESREGION) */ + } + } + + /* end deconvolution */ +} + +#ifdef PM_ZOOM_OPTIMIZED + +/*! \brief Sort function for 'part' array indices. + * + * Sorts the indices into the 'part' array by the global index of the + * corresponding 'part_slab_data' struct. + * + * \param[in] a index to be compared. + * \param[in] b index to be compared. + * + * \return sort result + */ +static int pm_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Implements the sorting function for mysort_pmperiodic() + * + * The index array is sorted using a merge sort algorithm. + * + * \param[in, out] b Index array to sort. + * \param[in] n Number of elements to sort. + * \param[out] t Temporary buffer array. + * + * \return void + */ +static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t) +{ + large_numpart_type *tmp; + large_numpart_type *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_pmperiodic_with_tmp(b1, n1, t); + msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(large_numpart_type)); + + memcpy(b, t, (n - n2) * sizeof(large_numpart_type)); +} + +/*! \brief Sorts the index array b of n entries using the sort kernel + * cmp. + * + * The parameter s is set to sizeof(int). The index array b + * is sorted according to the globalindex field of the referenced item in the + * 'part' array + * + * \param[in, out] b The index array to sort. + * \param[in] n Number of entries in array b. + * \param[in] s Size of each entry (must be sizeof(int)). + * \param[in] cmp Comparison function. + * + * \return void + */ +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size); + + msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp); + + myfree(tmp); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + +#endif /* #if defined(PMGRID) && (defined(PLACEHIGHRESREGION) || defined(GRAVITY_NOT_PERIODIC)) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c new file mode 100644 index 0000000000..319404f797 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic.c @@ -0,0 +1,2034 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_periodic.c + * \date 05/2018 + * \brief Routines for periodic PM-force computation. + * \details These routines support two different strategies for doing the + * particle data exchange to assemble the density field and to + * read out the forces and potentials: + * + * The default scheme sends the particle positions to the target + * slabs, and bins them there. This works usually well for + * homogeneously loaded boxes, but can be problematic for zoom-in + * runs. In the latter case, PM_ZOOM_OPTIMIZED can be activated, + * where the data is binned on the originating processor followed + * by assembly of the binned density field. + * + * In addition, the routines can be either used with a slab-based + * FFT (as is traditionally done in FFTW), or with a column-based + * FFT. The latter requires more communication and is hence + * usually slower than the slab-based one. But if the number of + * MPI ranks exceeds the number of cells per dimension, then the + * column-based one can still scale and offers a balanced memory + * consumption, whereas this is not the case for the slab-based + * approach. To select the column-based FFT, the switch + * FFT_COLUMN_BASED can be activated. + * + * The switches PM_ZOOM_OPTIMIZED and FFT_COLUMN_BASED may also + * be combined, such that there are 4 main modes of how the PM + * routines may operate. + * + * It is also possible to use non-cubical boxes, by means of + * setting one or several of the LONG_X, LONG_Y, and LONG_Z + * options in the config file. The values need to be integers, + * and then BoxSize is stretched by that factor in the + * corresponding dimension. + * + * Much of the code is multi-threaded, so there should be some + * speed-up if OpenMP is used with NUM_THREADS > 1, but the + * benefit may be limited because the data transfer steps (which + * weigh in quite heavily) are not accelerated by this. + * + * If eight times the particle load per processor exceeds 2^31 + * ~ 2 billion, one should activate NUMPART_PER_TASK_LARGE. The + * code will check this condition and terminate if this is + * violated, so there should hopefully be no severe risk to + * accidentally forget this. + * + * contains functions: + * void pm_init_periodic(void) + * void pmforce_zoom_optimized_prepare_density(int mode, int + * *typelist) + * void pmforce_zoom_optimized_readout_forces_or_potential(int + * dim) + * static void pmforce_uniform_optimized_prepare_density(int + * mode) + * static void pmforce_uniform_optimized_readout_forces_or_ + * potential(int dim) + * void pmforce_periodic(int mode, int *typelist) + * static int pm_periodic_compare_sortindex(const void *a, + * const void *b) + * static void msort_pmperiodic_with_tmp(large_numpart_type * b, + * size_t n, large_numpart_type * t) + * static void mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(PMGRID) + +#define GRIDX (PMGRID * STRETCHX * DBX + DBX_EXTRA) +#define GRIDY (PMGRID * STRETCHY * DBY + DBY_EXTRA) +#define GRIDZ (PMGRID * STRETCHZ * DBZ + DBZ_EXTRA) + +#define GRIDz (GRIDZ / 2 + 1) +#define GRID2 (2 * GRIDz) + +#if(GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) +typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid + with a single index */ +#else /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (GRIDX > 1024) || (GRIDY > 1024) || (GRIDZ > 1024) #else */ + +#ifdef NUMPART_PER_TASK_LARGE +typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this + data type should be used */ +#else /* #ifdef NUMPART_PER_TASK_LARGE */ +typedef int large_numpart_type; +#endif /* #ifdef NUMPART_PER_TASK_LARGE #else */ + +/* short-cut macros for accessing different 3D arrays */ +#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z)) +#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.base_firstcol) + (z)) +#ifndef FFT_COLUMN_BASED +#define NI(x, y, z) (((large_array_offset)GRIDZ) * ((y) + (x)*myplan.nslab_y) + (z)) +#endif /* #ifndef FFT_COLUMN_BASED */ + +/* variables for power spectrum estimation */ +#ifndef BINS_PS +#define BINS_PS 2000 /* number of bins for power spectrum computation */ +#endif /* #ifndef BINS_PS */ +#ifndef POWERSPEC_FOLDFAC +#define POWERSPEC_FOLDFAC 16. /* folding factor to obtain an estimate of the power spectrum on very small scales */ +#endif /* #ifndef POWERSPEC_FOLDFAC */ + +static fft_plan myplan; /*!< In this structure, various bookkeeping variables for the distributed FFTs are stored */ + +/*! \var maxfftsize + * \brief maximum size of the local fft grid among all tasks + */ +static size_t maxfftsize; + +/*! \var rhogrid + * \brief This array hold the local part of the density field and + * after the FFTs the local part of the potential + * + * \var forcegrid + * \brief This array will contain the force field + * + * \var workspace + * \brief Workspace array used during the FFTs + */ +static fft_real *rhogrid, *forcegrid, *workspace; + +/*! \brief Array containing the FFT of #rhogrid + * + * This pointer points to the same array as #rhogrid, + * because in-place FFTs are used. + */ +static fft_complex *fft_of_rhogrid; + +/* Variable for power spectrum calculation */ +static double power_spec_totmass, power_spec_totmass2; +static long long power_spec_totnumpart; + +/*! \brief This routine generates the FFT-plans to carry out the FFTs later on. + * + * Some auxiliary variables for bookkeeping are also initialized. + * + * \return void + */ +void pm_init_periodic(void) +{ +#ifdef LONG_X + if(LONG_X != (int)(LONG_X)) + terminate("LONG_X must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_X */ + +#ifdef LONG_Y + if(LONG_Y != (int)(LONG_Y)) + terminate("LONG_Y must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_Y */ + +#ifdef LONG_Z + if(LONG_Z != (int)(LONG_Z)) + terminate("LONG_Z must be an integer if used with PMGRID"); +#endif /* #ifdef LONG_Z */ + + All.Asmth[0] = ASMTH * All.BoxSize / PMGRID; + All.Rcut[0] = RCUT * All.Asmth[0]; + + /* Set up the FFTW-3 plan files. */ + int ndimx[1] = {GRIDX}; /* dimension of the 1D transforms */ + int ndimy[1] = {GRIDY}; /* dimension of the 1D transforms */ + int ndimz[1] = {GRIDZ}; /* dimension of the 1D transforms */ + + int max_GRID2 = 2 * (imax(imax(GRIDX, GRIDY), GRIDZ) / 2 + 1); + + /* temporarily allocate some arrays to make sure that out-of-place plans are created */ + rhogrid = (fft_real *)mymalloc("rhogrid", max_GRID2 * sizeof(fft_real)); + forcegrid = (fft_real *)mymalloc("forcegrid", max_GRID2 * sizeof(fft_real)); + +#ifdef DOUBLEPRECISION_FFTW + int alignflag = 0; +#else /* #ifdef DOUBLEPRECISION_FFTW */ + /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */ + int alignflag = FFTW_UNALIGNED; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ + + myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndimz, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + +#ifndef FFT_COLUMN_BASED + int stride = GRIDz; +#else /* #ifndef FFT_COLUMN_BASED */ + int stride = 1; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + myplan.forward_plan_ydir = + FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.forward_plan_xdir = + FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_xdir = + FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDX, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_ydir = + FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride, + GRIDz * GRIDY, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2, + FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag); + + myfree(forcegrid); + myfree(rhogrid); + +#ifndef FFT_COLUMN_BASED + + my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ); + + maxfftsize = imax(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2); + +#else /* #ifndef FFT_COLUMN_BASED */ + + my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ); + + maxfftsize = myplan.max_datasize; + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/* Below, the two functions + * + * pmforce_ ...... _prepare_density() + * and + * pmforce_ ...... _readout_forces_or_potential(int dim) + * + * are defined in two different versions, one that works better for uniform + * simulations, the other for zoom-in runs. Only one of the two sets is used, + * depending on the setting of PM_ZOOM_OPTIMIZED. + */ +#ifdef PM_ZOOM_OPTIMIZED +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +static int pm_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief This structure links the particles to the mesh cells, to which they + * contribute their mass. + * + * Each particle will have eight items of this structure in the #part array. + * For each of the eight mesh cells the CIC assignment will contribute, + * one item of this struct exists. + */ +static struct part_slab_data +{ + large_array_offset globalindex; /*!< index in the global density mesh */ + large_numpart_type partindex; /*!< contains the local particle index shifted by 2^3, the first three bits encode to which part of the + CIC assignment this item belongs to */ + large_array_offset localindex; /*!< index to a local copy of the corresponding mesh cell of the global density array (used during + local mass and force assignment) */ +} * part; /*!< array of part_slab_data linking the local particles to their mesh cells */ + +static size_t *localfield_sendcount, *localfield_first, *localfield_offset, *localfield_recvcount; +static large_array_offset *localfield_globalindex, *import_globalindex; +static fft_real *localfield_data, *import_data; + +/*! \brief Prepares density field for PM calculation in zoom-optimized + * algorithm. + * + * \param[in] mode Modes force calculation or power spectrum calculation. + * \param[in] typelist Which particles to include (only for power spectrum). + * + * \return void + */ +void pmforce_zoom_optimized_prepare_density(int mode, int *typelist) +{ + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + double to_slab_fac = + PMGRID / All.BoxSize; /* note: This is the same as GRIDX / (All.BoxSize * LONG_X), and similarly for each dimension */ + + if(mode == 2) + to_slab_fac *= POWERSPEC_FOLDFAC; + if(mode == 3) + to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC; + + part = (struct part_slab_data *)mymalloc("part", 8 * (NumPart * sizeof(struct part_slab_data))); + large_numpart_type *part_sortindex = (large_numpart_type *)mymalloc("part_sortindex", 8 * (NumPart * sizeof(large_numpart_type))); + + /* determine the cells each particle accesses */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_z = (int)(to_slab_fac * pos[2]); + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_y %= GRIDY; + slab_z %= GRIDZ; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + } + + large_numpart_type index_on_grid = ((large_numpart_type)i) << 3; + + for(int xx = 0; xx < 2; xx++) + for(int yy = 0; yy < 2; yy++) + for(int zz = 0; zz < 2; zz++) + { + int slab_xx = slab_x + xx; + int slab_yy = slab_y + yy; + int slab_zz = slab_z + zz; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + + large_array_offset offset = FI(slab_xx, slab_yy, slab_zz); + + part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz; + part[index_on_grid].globalindex = offset; + part_sortindex[index_on_grid] = index_on_grid; + index_on_grid++; + } + } + + /* note: num_on_grid will be 8 times larger than the particle number, but num_field_points will generally be much smaller */ + + large_array_offset num_field_points; + large_numpart_type num_on_grid = ((large_numpart_type)NumPart) << 3; + + /* bring the part-field into the order of the accessed cells. This allows the removal of duplicates */ + mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(large_numpart_type), pm_periodic_compare_sortindex); + + if(num_on_grid > 0) + num_field_points = 1; + else + num_field_points = 0; + + /* determine the number of unique field points */ + for(i = 1; i < num_on_grid; i++) + { + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc_movable(&localfield_globalindex, "localfield_globalindex", + num_field_points * sizeof(large_array_offset)); + localfield_data = (fft_real *)mymalloc_movable(&localfield_data, "localfield_data", num_field_points * sizeof(fft_real)); + localfield_first = (size_t *)mymalloc_movable(&localfield_first, "localfield_first", NTask * sizeof(size_t)); + localfield_sendcount = (size_t *)mymalloc_movable(&localfield_sendcount, "localfield_sendcount", NTask * sizeof(size_t)); + localfield_offset = (size_t *)mymalloc_movable(&localfield_offset, "localfield_offset", NTask * sizeof(size_t)); + localfield_recvcount = (size_t *)mymalloc_movable(&localfield_recvcount, "localfield_recvcount", NTask * sizeof(size_t)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_sendcount[i] = 0; + } + + /* establish the cross link between the part[ ]-array and the local list of + * mesh points. Also, count on which CPU the needed field points are stored. + */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + +#ifndef FFT_COLUMN_BASED + int slab = part[part_sortindex[i]].globalindex / (GRIDY * GRID2); + int task = myplan.slab_to_task[slab]; +#else /* #ifndef FFT_COLUMN_BASED */ + int task, column = part[part_sortindex[i]].globalindex / (GRID2); + + if(column < myplan.pivotcol) + task = column / myplan.avg; + else + task = (column - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(localfield_sendcount[task] == 0) + localfield_first[task] = num_field_points; + + localfield_sendcount[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1]; + + myfree_movable(part_sortindex); + part_sortindex = NULL; + + /* now bin the local particle data onto the mesh list */ + for(i = 0; i < num_field_points; i++) + localfield_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 8) + { + int pindex = (part[i].partindex >> 3); + + MyDouble *pos; +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[pindex].Type == 0) + { + posw[0] = WRAP_X(SphP[pindex].Center[0]); + posw[1] = WRAP_Y(SphP[pindex].Center[1]); + posw[2] = WRAP_Z(SphP[pindex].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[pindex].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_z = (int)(to_slab_fac * pos[2]); + + double dx = to_slab_fac * pos[0] - slab_x; + double dy = to_slab_fac * pos[1] - slab_y; + double dz = to_slab_fac * pos[2] - slab_z; + + double weight = P[pindex].Mass; + + if(mode) /* only for power spectrum calculation */ + if(typelist[P[pindex].Type] == 0) + continue; + + localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz; + localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz); + localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz; + localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz); + localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz; + localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz); + localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz; + } + + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + + /* exchange data and add contributions to the local mesh-path */ + MPI_Alltoall(localfield_sendcount, sizeof(size_t), MPI_BYTE, localfield_recvcount, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_B, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + /* note: here every element in rhogrid is only accessed once, so there should be no race condition */ + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { + /* determine offset in local FFT slab */ +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + rhogrid[offset] += import_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_data); + } + } + } +} + +/* \brief Function to read out the force component corresponding to spatial + * dimension 'dim'. + * + * \param[in] dim Dimension to be read out; If dim is negative, potential + * values are read out and assigned to particles. + * + * \return void + */ +void pmforce_zoom_optimized_readout_forces_or_potential(int dim) +{ +#ifdef EVALPOTENTIAL + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ +#endif /* #ifdef EVALPOTENTIAL */ + + large_numpart_type i; + int level, recvTask; + MPI_Status status; + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + double to_slab_fac = PMGRID / All.BoxSize; + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fft_real *)mymalloc("import_data", localfield_recvcount[recvTask] * sizeof(fft_real)); + import_globalindex = + (large_array_offset *)mymalloc("import_globalindex", localfield_recvcount[recvTask] * sizeof(large_array_offset)); + + if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0) + { + myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_sendcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, TAG_NONPERIOD_C, + import_globalindex, localfield_recvcount[recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_recvcount[recvTask]; i++) + { +#ifndef FFT_COLUMN_BASED + large_array_offset offset = + import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2); +#else /* #ifndef FFT_COLUMN_BASED */ + large_array_offset offset = import_globalindex[i] - myplan.base_firstcol * ((large_array_offset)GRID2); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + import_data[i] = grid[offset]; + } + + if(level > 0) + { + myMPI_Sendrecv(import_data, localfield_recvcount[recvTask] * sizeof(fft_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] * sizeof(fft_real), + MPI_BYTE, recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the froce/potential values, which all have been assembled in localfield_data */ + for(i = 0; i < NumPart; i++) + { + large_numpart_type j = (i << 3); + + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + double dx = to_slab_fac * pos[0] - slab_x; + + int slab_y = (int)(to_slab_fac * pos[1]); + double dy = to_slab_fac * pos[1] - slab_y; + + int slab_z = (int)(to_slab_fac * pos[2]); + double dz = to_slab_fac * pos[2] - slab_z; + + double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz + + localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) + + localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz + + localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) + + localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz + + localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) + + localfield_data[part[j + 7].localindex] * (dx)*dy * dz; + + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } +} + +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + +/* + * Here come the routines for a different communication algorithm that is + * better suited for a homogenuously loaded boxes. + */ + +/*! \brief Structure for particle buffer. + */ +static struct partbuf +{ + MyFloat Mass; + MyFloat Pos[3]; +} * partin, *partout; + +static size_t nimport, nexport; + +static size_t *Sndpm_count, *Sndpm_offset; +static size_t *Rcvpm_count, *Rcvpm_offset; + +/*! \brief Prepares density field for PM calculation in uniform box optimized + * algorithm. + * + * \param[in] mode Modes force calculation. + * + * \return void + */ +static void pmforce_uniform_optimized_prepare_density(int mode) +{ + int i, j; + + double to_slab_fac = PMGRID / All.BoxSize; + + if(mode == 2) + to_slab_fac *= POWERSPEC_FOLDFAC; + if(mode == 3) + to_slab_fac *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC; + + /* We here enlarge NTask such that each thread gets his own cache line for send_count/send_offset. + * This should hopefully prevent a performance penalty from 'false sharing' for these variables + */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + Sndpm_count = (size_t *)mymalloc("Sndpm_count", MaxThreads * multiNtask * sizeof(size_t)); + Sndpm_offset = (size_t *)mymalloc("Sndpm_offset", MaxThreads * multiNtask * sizeof(size_t)); + Rcvpm_count = (size_t *)mymalloc("Rcvpm_count", NTask * sizeof(size_t)); + Rcvpm_offset = (size_t *)mymalloc("Rcvpm_offset", NTask * sizeof(size_t)); + + /* determine the slabs/columns each particles accesses */ + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + + /* each threads needs to do theloop to clear its send_count[] array */ + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + } + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + send_count[task0]++; + if(task0 != task1) + send_count[task1]++; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + send_count[task0]++; + if(task1 != task0) + send_count[task1]++; + if(task2 != task1 && task2 != task0) + send_count[task2]++; + if(task3 != task0 && task3 != task1 && task3 != task2) + send_count[task3]++; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect thread-specific offset table and collect the results from the other threads */ + for(i = 0, Sndpm_offset[0] = 0; i < NTask; i++) + for(j = 0; j < MaxThreads; j++) + { + int ind_prev, ind = j * multiNtask + i; + if(ind > 0) + { + if(j == 0) + ind_prev = (MaxThreads - 1) * multiNtask + i - 1; + else + ind_prev = ind - multiNtask; + + Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev]; + } + } + + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + MPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0; j < NTask; j++) + { + nexport += Sndpm_count[j]; + nimport += Rcvpm_count[j]; + + if(j > 0) + { + Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1]; + Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1]; + } + } + + /* allocate import and export buffer */ + partin = (struct partbuf *)mymalloc("partin", nimport * sizeof(struct partbuf)); + partout = (struct partbuf *)mymalloc("partout", nexport * sizeof(struct partbuf)); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* fill export buffer */ + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + } + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task0 != task1) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + size_t ind0 = send_offset[task0] + send_count[task0]++; + partout[ind0].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind0].Pos[j] = pos[j]; + + if(task1 != task0) + { + size_t ind1 = send_offset[task1] + send_count[task1]++; + partout[ind1].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind1].Pos[j] = pos[j]; + } + if(task2 != task1 && task2 != task0) + { + size_t ind2 = send_offset[task2] + send_count[task2]++; + partout[ind2].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind2].Pos[j] = pos[j]; + } + if(task3 != task0 && task3 != task1 && task3 != task2) + { + size_t ind3 = send_offset[task3] + send_count[task3]++; + partout[ind3].Mass = P[i].Mass; + for(j = 0; j < 3; j++) + partout[ind3].Pos[j] = pos[j]; + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + } + + /* collect the send_count[] results from the other threads */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(struct partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange particle data */ + myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(struct partbuf), flag_big_all, + MPI_COMM_WORLD); + + myfree(partout); + + /* allocate density field */ + rhogrid = (fft_real *)mymalloc("rhogrid", maxfftsize * sizeof(fft_real)); + + /* clear local FFT-mesh density field */ + large_array_offset ii; + for(ii = 0; ii < maxfftsize; ii++) + rhogrid[ii] = 0; + +#ifndef FFT_COLUMN_BASED + /* bin particle data onto mesh, in multi-threaded fashion */ + { + int tid = get_thread_num(); + + int first_y, count_y; + subdivide_evenly(GRIDY, MaxThreads, tid, &first_y, &count_y); + int last_y = first_y + count_y - 1; + + for(i = 0; i < nimport; i++) + { + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_yy = slab_y + 1; + double dy = to_slab_fac * partin[i].Pos[1] - slab_y; + + if(mode >= 2) + { + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + int flag_slab_y, flag_slab_yy; + + if(slab_y >= first_y && slab_y <= last_y) + flag_slab_y = 1; + else + flag_slab_y = 0; + + if(slab_yy >= first_y && slab_yy <= last_y) + flag_slab_yy = 1; + else + flag_slab_yy = 0; + + if(flag_slab_y || flag_slab_yy) + { + double mass = partin[i].Mass; + + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + int slab_xx = slab_x + 1; + int slab_zz = slab_z + 1; + + double dx = to_slab_fac * partin[i].Pos[0] - slab_x; + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_z %= GRIDZ; + slab_xx %= GRIDX; + slab_zz %= GRIDZ; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + } + + int flag_slab_x, flag_slab_xx; + + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_x = 1; + } + else + flag_slab_x = 0; + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + flag_slab_xx = 1; + } + else + flag_slab_xx = 0; + + if(flag_slab_x) + { + if(flag_slab_y) + { + rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + } + + if(flag_slab_xx) + { + if(flag_slab_y) + { + rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag_slab_yy) + { + rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + struct data_cols + { + int col0, col1, col2, col3; + double dx, dy; + } * aux; + + aux = mymalloc("aux", nimport * sizeof(struct data_cols)); + + for(i = 0; i < nimport; i++) + { + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_xx = slab_x + 1; + + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_yy = slab_y + 1; + + aux[i].dx = to_slab_fac * partin[i].Pos[0] - slab_x; + aux[i].dy = to_slab_fac * partin[i].Pos[1] - slab_y; + + if(mode >= 2) + { + slab_x %= GRIDX; + slab_xx %= GRIDX; + slab_y %= GRIDY; + slab_yy %= GRIDY; + } + else + { + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + } + + aux[i].col0 = slab_x * GRIDY + slab_y; + aux[i].col1 = slab_x * GRIDY + slab_yy; + aux[i].col2 = slab_xx * GRIDY + slab_y; + aux[i].col3 = slab_xx * GRIDY + slab_yy; + } + + { + int tid = get_thread_num(); + + int first_col, last_col, count_col; + subdivide_evenly(myplan.base_ncol, MaxThreads, tid, &first_col, &count_col); + last_col = first_col + count_col - 1; + first_col += myplan.base_firstcol; + last_col += myplan.base_firstcol; + + for(i = 0; i < nimport; i++) + { + int flag0, flag1, flag2, flag3; + int col0 = aux[i].col0; + int col1 = aux[i].col1; + int col2 = aux[i].col2; + int col3 = aux[i].col3; + + if(col0 >= first_col && col0 <= last_col) + flag0 = 1; + else + flag0 = 0; + + if(col1 >= first_col && col1 <= last_col) + flag1 = 1; + else + flag1 = 0; + + if(col2 >= first_col && col2 <= last_col) + flag2 = 1; + else + flag2 = 0; + + if(col3 >= first_col && col3 <= last_col) + flag3 = 1; + else + flag3 = 0; + + if(flag0 || flag1 || flag2 || flag3) + { + double mass = partin[i].Mass; + + double dx = aux[i].dx; + double dy = aux[i].dy; + + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + int slab_zz = slab_z + 1; + + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(mode >= 2) + { + slab_z %= GRIDZ; + slab_zz %= GRIDZ; + } + else + { + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + } + + if(flag0) + { + rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz)); + } + + if(flag1) + { + rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz)); + } + + if(flag2) + { + rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz)); + rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz)); + } + + if(flag3) + { + rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz)); + rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz)); + } + } + } + } + + myfree(aux); + +#endif /* #ifndef FFT_COLUMN_BASED #else */ +} + +/* \brief Function to read out the force component corresponding to spatial + * dimension 'dim'. + * + * \param[in] dim Dimension to be read out; If dim is negative, potential values + * are read out and assigned to particles. + * + * \return void + */ +static void pmforce_uniform_optimized_readout_forces_or_potential(int dim) +{ +#ifdef EVALPOTENTIAL + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ +#endif /* #ifdef EVALPOTENTIAL */ + + double to_slab_fac = PMGRID / All.BoxSize; + + double *flistin = (double *)mymalloc("flistin", nimport * sizeof(double)); + double *flistout = (double *)mymalloc("flistout", nexport * sizeof(double)); + + fft_real *grid; + + if(dim < 0) + grid = rhogrid; + else + grid = forcegrid; + + size_t i; + for(i = 0; i < nimport; i++) + { + flistin[i] = 0; + + int slab_x = (int)(to_slab_fac * partin[i].Pos[0]); + int slab_y = (int)(to_slab_fac * partin[i].Pos[1]); + int slab_z = (int)(to_slab_fac * partin[i].Pos[2]); + + double dx = to_slab_fac * partin[i].Pos[0] - slab_x; + double dy = to_slab_fac * partin[i].Pos[1] - slab_y; + double dz = to_slab_fac * partin[i].Pos[2] - slab_z; + + if(slab_x >= GRIDX) + slab_x -= GRIDX; + if(slab_y >= GRIDY) + slab_y -= GRIDY; + if(slab_z >= GRIDZ) + slab_z -= GRIDZ; + + int slab_xx = slab_x + 1; + int slab_yy = slab_y + 1; + int slab_zz = slab_z + 1; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + if(slab_zz >= GRIDZ) + slab_zz -= GRIDZ; + +#ifndef FFT_COLUMN_BASED + if(myplan.slab_to_task[slab_x] == ThisTask) + { + slab_x -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) + + grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + + grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(myplan.slab_to_task[slab_xx] == ThisTask) + { + slab_xx -= myplan.first_slab_x_of_task[ThisTask]; + + flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) + + grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) + + grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz); + } +#else /* #ifndef FFT_COLUMN_BASED */ + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + if(column0 >= myplan.base_firstcol && column0 <= myplan.base_lastcol) + { + flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) + + grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz); + } + if(column1 >= myplan.base_firstcol && column1 <= myplan.base_lastcol) + { + flistin[i] += + grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz); + } + + if(column2 >= myplan.base_firstcol && column2 <= myplan.base_lastcol) + { + flistin[i] += + grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz); + } + + if(column3 >= myplan.base_firstcol && column3 <= myplan.base_lastcol) + { + flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + } + + /* exchange the potential component data */ + int flag_big = 0, flag_big_all; + for(i = 0; i < NTask; i++) + if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES) + flag_big = 1; + + /* produce a flag if any of the send sizes is above our transfer limit, in this case we will + * transfer the data in chunks. + */ + MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* exchange data */ + myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, + MPI_COMM_WORLD); + + /* now assign them to the correct particles */ + int multiNtask = roundup_to_multiple_of_cacheline_size(NTask * sizeof(size_t)) / sizeof(size_t); + + { + size_t *send_count = Sndpm_count + get_thread_num() * multiNtask; + size_t *send_offset = Sndpm_offset + get_thread_num() * multiNtask; + + int j; + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + int i; + for(i = 0; i < NumPart; i++) + { + MyDouble *pos; + +#ifdef CELL_CENTER_GRAVITY + MyDouble posw[3], xtmp, ytmp, ztmp; + if(P[i].Type == 0) + { + posw[0] = WRAP_X(SphP[i].Center[0]); + posw[1] = WRAP_Y(SphP[i].Center[1]); + posw[2] = WRAP_Z(SphP[i].Center[2]); + + pos = posw; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[i].Pos; + + int slab_x = (int)(to_slab_fac * pos[0]); + int slab_xx = slab_x + 1; + + if(slab_x >= GRIDX) + slab_x -= GRIDX; + + if(slab_xx >= GRIDX) + slab_xx -= GRIDX; + +#ifndef FFT_COLUMN_BASED + int task0 = myplan.slab_to_task[slab_x]; + int task1 = myplan.slab_to_task[slab_xx]; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task0 != task1) + value += flistout[send_offset[task1] + send_count[task1]++]; +#else /* #ifndef FFT_COLUMN_BASED */ + int slab_y = (int)(to_slab_fac * pos[1]); + int slab_yy = slab_y + 1; + + if(slab_y >= GRIDY) + slab_y -= GRIDY; + + if(slab_yy >= GRIDY) + slab_yy -= GRIDY; + + int column0 = slab_x * GRIDY + slab_y; + int column1 = slab_x * GRIDY + slab_yy; + int column2 = slab_xx * GRIDY + slab_y; + int column3 = slab_xx * GRIDY + slab_yy; + + int task0, task1, task2, task3; + + if(column0 < myplan.pivotcol) + task0 = column0 / myplan.avg; + else + task0 = (column0 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column1 < myplan.pivotcol) + task1 = column1 / myplan.avg; + else + task1 = (column1 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column2 < myplan.pivotcol) + task2 = column2 / myplan.avg; + else + task2 = (column2 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + if(column3 < myplan.pivotcol) + task3 = column3 / myplan.avg; + else + task3 = (column3 - myplan.pivotcol) / (myplan.avg - 1) + myplan.tasklastsection; + + double value = flistout[send_offset[task0] + send_count[task0]++]; + + if(task1 != task0) + value += flistout[send_offset[task1] + send_count[task1]++]; + + if(task2 != task1 && task2 != task0) + value += flistout[send_offset[task2] + send_count[task2]++]; + + if(task3 != task0 && task3 != task1 && task3 != task2) + value += flistout[send_offset[task3] + send_count[task3]++]; +#endif /* #ifndef FFT_COLUMN_BASED */ + if(dim < 0) + { +#ifdef EVALPOTENTIAL + P[i].PM_Potential += value * fac; +#endif /* #ifdef EVALPOTENTIAL */ + } + else + P[i].GravPM[dim] += value; + } + } + + int j; + /* restore total Sndpm_count */ + for(j = 1; j < MaxThreads; j++) + for(i = 0; i < NTask; i++) + Sndpm_count[i] += Sndpm_count[i + j * multiNtask]; + + myfree(flistout); + myfree(flistin); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + +/*! \brief Calculates the long-range periodic force given the particle + * positions using the PM method. + * + * The force is Gaussian filtered with Asmth, given in + * mesh-cell units. We carry out a CIC charge assignment, and compute the + * potential by fast Fourier transform methods. The potential is + * finite-differenced using a 4-point finite differencing formula, and the + * forces are interpolated tri-linearly to the particle positions. The CIC + * kernel is deconvolved. + * + * \param[in] mode For mode=0, normal force calculation, mode=1, only density + * field construction for a power spectrum calculation. In the + * later case, typelist flags the particle types that should be + * included in the density field. + * \param[in] typelist Flags of particle types included in power spectrum + * calculation. + * + * \return void + */ +void pmforce_periodic(int mode, int *typelist) +{ + int x, y, z, xx, yy, zz; + + double tstart = second(); + + if(mode == 0) + mpi_printf("PM-PERIODIC: Starting periodic PM calculation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + +#ifndef NUMPART_PER_TASK_LARGE + if((((long long)NumPart) << 3) >= (((long long)1) << 31)) + terminate("We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help."); +#endif /* #ifndef NUMPART_PER_TASK_LARGE */ + + double asmth2 = All.Asmth[0] * All.Asmth[0]; + double d = All.BoxSize / PMGRID; + double dhalf = 0.5 * d; + + double fac = 4 * M_PI * All.G / (pow(All.BoxSize, 3) * STRETCHX * STRETCHY * STRETCHZ); /* to get potential */ + + fac *= 1 / (2 * d); /* for finite differencing */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_prepare_density(mode, typelist); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_prepare_density(mode); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + + /* allocate the memory to hold the FFT fields */ + + forcegrid = (fft_real *)mymalloc("forcegrid", maxfftsize * sizeof(fft_real)); + + workspace = forcegrid; + +#ifndef FFT_COLUMN_BASED + fft_of_rhogrid = (fft_complex *)&rhogrid[0]; +#else /* #ifndef FFT_COLUMN_BASED */ + fft_of_rhogrid = (fft_complex *)&workspace[0]; +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Do the FFT of the density field */ +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, rhogrid, workspace, 1); /* result is in workspace, not in rhogrid ! */ +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + if(mode != 0) + { + /* used to measure powerspectrum */ + } + else + { + /* multiply with Green's function in order to obtain the potential (or forces for spectral diffencing) */ + + double kfacx = 2.0 * M_PI / (GRIDX * d); + double kfacy = 2.0 * M_PI / (GRIDY * d); + double kfacz = 2.0 * M_PI / (GRIDZ * d); + +#ifdef FFT_COLUMN_BASED + for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++) + { + large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX; + y = ipcell / (GRIDX * GRIDz); + int yr = ipcell % (GRIDX * GRIDz); + z = yr / GRIDX; + x = yr % GRIDX; +#else /* #ifdef FFT_COLUMN_BASED */ + for(x = 0; x < GRIDX; x++) + for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++) + for(z = 0; z < GRIDz; z++) + { +#endif /* #ifdef FFT_COLUMN_BASED #else */ + if(x >= (GRIDX / 2)) + xx = x - GRIDX; + else + xx = x; + if(y >= (GRIDY / 2)) + yy = y - GRIDY; + else + yy = y; + if(z >= (GRIDZ / 2)) + zz = z - GRIDZ; + else + zz = z; + + double kx = kfacx * xx; + double ky = kfacy * yy; + double kz = kfacz * zz; + + double k2 = kx * kx + ky * ky + kz * kz; + + if(k2 > 0) + { + double smth = -exp(-k2 * asmth2) / k2; + + /* do deconvolution */ + + double fx = 1, fy = 1, fz = 1; + + if(xx != 0) + { + fx = kx * dhalf; + fx = sin(fx) / fx; + } + if(yy != 0) + { + fy = ky * dhalf; + fy = sin(fy) / fy; + } + if(zz != 0) + { + fz = kz * dhalf; + fz = sin(fz) / fz; + } + + double ff = 1 / (fx * fy * fz); + double deconv = ff * ff * ff * ff; + + smth *= deconv; /* deconvolution */ + +#ifndef FFT_COLUMN_BASED + large_array_offset ip = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z; +#endif /* #ifndef FFT_COLUMN_BASED */ + + fft_of_rhogrid[ip][0] *= smth; + fft_of_rhogrid[ip][1] *= smth; + } + } + +#ifdef FFT_COLUMN_BASED + if(myplan.second_transposed_firstcol == 0) + fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0; +#else /* #ifdef FFT_COLUMN_BASED */ + if(myplan.slabstart_y == 0) + fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0; +#endif /* #ifdef FFT_COLUMN_BASED #else */ + + /* Do the inverse FFT to get the potential/forces */ + +#ifndef FFT_COLUMN_BASED + my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], -1); +#else /* #ifndef FFT_COLUMN_BASED */ + my_column_based_fft(&myplan, workspace, rhogrid, -1); +#endif /* #ifndef FFT_COLUMN_BASED #else */ + + /* Now rhogrid holds the potential/forces */ + +#ifdef EVALPOTENTIAL +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_readout_forces_or_potential(-1); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_readout_forces_or_potential(-1); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing of the potential for each dimension, + * and send the results back to the right CPUs + */ + for(int dim = 2; dim >= 0; dim--) /* Calculate each component of the force. */ + { + /* we do the x component last, because for differencing the potential in the x-direction, we need to construct the transpose + */ + +#ifndef FFT_COLUMN_BASED + if(dim == 0) + { + my_slab_transposeA(&myplan, rhogrid, + forcegrid); /* compute the transpose of the potential field for finite differencing */ + /* note: for the x-direction, we difference the transposed field */ + + for(x = 0; x < GRIDX; x++) + for(y = 0; y < myplan.nslab_y; y++) + for(z = 0; z < GRIDZ; z++) + { + int xrr = x + 2, xll = x - 2, xr = x + 1, xl = x - 1; + if(xr >= GRIDX) + xr -= GRIDX; + if(xrr >= GRIDX) + xrr -= GRIDX; + if(xl < 0) + xl += GRIDX; + if(xll < 0) + xll += GRIDX; + + forcegrid[NI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[NI(xl, y, z)] - rhogrid[NI(xr, y, z)]) - + (1.0 / 6) * (rhogrid[NI(xll, y, z)] - rhogrid[NI(xrr, y, z)])); + } + + my_slab_transposeB(&myplan, forcegrid, rhogrid); /* reverse the transpose from above */ + } + else + { + for(y = 0; y < GRIDY; y++) + for(x = 0; x < myplan.nslab_x; x++) + for(z = 0; z < GRIDZ; z++) + { + if(dim == 1) + { + int yr = y + 1, yl = y - 1, yrr = y + 2, yll = y - 2; + if(yr >= GRIDY) + yr -= GRIDY; + if(yrr >= GRIDY) + yrr -= GRIDY; + if(yl < 0) + yl += GRIDY; + if(yll < 0) + yll += GRIDY; + + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, z)] - rhogrid[FI(x, yr, z)]) - + (1.0 / 6) * (rhogrid[FI(x, yll, z)] - rhogrid[FI(x, yrr, z)])); + } + else if(dim == 2) + { + int zr = z + 1, zl = z - 1, zrr = z + 2, zll = z - 2; + if(zr >= GRIDZ) + zr -= GRIDZ; + if(zrr >= GRIDZ) + zrr -= GRIDZ; + if(zl < 0) + zl += GRIDZ; + if(zll < 0) + zll += GRIDZ; + + forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, y, zl)] - rhogrid[FI(x, y, zr)]) - + (1.0 / 6) * (rhogrid[FI(x, y, zll)] - rhogrid[FI(x, y, zrr)])); + } + } + } + +#else /* #ifndef FFT_COLUMN_BASED */ + + if(dim == 2) + { + for(large_array_offset i = 0; i < myplan.base_ncol; i++) + { + fft_real *forcep = &forcegrid[GRID2 * i]; + fft_real *potp = &rhogrid[GRID2 * i]; + + for(int z = 0; z < GRIDZ; z++) + { + int zr = z + 1; + int zl = z - 1; + int zrr = z + 2; + int zll = z - 2; + + if(zr >= GRIDZ) + zr -= GRIDZ; + if(zrr >= GRIDZ) + zrr -= GRIDZ; + if(zl < 0) + zl += GRIDZ; + if(zll < 0) + zll += GRIDZ; + + forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr])); + } + } + } + else if(dim == 1) + { + fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + my_fft_swap23(&myplan, scratch, forcegrid); + + for(large_array_offset i = 0; i < myplan.ncol_XZ; i++) + { + fft_real *forcep = &scratch[GRIDY * i]; + fft_real *potp = &forcegrid[GRIDY * i]; + + for(int y = 0; y < GRIDY; y++) + { + int yr = y + 1; + int yl = y - 1; + int yrr = y + 2; + int yll = y - 2; + + if(yr >= GRIDY) + yr -= GRIDY; + if(yrr >= GRIDY) + yrr -= GRIDY; + if(yl < 0) + yl += GRIDY; + if(yll < 0) + yll += GRIDY; + + forcep[y] = fac * ((4.0 / 3) * (potp[yl] - potp[yr]) - (1.0 / 6) * (potp[yll] - potp[yrr])); + } + } + + my_fft_swap23back(&myplan, scratch, forcegrid); + myfree(scratch); + } + else if(dim == 0) + { + fft_real *scratch = mymalloc("scratch", myplan.fftsize * sizeof(fft_real)); /* need a third field as scratch space */ + memcpy(scratch, rhogrid, myplan.fftsize * sizeof(fft_real)); + + my_fft_swap13(&myplan, scratch, forcegrid); + + for(large_array_offset i = 0; i < myplan.ncol_YZ; i++) + { + fft_real *forcep = &scratch[GRIDX * i]; + fft_real *potp = &forcegrid[GRIDX * i]; + + for(int x = 0; x < GRIDX; x++) + { + int xr = x + 1; + int xl = x - 1; + int xrr = x + 2; + int xll = x - 2; + + if(xr >= GRIDX) + xr -= GRIDX; + if(xrr >= GRIDX) + xrr -= GRIDX; + if(xl < 0) + xl += GRIDX; + if(xll < 0) + xll += GRIDX; + + forcep[x] = fac * ((4.0 / 3) * (potp[xl] - potp[xr]) - (1.0 / 6) * (potp[xll] - potp[xrr])); + } + } + + my_fft_swap13back(&myplan, scratch, forcegrid); + myfree(scratch); + } +#endif /* #ifndef FFT_COLUMN_BASED #else */ + +#ifdef PM_ZOOM_OPTIMIZED + pmforce_zoom_optimized_readout_forces_or_potential(dim); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + pmforce_uniform_optimized_readout_forces_or_potential(dim); +#endif /* #ifdef PM_ZOOM_OPTIMIZED #else */ + } + } + + /* free stuff */ + + myfree(forcegrid); + myfree(rhogrid); + +#ifdef PM_ZOOM_OPTIMIZED + myfree(localfield_recvcount); + myfree(localfield_offset); + myfree(localfield_sendcount); + myfree(localfield_first); + myfree(localfield_data); + myfree(localfield_globalindex); + myfree(part); +#else /* #ifdef PM_ZOOM_OPTIMIZED */ + myfree(partin); + myfree(Rcvpm_offset); + myfree(Rcvpm_count); + myfree(Sndpm_offset); + myfree(Sndpm_count); +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + + double tend = second(); + + if(mode == 0) + mpi_printf("PM-PERIODIC: done. (took %g seconds)\n", timediff(tstart, tend)); +} + +#ifdef PM_ZOOM_OPTIMIZED + +/*! \brief Sort function for 'part' array indices. + * + * Sorts the indices into the 'part' array by the global index of the + * corresponding 'part_slab_data' struct. + * + * \param[in] a Index to be compared. + * \param[in] b Index to be compared. + * + * \return sort result + */ +static int pm_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Implements the sorting function for mysort_pmperiodic(). + * + * The index array is sorted using a merge sort algorithm. + * + * \param[in, out] b Index array to sort. + * \param[in] n Number of elements to sort. + * \param[out] t Temporary buffer array. + * + * \return void + */ +static void msort_pmperiodic_with_tmp(large_numpart_type *b, size_t n, large_numpart_type *t) +{ + large_numpart_type *tmp; + large_numpart_type *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + msort_pmperiodic_with_tmp(b1, n1, t); + msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(large_numpart_type)); + + memcpy(b, t, (n - n2) * sizeof(large_numpart_type)); +} + +/*! \brief Sort the index array b of n entries using the sort kernel + * cmp. + * + * The parameter s is set to sizeof(int). The index array b is sorted + * according to the globalindex field of the referenced item in the 'part' + * array. + * + * \param[in, out] b The index array to sort. + * \param[in] n Number of entries in array b. + * \param[in] s Size of each entry (must be sizeof(int)). + * \param[in] cmp Comparison function. + */ +static void mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + large_numpart_type *tmp = (large_numpart_type *)mymalloc("tmp", size); + + msort_pmperiodic_with_tmp((large_numpart_type *)b, n, tmp); + + myfree(tmp); +} +#endif /* #ifdef PM_ZOOM_OPTIMIZED */ + +#endif /* #if defined(PMGRID) */ diff --git a/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c new file mode 100644 index 0000000000..6ace982b68 --- /dev/null +++ b/src/amuse/community/arepo/src/gravity/pm/pm_periodic2d.c @@ -0,0 +1,905 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gravity/pm/pm_periodic2d.c + * \date 05/2018 + * \brief Routines for periodic PM-force computation in 2d. + * \details contains functions: + * void pm2d_init_periodic(void) + * void pm2d_init_periodic_allocate(void) + * void pm2d_init_periodic_free(void) + * void pm2d_force_periodic(int mode) + * int pm2d_periodic_compare_sortindex(const void *a, const + * void *b) + * static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n, + * int *t) + * void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, + * int (*cmp) (const void *, const void *)) + * void pm2d_periodic_transposeA(fftw_real * field, + * fftw_real * scratch) + * void pm2d_periodic_transposeB(fftw_real * field, + * fftw_real * scratch) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#ifdef PMGRID +#ifndef GRAVITY_NOT_PERIODIC +#ifdef TWODIMS + +#ifdef NOTYPEPREFIX_FFTW +#include +#else /* #ifdef NOTYPEPREFIX_FFTW */ +#ifdef DOUBLEPRECISION_FFTW +#include /* double precision FFTW */ +#else /* #ifdef DOUBLEPRECISION_FFTW */ +#include +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#endif /* #ifdef NOTYPEPREFIX_FFTW #else */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#define PMGRID2 (2 * (PMGRID / 2 + 1)) + +#if(PMGRID > 1024) +typedef long long large_array_offset; +#else /* #if (PMGRID > 1024) */ +typedef unsigned int large_array_offset; +#endif /* #if (PMGRID > 1024) #else */ + +#define d_fftw_real fftw_real + +static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan; + +static int slab_to_task[PMGRID]; +static int *slabs_x_per_task; +static int *first_slab_x_of_task; + +static int slabstart_x, nslab_x, slabstart_y, nslab_y, smallest_slab; + +static int fftsize, maxfftsize; + +static fftw_real *rhogrid, *forcegrid, *workspace; +static d_fftw_real *d_rhogrid, *d_forcegrid, *d_workspace; + +static fftw_complex *fft_of_rhogrid; + +static MyFloat to_slab_fac; + +void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch); +void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch); +int pm2d_periodic_compare_sortindex(const void *a, const void *b); + +/*! \brief Data for fft slab. + */ +static struct part_slab_data +{ + large_array_offset globalindex; + int partindex; + int localindex; +} * part; + +static int *part_sortindex; + +/*! \brief This routines generates the FFTW-plans to carry out the parallel + * FFTs later on. Some auxiliary variables are also initialized. + * + * \return void + */ +void pm2d_init_periodic(void) +{ + int i; + int slab_to_task_local[PMGRID]; + + All.Asmth[0] = ASMTH * All.BoxSize / PMGRID; + All.Rcut[0] = RCUT * All.Asmth[0]; + + /* Set up the FFTW plan files. */ + + fft_forward_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); + fft_inverse_plan = rfftw2d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); + + /* Workspace out the ranges on each processor. */ + + rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize); + + for(i = 0; i < PMGRID; i++) + slab_to_task_local[i] = 0; + + for(i = 0; i < nslab_x; i++) + slab_to_task_local[slabstart_x + i] = ThisTask; + + MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + + slabs_x_per_task = (int *)mymalloc("slabs_per_task", NTask * sizeof(int)); + MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD); + + first_slab_x_of_task = (int *)mymalloc("first_slab_of_task", NTask * sizeof(int)); + MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD); + + to_slab_fac = PMGRID / All.BoxSize; + + MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + printf("maxfftsize=%d PMGRID=%d\n", maxfftsize, PMGRID); +} + +/*! \brief Allocates memory for 2d PM algorithm. + * + * This function allocates the memory neeed to compute the long-range PM + * force. Three fields are used, one to hold the density (and its FFT, and + * then the real-space potential), one to hold the force field obtained by + * finite differencing, and finally a workspace field, which is used both as + * workspace for the parallel FFT, and as buffer for the communication + * algorithm used in the force computation. + * + * \return void + */ +void pm2d_init_periodic_allocate(void) +{ + double bytes_tot = 0; + size_t bytes; + + /* allocate the memory to hold the FFT fields */ + + rhogrid = (fftw_real *)mymalloc("rhogrid", bytes = maxfftsize * sizeof(d_fftw_real)); + bytes_tot += bytes; + + forcegrid = (fftw_real *)mymalloc("forcegrid", bytes = maxfftsize * sizeof(d_fftw_real)); + bytes_tot += bytes; + + part = (struct part_slab_data *)mymalloc("part", bytes = 4 * NumPart * sizeof(struct part_slab_data)); + bytes_tot += bytes; + + part_sortindex = (int *)mymalloc("part_sortindex", bytes = 4 * NumPart * sizeof(int)); + bytes_tot += bytes; + + if(ThisTask == 0) + printf("Using %g MByte for periodic FFT computation. (presently allocated=%g MB)\n", bytes_tot / (1024.0 * 1024.0), + AllocatedBytes / (1024.0 * 1024.0)); + + workspace = forcegrid; + + fft_of_rhogrid = (fftw_complex *)&rhogrid[0]; + + d_rhogrid = (d_fftw_real *)rhogrid; + d_forcegrid = (d_fftw_real *)forcegrid; + d_workspace = (d_fftw_real *)workspace; +} + +/*! \brief This routine frees the space allocated for the parallel FFT + * algorithm. + * + * \return void + */ +void pm2d_init_periodic_free(void) +{ + /* allocate the memory to hold the FFT fields */ + myfree(part_sortindex); + myfree(part); + myfree(forcegrid); + myfree(rhogrid); +} + +/*! \brief Long range periodic 2d gravity. + * + * Calculates the long-range periodic force given the particle positions + * using the PM method. The force is Gaussian filtered with Asmth, given in + * mesh-cell units. We carry out a CIC charge assignment, and compute the + * potenial by Fourier transform methods. The potential is finite differenced + * using a 4-point finite differencing formula, and the forces are + * interpolated tri-linearly to the particle positions. The CIC kernel is + * deconvolved. Note that the particle distribution is not in the slab + * decomposition that is used for the FFT. Instead, overlapping patches + * between local domains and FFT slabs are communicated as needed. + * + * \param[in] mode 0: normal PM force; 1: calculate mesh correction vector. + * + * \return void + */ +void pm2d_force_periodic(int mode) +{ + double k2, kx, ky, smth; + double dx, dy, weight; + double fx, fy, ff; + double asmth2, fac, acc_dim; + int i, j, N, slab, level, sendTask, recvTask, task; + int x, y, yl, yr, yll, yrr, ip, dim; + int slab_x, slab_y; + int slab_xx, slab_yy; + int num_on_grid, num_field_points, pindex, xx, yy; + MPI_Status status; + int *localfield_count, *localfield_first, *localfield_offset, *localfield_togo; + large_array_offset offset, *localfield_globalindex, *import_globalindex; + d_fftw_real *localfield_d_data, *import_d_data; + fftw_real *localfield_data, *import_data; + + if(ThisTask == 0) + { + printf("Starting periodic PM-2d calculation. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + myflush(stdout); + } + + asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; + asmth2 *= asmth2; + + fac = All.G / (M_PI * All.BoxSize); /* to get potential */ + fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */ + + if(mode == 1) + { + fac *= 1.0 / (All.G) * All.BoxSize; + } + else + { + fac *= All.BoxSize; + } + + pm2d_init_periodic_allocate(); + + if(mode == 0) + N = NumPart; + else + N = NumGas; + + /* determine the cells each particles accesses */ + for(i = 0, num_on_grid = 0; i < N; i++) + { + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + + if(slab_x >= PMGRID) + slab_x = PMGRID - 1; + if(slab_y >= PMGRID) + slab_y = PMGRID - 1; + + for(xx = 0; xx < 2; xx++) + for(yy = 0; yy < 2; yy++) + { + slab_xx = slab_x + xx; + slab_yy = slab_y + yy; + + if(slab_xx >= PMGRID) + slab_xx -= PMGRID; + if(slab_yy >= PMGRID) + slab_yy -= PMGRID; + + offset = (PMGRID2 * slab_xx + slab_yy); + + part[num_on_grid].partindex = (i << 2) + (xx << 1) + yy; + part[num_on_grid].globalindex = offset; + part_sortindex[num_on_grid] = num_on_grid; + num_on_grid++; + } + } + + /* note: num_on_grid will be 4 times larger than the particle number, + but num_field_points will generally be much smaller */ + + /* bring the part-field into the order of the accessed cells. This allow the removal of duplicates */ + pm2d_mysort_pmperiodic(part_sortindex, num_on_grid, sizeof(int), pm2d_periodic_compare_sortindex); + + /* determine the number of unique field points */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + num_field_points++; + } + + /* allocate the local field */ + localfield_globalindex = (large_array_offset *)mymalloc("first_slab_of_task", num_field_points * sizeof(large_array_offset)); + localfield_d_data = (d_fftw_real *)mymalloc("localfield_d_data", num_field_points * sizeof(d_fftw_real)); + localfield_data = (fftw_real *)localfield_d_data; + localfield_first = (int *)mymalloc("localfield_d_data", NTask * sizeof(int)); + localfield_count = (int *)mymalloc("localfield_count", NTask * sizeof(int)); + localfield_offset = (int *)mymalloc("localfield_count", NTask * sizeof(int)); + localfield_togo = (int *)mymalloc("localfield_togo", NTask * NTask * sizeof(int)); + + for(i = 0; i < NTask; i++) + { + localfield_first[i] = 0; + localfield_count[i] = 0; + } + + /* establish the cross link between the part[] array and the local list of + mesh points. Also, count on which CPU how many of the needed field points are stored */ + for(i = 0, num_field_points = 0; i < num_on_grid; i++) + { + if(i > 0) + if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex) + num_field_points++; + + part[part_sortindex[i]].localindex = num_field_points; + + if(i > 0) + if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex) + continue; + + localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex; + + slab = part[part_sortindex[i]].globalindex / PMGRID2; + task = slab_to_task[slab]; + if(localfield_count[task] == 0) + localfield_first[task] = num_field_points; + localfield_count[task]++; + } + num_field_points++; + + for(i = 1, localfield_offset[0] = 0; i < NTask; i++) + localfield_offset[i] = localfield_offset[i - 1] + localfield_count[i - 1]; + + /* now bin the local particle data onto the mesh list */ + + for(i = 0; i < num_field_points; i++) + localfield_d_data[i] = 0; + + for(i = 0; i < num_on_grid; i += 4) + { + pindex = (part[i].partindex >> 2); + + slab_x = (int)(to_slab_fac * P[pindex].Pos[0]); + slab_y = (int)(to_slab_fac * P[pindex].Pos[1]); + + dx = to_slab_fac * P[pindex].Pos[0] - slab_x; + dy = to_slab_fac * P[pindex].Pos[1] - slab_y; + + weight = P[pindex].Mass; + + localfield_d_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy); + localfield_d_data[part[i + 1].localindex] += weight * (1.0 - dx) * dy; + localfield_d_data[part[i + 2].localindex] += weight * (dx) * (1.0 - dy); + localfield_d_data[part[i + 3].localindex] += weight * (dx)*dy; + } + + /* clear local FFT-mesh density field */ + for(i = 0; i < fftsize; i++) + d_rhogrid[i] = 0; + + /* exchange data and add contributions to the local mesh-path */ + + MPI_Allgather(localfield_count, NTask, MPI_INT, localfield_togo, NTask, MPI_INT, MPI_COMM_WORLD); + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_d_data = + (d_fftw_real *)mymalloc("import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(d_fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_d_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_d_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + import_d_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(d_fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, MPI_COMM_WORLD, &status); + + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_B, MPI_COMM_WORLD, &status); + } + } + else + { + import_d_data = localfield_d_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + /* determine offset in local FFT slab */ + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2; + + d_rhogrid[offset] += import_d_data[i]; + } + + if(level > 0) + { + myfree(import_globalindex); + myfree(import_d_data); + } + } + } + + /* Do the FFT of the density field */ + + rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); + + /* multiply with Green's function for the potential */ + + for(y = slabstart_y; y < slabstart_y + nslab_y; y++) + for(x = 0; x < PMGRID; x++) + { + if(x > PMGRID / 2) + kx = x - PMGRID; + else + kx = x; + if(y > PMGRID / 2) + ky = y - PMGRID; + else + ky = y; + + k2 = kx * kx + ky * ky; + + if(k2 > 0) + { + smth = -exp(-k2 * asmth2) / k2; + + /* do deconvolution */ + + fx = fy = 1; + if(kx != 0) + { + fx = (M_PI * kx) / PMGRID; + fx = sin(fx) / fx; + } + if(ky != 0) + { + fy = (M_PI * ky) / PMGRID; + fy = sin(fy) / fy; + } + ff = 1 / (fx * fy); + smth *= ff * ff * ff * ff; + + /* end deconvolution */ + + ip = PMGRID * (y - slabstart_y) + x; + fft_of_rhogrid[ip].re *= smth; + fft_of_rhogrid[ip].im *= smth; + } + } + + if(slabstart_y == 0) + fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0; + + /* Do the inverse FFT to get the potential */ + + rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); + +#ifdef EVALPOTENTIAL /* now read out the potential */ + if(mode == 0) + { + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * ((large_array_offset)PMGRID2); + import_data[i] = rhogrid[offset]; + } + + if(level > 0) + { + MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the potential values, which all have been assembled in localfield_data */ + + double pot; + + for(i = 0, j = 0; i < N; i++) + { + while(j < num_on_grid && (part[j].partindex >> 2) != i) + j++; + + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + dx = to_slab_fac * P[i].Pos[0] - slab_x; + + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + dy = to_slab_fac * P[i].Pos[1] - slab_y; + + pot = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy + localfield_data[part[j + 2].localindex] * dx * (1.0 - dy) + + localfield_data[part[j + 3].localindex] * dx * dy; + + P[i].PM_Potential += pot * fac * (2 * All.BoxSize / PMGRID); + /* compensate the finite differencing factor */; + } + } +#endif /* #ifdef EVALPOTENTIAL */ + + /* get the force components by finite differencing the potential for each dimension, + and send back the results to the right CPUs */ + + for(dim = 1; dim >= 0; dim--) /* Calculate each component of the force. */ + { /* we do the x component last, because for differencing the potential in the x-direction, we need to contruct the transpose */ + if(dim == 0) + pm2d_periodic_transposeA(rhogrid, forcegrid); /* compute the transpose of the potential field */ + + for(xx = slabstart_x; xx < (slabstart_x + nslab_x); xx++) + for(y = 0; y < PMGRID; y++) + { + x = xx - slabstart_x; + + yrr = yll = yr = yl = y; + + yr = y + 1; + yl = y - 1; + yrr = y + 2; + yll = y - 2; + if(yr >= PMGRID) + yr -= PMGRID; + if(yrr >= PMGRID) + yrr -= PMGRID; + if(yl < 0) + yl += PMGRID; + if(yll < 0) + yll += PMGRID; + + if(dim == 0) + { + forcegrid[x + y * nslab_x] = fac * ((4.0 / 3) * (rhogrid[(x + yl * nslab_x)] - rhogrid[(x + yr * nslab_x)]) - + (1.0 / 6) * (rhogrid[(x + yll * nslab_x)] - rhogrid[(x + yrr * nslab_x)])); + } + else + { + forcegrid[PMGRID2 * x + y] = fac * ((4.0 / 3) * (rhogrid[PMGRID2 * x + yl] - rhogrid[PMGRID2 * x + yr]) - + (1.0 / 6) * (rhogrid[PMGRID2 * x + yll] - rhogrid[PMGRID2 * x + yrr])); + } + } + + if(dim == 0) + pm2d_periodic_transposeB(forcegrid, rhogrid); /* compute the transpose of the potential field */ + + /* send the force components to the right processors */ + + for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ + { + sendTask = ThisTask; + recvTask = ThisTask ^ level; + + if(recvTask < NTask) + { + if(level > 0) + { + import_data = (fftw_real *)mymalloc("import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(fftw_real)); + import_globalindex = (large_array_offset *)mymalloc( + "import_data", localfield_togo[recvTask * NTask + ThisTask] * sizeof(large_array_offset)); + + if(localfield_togo[sendTask * NTask + recvTask] > 0 || localfield_togo[recvTask * NTask + sendTask] > 0) + { + MPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, import_globalindex, + localfield_togo[recvTask * NTask + sendTask] * sizeof(large_array_offset), MPI_BYTE, recvTask, + TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); + } + } + else + { + import_data = localfield_data + localfield_offset[ThisTask]; + import_globalindex = localfield_globalindex + localfield_offset[ThisTask]; + } + + for(i = 0; i < localfield_togo[recvTask * NTask + sendTask]; i++) + { + /* determine offset in local FFT slab */ + offset = import_globalindex[i] - first_slab_x_of_task[ThisTask] * PMGRID2; + import_data[i] = forcegrid[offset]; + } + + if(level > 0) + { + MPI_Sendrecv(import_data, localfield_togo[recvTask * NTask + sendTask] * sizeof(fftw_real), MPI_BYTE, recvTask, + TAG_NONPERIOD_A, localfield_data + localfield_offset[recvTask], + localfield_togo[sendTask * NTask + recvTask] * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_A, + MPI_COMM_WORLD, &status); + + myfree(import_globalindex); + myfree(import_data); + } + } + } + + /* read out the forces, which all have been assembled in localfield_data */ + + for(i = 0, j = 0; i < N; i++) + { + while(j < num_on_grid && (part[j].partindex >> 2) != i) + j++; + + slab_x = (int)(to_slab_fac * P[i].Pos[0]); + dx = to_slab_fac * P[i].Pos[0] - slab_x; + + slab_y = (int)(to_slab_fac * P[i].Pos[1]); + dy = to_slab_fac * P[i].Pos[1] - slab_y; + + acc_dim = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) + + localfield_data[part[j + 1].localindex] * (1.0 - dx) * dy + + localfield_data[part[j + 2].localindex] * (dx) * (1.0 - dy) + localfield_data[part[j + 3].localindex] * (dx)*dy; + + P[i].GravPM[dim] += acc_dim; + } + } + + /* free locallist */ + myfree(localfield_togo); + myfree(localfield_offset); + myfree(localfield_count); + myfree(localfield_first); + myfree(localfield_d_data); + myfree(localfield_globalindex); + + pm2d_init_periodic_free(); + + mpi_printf("done PM-2d.\n"); +} + +/*! \brief Compares two objects of type part_slab_data. + * + * According to element globalindex. + * + * \param[in] a Index of first object in part array. + * \param[in] b Index of second object in part array. + * + * \return (-1,0,1); -1 if part[a].globalindex < part[b].globalindex + */ +int pm2d_periodic_compare_sortindex(const void *a, const void *b) +{ + if(part[*(int *)a].globalindex < part[*(int *)b].globalindex) + return -1; + + if(part[*(int *)a].globalindex > part[*(int *)b].globalindex) + return +1; + + return 0; +} + +/*! \brief Merge sort algorithm for 2d periodic particle mesh algorithm. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Size of array b. + * \param[in, out] t Temporary array. + * + * \return void + */ +static void pm2d_msort_pmperiodic_with_tmp(int *b, size_t n, int *t) +{ + int *tmp; + int *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = b + n1; + + pm2d_msort_pmperiodic_with_tmp(b1, n1, t); + pm2d_msort_pmperiodic_with_tmp(b2, n2, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(part[*b1].globalindex <= part[*b2].globalindex) + { + --n1; + *tmp++ = *b1++; + } + else + { + --n2; + *tmp++ = *b2++; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * sizeof(int)); + + memcpy(b, t, (n - n2) * sizeof(int)); +} + +/*! \brief Wrapper for sorting algorithm in 2d periodic PM algorithm. + * + * Uses pm2d_msort_pmperiodic_with_tmp. + * + * \param[in, out] b Array to be sorted. + * \param[in] n Number of elements in array b. + * \param[in] s Size of individual element of b (for memory allocation). + * \param[in] cmp Compare function (unused). + * + * \return void + */ +void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + + int *tmp = (int *)mymalloc("tmp", size); + + pm2d_msort_pmperiodic_with_tmp((int *)b, n, tmp); + + myfree(tmp); +} + +/*! \brief Transpose operation for 2d fft. + * + * Used for transposing rhogrid. + * + * \param[in, out] field Field that needs to be transposed. + * \param[in, out] scratch Temporary data. + * + * \return void + */ +void pm2d_periodic_transposeA(fftw_real *field, fftw_real *scratch) +{ + int x, y, task; + + for(task = 0; task < NTask; task++) + for(x = 0; x < nslab_x; x++) + for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++) + { + scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))] = + field[PMGRID2 * x + y]; + } + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + MPI_Request *requests; + int nrequests = 0; + + requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request)); + + for(task = 0; task < NTask; task++) + { + MPI_Isend(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + + MPI_Irecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + } + + MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE); + myfree(requests); +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + int ngrp; + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + task = ThisTask ^ ngrp; + + if(task < NTask) + { + MPI_Sendrecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, + task, TAG_KEY, field + first_slab_x_of_task[task] * nslab_x, + nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ +} + +/*! \brief Transpose operation for 2d fft. + * + * Used for forcegrid transpose. + * + * \param[in, out] field Field that needs to be transposed. + * \param[in, out] scratch Temporary data. + * + * \return void + */ +void pm2d_periodic_transposeB(fftw_real *field, fftw_real *scratch) +{ + int x, y, task; + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + MPI_Request *requests; + int nrequests = 0; + + requests = (MPI_Request *)mymalloc(2 * NTask * sizeof(MPI_Request)); + + for(task = 0; task < NTask; task++) + { + MPI_Isend(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + + MPI_Irecv(scratch + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, + TAG_KEY, MPI_COMM_WORLD, &requests[nrequests++]); + } + + MPI_Waitall(nrequests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + int ngrp; + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + task = ThisTask ^ ngrp; + + if(task < NTask) + { + MPI_Sendrecv(field + first_slab_x_of_task[task] * nslab_x, nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, + task, TAG_KEY, scratch + first_slab_x_of_task[task] * nslab_x, + nslab_x * slabs_x_per_task[task] * sizeof(fftw_real), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ + + for(task = 0; task < NTask; task++) + for(x = 0; x < nslab_x; x++) + for(y = first_slab_x_of_task[task]; y < first_slab_x_of_task[task] + slabs_x_per_task[task]; y++) + { + field[PMGRID2 * x + y] = + scratch[(first_slab_x_of_task[task] * nslab_x + x * slabs_x_per_task[task] + (y - first_slab_x_of_task[task]))]; + } +} + +#endif /* #ifdef TWODIMS */ +#endif /* #ifndef GRAVITY_NOT_PERIODIC */ +#endif /* #ifdef PMGRID */ diff --git a/src/amuse/community/arepo/src/hydro/finite_volume_solver.c b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c new file mode 100644 index 0000000000..287fe14bb4 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/finite_volume_solver.c @@ -0,0 +1,1895 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/finite_volume_solver.c + * \date 05/2018 + * \brief Core algorithms of the finite-volume solver. + * \details contains functions: + * void compute_interface_fluxes(tessellation * T) + * void backup_face_areas(tessellation * T) + * void restore_face_areas(tessellation * T) + * int face_get_state(tessellation * T, int p, int i, struct + * state *st) + * void face_boundary_check_vertex(tessellation * T, int p, + * MyFloat * velx, MyFloat * vely, MyFloat * velz) + * void face_boundary_check(point * p, double *velx, double + * *vely, double *velz) + * int face_check_responsibility_of_this_task(tessellation * T, + * int p1, int p2, struct state *st_L, struct state *st_R) + * double face_timestep(struct state *state_L, struct state + * *state_R, double *hubble_a, double *atime) + * void state_convert_to_local_frame(struct state *st, double + * *vel_face, double hubble_a, double atime) + * void face_do_time_extrapolation(struct state *delta, + * struct state *st, double atime) + * void face_do_spatial_extrapolation(struct state *delta, + * struct state *st, struct state *st_other) + * void face_do_spatial_extrapolation_single_quantity(double + * *delta, double st, double st_other, MySingle * grad, + * double *dx, double *r) + * void face_add_extrapolations(struct state *st_face, struct + * state *delta_time, struct state *delta_space, struct + * fvs_stat *stat) + * void face_add_extrapolation(struct state *st_face, struct + * state *delta, struct fvs_stat *stat) + * void face_add_extrapolation_with_check(struct state *st_face, + * struct state *delta, struct fvs_stat *stat) + * void face_turn_velocities(struct state *st, struct geometry + * *geom) + * void solve_advection(struct state *st_L, struct state *st_R, + * struct state_face *st_face, struct geometry *geom, + * double *vel_face) + * void face_turnback_velocities(struct state_face *st_face, + * struct geometry *geom) + * void face_set_scalar_states_and_fluxes(struct state *st_L, + * struct state *st_R, struct state_face *st_face, struct + * fluxes *flux) + * void flux_convert_to_lab_frame(struct state *st_L, struct + * state *st_R, double *vel_face, struct fluxes *flux) + * void face_turn_momentum_flux(struct fluxes *flux, struct + * geometry *geom) + * void face_get_fluxes(struct state *st_L, struct state *st_R, + * struct state_face *st_face, struct fluxes *flux, struct + * geometry *geom, double *vel_face) + * void face_limit_fluxes(struct state *st_L, struct state + * *st_R, struct state *st_center_L, struct state + * *st_center_R, struct fluxes *flux, double dt, double + * *count, double *count_reduced) + * void face_clear_fluxes(struct fluxes *flux) + * void face_add_fluxes_advection(struct state_face *st_face, + * struct fluxes *flux, struct geometry *geom, double + * *vel_face) + * int flux_list_data_compare(const void *a, const void *b) + * void apply_flux_list(void) + * void fvs_initialize_statistics(struct fvs_stat *stat) + * void fvs_evaluate_statistics(struct fvs_stat *stat) + * void apply_spherical_source_terms() + * void add_spin_source_term_from_grid_movement() + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 17.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Data needed for flux calculation. + */ +static struct flux_list_data +{ + int task, index; + double dM, dP[3]; +#ifdef MHD + double dB[3]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + double dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ +#ifdef MAXSCALARS + double dConservedScalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} * FluxList; + +static int Nflux, MaxNflux; + +struct primexch *PrimExch; +struct grad_data *GradExch; + +/*! state on a face determined by Riemann solver */ +struct state_face state_face; + +/*! flux through a face */ +struct fluxes fluxes; + +struct geometry geom; + +#ifdef ONEDIMS_SPHERICAL +void apply_spherical_source_terms(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +static void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat); +static void fvs_initialize_statistics(struct fvs_stat *stat); +static void fvs_evaluate_statistics(struct fvs_stat *stat); + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS +void backup_face_areas(tessellation *T); +void restore_face_areas(tessellation *T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + +/*! \brief Main routine to compute fluxes across interfaces given am mesh T. + * + * Adds these fluxes to conserved variables. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void compute_interface_fluxes(tessellation *T) +{ +#ifdef NOHYDRO + return; +#endif /* #ifdef NOHYDRO */ + TIMER_START(CPU_FLUXES); + + int i, j; + double count = 0, count_reduced = 0, tot_count, tot_count_reduced; + double face_dt, hubble_a, atime; + struct fvs_stat stat; +#ifdef MHD + double sqrtatime; +#endif /* #ifdef MHD */ + +#ifdef GODUNOV_STATS + FILE *fdstats; + char buf[1000]; + + sprintf(buf, "%s/godunov_stats_%d.txt", All.OutputDir, ThisTask); + if(!(fdstats = fopen(buf, "w"))) + terminate("error in opening file '%s'", buf); +#endif /* #ifdef GODUNOV_STATS */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + backup_face_areas(T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + + fvs_initialize_statistics(&stat); + + MaxNflux = T->Indi.AllocFacNflux; + Nflux = 0; + FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data)); + + face *VF = T->VF; + point *DP = T->DP; + + for(i = 0; i < T->Nvf; i++) + { + struct state state_L, state_center_L, delta_time_L, delta_space_L; + struct state state_R, state_center_R, delta_time_R, delta_space_R; + + face_dt = 0; /* the default is that this face is not active */ + + /* calculate normal vectors */ + if(face_get_normals(T, i, &geom)) + continue; + + /* get the values of the states at the center of the cells */ + if(face_get_state(T, VF[i].p1, i, &state_center_L)) + continue; + + if(face_get_state(T, VF[i].p2, i, &state_center_R)) + continue; + + /* only treat faces where one of the two sides is active */ + if(!TimeBinSynchronized[state_center_L.timeBin] && !TimeBinSynchronized[state_center_R.timeBin]) + continue; + + /* clarify whether the face should be done by this task (it may be present also on another task) */ + if(face_check_responsibility_of_this_task(T, VF[i].p1, VF[i].p2, &state_center_L, &state_center_R)) + continue; + + /* calculate timestep of the face */ + face_dt = face_timestep(&state_center_L, &state_center_R, &hubble_a, &atime); +#ifdef MHD + sqrtatime = sqrt(atime); +#endif /* #ifdef MHD */ + + if(!(face_dt > 0)) + continue; + + /* now estimate the velocity of the midpoint of the face based on the velocities of the generators of the mesh. */ + double vel_face[3]; + + if(All.ComovingIntegrationOn) + for(j = 0; j < 3; j++) + { + state_center_L.velVertex[j] /= atime; /* convert vertex motion to peculiar velocity */ + state_center_R.velVertex[j] /= atime; + } + + /* rough motion of mid-point of edge */ + vel_face[0] = 0.5 * (state_center_L.velVertex[0] + state_center_R.velVertex[0]); + vel_face[1] = 0.5 * (state_center_L.velVertex[1] + state_center_R.velVertex[1]); + vel_face[2] = 0.5 * (state_center_L.velVertex[2] + state_center_R.velVertex[2]); + + double cx, cy, cz, facv; + + cx = VF[i].cx - 0.5 * (DP[VF[i].p2].x + DP[VF[i].p1].x); + cy = VF[i].cy - 0.5 * (DP[VF[i].p2].y + DP[VF[i].p1].y); + cz = VF[i].cz - 0.5 * (DP[VF[i].p2].z + DP[VF[i].p1].z); + + facv = (cx * (state_center_L.velVertex[0] - state_center_R.velVertex[0]) + + cy * (state_center_L.velVertex[1] - state_center_R.velVertex[1]) + + cz * (state_center_L.velVertex[2] - state_center_R.velVertex[2])) / + geom.nn; + + /* put in a limiter for highly distorted cells */ + double cc = sqrt(cx * cx + cy * cy + cz * cz); + if(cc > 0.9 * geom.nn) + facv *= (0.9 * geom.nn) / cc; + + vel_face[0] += facv * geom.nx; + vel_face[1] += facv * geom.ny; + vel_face[2] += facv * geom.nz; + +#if defined(VORONOI_STATIC_MESH) + vel_face[0] = 0; + vel_face[1] = 0; + vel_face[2] = 0; +#endif /* #if defined(VORONOI_STATIC_MESH) */ + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) + double vel_face_turned[3]; + /* for these riemann solvers, the riemann problem is not solved in the + * restframe of the face, instead the mesh motion is accounted for via + * an advection step. + */ + + /* turn the face velocity */ + vel_face_turned[0] = vel_face[0] * geom.nx + vel_face[1] * geom.ny + vel_face[2] * geom.nz; + vel_face_turned[1] = vel_face[0] * geom.mx + vel_face[1] * geom.my + vel_face[2] * geom.mz; + vel_face_turned[2] = vel_face[0] * geom.px + vel_face[1] * geom.py + vel_face[2] * geom.pz; +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + + state_convert_to_local_frame(&state_center_L, vel_face, hubble_a, atime); + state_convert_to_local_frame(&state_center_R, vel_face, hubble_a, atime); + + /* copy center state to state at interface, then add extrapolation terms */ + state_L = state_center_L; + state_R = state_center_R; + + face_do_time_extrapolation(&delta_time_L, &state_center_L, atime); + face_do_time_extrapolation(&delta_time_R, &state_center_R, atime); + + face_do_spatial_extrapolation(&delta_space_L, &state_center_L, &state_center_R); + face_do_spatial_extrapolation(&delta_space_R, &state_center_R, &state_center_L); + + face_add_extrapolations(&state_L, &delta_time_L, &delta_space_L, &stat); + face_add_extrapolations(&state_R, &delta_time_R, &delta_space_R, &stat); + +#ifdef MHD + if(All.ComovingIntegrationOn) + { + state_L.Bx /= sqrtatime; + state_L.By /= sqrtatime; + state_L.Bz /= sqrtatime; + + state_R.Bx /= sqrtatime; + state_R.By /= sqrtatime; + state_R.Bz /= sqrtatime; + } +#endif /* #ifdef MHD */ + +#ifndef MESHRELAX +#ifndef ISOTHERM_EQS + /* check for crazy values */ + if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0) + { + printf("i=%d press_L=%g press_R=%g rho_L=%g rho_R=%g\n", i, state_L.press, state_R.press, state_L.rho, state_R.rho); + printf("area=%g lx=%g ly=%g rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy); + terminate("found crazy values"); + } +#else /* #ifndef ISOTHERM_EQS */ + if(state_L.press < 0 || state_R.press < 0 || state_L.rho < 0 || state_R.rho < 0) + { + printf("i=%d rho_L=%g rho_R=%g\n", i, state_L.rho, state_R.rho); + printf("area=%g lx=%g ly=%g rx=%g ry=%g\n", VF[i].area, state_L.dx, state_L.dy, state_R.dx, state_R.dy); + terminate("found crazy values"); + } +#endif /* #ifndef ISOTHERM_EQS #else */ +#endif /* #ifndef MESHRELAX */ + + /* mirror velocity in case of reflecting boundaries */ + face_boundary_check(&T->DP[VF[i].p1], &state_L.velx, &state_L.vely, &state_L.velz); + face_boundary_check(&T->DP[VF[i].p2], &state_R.velx, &state_R.vely, &state_R.velz); + +#ifdef MHD + /* mirror magnetic field in case of reflecting boundaries */ + face_boundary_check(&T->DP[VF[i].p1], &state_L.Bx, &state_L.By, &state_L.Bz); + face_boundary_check(&T->DP[VF[i].p2], &state_R.Bx, &state_R.By, &state_R.Bz); +#endif /* #ifdef MHD */ + + /* turn the velocities to get velx perpendicular and vely and velz in the plane of the face */ + face_turn_velocities(&state_L, &geom); + face_turn_velocities(&state_R, &geom); + +#ifndef MESHRELAX + + /* call Riemann solver */ + + double press; +#ifdef RIEMANN_HLLC + press = godunov_flux_3d_hllc(&state_L, &state_R, &state_face, &fluxes); +#else /* #ifdef RIEMANN_HLLC */ +#ifdef RIEMANN_HLLD + press = godunov_flux_3d_hlld(&state_L, &state_R, vel_face_turned, &state_face, &fluxes); +#else /* #ifdef RIEMANN_HLLD */ + press = godunov_flux_3d(&state_L, &state_R, &state_face); /* exact ideal gas solver */ +#endif /* #ifdef RIEMANN_HLLD #else */ +#endif /* #ifdef RIEMANN_HLLC #else */ + + if(press < 0) + terminate("press < 0: ID_L: %d, ID_R: %d", VF[i].p1, VF[i].p2); + +#ifdef GODUNOV_STATS + get_mach_numbers(&state_L, &state_R, press); + if(st_L.rho > 1.0e-6 && st_R.rho > 1.0e-6) + fprintf(fdstats, "%g %g %g %g %g %g %g %g %g %g %g %g\n", state_L.rho, state_L.velx, state_L.press, state_L.rho, + state_L.velx, state_L.press, state_face.rho, state_face.velx, state_face.press, state_L.mach, state_R.mach, + VF[i].area); +#endif /* GODUNOV_STATS */ + +#endif /* #ifndef MESHRELAX */ + + /* turn the velocity field back */ + face_turnback_velocities(&state_face, &geom); + + /* add the face velocity again */ + state_face.velx += vel_face[0]; + state_face.vely += vel_face[1]; + state_face.velz += vel_face[2]; + +#ifndef MESHRELAX + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) + /* for non-exact Riemann solver, fluxes are already computed in the local frame, so convert to lab frame and turn momentum fluxes + * to the lab orientation */ + flux_convert_to_lab_frame(&state_L, &state_R, vel_face_turned, &fluxes); + face_turn_momentum_flux(&fluxes, &geom); + +#else /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + + /* calculate fluxes for exact Riemann problem */ + /* compute net flux with dot-product of outward normal and area of face */ + /* multiplication with area and time-step comes later */ + + face_get_fluxes(&state_L, &state_R, &state_face, &fluxes, &geom, vel_face); + +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) #else */ + + /* set the face states and fluxes of those quantities that are passively advected */ + face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes); + + face_limit_fluxes(&state_L, &state_R, &state_center_L, &state_center_R, &fluxes, face_dt, &count, &count_reduced); + + /* put in cosmological factors */ + if(All.ComovingIntegrationOn) + { + fluxes.momentum[0] *= atime; + fluxes.momentum[1] *= atime; + fluxes.momentum[2] *= atime; + fluxes.energy *= atime * atime; +#ifdef MHD + fluxes.B[0] *= sqrtatime; + fluxes.B[1] *= sqrtatime; + fluxes.B[2] *= sqrtatime; +#ifdef MHD_POWELL + state_face.Bx *= sqrtatime; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + } + +#else /* #ifndef MESHRELAX */ + + /* just solve the advection equation instead of Riemann problem */ + + solve_advection(&state_L, &state_R, &state_face, &geom, vel_face); + face_clear_fluxes(&fluxes); + face_add_fluxes_advection(&state_face, &fluxes, &geom, vel_face); + face_set_scalar_states_and_fluxes(&state_L, &state_R, &state_face, &fluxes); + +#endif /* #ifndef MESHRELAX #else */ + +#ifndef ISOTHERM_EQS + if(!gsl_finite(fluxes.energy)) + { + printf("i=%d eFlux-Bummer: %g %g %g\n", i, fluxes.energy, state_face.press, state_face.rho); + printf("rho_L=%g velx_L=%g vely_L=%g velz_L=%g press_L=%g\n", state_L.rho, state_L.velx, state_L.vely, state_L.velz, + state_L.press); + printf("rho_R=%g velx_R=%g vely_R=%g velz_R=%g press_R=%g\n", state_R.rho, state_R.velx, state_R.vely, state_R.velz, + state_R.press); + print_particle_info(i); + terminate("infinity encountered"); + } +#endif /* #ifndef ISOTHERM_EQS */ + + /* now apply the flux to update the conserved states of the cells */ + + if(face_dt > 0) /* selects active faces */ + { + int k, p, q; + double dir; + double fac = face_dt * VF[i].area; +#if defined(MAXSCALARS) + int m; +#endif /* #if defined(MAXSCALARS) */ + + fac *= 0.5; + +#if defined(MHD_POWELL) + struct state *state_center, *delta_time; +#endif /* #if defined(MHD_POWELL) */ + for(k = 0; k < 2; k++) + { +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + int qother; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + if(k == 0) + { + q = VF[i].p1; + p = DP[q].index; + dir = -fac; +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + qother = VF[i].p2; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ +#if defined(MHD_POWELL) + state_center = &state_center_L; + delta_time = &delta_time_L; +#endif /* #if defined(MHD_POWELL) */ + } + else + { + q = VF[i].p2; + p = DP[q].index; + dir = +fac; +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + qother = VF[i].p1; +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ +#if defined(MHD_POWELL) + state_center = &state_center_R; + delta_time = &delta_time_R; +#endif /* #if defined(MHD_POWELL) */ + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + { + if(DP[VF[i].p1].ID == DP[VF[i].p2].ID) /* this may happen for reflective points */ + continue; + p -= NumGas; + } + + /* note: this will be executed if P[p] is a local point, independent of active or not */ + P[p].Mass += dir * fluxes.mass; + SphP[p].Momentum[0] += dir * fluxes.momentum[0]; + SphP[p].Momentum[1] += dir * fluxes.momentum[1]; + SphP[p].Momentum[2] += dir * fluxes.momentum[2]; + +#ifdef MHD + SphP[p].BConserved[0] += dir * fluxes.B[0]; + SphP[p].BConserved[1] += dir * fluxes.B[1]; + SphP[p].BConserved[2] += dir * fluxes.B[2]; +#if defined(MHD_POWELL) + double Velx = state_center->velx + delta_time->velx + vel_face[0]; + double Vely = state_center->vely + delta_time->vely + vel_face[1]; + double Velz = state_center->velz + delta_time->velz + vel_face[2]; + + if(All.ComovingIntegrationOn) + { + Velx += atime * hubble_a * state_center->dx; + Vely += atime * hubble_a * state_center->dy; + Velz += atime * hubble_a * state_center->dz; + } + + double Bx = state_center->Bx + delta_time->Bx; + double By = state_center->By + delta_time->By; + double Bz = state_center->Bz + delta_time->Bz; + + SphP[p].BConserved[0] += dir * Velx * state_face.Bx; + SphP[p].BConserved[1] += dir * Vely * state_face.Bx; + SphP[p].BConserved[2] += dir * Velz * state_face.Bx; + + SphP[p].Momentum[0] += dir * Bx * state_face.Bx; + SphP[p].Momentum[1] += dir * By * state_face.Bx; + SphP[p].Momentum[2] += dir * Bz * state_face.Bx; + + SphP[p].Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + + { + double dMomX = dir * Bx * state_face.Bx; + double dMomY = dir * By * state_face.Bx; + double dMomZ = dir * Bz * state_face.Bx; + + All.Powell_Momentum[0] += dMomX; + All.Powell_Momentum[1] += dMomY; + All.Powell_Momentum[2] += dMomZ; + + double dx = SphP[p].Center[0] - 0.5 * All.BoxSize; + double dy = SphP[p].Center[1] - 0.5 * All.BoxSize; + double dz = SphP[p].Center[2] - 0.5 * All.BoxSize; + + All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY; + All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ; + All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX; + All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + } +#endif /* #if defined(MHD_POWELL) */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(m = 0; m < N_Scalar; m++) + { + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[m].offset_mass) += dir * fluxes.scalars[m]; + } +#endif /* #ifdef MAXSCALARS */ + +#if !defined(ISOTHERM_EQS) + SphP[p].Energy += dir * fluxes.energy; +#endif /* #if !defined(ISOTHERM_EQS) */ + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Nflux >= MaxNflux) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = T->Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + T->Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + FluxList[Nflux].task = DP[q].task; + FluxList[Nflux].index = DP[q].originalindex; + + FluxList[Nflux].dM = dir * fluxes.mass; + + FluxList[Nflux].dP[0] = dir * fluxes.momentum[0]; + FluxList[Nflux].dP[1] = dir * fluxes.momentum[1]; + FluxList[Nflux].dP[2] = dir * fluxes.momentum[2]; + +#if !defined(ISOTHERM_EQS) + FluxList[Nflux].dEnergy = dir * fluxes.energy; +#endif /* #if !defined(ISOTHERM_EQS) */ + +#ifdef MHD + FluxList[Nflux].dB[0] = dir * fluxes.B[0]; + FluxList[Nflux].dB[1] = dir * fluxes.B[1]; + FluxList[Nflux].dB[2] = dir * fluxes.B[2]; +#if defined(MHD_POWELL) + double Velx = state_center->velx + delta_time->velx + vel_face[0]; + double Vely = state_center->vely + delta_time->vely + vel_face[1]; + double Velz = state_center->velz + delta_time->velz + vel_face[2]; + + if(All.ComovingIntegrationOn) + { + Velx += atime * hubble_a * state_center->dx; + Vely += atime * hubble_a * state_center->dy; + Velz += atime * hubble_a * state_center->dz; + } + + double Bx = state_center->Bx + delta_time->Bx; + double By = state_center->By + delta_time->By; + double Bz = state_center->Bz + delta_time->Bz; + + FluxList[Nflux].dB[0] += dir * Velx * state_face.Bx; + FluxList[Nflux].dB[1] += dir * Vely * state_face.Bx; + FluxList[Nflux].dB[2] += dir * Velz * state_face.Bx; + + FluxList[Nflux].dP[0] += dir * Bx * state_face.Bx; + FluxList[Nflux].dP[1] += dir * By * state_face.Bx; + FluxList[Nflux].dP[2] += dir * Bz * state_face.Bx; +#ifndef ISOTHERM_EQS + FluxList[Nflux].dEnergy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; +#endif /* #ifndef ISOTHERM_EQS */ + + { + double dMomX = dir * Bx * state_face.Bx; + double dMomY = dir * By * state_face.Bx; + double dMomZ = dir * Bz * state_face.Bx; + + All.Powell_Momentum[0] += dMomX; + All.Powell_Momentum[1] += dMomY; + All.Powell_Momentum[2] += dMomZ; + + double dx = PrimExch[p].Center[0] - 0.5 * All.BoxSize; + double dy = PrimExch[p].Center[1] - 0.5 * All.BoxSize; + double dz = PrimExch[p].Center[2] - 0.5 * All.BoxSize; + + All.Powell_Angular_Momentum[0] += dy * dMomZ - dz * dMomY; + All.Powell_Angular_Momentum[1] += dz * dMomX - dx * dMomZ; + All.Powell_Angular_Momentum[2] += dx * dMomY - dy * dMomX; + All.Powell_Energy += dir * (Bx * Velx + By * Vely + Bz * Velz) * state_face.Bx * atime; + } +#endif /* #if defined(MHD_POWELL) */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(m = 0; m < N_Scalar; m++) + FluxList[Nflux].dConservedScalars[m] = dir * fluxes.scalars[m]; +#endif /* #ifdef MAXSCALARS */ + + Nflux++; + } + } + } + } + /* end of big loop over all faces */ + + TIMER_STOPSTART(CPU_FLUXES, CPU_FLUXES_COMM); + + /* now exchange the flux-list and apply it when needed */ + apply_flux_list(); + + TIMER_STOPSTART(CPU_FLUXES_COMM, CPU_FLUXES); + + myfree(FluxList); + + double in[2] = {count, count_reduced}, out[2]; + MPI_Reduce(in, out, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + if(ThisTask == 0) + { + tot_count = out[0]; + tot_count_reduced = out[1]; + + printf("FLUX: exchanged fluxes over %g faces, with %g reduced (fraction %g), cumulative fraction %g\n", tot_count, + tot_count_reduced, tot_count_reduced / (tot_count + 1.0e-30), All.TotCountReducedFluxes / (All.TotCountFluxes + 1.0e-30)); + All.TotCountReducedFluxes += tot_count_reduced; + All.TotCountFluxes += tot_count; + } + + fvs_evaluate_statistics(&stat); + +#ifdef MESHRELAX + for(i = 0; i < NumGas; i++) + { + if(P[i].Mass < 0) + { + terminate("negative mass reached for cell=%d mass=%g", P[i].ID, P[i].Mass); + + P[i].Mass = 0; + SphP[i].Energy = 0; + SphP[i].Momentum[0] = 0; + SphP[i].Momentum[1] = 0; + SphP[i].Momentum[2] = 0; + } + } +#endif /* #ifdef MESHRELAX */ + +#ifdef GODUNOV_STATS + endrun(); +#endif /* #ifdef GODUNOV_STATS */ + +#ifdef ONEDIMS_SPHERICAL + apply_spherical_source_terms(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(MHD_POWELL) && defined(VERBOSE) + double Powell_Momentum[3]; + double Powell_Angular_Momentum[3]; + double Powell_Energy; + + MPI_Reduce(All.Powell_Momentum, Powell_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(All.Powell_Angular_Momentum, Powell_Angular_Momentum, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&All.Powell_Energy, &Powell_Energy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + printf("MHD_POWELL: Total ST contribution: Mom=%g,%g,%g AngMom=%g,%g,%g Energy=%g\n", Powell_Momentum[0], Powell_Momentum[1], + Powell_Momentum[2], Powell_Angular_Momentum[0], Powell_Angular_Momentum[1], Powell_Angular_Momentum[2], Powell_Energy); +#endif /* #if defined(MHD_POWELL) && defined(VERBOSE) */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + restore_face_areas(T); +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + + TIMER_STOP(CPU_FLUXES); +} + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS +/*! \brief Writes face areas to a backup variable. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void backup_face_areas(tessellation *T) +{ + for(int i = 0; i < T->Nvf; i++) + T->VF[i].area_backup = T->VF[i].area; +} + +/*! \brief Restores face areas from a backup variable. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void restore_face_areas(tessellation *T) +{ + for(int i = 0; i < T->Nvf; i++) + T->VF[i].area = T->VF[i].area_backup; +} +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ + +/*! \brief Gets value of hydrodynamial quantities at face. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Index in DP array. + * \param[in] i Index in VF array. + * \param[out] st State at face. + * + * \return 0 + */ +int face_get_state(tessellation *T, int p, int i, struct state *st) +{ + int particle; +#if defined(MAXSCALARS) + int j; +#endif /* #if defined(MAXSCALARS) */ + double aBegin; + + point *DP = T->DP; + face *VF = T->VF; + + particle = DP[p].index; + + if(particle < 0) + return -1; + + if(particle >= NumGas && DP[p].task == ThisTask) + particle -= NumGas; + + /* interpolation vector for the left state */ + if(DP[p].task == ThisTask) + { + st->dx = VF[i].cx - SphP[particle].Center[0]; + st->dy = VF[i].cy - SphP[particle].Center[1]; + st->dz = VF[i].cz - SphP[particle].Center[2]; + } + else + { + st->dx = VF[i].cx - PrimExch[particle].Center[0]; + st->dy = VF[i].cy - PrimExch[particle].Center[1]; + st->dz = VF[i].cz - PrimExch[particle].Center[2]; + } + + /* correct for periodicity */ +#if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL) + if(st->dx < -boxHalf_X) + st->dx += boxSize_X; + if(st->dx > boxHalf_X) + st->dx -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) && !defined(ONEDIMS_SPHERICAL) */ +#if !defined(REFLECTIVE_Y) + if(st->dy < -boxHalf_Y) + st->dy += boxSize_Y; + if(st->dy > boxHalf_Y) + st->dy -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ +#if !defined(REFLECTIVE_Z) + if(st->dz < -boxHalf_Z) + st->dz += boxSize_Z; + if(st->dz > boxHalf_Z) + st->dz -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(DP[p].task == ThisTask) + st->radius = SphP[particle].Center[0]; + else + st->radius = PrimExch[particle].Center[0]; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + if(DP[p].task == ThisTask) + { + st->velGas[0] = P[particle].Vel[0]; + st->velGas[1] = P[particle].Vel[1]; + st->velGas[2] = P[particle].Vel[2]; + + st->velVertex[0] = SphP[particle].VelVertex[0]; + st->velVertex[1] = SphP[particle].VelVertex[1]; + st->velVertex[2] = SphP[particle].VelVertex[2]; + + st->rho = SphP[particle].Density; + + st->press = SphP[particle].Pressure; + + st->grad = &SphP[particle].Grad; + + st->timeBin = P[particle].TimeBinHydro; + + st->volume = SphP[particle].Volume; + +#ifdef MHD + st->Bx = SphP[particle].B[0]; + st->By = SphP[particle].B[1]; + st->Bz = SphP[particle].B[2]; +#ifdef MHD_POWELL + st->divB = SphP[particle].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + st->scalars[j] = *(MyFloat *)(((char *)(&SphP[particle])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + aBegin = SphP[particle].TimeLastPrimUpdate; + + st->oldmass = SphP[particle].OldMass; + st->surfacearea = SphP[particle].SurfaceArea; + st->activearea = SphP[particle].ActiveArea; + st->csnd = get_sound_speed(particle); + st->ID = P[particle].ID; + } + else + { + st->velGas[0] = PrimExch[particle].VelGas[0]; + st->velGas[1] = PrimExch[particle].VelGas[1]; + st->velGas[2] = PrimExch[particle].VelGas[2]; + + st->velVertex[0] = PrimExch[particle].VelVertex[0]; + st->velVertex[1] = PrimExch[particle].VelVertex[1]; + st->velVertex[2] = PrimExch[particle].VelVertex[2]; + + st->rho = PrimExch[particle].Density; + + st->press = PrimExch[particle].Pressure; + + st->grad = &GradExch[particle]; + + st->timeBin = PrimExch[particle].TimeBinHydro; /* This is the hydro timestep */ + + st->volume = PrimExch[particle].Volume; + +#ifdef MHD + st->Bx = PrimExch[particle].B[0]; + st->By = PrimExch[particle].B[1]; + st->Bz = PrimExch[particle].B[2]; +#ifdef MHD_POWELL + st->divB = PrimExch[particle].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + st->scalars[j] = PrimExch[particle].Scalars[j]; +#endif /* #ifdef MAXSCALARS */ + + aBegin = PrimExch[particle].TimeLastPrimUpdate; + + st->oldmass = PrimExch[particle].OldMass; + st->surfacearea = PrimExch[particle].SurfaceArea; + st->activearea = PrimExch[particle].ActiveArea; + st->csnd = PrimExch[particle].Csnd; + st->ID = DP[p].ID; + } + + st->dtExtrapolation = All.Time - aBegin; + + /* check for reflecting or outflowing boundaries */ + face_boundary_check_vertex(T, p, &st->velVertex[0], &st->velVertex[1], &st->velVertex[2]); + + return 0; +} + +/*! \brief Checks for boundary cells with non-periodic boundary conditions. + * + * Adjusts the velocities accordingly. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Index in DP array. + * \param[in, out] velx Velocity in x coordinate. + * \param[in, out] vely Velocity in y coordinate. + * \param[in, out] velz Velocity in z coordinate. + * + * \return void + */ +void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz) +{ + /* check for reflecting or outflowing boundaries */ +#if defined(REFLECTIVE_X) + if((T->DP[p].image_flags & REFL_X_FLAGS)) + *velx *= -1; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((T->DP[p].image_flags & REFL_Y_FLAGS)) + *vely *= -1; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((T->DP[p].image_flags & REFL_Z_FLAGS)) + *velz *= -1; +#endif /* #if defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(p == -1) + *velx *= -1; +#endif /* #ifdef ONEDIMS_SPHERICAL */ +} + +/*! \brief Checks for boundary cells with non-periodic boundary conditions. + * + * \param[in] p Pointer to point. + * \param[in, out] velx Velocity in x direction. + * \param[in, out] vely Velocity in y direction. + * \param[in, out] velz Velocity in z direction. + * + * \return void + */ +void face_boundary_check(point *p, double *velx, double *vely, double *velz) +{ + /* check for reflecting or outflowing boundaries */ +#if defined(REFLECTIVE_X) + if((p->image_flags & REFL_X_FLAGS) && !(p->image_flags & OUTFLOW_X)) + *velx *= -1; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((p->image_flags & REFL_Y_FLAGS) && !(p->image_flags & OUTFLOW_Y)) + *vely *= -1; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((p->image_flags & REFL_Z_FLAGS) && !(p->image_flags & OUTFLOW_Z)) + *velz *= -1; +#endif /* #if defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + if(p == &Mesh.DP[-1]) + *velx *= -1; +#endif /* #ifdef ONEDIMS_SPHERICAL */ +} + +/*! \brief Checks whether local task is responsible for a face. + * + * \param[in] T Pointer to tessellation. + * \param[in] p1 Index in DP array of point1 making up the face. + * \param[in] p2 Index in DP array of point2 making up the face. + * \param[in] st_L Left hand side state of the face. + * \param[in] st_R Right hand side state of the face. + * + * \return -1 if not local responsibility, 0 if it is. + */ +int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R) +{ + int low_p, high_p; + struct state *low_state, *high_state; + + point *DP = T->DP; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + low_state = st_L; + high_state = st_R; + } + else if(DP[p1].ID > DP[p2].ID) + { + low_p = p2; + high_p = p1; + low_state = st_R; + high_state = st_L; + } + else + { + /* equality of the IDs should only occur for reflective boundaries */ + if(DP[p1].task == ThisTask && DP[p1].index < NumGas) + { + low_p = p1; + high_p = p2; + low_state = st_L; + high_state = st_R; + } + else + { + low_p = p2; + high_p = p1; + low_state = st_R; + high_state = st_L; + } + } + + if(TimeBinSynchronized[low_state->timeBin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index < NumGas) + return 0; + } + else if(TimeBinSynchronized[high_state->timeBin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index < NumGas) + return 0; + } + + return -1; /* we can skip this face on the local task */ +} + +/*! \brief Determines timestep of face. + * + * \param[in] state_L Left hand side state of face. + * \param[in] state_R Right hand side state of face. + * \param[out] hubble_a Value of Hubble function at scalefactor + * a(cosmological). + * \param[out] atime Scalefactor (cosmological). + * + * \return Face timestep. + */ +double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime) +{ + integertime ti_begin_L, ti_begin_R; + short int timeBin; + double face_dt; + + /* determine most recent start of the time bins */ + ti_begin_L = (All.Ti_Current >> state_L->timeBin) << state_L->timeBin; + ti_begin_R = (All.Ti_Current >> state_R->timeBin) << state_R->timeBin; + + /* take the minimum of the two */ + timeBin = state_L->timeBin; + if(timeBin > state_R->timeBin) + timeBin = state_R->timeBin; + + /* compute the half-step prediction times */ + state_L->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_L) * All.Timebase_interval; + state_R->dt_half = (All.Ti_Current + (((integertime)1) << (timeBin - 1)) - ti_begin_R) * All.Timebase_interval; + + if(All.ComovingIntegrationOn) + { + /* calculate scale factor at middle of timestep */ + *atime = All.TimeBegin * exp((All.Ti_Current + (((integertime)1) << (timeBin - 1))) * All.Timebase_interval); + *hubble_a = hubble_function(*atime); + } + else + *atime = *hubble_a = 1.0; + + /* set the actual time-step for the face */ + face_dt = (((integertime)1) << timeBin) * All.Timebase_interval; + + if(All.ComovingIntegrationOn) + { + /* converts to delta_t */ + state_L->dt_half /= *hubble_a; + state_R->dt_half /= *hubble_a; + face_dt /= *hubble_a; + + face_dt /= *atime; /* we need dt/a, the (1/a) takes care of the gradient in the cosmological euler equations */ + + state_L->dtExtrapolation /= *hubble_a; + state_L->dtExtrapolation /= *atime; + state_R->dtExtrapolation /= *hubble_a; + state_R->dtExtrapolation /= *atime; + } + + return face_dt; +} + +/*! \brief Converts the velocities to local frame, compensating for the + * movement of the face. + * + * \param[in, out] st State to be converted to local frame. + * \param[in] vel_face Face velocity. + * \param[in] hubble_a Value of Hubble function at scalefactor + * a (cosmological). + * \param[in] atime Scalefactor (cosmological). + * + * \return void + */ +void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime) +{ + if(All.ComovingIntegrationOn) + { + st->velGas[0] /= atime; /* convert to peculiar velocity */ + st->velGas[1] /= atime; + st->velGas[2] /= atime; + } + + st->velx = st->velGas[0] - vel_face[0]; + st->vely = st->velGas[1] - vel_face[1]; + st->velz = st->velGas[2] - vel_face[2]; + + if(All.ComovingIntegrationOn) + { + st->velx -= atime * hubble_a * st->dx; /* need to get the physical velocity relative to the face */ + st->vely -= atime * hubble_a * st->dy; + st->velz -= atime * hubble_a * st->dz; + } +} + +/*! \brief Extrapolates the state in time. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated. + * \param[in] atime Scalefactor at this time (cosmological). + * + * \return void + */ +void face_do_time_extrapolation(struct state *delta, struct state *st, double atime) +{ + /* st is the state at the center of the cell */ + + /* the code still allows for emtpy cells but we are going to divide + * by rho, so ... + */ + if(st->rho <= 0) + return; + +#if defined(MESHRELAX) || defined(DISABLE_TIME_EXTRAPOLATION) + /* do not time extrapolation */ + (void)st; + (void)atime; + memset(delta, 0, sizeof(struct state)); + return; +#endif /* #if defined (MESHRELAX) || defined (DISABLE_TIME_EXTRAPOLATION) */ + + struct grad_data *grad = st->grad; + + double dt_half = st->dtExtrapolation; + + if(All.ComovingIntegrationOn) + dt_half /= atime; + + delta->rho = -dt_half * (st->velx * grad->drho[0] + st->rho * grad->dvel[0][0] + st->vely * grad->drho[1] + + st->rho * grad->dvel[1][1] + st->velz * grad->drho[2] + st->rho * grad->dvel[2][2]); + + delta->velx = -dt_half * (1.0 / st->rho * grad->dpress[0] + st->velx * grad->dvel[0][0] + st->vely * grad->dvel[0][1] + + st->velz * grad->dvel[0][2]); + + delta->vely = -dt_half * (1.0 / st->rho * grad->dpress[1] + st->velx * grad->dvel[1][0] + st->vely * grad->dvel[1][1] + + st->velz * grad->dvel[1][2]); + + delta->velz = -dt_half * (1.0 / st->rho * grad->dpress[2] + st->velx * grad->dvel[2][0] + st->vely * grad->dvel[2][1] + + st->velz * grad->dvel[2][2]); + + delta->press = -dt_half * (GAMMA * st->press * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) + + st->velx * grad->dpress[0] + st->vely * grad->dpress[1] + st->velz * grad->dpress[2]); + +#ifdef ONEDIMS_SPHERICAL + delta->velx += dt_half * 2. * st->press / (st->rho * st->radius); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#ifdef MHD + delta->velx += + -dt_half * (1.0 / st->rho * + (st->By * grad->dB[1][0] + st->Bz * grad->dB[2][0] - st->By * grad->dB[0][1] - st->Bz * grad->dB[0][2]) / atime); + + delta->vely += + -dt_half * (1.0 / st->rho * + (st->Bx * grad->dB[0][1] + st->Bz * grad->dB[2][1] - st->Bx * grad->dB[1][0] - st->Bz * grad->dB[1][2]) / atime); + + delta->velz += + -dt_half * (1.0 / st->rho * + (st->Bx * grad->dB[0][2] + st->By * grad->dB[1][2] - st->Bx * grad->dB[2][0] - st->By * grad->dB[2][1]) / atime); + + delta->Bx = + -dt_half * (-st->velx * grad->dB[1][1] - grad->dvel[0][1] * st->By + st->vely * grad->dB[0][1] + grad->dvel[1][1] * st->Bx + + st->velz * grad->dB[0][2] + grad->dvel[2][2] * st->Bx - st->velx * grad->dB[2][2] - grad->dvel[0][2] * st->Bz); + + delta->By = + -dt_half * (+st->velx * grad->dB[1][0] + grad->dvel[0][0] * st->By - st->vely * grad->dB[0][0] - grad->dvel[1][0] * st->Bx - + st->vely * grad->dB[2][2] - grad->dvel[1][2] * st->Bz + st->velz * grad->dB[1][2] + grad->dvel[2][2] * st->By); + + delta->Bz = + -dt_half * (-st->velz * grad->dB[0][0] - grad->dvel[2][0] * st->Bx + st->velx * grad->dB[2][0] + grad->dvel[0][0] * st->Bz + + st->vely * grad->dB[2][1] + grad->dvel[1][1] * st->Bz - st->velz * grad->dB[1][1] - grad->dvel[2][1] * st->By); +#endif /* #ifdef MHD */ + +#if defined(MAXSCALARS) + int k; + for(k = 0; k < N_Scalar; k++) + { + delta->scalars[k] = + -dt_half * (st->velx * grad->dscalars[k][0] + st->vely * grad->dscalars[k][1] + st->velz * grad->dscalars[k][2]); + } +#endif /* #if defined(MAXSCALARS) */ +} + +/*! \brief Extrapolates the state in space. + * + * Linear extrapolation with neighbor cell to their common face. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated. + * \param[in] st_other state of other cell. + * + * \return void + */ +void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other) +{ +#ifdef DISABLE_SPATIAL_RECONSTRUCTION + memset(delta, 0, sizeof(struct state)); + return; +#endif /* #ifdef DISABLE_SPATIAL_RECONSTRUCTION */ + +#ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS + if(dmax(st->press, st_other->press) > 100. * dmin(st->press, st_other->press)) + { + memset(delta, 0, sizeof(struct state)); + return; + } +#endif /* #ifdef NO_RECONSTRUCTION_AT_STRONG_SHOCKS */ + + struct grad_data *grad = st->grad; + + double dx[3]; + dx[0] = st->dx; + dx[1] = st->dy; + dx[2] = st->dz; + + double r[3]; + r[0] = -st_other->dx + st->dx; + r[1] = -st_other->dy + st->dy; + r[2] = -st_other->dz + st->dz; + + face_do_spatial_extrapolation_single_quantity(&delta->rho, st->rho, st_other->rho, grad->drho, dx, r); + + face_do_spatial_extrapolation_single_quantity(&delta->velx, st->velx, st_other->velx, grad->dvel[0], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->vely, st->vely, st_other->vely, grad->dvel[1], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->velz, st->velz, st_other->velz, grad->dvel[2], dx, r); + + face_do_spatial_extrapolation_single_quantity(&delta->press, st->press, st_other->press, grad->dpress, dx, r); + +#ifdef MHD + face_do_spatial_extrapolation_single_quantity(&delta->Bx, st->Bx, st_other->Bx, grad->dB[0], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->By, st->By, st_other->By, grad->dB[1], dx, r); + face_do_spatial_extrapolation_single_quantity(&delta->Bz, st->Bz, st_other->Bz, grad->dB[2], dx, r); +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + { + face_do_spatial_extrapolation_single_quantity(&delta->scalars[k], st->scalars[k], st_other->scalars[k], grad->dscalars[k], dx, + r); + } +#endif /* #ifdef MAXSCALARS */ +} + +/*! \brief Extrapolates a single quantity in space. + * + * Linear interpolation with neighbor cell to their common face. + * + * \param[out] delta Change due to time extrapolation. + * \param[in] st State to be extrapolated (unused). + * \param[in] st_other state of other cell (unused). + * \param[in] grad Gradient used for extrapolation. + * \param[in] dx normal vector. + * \param[in] r (unused). + * + * \return void + */ +void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r) +{ + (void)st; + (void)st_other; + (void)r; + *delta = grad[0] * dx[0] + grad[1] * dx[1] + grad[2] * dx[2]; +} + +/*! \brief Adds space and time extrapolation to state. + * + * \param[in, out] st_face State that is modified. + * \param[in] delta_time Change of state due to time extrapolation. + * \param[in] delta_space Change of state due to space extrapolation. + * \param[in, out] stat Structure that counts face value statistics. + * + * \return void + */ +void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat) +{ + stat->count_disable_extrapolation += 1; + + if(st_face->rho <= 0) + return; + + if(st_face->rho + delta_time->rho + delta_space->rho < 0 || st_face->press + delta_time->press + delta_space->press < 0) + return; + + stat->count_disable_extrapolation -= 1; + +#if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION) + face_add_extrapolation(st_face, delta_time, stat); +#endif /* #if !defined(MESHRELAX) && !defined(DISABLE_TIME_EXTRAPOLATION) */ + +#if !defined(DISABLE_SPATIAL_EXTRAPOLATION) + face_add_extrapolation(st_face, delta_space, stat); +#endif /* #if !defined(DISABLE_SPATIAL_EXTRAPOLATION) */ +} + +/*! \brief Adds an extrapolation to state. + * + * Called in face_add_extrapolations(..). + * + * \param[in, out] st_face State that is modified. + * \param[in] delta Change of state due to extrapolation. + * \param[in] stat (unused) + * + * \return void + */ +void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat) +{ + st_face->rho += delta->rho; + st_face->velx += delta->velx; + st_face->vely += delta->vely; + st_face->velz += delta->velz; + st_face->press += delta->press; + +#ifdef MHD +#ifndef ONEDIMS + /* in one dimension, Bx has to be constant! */ + st_face->Bx += delta->Bx; +#endif /* #ifndef ONEDIMS */ + st_face->By += delta->By; + st_face->Bz += delta->Bz; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + st_face->scalars[k] += delta->scalars[k]; +#endif /* #ifdef MAXSCALARS */ +} + +/*! \brief Adds an extrapolation to state. + * + * But checks for positivity of density. + * + * \param[in, out] st_face State that is modified. + * \param[in] delta Change of state due to extrapolation. + * \param[in, out] stat Structure that counts face value statistics. + * + * \return void + */ +void face_add_extrapolation_with_check(struct state *st_face, struct state *delta, struct fvs_stat *stat) +{ + stat->count_disable_extrapolation += 1; + + if(st_face->rho <= 0) + return; + + if(st_face->rho + delta->rho < 0 || st_face->press + delta->press < 0) + return; + + stat->count_disable_extrapolation -= 1; + + face_add_extrapolation(st_face, delta, stat); +} + +/*! \brief Rotates velocities and magnetic field. + * + * \param[in, out] st State that containes velocities to be rotated. + * \param[in] geom Geometry with a rotation matrix. + * + * \return void + */ +void face_turn_velocities(struct state *st, struct geometry *geom) +{ + double velx, vely, velz; + + velx = st->velx; + vely = st->vely; + velz = st->velz; + + st->velx = velx * geom->nx + vely * geom->ny + velz * geom->nz; + st->vely = velx * geom->mx + vely * geom->my + velz * geom->mz; + st->velz = velx * geom->px + vely * geom->py + velz * geom->pz; + +#ifdef MHD + double Bx, By, Bz; + + Bx = st->Bx; + By = st->By; + Bz = st->Bz; + + st->Bx = Bx * geom->nx + By * geom->ny + Bz * geom->nz; + st->By = Bx * geom->mx + By * geom->my + Bz * geom->mz; + st->Bz = Bx * geom->px + By * geom->py + Bz * geom->pz; +#endif /* #ifdef MHD */ +} + +/*! \brief Sets the state at the face to its upwind value. + * + * \param[in] st_L Left hand side hydrodynamical state. + * \param[in] st_R Right hand side hydrodynamical state. + * \param[out] st_face State at face. + * \param[in] geom Geometry structure that includes normal vector of face. + * \param[in] vel_face Velocity vector of face. + * + * \return void + */ +void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face) +{ + double ev = vel_face[0] * geom->nx + vel_face[1] * geom->ny + vel_face[2] * geom->nz; + + if(ev < 0) + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = st_L->press; + } + else + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = st_R->press; + } +} + +/*! \brief Rotates velocities backwards. + * + * Inverse operation to face_turn_velocities(...). + * + * \param[in, out] st State that containes velocities to be rotated. + * \param[in] geom Geometry with a rotation matrix. + * + * \return void + */ +void face_turnback_velocities(struct state_face *st_face, struct geometry *geom) +{ + double velx, vely, velz; + + velx = st_face->velx; + vely = st_face->vely; + velz = st_face->velz; + + st_face->velx = velx * geom->nx + vely * geom->mx + velz * geom->px; + st_face->vely = velx * geom->ny + vely * geom->my + velz * geom->py; + st_face->velz = velx * geom->nz + vely * geom->mz + velz * geom->pz; +} + +/*! \brief Sets the scalar states compute the scalar flux from mass flux. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face Face state. + * \param[out] flux Flux over face. + * + * \return void + */ +void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux) +{ +#if defined(MAXSCALARS) + int i; + + double normfac, normifac; + + if(flux->mass > 0) + st_face->scalars = st_L->scalars; + else + st_face->scalars = st_R->scalars; + + /* Normalize species here */ + normfac = 0; + + for(i = 0; i < N_Scalar; i++) + { + flux->scalars[i] = st_face->scalars[i] * flux->mass; + + if(scalar_elements[i].type == SCALAR_TYPE_SPECIES) + normfac += st_face->scalars[i]; + } + + if(normfac != 0) + { + normifac = 1.0 / normfac; + + for(i = 0; i < N_Scalar; i++) + if(scalar_elements[i].type == SCALAR_TYPE_SPECIES || scalar_elements[i].type == SCALAR_TYPE_NORMALIZE) + flux->scalars[i] *= normifac; + } + +#endif /* #if defined(MAXSCALARS) */ +} + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) +/*! \brief Converts flux from face frame to simulation box frame. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] vel_face Velocity vector of face. + * \param[in, out] flux Flux vector accross face. + * + * \return void + */ +void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux) +{ + double momx = flux->momentum[0]; + double momy = flux->momentum[1]; + double momz = flux->momentum[2]; + + flux->momentum[0] += vel_face[0] * flux->mass; + flux->momentum[1] += vel_face[1] * flux->mass; + flux->momentum[2] += vel_face[2] * flux->mass; + + flux->energy += momx * vel_face[0] + momy * vel_face[1] + momz * vel_face[2] + + 0.5 * flux->mass * (vel_face[0] * vel_face[0] + vel_face[1] * vel_face[1] + vel_face[2] * vel_face[2]); + +#ifdef MHD + double Bx; + Bx = 0.5 * (st_L->Bx + st_R->Bx); + + flux->B[0] -= vel_face[0] * Bx; + flux->B[1] -= vel_face[1] * Bx; + flux->B[2] -= vel_face[2] * Bx; +#endif /* #ifdef MHD */ +} +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + +/*! \brief Rotates momenum flux and magnetic flux vector. + * + * flux->momentum vector needs to be turned in case the HLLC or Rosunov + * Riemann solvers are used. + * + * \param[in, out] flux Flux vector which is rotated. + * \param[in] geom Geometry structure that holds rotation matrix. + * + * \return void + */ +void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom) +{ + double momx = flux->momentum[0]; + double momy = flux->momentum[1]; + double momz = flux->momentum[2]; + + flux->momentum[0] = momx * geom->nx + momy * geom->mx + momz * geom->px; + flux->momentum[1] = momx * geom->ny + momy * geom->my + momz * geom->py; + flux->momentum[2] = momx * geom->nz + momy * geom->mz + momz * geom->pz; + +#ifdef MHD + double Bx = flux->B[0]; + double By = flux->B[1]; + double Bz = flux->B[2]; + + flux->B[0] = Bx * geom->nx + By * geom->mx + Bz * geom->px; + flux->B[1] = Bx * geom->ny + By * geom->my + Bz * geom->py; + flux->B[2] = Bx * geom->nz + By * geom->mz + Bz * geom->pz; +#endif /* #ifdef MHD */ +} + +/*! \brief Calculates the flux from face states. + * + * \param[in] st_L (unused) + * \param[in] st_R (unused) + * \param[in] st_face State at face. + * \param[out] flux Flux at face. + * \param[in] geom Geometry structure containing normal vector of face. + * \param[in] vel_face Velocity vector of face. + * + * \return void + */ +void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom, + double *vel_face) +{ + double fac; + + /* calculate fluxes for ordinary Riemann solver */ + + fac = (st_face->velx - vel_face[0]) * geom->nx + (st_face->vely - vel_face[1]) * geom->ny + (st_face->velz - vel_face[2]) * geom->nz; + + flux->mass = st_face->rho * fac; + + flux->momentum[0] = (st_face->rho * st_face->velx * fac + st_face->press * geom->nx); + flux->momentum[1] = (st_face->rho * st_face->vely * fac + st_face->press * geom->ny); + flux->momentum[2] = (st_face->rho * st_face->velz * fac + st_face->press * geom->nz); + +#ifndef ISOTHERM_EQS + flux->energy = + (0.5 * st_face->rho * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) + + st_face->press / GAMMA_MINUS1) * + fac + + st_face->press * (st_face->velx * geom->nx + st_face->vely * geom->ny + st_face->velz * geom->nz); +#endif /* #ifndef ISOTHERM_EQS */ +} + +/*! \brief Flux limiter. + * + * Make sure cell cannot loose more mass than it contains... + * + * \param[in] st_L Left hand side hydrodynamical state. + * \param[in] st_R Right hand side hydrodynamical state. + * \param[in] st_center_L (unused) + * \param[in] st_center_R (unused) + * \param[in, out] fulx Flux vector. + * \param[in] dt Timestep. + * \param[in, out] count Number of calls of this function. + * \param[in, out] count_reduced Number if flux reductions caused by this + * function. + * + * \return void + */ +void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R, + struct fluxes *flux, double dt, double *count, double *count_reduced) +{ + *count = *count + 1.0; + + /* choose upwind mass to determine a stability bound on the maximum allowed mass exchange, + (we do this to prevent negative masses under all circumstances) */ + double upwind_mass, upwind_activearea, reduc_fac; + integertime upwind_timebin, downstream_timebin; + + if(flux->mass > 0) + { + upwind_mass = st_L->oldmass; + upwind_activearea = st_L->activearea; + upwind_timebin = st_L->timeBin; + downstream_timebin = st_R->timeBin; + } + else + { + upwind_mass = st_R->oldmass; + upwind_activearea = st_R->activearea; + upwind_timebin = st_R->timeBin; + downstream_timebin = st_L->timeBin; + } + + if(upwind_timebin > downstream_timebin) + dt *= pow(2, upwind_timebin - downstream_timebin); + + if(fabs(flux->mass * dt * upwind_activearea) > 0.9 * upwind_mass) + { + reduc_fac = 0.9 * upwind_mass / fabs(flux->mass * dt * upwind_activearea); + + *count_reduced = *count_reduced + 1.0; + + flux->mass *= reduc_fac; + flux->energy *= reduc_fac; + flux->momentum[0] *= reduc_fac; + flux->momentum[1] *= reduc_fac; + flux->momentum[2] *= reduc_fac; + + /* remark: do not reduce the magnetic field flux, as it is not coupled to the mass flux */ +#ifdef MAXSCALARS + for(int i = 0; i < N_Scalar; i++) + flux->scalars[i] *= reduc_fac; +#endif /* #ifdef MAXSCALARS */ + } +} + +/*! \brief Set flux vector entries to zero. + * + * \param[out] flux Flux vector. + * + * \return void + */ +void face_clear_fluxes(struct fluxes *flux) +{ + flux->mass = 0; + flux->momentum[0] = 0; + flux->momentum[1] = 0; + flux->momentum[2] = 0; + flux->energy = 0; +#ifdef MHD + flux->B[0] = 0; + flux->B[1] = 0; + flux->B[2] = 0; +#endif /* #ifdef MHD */ +} + +/*! \brief Adds flux due to advection to flux vector. + * + * \param[in] st_face State at face. + * \param[in, out] flux Flux vector. + * \param[in] geom Geometry structure containing the face normal vector. + * \param[in] vel_face Velocity vector of the face. + * + * \return void + */ +void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face) +{ + double fac = -vel_face[0] * geom->nx - vel_face[1] * geom->ny - vel_face[2] * geom->nz; + + flux->mass += st_face->rho * fac; + + flux->momentum[0] += st_face->rho * st_face->velx * fac; + flux->momentum[1] += st_face->rho * st_face->vely * fac; + flux->momentum[2] += st_face->rho * st_face->velz * fac; + + flux->energy += + 0.5 * st_face->rho * fac * (st_face->velx * st_face->velx + st_face->vely * st_face->vely + st_face->velz * st_face->velz) + + st_face->press / GAMMA_MINUS1 * fac; +} + +/*! \brief Compares tasks of flux list data. + * + * Sort kernel for flux list data. + * + * \param[in] a First flux list data object. + * \param[in] b Second flux list data object. + * + * \return (-1,0,1) -1 if a->task < b->task. + */ +int flux_list_data_compare(const void *a, const void *b) +{ + if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task)) + return -1; + + if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Communicates flux list and applies fluxes to conserved hydro + * variables. + * + * \return void + */ +void apply_flux_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; +#if defined(MAXSCALARS) + int k; +#endif /* #if defined(MAXSCALARS) */ + + /* now exchange the flux-list and apply it when needed */ + + mysort(FluxList, Nflux, sizeof(struct flux_list_data), flux_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[FluxList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data)); + + /* exchange particle data */ + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the fluxes */ + + for(i = 0; i < nimport; i++) + { + p = FluxListGet[i].index; + + P[p].Mass += FluxListGet[i].dM; + + SphP[p].Momentum[0] += FluxListGet[i].dP[0]; + SphP[p].Momentum[1] += FluxListGet[i].dP[1]; + SphP[p].Momentum[2] += FluxListGet[i].dP[2]; +#ifdef MHD + SphP[p].BConserved[0] += FluxListGet[i].dB[0]; + SphP[p].BConserved[1] += FluxListGet[i].dB[1]; + SphP[p].BConserved[2] += FluxListGet[i].dB[2]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k]; +#endif /* #ifdef MAXSCALARS */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += FluxListGet[i].dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + } + myfree(FluxListGet); +} + +/*! \brief Initializes statistics of finite volume solver. + * + * \param[out] stat Statistics structure. + * + * \return void + */ +void fvs_initialize_statistics(struct fvs_stat *stat) { stat->count_disable_extrapolation = 0; } + +/*! \brief Gathers statistics properties from all tasks and prints information. + * + * \param[in] stat Finite volume solver statistics structure. + * + * \return void + */ +void fvs_evaluate_statistics(struct fvs_stat *stat) +{ +#ifdef VERBOSE + int count_disable_extrapolation = 0; + MPI_Reduce(&stat->count_disable_extrapolation, &count_disable_extrapolation, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + mpi_printf("FLUX: Disabled extrapolation for %d interfaces.\n", count_disable_extrapolation); +#endif /* #ifdef VERBOSE */ +} + +#ifdef ONEDIMS_SPHERICAL +/*! \brief Applies source terms that occur due to spherical symmetry. + * + * \return void + */ +void apply_spherical_source_terms() +{ + int idx, i; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + double Pressure = SphP[i].Pressure; + double dt_Extrapolation = All.Time - SphP[i].TimeLastPrimUpdate; + struct grad_data *grad = &SphP[i].Grad; + + Pressure += -dt_Extrapolation * (GAMMA * Pressure * (grad->dvel[0][0] + grad->dvel[1][1] + grad->dvel[2][2]) + + P[i].Vel[0] * grad->dpress[0] + P[i].Vel[1] * grad->dpress[1] + P[i].Vel[2] * grad->dpress[2]); + + double dt = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + SphP[i].Momentum[0] += dt * Pressure * (Mesh.VF[i + 1].area - Mesh.VF[i].area); + } +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ diff --git a/src/amuse/community/arepo/src/hydro/gradients.c b/src/amuse/community/arepo/src/hydro/gradients.c new file mode 100644 index 0000000000..191c13635c --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/gradients.c @@ -0,0 +1,149 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/gradients.c + * \date 05/2018 + * \brief Routines to initialize gradient data. + * \details contains functions: + * void init_gradients() + * void gradient_init(MyFloat * addr, MyFloat * addr_exch, + * MySingle * addr_grad, int type) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +int N_Grad = 0; + +struct grad_elements grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm; + +/*! \brief Initializes all gradient fields. + * + * Density, velocity, pressure and if needed magnetic fields and passive + * scalars. + * + * \return void + */ +void init_gradients() +{ +#if defined(MAXSCALARS) + int k; +#endif /* #if defined(MAXSCALARS) */ + + gradient_init(&SphP[0].Density, &PrimExch[0].Density, SphP[0].Grad.drho, GRADIENT_TYPE_DENSITY); + + gradient_init(&P[0].Vel[0], &PrimExch[0].VelGas[0], SphP[0].Grad.dvel[0], GRADIENT_TYPE_VELX); + gradient_init(&P[0].Vel[1], &PrimExch[0].VelGas[1], SphP[0].Grad.dvel[1], GRADIENT_TYPE_VELY); + gradient_init(&P[0].Vel[2], &PrimExch[0].VelGas[2], SphP[0].Grad.dvel[2], GRADIENT_TYPE_VELZ); + + gradient_init(&SphP[0].Pressure, &PrimExch[0].Pressure, SphP[0].Grad.dpress, GRADIENT_TYPE_PRESSURE); + +#ifdef MHD + gradient_init(&SphP[0].B[0], &PrimExch[0].B[0], SphP[0].Grad.dB[0], GRADIENT_TYPE_NORMAL); + gradient_init(&SphP[0].B[1], &PrimExch[0].B[1], SphP[0].Grad.dB[1], GRADIENT_TYPE_NORMAL); + gradient_init(&SphP[0].B[2], &PrimExch[0].B[2], SphP[0].Grad.dB[2], GRADIENT_TYPE_NORMAL); +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + MyFloat *addr; + + for(k = 0; k < N_Scalar; k++) + { + addr = (MyFloat *)(((char *)(&SphP[0])) + scalar_elements[k].offset); + gradient_init(addr, &PrimExch[0].Scalars[k], SphP[0].Grad.dscalars[k], GRADIENT_TYPE_NORMAL); + } +#endif /* #ifdef MAXSCALARS */ + + mpi_printf("INIT: %d/%d Gradients used.\n", N_Grad, MAXGRADIENTS); +} + +/*! \brief Initialize a gradient field. + * + * Each time this initialization routine is called, the global variable + * NGrad is incremented by 1. + * + * \param[in] addr Pointer to element in SphP[0] struct (for Vel in P[0]) + * \param[in] addr_exch Pointer to element in PrimExch[0] struct + * \param[in] addr_grad Pointer to element in SphP[0].Grad struct + * \param[in] type Type of gradient + * + * \return void + */ +void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type) +{ + if(N_Grad == MAXGRADIENTS) + { + mpi_printf("Failed to register gradient, maximum of %d already reached\n", MAXGRADIENTS); + terminate("MAXGRADIENTS reached"); + } + + grad_elements[N_Grad].type = type; + + if((type == GRADIENT_TYPE_VELX) || (type == GRADIENT_TYPE_VELY) || (type == GRADIENT_TYPE_VELZ)) + { + /* basic structure is P */ + grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&P[0]); + } + else + { + /* basic structure is SphP */ + grad_elements[N_Grad].offset = ((char *)addr) - ((char *)&SphP[0]); + } + + grad_elements[N_Grad].offset_exch = ((char *)addr_exch) - ((char *)&PrimExch[0]); + grad_elements[N_Grad].offset_grad = ((char *)addr_grad) - ((char *)&(SphP[0].Grad)); + + switch(type) + { + case GRADIENT_TYPE_VELX: + GVelx = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_VELY: + GVely = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_VELZ: + GVelz = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_DENSITY: + GDensity = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_PRESSURE: + GPressure = &grad_elements[N_Grad]; + break; + case GRADIENT_TYPE_UTHERM: + GUtherm = &grad_elements[N_Grad]; + break; + default: + break; + } + + N_Grad++; +} diff --git a/src/amuse/community/arepo/src/hydro/mhd.c b/src/amuse/community/arepo/src/hydro/mhd.c new file mode 100644 index 0000000000..33eaf7eab5 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/mhd.c @@ -0,0 +1,99 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mhd.c + * \date 05/2018 + * \brief Source terms for MHD implementation needed for cosmological + * MHD equations as well as Powell source terms. + * \details contains functions: + * void do_mhd_source_terms_first_half(void) + * void do_mhd_source_terms_second_half(void) + * void do_mhd_source_terms(void) + * void do_mhd_powell_source_terms(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MHD + +static void do_mhd_source_terms(void); + +/*! \brief First half of the MHD source terms. + * + * Before hydrodynamics timestep. + * + * \return void + */ +void do_mhd_source_terms_first_half(void) +{ + do_mhd_source_terms(); + update_primitive_variables(); +} + +/*! \brief Second half of the MHD source terms. + * + * After hydrodynamics timestep. + * + * \return void + */ +void do_mhd_source_terms_second_half(void) +{ + do_mhd_source_terms(); + update_primitive_variables(); +} + +/*! \brief Adds source terms of MHD equations in expanding spacetime (i.e. + * in cosmological simulations) to energy. + * + * \return void + */ +void do_mhd_source_terms(void) +{ + TIMER_START(CPU_MHD); + + if(All.ComovingIntegrationOn) + { + double atime = All.Time; + double hubble_a = hubble_function(atime); + + int idx, i; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + double dt_cell = 0.5 * (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval / + hubble_a; /* half the timestep of the cell */ + SphP[i].Energy += dt_cell * 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) * + SphP[i].Volume * atime * hubble_a; + } + } + + TIMER_STOP(CPU_MHD); +} + +#endif /* #ifdef MHD */ diff --git a/src/amuse/community/arepo/src/hydro/riemann.c b/src/amuse/community/arepo/src/hydro/riemann.c new file mode 100644 index 0000000000..24f664352f --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann.c @@ -0,0 +1,955 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann.c + * \date 05/2018 + * \brief Exact, iterative Riemann solver; both adiabatic and isothermal. + * \details contains functions: + * double godunov_flux_3d(struct state *st_L, struct state + * *st_R, struct state_face *st_face) + * void sample_solution_vaccum_left_3d(double S, struct state + * *st_R, struct state_face *st_face) + * void sample_solution_vaccum_right_3d(double S, struct state + * *st_L, struct state_face *st_face) + * void sample_solution_vacuum_generate_3d(double S, struct + * state *st_L, struct state *st_R, struct state_face + * *st_face) + * void get_mach_numbers(struct state *st_L, struct state + * *st_R, double Press) + * void sample_solution_3d(double S, struct state *st_L, + * struct state *st_R, double Press, double Vel, struct + * state_face *st_face) + * int riemann(struct state *st_L, struct state *st_R, double + * *Press, double *Vel) + * void pressure_function(double P, struct state *st, double *F, + * double *FD) + * double guess_for_pressure(struct state *st_L, + * struct state *st_R) + * void riemann_isotherm(struct state *st_L, struct state *st_R, + * double *Rho, double *Vel, double csnd) + * void isothermal_function(double rhostar, double rho, + * double *F, double *FD) + * void sample_solution_isothermal3d(double S, struct state + * *st_L, struct state *st_R, double Rho, double Vel, + * struct state_face *st_face, double csnd) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)) + +#define GAMMA_G1 ((GAMMA - 1.0) / (2.0 * GAMMA)) +#define GAMMA_G2 ((GAMMA + 1.0) / (2.0 * GAMMA)) +#define GAMMA_G3 ((2.0 * GAMMA / (GAMMA - 1.0))) +#define GAMMA_G4 (2.0 / (GAMMA - 1.0)) +#define GAMMA_G5 (2.0 / (GAMMA + 1.0)) +#define GAMMA_G6 ((GAMMA - 1.0) / (GAMMA + 1.0)) +#define GAMMA_G7 (0.5 * (GAMMA - 1.0)) +#define GAMMA_G8 (1.0 / GAMMA) +#define GAMMA_G9 (GAMMA - 1.0) + +#define TOL 1.0e-8 + +/*! \brief Calculates face state from Riemann problem. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return 0. + */ +double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face) +{ + double Vel; + +#ifndef ISOTHERM_EQS + { + if(st_L->press == 0 && st_R->press == 0) + { + /* vacuum state */ + st_face->velx = 0; + st_face->rho = 0; + st_face->press = 0; + st_face->vely = 0; + st_face->velz = 0; +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + return 0; + } + + if(st_L->rho > 0 && st_R->rho > 0) + { + st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho); + st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho); + + double Press; + + if(riemann(st_L, st_R, &Press, &Vel)) + { + sample_solution_3d(0.0, /* S=x/t */ + st_L, st_R, Press, Vel, st_face); + return Press; + } + else + { + /* ICs lead to vacuum, need to sample vacuum solution */ + + sample_solution_vacuum_generate_3d(0.0, /* S=x/t */ + st_L, st_R, st_face); + return 0; + } + } + else + { + if(st_L->rho == 0 && st_R->rho > 0) + { + sample_solution_vacuum_left_3d(0.0, /* S=x/t */ + st_R, st_face); + return 0; + } + else if(st_R->rho == 0 && st_L->rho > 0) + { + sample_solution_vacuum_right_3d(0.0, /* S=x/t */ + st_L, st_face); + return 0; + } + else if(st_R->rho == 0 && st_L->rho == 0) + { + /* vacuum state */ + st_face->velx = 0; + st_face->rho = 0; + st_face->press = 0; + st_face->vely = 0; + st_face->velz = 0; +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + } + else + { + terminate("one of the densities is negative\n"); + } + return 0; + } + } + +#else /* #ifndef ISOTHERM_EQS */ + double Rho; + double csnd; + + csnd = All.IsoSoundSpeed; + riemann_isotherm(st_L, st_R, &Rho, &Vel, csnd); + + sample_solution_isothermal3d(0.0, /* S=x/t */ + st_L, st_R, Rho, Vel, st_face, csnd); + + st_face->press = st_face->rho * csnd * csnd; + + return 0; +#endif /* #ifndef ISOTHERM_EQS #else */ +} + +/*! \brief Sample solution for a vacuum state at the left hand side. + * + * \param[in] S Position x / t. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face) +{ + double Csnd; + + double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1; + + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S >= Sr) + { + /* right fan */ + + double shr = st_R->velx + st_R->csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + /* rarefaction fan right state */ + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + else + { + /* vacuum state */ + st_face->velx = Sr; + st_face->rho = 0; + st_face->press = 0; + } +} + +/*! \brief Sample solution for a vacuum state at the right hand side. + * + * \param[in] S S Position x / t. + * \param[in] st_L Left hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face) +{ + double Csnd; + + double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1; + + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S <= Sl) + { + /* left fan */ + + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) + { + /* left data state */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + /* rarefaction fan left state */ + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + else + { + /* vacuum in between */ + st_face->velx = Sl; + st_face->rho = 0; + st_face->press = 0; + } +} + +/*! \brief Sample solution for vacuum states. + * + * \param[in] S S Position x / t + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face) +{ + double Csnd; + + double Sl = st_L->velx + 2 * st_L->csnd / GAMMA_MINUS1; + double Sr = st_R->velx - 2 * st_R->csnd / GAMMA_MINUS1; + + if(S <= Sl) + { + /* left fan */ + + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) + { + /* left data state */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + /* rarefaction fan left state */ + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + else if(S >= Sr) + { + /* right fan */ + + double shr = st_R->velx + st_R->csnd; + + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + /* rarefaction fan right state */ + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + else + { + /* vacuum in between */ + st_face->velx = S; + st_face->rho = 0; + st_face->press = 0; + + st_face->vely = st_L->vely + (st_R->vely - st_L->vely) * (S - Sl) / (Sr - Sl); + st_face->velz = st_L->velz + (st_R->velz - st_L->velz) * (S - Sl) / (Sr - Sl); + +#ifdef MAXSCALARS + st_face->scalars = NULL; +#endif /* #ifdef MAXSCALARS */ + } +} + +/* \brief Calculates Mach numbers of shocks from Riemann problem. + * + * Mostly used for statistics. + * + * \param[in] S Position x / t + * \param[in, out] st_L Left hand side state. + * \param[in, out] st_R Right hand side state. + * \param[in] Press Central pressure + * + * \return void + */ +void get_mach_numbers(struct state *st_L, struct state *st_R, double Press) +{ +#if defined GODUNOV_STATS + if(Press <= st_L->press) /* left fan */ + { + st_L->mach = 0; + } + else /* left shock */ + { + double pml = Press / st_L->press; + st_L->mach = sqrt(GAMMA_G2 * pml + GAMMA_G1); + } + + if(Press > st_R->press) /* right shock */ + { + double pmr = Press / st_R->press; + st_R->mach = sqrt(GAMMA_G2 * pmr + GAMMA_G1); + } + else + { + st_R->mach = 0; + } +#endif /* #if defined GODUNOV_STATS */ +} + +/*! \brief Samples 3d solution to Riemann problem. + * + * \param[in] S Position x / t. + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] Press Pressure in central region. + * \param[in] Vel Velocity in central region. + * \param[out] st_face State at face. + * + * \return void + */ +void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face) +{ + double Csnd; + + if(S <= Vel) /* sample point is left of contact */ + { + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Press <= st_L->press) /* left fan */ + { + double shl = st_L->velx - st_L->csnd; + + if(S <= shl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else + { + double cml = st_L->csnd * pow(Press / st_L->press, GAMMA_G1); + double stl = Vel - cml; + + if(S > stl) /* middle left state */ + { + st_face->rho = st_L->rho * pow(Press / st_L->press, GAMMA_G8); + st_face->velx = Vel; + st_face->press = Press; + } + else /* left state inside fan */ + { + st_face->velx = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * st_L->velx + S); + Csnd = GAMMA_G5 * (st_L->csnd + GAMMA_G7 * (st_L->velx - S)); + st_face->rho = st_L->rho * pow(Csnd / st_L->csnd, GAMMA_G4); + st_face->press = st_L->press * pow(Csnd / st_L->csnd, GAMMA_G3); + } + } + } + else /* left shock */ + { + if(st_L->press > 0) + { + double pml = Press / st_L->press; + double sl = st_L->velx - st_L->csnd * sqrt(GAMMA_G2 * pml + GAMMA_G1); + + if(S <= sl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->press = st_L->press; + } + else /* middle left state behind shock */ + { + st_face->rho = st_L->rho * (pml + GAMMA_G6) / (pml * GAMMA_G6 + 1.0); + st_face->velx = Vel; + st_face->press = Press; + } + } + else + { + st_face->rho = st_L->rho / GAMMA_G6; + st_face->velx = Vel; + st_face->press = Press; + } + } + } + else /* right of contact */ + { + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Press > st_R->press) /* right shock */ + { + if(st_R->press > 0) + { + double pmr = Press / st_R->press; + double sr = st_R->velx + st_R->csnd * sqrt(GAMMA_G2 * pmr + GAMMA_G1); + + if(S >= sr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else /* middle right state behind shock */ + { + st_face->rho = st_R->rho * (pmr + GAMMA_G6) / (pmr * GAMMA_G6 + 1.0); + st_face->velx = Vel; + st_face->press = Press; + } + } + else + { + st_face->rho = st_R->rho / GAMMA_G6; + st_face->velx = Vel; + st_face->press = Press; + } + } + else /* right fan */ + { + double shr = st_R->velx + st_R->csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->press = st_R->press; + } + else + { + double cmr = st_R->csnd * pow(Press / st_R->press, GAMMA_G1); + double str = Vel + cmr; + + if(S <= str) /* middle right state */ + { + st_face->rho = st_R->rho * pow(Press / st_R->press, GAMMA_G8); + st_face->velx = Vel; + st_face->press = Press; + } + else /* fan right state */ + { + st_face->velx = GAMMA_G5 * (-st_R->csnd + GAMMA_G7 * st_R->velx + S); + Csnd = GAMMA_G5 * (st_R->csnd - GAMMA_G7 * (st_R->velx - S)); + st_face->rho = st_R->rho * pow(Csnd / st_R->csnd, GAMMA_G4); + st_face->press = st_R->press * pow(Csnd / st_R->csnd, GAMMA_G3); + } + } + } + } +} + +/*! \brief Riemann-solver; i.e. iterative solver of central pressure of a + * Riemann problem. + * + * Solution via root-finding of pressure function. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in, out] Press Central pressure; needs some initial guess. + * \param[out] Vel Velocity in central region. + * + * \return 0: failed, 1: success. + */ +int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel) +{ + double F_L, FD_L, F_R, FD_R, pold; + + double dVel = st_R->velx - st_L->velx; + + double critVel = GAMMA_G4 * (st_L->csnd + st_R->csnd) - dVel; + + if(critVel < 0) + { + /* + printf("ICs lead to vacuum. stopping. Csnd_L=%g Csnd_R=%g dVel=%g\n", Csnd_L, Csnd_R, dVel); + */ + return 0; + } + + double p = guess_for_pressure(st_L, st_R); + + int iter = 0; + + do /* newton-raphson scheme */ + { + pold = p; + + pressure_function(p, st_L, &F_L, &FD_L); + pressure_function(p, st_R, &F_R, &FD_R); + + if(iter < MAXITER / 2) + p -= (F_L + F_R + dVel) / (FD_L + FD_R); + else + p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(p < 0.1 * pold) + p = 0.1 * pold; + + pressure_function(p, st_L, &F_L, &FD_L); + pressure_function(p, st_R, &F_R, &FD_R); + + if(iter < MAXITER / 2) + p -= (F_L + F_R + dVel) / (FD_L + FD_R); + else + p -= 0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(p < 0.1 * pold) + p = 0.1 * pold; + + iter++; + } + while(2 * fabs((p - pold) / (p + pold)) > TOL && iter < MAXITER); + + if(iter >= MAXITER) + { + printf("Task=%d: Warning: ICs for riemann solver lead to divergence.\n", ThisTask); + printf("Rho_L=%g Vel_L=%g Press_L=%g Csnd_L=%g\n", st_L->rho, st_L->velx, st_L->press, st_L->csnd); + printf("Rho_R=%g Vel_R=%g Press_R=%g Csnd_R=%g\n", st_R->rho, st_R->velx, st_R->press, st_R->csnd); + printf("Adopted solution: Press=%g Vel=%g\n", p, 0.5 * (st_L->velx + st_R->velx + F_R - F_L)); + + FILE *fd; + + if((fd = fopen("riemann.dat", "w"))) + { + fwrite(&st_L->rho, sizeof(double), 1, fd); + fwrite(&st_L->velx, sizeof(double), 1, fd); + fwrite(&st_L->press, sizeof(double), 1, fd); + fwrite(&st_L->csnd, sizeof(double), 1, fd); + fwrite(&st_R->rho, sizeof(double), 1, fd); + fwrite(&st_R->velx, sizeof(double), 1, fd); + fwrite(&st_R->press, sizeof(double), 1, fd); + fwrite(&st_R->csnd, sizeof(double), 1, fd); + fclose(fd); + } + } + + /* prepare output values */ + *Press = p; + *Vel = 0.5 * (st_L->velx + st_R->velx + F_R - F_L); + + return 1; +} + +/*! \brief Pressure function for root-finding. + * + * \param[in] P Pressure. + * \param[in] st Hydrodynamic state. + * \param[out] F pressure function. + * \param[out] FD derivative of pressure function. + * + * \return void + */ +void pressure_function(double P, struct state *st, double *F, double *FD) +{ + if(P <= st->press) /* rarefaction wave */ + { + double prat = P / st->press; + + *F = GAMMA_G4 * st->csnd * (pow(prat, GAMMA_G1) - 1.0); + *FD = (1.0 / (st->rho * st->csnd)) * pow(prat, -GAMMA_G2); + } + else /* shock wave */ + { + double ak = GAMMA_G5 / st->rho; + double bk = GAMMA_G6 * st->press; + double qrt = sqrt(ak / (bk + P)); + + *F = (P - st->press) * qrt; + *FD = (1.0 - 0.5 * (P - st->press) / (bk + P)) * qrt; + } +} + +/*! \brief Returns initial guess for central pressure of the Riemann problem. + * + * This is used as the starting value for the root-finding iteration. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * + * \return Guess for pressure in central region. + */ +double guess_for_pressure(struct state *st_L, struct state *st_R) +{ +#define QMAX 2.0 + + double pmin, pmax; + + double pv = + 0.5 * (st_L->press + st_R->press) - 0.125 * (st_R->velx - st_L->velx) * (st_L->rho + st_R->rho) * (st_L->csnd + st_R->csnd); + + if(st_L->press < st_R->press) + { + pmin = st_L->press; + pmax = st_R->press; + } + else + { + pmin = st_R->press; + pmax = st_L->press; + } + + if(pmin > 0) + { + double qrat = pmax / pmin; + + if(qrat <= QMAX && (pmin <= pv && pv <= pmax)) + { + if(pv < 0) + { + printf("pv=%g\n", pv); + terminate("negative pv"); + } + + return pv; + } + else + { + if(pv < pmin) /* use two-rarefaction solution */ + { + double pnu = (st_L->csnd + st_R->csnd) - GAMMA_G7 * (st_R->velx - st_L->velx); + double pde = st_L->csnd / pow(st_L->press, GAMMA_G1) + st_R->csnd / pow(st_R->press, GAMMA_G1); + + return pow(pnu / pde, GAMMA_G3); + } + else /* two-shock approximation */ + { + double gel = sqrt((GAMMA_G5 / st_L->rho) / (GAMMA_G6 * st_L->press + pv)); + double ger = sqrt((GAMMA_G5 / st_R->rho) / (GAMMA_G6 * st_R->press + pv)); + double x = (gel * st_L->press + ger * st_R->press - (st_R->velx - st_L->velx)) / (gel + ger); + + if(x < pmin || x > pmax) + { + x = pmin; + } + + return x; + } + } + } + else + { + return (pmin + pmax) / 2; + } +} + +/*! \brief Riemann-solver for isothermal gas. + * + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in, out] Rho Central density; needs some initial guess. + * \param[out] Vel Velocity in central region. + * \param[in] csnd Sound speed. + * + * \return void + */ +void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd) +{ + double F_L, FD_L, F_R, FD_R, rhoold, drho; + double rho; + + double dVel = (st_R->velx - st_L->velx) / csnd; + + if(dVel > 0) + rho = sqrt(st_L->rho * st_R->rho * exp(-dVel)); + else + rho = 0.5 * (st_L->rho + st_R->rho); + + int iter = 0; + + if(st_L->rho <= 0 || st_R->rho <= 0) + terminate("isothermal Riemann solver was called with zero or negative density\n"); + + do /* newton-raphson scheme */ + { + isothermal_function(rho, st_L->rho, &F_L, &FD_L); + isothermal_function(rho, st_R->rho, &F_R, &FD_R); + + rhoold = rho; + drho = -0.5 * (F_L + F_R + dVel) / (FD_L + FD_R); + + if(fabs(drho) > 0.25 * rho) + drho = 0.25 * rho * fabs(drho) / drho; + + rho += drho; + + iter++; + } + while(2 * fabs(rho - rhoold) / (st_L->rho + st_R->rho) > TOL && iter < MAXITER); + + if(iter >= MAXITER) + { +#ifndef LONGIDS + printf("ID_L=%u ID_R=%u Rho_L=%g Rho_R=%g Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx, + st_R->velx); +#else /* #ifndef LONGIDS */ + printf("ID_L=%llu ID_R=%llu Rho_L=%g Rho_R=%g Vel_L=%g Vel_R=%g\n", st_L->ID, st_R->ID, st_L->rho, st_R->rho, st_L->velx, + st_R->velx); +#endif /* #ifndef LONGIDS #else */ + terminate("ICs for isothermal riemann solver lead to divergence. stopping."); + /* + *Rho = 0.5 * (Rho_L + Rho_R); + *Vel = 0.5 * (Vel_L + Vel_R); + return; + */ + } + + /* prepare output values */ + *Rho = rho; + *Vel = 0.5 * (st_L->velx + st_R->velx + csnd * (F_R - F_L)); +} + +/*! \brief "Pressure" function for isothermal gas. + * + * Needed for root-finding in riemann_isotherm. + * + * \param[in] rhostar Central density. + * \param[in] rho External density. + * \param[out] F Isotherma function. + * \param[out] FD Derivative of isothermal function. + * + * \return void + */ +void isothermal_function(double rhostar, double rho, double *F, double *FD) +{ + if(rhostar <= rho) /* rarefaction wave */ + { + *F = log(rhostar / rho); + *FD = 1.0 / rho; + } + else /* shock wave */ + { + *F = (rhostar - rho) / sqrt(rhostar * rho); + *FD = 0.5 / rhostar * (sqrt(rhostar / rho) + sqrt(rho / rhostar)); + } +} + +/*! \brief Samples 3d solution to Riemann problem with isothermal gas. + * + * \param[in] S S Position x / t. + * \param[in] st_L Left hand side state. + * \param[in] st_R Right hand side state. + * \param[in] Rho central density. + * \paramm[in] Vel Velocity in central region. + * \param[out] st_face State at face. + * \param[in] csnd Sound speed. + * + * \return void + */ +void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face, + double csnd) +{ + if(S <= Vel) /* sample point is left of contact */ + { + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; +#ifdef MAXSCALARS + st_face->scalars = st_L->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Rho <= st_L->rho) /* left fan */ + { + double shl = st_L->velx - csnd; + + if(S <= shl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + } + else + { + double stl = Vel - csnd; + + if(S > stl) /* middle left state */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + else /* left state inside fan */ + { + st_face->velx = S + csnd; + st_face->rho = st_L->rho * exp(-((S + csnd) - st_L->velx) / csnd); + } + } + } + else /* left shock */ + { + double sl = (st_L->rho * st_L->velx - Rho * Vel) / (st_L->rho - Rho); + + if(S <= sl) /* left data state */ + { + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + } + else /* left state behind shock */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + } + } + else /* right of contact */ + { + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; +#ifdef MAXSCALARS + st_face->scalars = st_R->scalars; +#endif /* #ifdef MAXSCALARS */ + + if(Rho > st_R->rho) /* right shock */ + { + double sr = (st_R->rho * st_R->velx - Rho * Vel) / (st_R->rho - Rho); + + if(S >= sr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + } + else /* right state behind shock */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + } + else /* right fan */ + { + double shr = st_R->velx + csnd; + + if(S >= shr) /* right data state */ + { + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + } + else + { + double str = Vel + csnd; + + if(S <= str) /* middle right state */ + { + st_face->rho = Rho; + st_face->velx = Vel; + } + else /* fan right state */ + { + st_face->velx = S - csnd; + st_face->rho = st_R->rho * exp(((S - csnd) - st_R->velx) / csnd); + } + } + } + } +} + +#endif /* #if !(defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD)) */ diff --git a/src/amuse/community/arepo/src/hydro/riemann_hllc.c b/src/amuse/community/arepo/src/hydro/riemann_hllc.c new file mode 100644 index 0000000000..80fb519ceb --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann_hllc.c @@ -0,0 +1,213 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann_hllc.c + * \date 05/2018 + * \brief Routines for a HLLC Riemann solver. + * \details contains functions: + * static void hllc_get_fluxes_from_state(struct state *st, + * struct fluxes *flux) + * static double get_hllc_star_fluxes(const struct state *st, + * const struct fluxes *flux, struct fluxes *hllc_flux, + * double S_star, double S) + * double godunov_flux_3d_hllc(struct state *st_L, struct state + * *st_R, struct state_face *st_face, struct fluxes *flux) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if defined(RIEMANN_HLLC) + +#if defined(RIEMANN_HLLD) +#error option RIEMANN_HLLC is incompatible with option RIEMANN_HLLD. +Only one Riemann solver can be chosen among the above options.If none of them is selected, + the exact Riemann solver will be used. +#endif /* #if defined(RIEMANN_HLLD) */ + /*! \brief Calculates the flux from a state. + * + * Mass, momentum and energy flux. + * + * \param[in] st State. + * \param[out] flux Flux corresponding to the state. + * + * \return void + */ + static void + hllc_get_fluxes_from_state(struct state *st, struct fluxes *flux) +{ + flux->mass = st->rho * st->velx; + flux->momentum[0] = st->rho * st->velx * st->velx + st->press; + flux->momentum[1] = st->rho * st->velx * st->vely; + flux->momentum[2] = st->rho * st->velx * st->velz; + + st->Energy = st->press / GAMMA_MINUS1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz); + flux->energy = (st->Energy + st->press) * st->velx; +} + +/*! \brief Calculates a central flux in HLLC approximation. + * + * \param[in] st State of the Riemann problem (either left or right). + * \param[in] flux Flux through face (either left or right). + * \param[out] hllc_flux State at the face (determined by this routine). + * \param[in] S_star speed of characteristics in central region. + * \param[in] S speed of characteristics in outside state (left or right). + * + * \return Central density. + */ +static double get_hllc_star_fluxes(const struct state *st, const struct fluxes *flux, struct fluxes *hllc_flux, double S_star, + double S) +{ + double Q0 = st->rho * (S - st->velx) / (S - S_star); + double Q1 = Q0 * S_star; + double Q2 = Q0 * st->vely; + double Q3 = Q0 * st->velz; + double Q4 = Q0 * (st->Energy / st->rho + (S_star - st->velx) * (S_star + st->press / (st->rho * (S - st->velx)))); + + hllc_flux->mass = flux->mass + S * (Q0 - st->rho); + + hllc_flux->momentum[0] = flux->momentum[0] + S * (Q1 - st->rho * st->velx); + + hllc_flux->momentum[1] = flux->momentum[1] + S * (Q2 - st->rho * st->vely); + + hllc_flux->momentum[2] = flux->momentum[2] + S * (Q3 - st->rho * st->velz); + + hllc_flux->energy = flux->energy + S * (Q4 - st->Energy); + + return Q0; +} + +/*! \brief Main routine for the hllc Riemann solver. + * + * Called in finite_volume_solver.c + * + * \param[in] st_L Left state of the Riemann problem. + * \param[in] st_R Right state of the Riemann problem. + * \param[out] st_face State at face. + * \param[out] flux Flux through face. + * + * \return Pressure. + */ +double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux) +{ + double S_L, S_R, S_star; + double Press_star, rho_star; + double rho_hat, csnd_hat; + + if(st_L->rho > 0 && st_R->rho > 0) + { + struct fluxes flux_L, flux_R; + + st_L->csnd = sqrt(GAMMA * st_L->press / st_L->rho); + st_R->csnd = sqrt(GAMMA * st_R->press / st_R->rho); + + /* first estimate wave speeds */ + S_L = dmin(st_L->velx - st_L->csnd, st_R->velx - st_R->csnd); + S_R = dmax(st_L->velx + st_L->csnd, st_R->velx + st_R->csnd); + + rho_hat = 0.5 * (st_L->rho + st_R->rho); + csnd_hat = 0.5 * (st_L->csnd + st_R->csnd); + Press_star = 0.5 * ((st_L->press + st_R->press) + (st_L->velx - st_R->velx) * (rho_hat * csnd_hat)); + S_star = 0.5 * ((st_L->velx + st_R->velx) + (st_L->press - st_R->press) / (rho_hat * csnd_hat)); + + /* compute fluxes for the left and right states */ + hllc_get_fluxes_from_state(st_L, &flux_L); + hllc_get_fluxes_from_state(st_R, &flux_R); + + if(S_L >= 0.0) /* F_hllc = F_L */ + { + /* copy the fluxes from the left state */ + flux->mass = flux_L.mass; + flux->momentum[0] = flux_L.momentum[0]; + flux->momentum[1] = flux_L.momentum[1]; + flux->momentum[2] = flux_L.momentum[2]; + flux->energy = flux_L.energy; + + /* set the primitive variables at the face */ + st_face->rho = st_L->rho; + st_face->velx = st_L->velx; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = st_L->press; + } + else if(S_R <= 0.0) /* F_hllc = F_R */ + { + /* copy the fluxes from the left state */ + flux->mass = flux_R.mass; + flux->momentum[0] = flux_R.momentum[0]; + flux->momentum[1] = flux_R.momentum[1]; + flux->momentum[2] = flux_R.momentum[2]; + flux->energy = flux_R.energy; + + /* set the primitive variables at the face */ + st_face->rho = st_R->rho; + st_face->velx = st_R->velx; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = st_R->press; + } + else if(S_L <= 0.0 && S_star >= 0.0) /* F_hllc = F*_L */ + { + /* compute star flux */ + rho_star = get_hllc_star_fluxes(st_L, &flux_L, flux, S_star, S_L); + + /* set the primitive variables at the face */ + st_face->rho = rho_star; + st_face->velx = S_star; + st_face->vely = st_L->vely; + st_face->velz = st_L->velz; + st_face->press = Press_star; + } + else /* F_hllc = F*_R */ + { + /* compute star flux */ + rho_star = get_hllc_star_fluxes(st_R, &flux_R, flux, S_star, S_R); + + /* set the primitive variables at the face */ + st_face->rho = rho_star; + st_face->velx = S_star; + st_face->vely = st_R->vely; + st_face->velz = st_R->velz; + st_face->press = Press_star; + } + } + else + { + printf("Left: st_L->press=%g st_L->rho=%g st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx); + printf("Right: st_R->press=%g st_R->rho=%g st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx); + terminate("density is zero\n"); + return 0; + } + + return st_face->press; +} + +#endif /* #if defined(RIEMANN_HLLC) */ diff --git a/src/amuse/community/arepo/src/hydro/riemann_hlld.c b/src/amuse/community/arepo/src/hydro/riemann_hlld.c new file mode 100644 index 0000000000..8770282bd4 --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/riemann_hlld.c @@ -0,0 +1,567 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/riemann_hlld.c + * \date 05/2018 + * \brief Routines for a HLLD Riemann solver (to be used for MHD). + * \details contains functions: + * static inline int state_and_flux_valid(const struct state + * *st, const struct fluxes *flux) + * double godunov_flux_3d_hlld(struct state *st_L, struct state + * *st_R, double *vel_face, struct state_face *st_face, + * struct fluxes *flux) + * static double hlld_get_fast_wave(struct state *st) + * static void hlld_get_fluxes_from_state(struct state *st, + * struct fluxes *flux, double *st_ptot) + * static void hlld_get_star(struct state *st_star, struct + * state *st, double S, double S_M, double ptot, double + * ptot_star) + * static void hlld_get_fluxes_star(struct state *st_A, struct + * state *st_A_star, struct fluxes *flux_A, double S_A, + * struct fluxes *flux) + * static void hlld_get_starstar_L(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar) + * static void hlld_get_starstar_R(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar) + * static void hlld_get_starstar(struct state *st_star_L, + * struct state *st_star_R, struct state *st_starstar, + * struct state *st_star_A, double sign) + * static void hlld_get_fluxes_starstar(struct state *st_A, + * struct state *st_A_star, struct state *st_A_starstar, + * struct fluxes *flux_A, double S_A, double S_A_star, struct + * fluxes *flux) + * static void hll_get_star(struct state *st_star, struct + * fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, + * struct state *st_R, double S_L, double S_R) + * static void hll_get_flux(struct fluxes *flux, struct fluxes + * *flux_L, struct fluxes *flux_R, struct state *st_L, + * struct state *st_R, double S_L, double S_R) + * static void lax_get_flux(struct fluxes *flux, struct fluxes + * *flux_L, struct fluxes *flux_R, struct state *st_L, struct + * state *st_R, double S) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if defined(RIEMANN_HLLD) + +static double hlld_get_fast_wave(struct state *st); +static void hlld_get_fluxes_from_state(struct state *st_face, struct fluxes *flux, double *st_ptot); +static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star); +static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux); +static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar); +static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar); +static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A, + double sign); +static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A, + double S_A, double S_A_star, struct fluxes *flux); +static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R); +static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R); +static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S); + +/*! \brief Check if pressure, energy and energy flux have valid values. + * + * \param[in] st State. + * \param[in] flux Flux. + * + * \return 1 if valid state and flux, 0 otherwise. + */ +static inline int state_and_flux_valid(const struct state *st, const struct fluxes *flux) +{ + return (st->press >= 0) && gsl_finite(st->press) && gsl_finite(flux->energy); +} + +/*! \brief Main routine for the hlld Riemann solver. + * + * Called in finite_volume_solver.c. + * + * \param[in] st_L Left state of the Riemann problem. + * \param[in] st_R Right state of the Riemann problem. + * \param[in] vel_face Velocity at which the face is moving. + * \param[out] st_face State at face. + * \param[out] flux Flux through face. + * + * \return Pressure. + */ +double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux) +{ + struct state st_Lstar, st_Rstar, st_star; + struct state st_Lstarstar, st_Rstarstar; + struct state *st_middle; + double Bx; + double cf_L, cf_R; + double S, S_L, S_R, S_M, S_L_star, S_R_star; + double ptot_L, ptot_R; + + S_R_star = S_L_star = S_M = 0.; + + if(st_L->rho > 0 && st_R->rho > 0) + { + Bx = 0.5 * (st_L->Bx + st_R->Bx); + flux->B[0] = 0.; + + st_L->Bx = Bx; + st_R->Bx = Bx; + st_face->Bx = Bx; + + /* get wave speeds first */ + cf_L = hlld_get_fast_wave(st_L); + cf_R = hlld_get_fast_wave(st_R); + + S = dmax(dmax(fabs(st_L->velx - cf_L), fabs(st_R->velx - cf_R)), dmax(fabs(st_L->velx + cf_L), fabs(st_R->velx + cf_R))); + + S_L = dmin(st_L->velx - cf_L, st_R->velx - cf_R); + S_R = dmax(st_L->velx + cf_L, st_R->velx + cf_R); + + if(S_L >= 0) + { + st_middle = st_L; + hlld_get_fluxes_from_state(st_L, flux, NULL); + } + else if(S_R <= 0) + { + st_middle = st_R; + hlld_get_fluxes_from_state(st_R, flux, NULL); + } + else + { + // stars are needed + struct fluxes flux_R, flux_L; + + hlld_get_fluxes_from_state(st_L, &flux_L, &ptot_L); + hlld_get_fluxes_from_state(st_R, &flux_R, &ptot_R); + + S_M = ((S_R - st_R->velx) * st_R->rho * st_R->velx - (S_L - st_L->velx) * st_L->rho * st_L->velx - ptot_R + ptot_L) / + ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho); + + double ptot_star = ((S_R - st_R->velx) * st_R->rho * ptot_L - (S_L - st_L->velx) * st_L->rho * ptot_R + + st_L->rho * st_R->rho * (S_R - st_R->velx) * (S_L - st_L->velx) * (st_R->velx - st_L->velx)) / + ((S_R - st_R->velx) * st_R->rho - (S_L - st_L->velx) * st_L->rho); + + hlld_get_star(&st_Lstar, st_L, S_L, S_M, ptot_L, ptot_star); + hlld_get_star(&st_Rstar, st_R, S_R, S_M, ptot_R, ptot_star); + + S_L_star = S_M - fabs(st_L->Bx) / sqrt(st_Lstar.rho); + S_R_star = S_M + fabs(st_R->Bx) / sqrt(st_Rstar.rho); + + if(S_L_star >= 0 || (Bx == 0 && S_M >= 0)) // we already know: S_L <= 0 + { + st_middle = &st_Lstar; + hlld_get_fluxes_star(st_L, &st_Lstar, &flux_L, S_L, flux); + } + else if(S_R_star <= 0 || (Bx == 0)) // we already know: S_R >= 0 + { + st_middle = &st_Rstar; + hlld_get_fluxes_star(st_R, &st_Rstar, &flux_R, S_R, flux); + } + else + { + // double stars are needed + if(S_M >= 0) // we already know: S_L_star <= 0) + { + st_middle = &st_Lstarstar; + hlld_get_starstar_L(&st_Lstar, &st_Rstar, &st_Lstarstar); + hlld_get_fluxes_starstar(st_L, &st_Lstar, &st_Lstarstar, &flux_L, S_L, S_L_star, flux); + } + else // we already know: S_R_star >= 0 and S_M <= 0 + { + st_middle = &st_Rstarstar; + hlld_get_starstar_R(&st_Lstar, &st_Rstar, &st_Rstarstar); + hlld_get_fluxes_starstar(st_R, &st_Rstar, &st_Rstarstar, &flux_R, S_R, S_R_star, flux); + } + } + } + } + else + { + printf("Left: st_L->press=%g st_L->rho=%g st_L->velx=%g\n", st_L->press, st_L->rho, st_L->velx); + printf("Right: st_R->press=%g st_R->rho=%g st_R->velx=%g\n", st_R->press, st_R->rho, st_R->velx); + terminate("density is zero\n"); + return 0; + } + + if(!state_and_flux_valid(st_middle, flux)) + { + /* HLLD did not work => use HLL instead */ + struct fluxes flux_R, flux_L; + + hlld_get_fluxes_from_state(st_L, &flux_L, NULL); + hlld_get_fluxes_from_state(st_R, &flux_R, NULL); + + hll_get_star(&st_star, &flux_L, &flux_R, st_L, st_R, S_L, S_R); + hll_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S_L, S_R); + + st_middle = &st_star; + + if(!state_and_flux_valid(st_middle, flux)) + { + /* HLL did not work, use lax-friedrich flux instead */ + lax_get_flux(flux, &flux_L, &flux_R, st_L, st_R, S); + + st_star.press = 0.5 * (st_L->press + st_R->press); + } + } + + st_face->rho = st_middle->rho; + st_face->velx = st_middle->velx; + st_face->vely = st_middle->vely; + st_face->velz = st_middle->velz; + st_face->press = st_middle->press; + st_face->By = st_middle->By; + st_face->Bz = st_middle->Bz; + + if(!state_and_flux_valid(st_middle, flux)) + { + printf("M: rho=%g, v=(%g,%g,%g), p=%g, B=(%g,%g,%g)\n", st_middle->rho, st_middle->velx + vel_face[0], + st_middle->vely + vel_face[1], st_middle->velz + vel_face[2], st_middle->press, st_middle->Bx, st_middle->By, + st_middle->Bz); + printf("S_L=%g, S_L_star=%g, S_M=%g, S_R_star=%g, S_R=%g, cf_L=%g, cf_R=%g\n", S_L, S_L_star, S_M, S_R_star, S_R, cf_L, cf_R); + } + + return st_middle->press; +} + +/*! \brief Calculates signal speed of the fast magnetosonic wave. + * + * \param[in] st MHD state. + * + * \return Signal speed of fast wave. + */ +static double hlld_get_fast_wave(struct state *st) +{ + double gamma = GAMMA; + double gPress = gamma * st->press; + double Bsqr = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz; + double gpb2 = gPress + Bsqr; + + return sqrt(0.5 / st->rho * (gpb2 + sqrt(gpb2 * gpb2 - 4. * gPress * st->Bx * st->Bx))); +} + +/*! \brief Calculates the flux from a state. + * + * Mass, momentum and energy flux. + * + * \param[in] st State. + * \param[out] flux Flux corresponding to the state. + * \param[out] st_ptot Total pressure. + * + * \return void + */ +static void hlld_get_fluxes_from_state(struct state *st, struct fluxes *flux, double *st_ptot) +{ + double gamma = GAMMA; + double gamma_minus1 = gamma - 1.; + + double cr_press = 0.; + + flux->mass = st->rho * st->velx; + double Bsqr = st->Bx * st->Bx + st->By * st->By + st->Bz * st->Bz; + flux->momentum[0] = st->rho * st->velx * st->velx + st->press + 0.5 * Bsqr - st->Bx * st->Bx + cr_press; + flux->momentum[1] = st->rho * st->velx * st->vely - st->Bx * st->By; + flux->momentum[2] = st->rho * st->velx * st->velz - st->Bx * st->Bz; + + flux->B[1] = st->By * st->velx - st->Bx * st->vely; + flux->B[2] = st->Bz * st->velx - st->Bx * st->velz; + + double etot = + st->press / gamma_minus1 + 0.5 * st->rho * (st->velx * st->velx + st->vely * st->vely + st->velz * st->velz) + 0.5 * Bsqr; + double ptot = st->press + 0.5 * Bsqr + cr_press; + + flux->energy = (etot + ptot) * st->velx - st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz); + + st->Energy = etot; + if(st_ptot) + *st_ptot = ptot; +} + +/*! \brief Calculates state in star region. + * + * \param[out] st_star State in star region (computed in this function). + * \param[in] st Outer state of Riemann problem. + * \param[in] S Velocity of characteristics. + * \param[in] S_M Velocity of magnetic characteristics. + * \param[in] ptot Total pressure of outer state. + * \param[in] ptot_star Total pressure in star region. + * + * \return void + */ +static void hlld_get_star(struct state *st_star, struct state *st, double S, double S_M, double ptot, double ptot_star) +{ + st_star->rho = st->rho * (S - st->velx) / (S - S_M); + st_star->velx = S_M; + st_star->vely = st->vely - st->Bx * st->By * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + st_star->velz = st->velz - st->Bx * st->Bz * (S_M - st->velx) / (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + + st_star->Bx = st->Bx; + st_star->By = st->By * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) / + (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + st_star->Bz = st->Bz * (st->rho * (S - st->velx) * (S - st->velx) - st->Bx * st->Bx) / + (st->rho * (S - st->velx) * (S - S_M) - st->Bx * st->Bx); + + st_star->Energy = ((S - st->velx) * st->Energy - ptot * st->velx + ptot_star * S_M + + st->Bx * (st->velx * st->Bx + st->vely * st->By + st->velz * st->Bz - st_star->velx * st->Bx - + st_star->vely * st_star->By - st_star->velz * st_star->Bz)) / + (S - S_M); + + st_star->press = ptot_star - 0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz); +} + +/*! \brief Calculates a central flux. + * + * \param[in] st_A State of the Riemann problem. + * \param[in] st_A_star State inside fast wave. + * \param[in] flux_A Flux through face. + * \param[in] S_A speed of characteristics. + * \param[out] flux Flux through face. + * + * \return void + */ +static void hlld_get_fluxes_star(struct state *st_A, struct state *st_A_star, struct fluxes *flux_A, double S_A, struct fluxes *flux) +{ + flux->mass = flux_A->mass - S_A * (st_A->rho - st_A_star->rho); + + flux->momentum[0] = flux_A->momentum[0] - S_A * (st_A->rho * st_A->velx - st_A_star->rho * st_A_star->velx); + flux->momentum[1] = flux_A->momentum[1] - S_A * (st_A->rho * st_A->vely - st_A_star->rho * st_A_star->vely); + flux->momentum[2] = flux_A->momentum[2] - S_A * (st_A->rho * st_A->velz - st_A_star->rho * st_A_star->velz); + + flux->B[1] = flux_A->B[1] - S_A * (st_A->By - st_A_star->By); + flux->B[2] = flux_A->B[2] - S_A * (st_A->Bz - st_A_star->Bz); + + flux->energy = flux_A->energy - S_A * (st_A->Energy - st_A_star->Energy); +} + +/*! \brief Get state in starstar region, case S_M>=0. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * + * \return void + */ +static void hlld_get_starstar_L(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar) +{ + hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_L, -1.0); +} + +/*! \brief Get state in starstar region, case S_M<0. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * + * \return void + */ +static void hlld_get_starstar_R(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar) +{ + hlld_get_starstar(st_star_L, st_star_R, st_starstar, st_star_R, 1.0); +} + +/*! \brief Get state in starstar region. + * + * \param[in] st_star_L State in left star region. + * \param[in] st_star_R State in right star region. + * \param[out] st_starstar State in starstar region. + * \param[in] st_star_A State where flow is coming from (depends on + * directionality of the flow). + * \param[in] sign Directionality of flow. + * + * \return void + */ +static void hlld_get_starstar(struct state *st_star_L, struct state *st_star_R, struct state *st_starstar, struct state *st_star_A, + double sign) +{ + double sBx = st_star_A->Bx < 0 ? -1.0 : 1.0; + + double sqLrho = sqrt(st_star_L->rho); + double sqRrho = sqrt(st_star_R->rho); + + st_starstar->rho = st_star_A->rho; + + st_starstar->velx = st_star_L->velx; /* == st_star_R->velx == S_M */ + st_starstar->vely = + ((sqLrho * st_star_L->vely) + (sqRrho * st_star_R->vely) + (st_star_R->By - st_star_L->By) * sBx) / (sqLrho + sqRrho); + st_starstar->velz = + ((sqLrho * st_star_L->velz) + (sqRrho * st_star_R->velz) + (st_star_R->Bz - st_star_L->Bz) * sBx) / (sqLrho + sqRrho); + + st_starstar->Bx = st_star_A->Bx; + st_starstar->By = + ((sqLrho * st_star_R->By) + (sqRrho * st_star_L->By) + sqLrho * sqRrho * (st_star_R->vely - st_star_L->vely) * sBx) / + (sqLrho + sqRrho); + st_starstar->Bz = + ((sqLrho * st_star_R->Bz) + (sqRrho * st_star_L->Bz) + sqLrho * sqRrho * (st_star_R->velz - st_star_L->velz) * sBx) / + (sqLrho + sqRrho); + + st_starstar->Energy = st_star_A->Energy + sign * sqrt(st_star_A->rho) * sBx * + (st_star_A->velx * st_star_A->Bx + st_star_A->vely * st_star_A->By + + st_star_A->velz * st_star_A->Bz - st_starstar->velx * st_star_A->Bx - + st_starstar->vely * st_starstar->By - st_starstar->velz * st_starstar->Bz); + + st_starstar->press = st_star_A->press; +} + +/*! \brief Get fluxes in starstar region. + * + * \param[in] st_A State in outside region. + * \param[in] st_A_star State in star region. + * \param[in] st_A_starstar State in starstar region. + * \param[in] flux_A Flux corresponding to st_A. + * \param[in] S_A Speed of characteristics in outside region. + * \param[in] S_A_star Speed of characteristics in star region. + * \param[out] flux Flux in starstar region. + * + * \return void + */ +static void hlld_get_fluxes_starstar(struct state *st_A, struct state *st_A_star, struct state *st_A_starstar, struct fluxes *flux_A, + double S_A, double S_A_star, struct fluxes *flux) +{ + flux->mass = flux_A->mass + S_A_star * st_A_starstar->rho - (S_A_star - S_A) * st_A_star->rho - S_A * st_A->rho; + + flux->momentum[0] = flux_A->momentum[0] + S_A_star * st_A_starstar->rho * st_A_starstar->velx - + (S_A_star - S_A) * st_A_star->rho * st_A_star->velx - S_A * st_A->rho * st_A->velx; + flux->momentum[1] = flux_A->momentum[1] + S_A_star * st_A_starstar->rho * st_A_starstar->vely - + (S_A_star - S_A) * st_A_star->rho * st_A_star->vely - S_A * st_A->rho * st_A->vely; + flux->momentum[2] = flux_A->momentum[2] + S_A_star * st_A_starstar->rho * st_A_starstar->velz - + (S_A_star - S_A) * st_A_star->rho * st_A_star->velz - S_A * st_A->rho * st_A->velz; + + flux->B[1] = flux_A->B[1] + S_A_star * st_A_starstar->By - (S_A_star - S_A) * st_A_star->By - S_A * st_A->By; + flux->B[2] = flux_A->B[2] + S_A_star * st_A_starstar->Bz - (S_A_star - S_A) * st_A_star->Bz - S_A * st_A->Bz; + + flux->energy = flux_A->energy + S_A_star * st_A_starstar->Energy - (S_A_star - S_A) * st_A_star->Energy - S_A * st_A->Energy; +} + +/*! \brief Get state in star region. + * + * \param[out] st_star State in star region. + * \param[in] flux_L Flux from the left state. + * \param[in] flux_R Flux from the right state. + * \param[in] st_L State at the left side of the Riemann problem. + * \param[in] st_R State at the right side of the Riemann problem. + * \param[in] S_L Speed of characteristics on the left side. + * \param[in] S_R Speed of characteristics on the right side. + * + * \return void + */ +static void hll_get_star(struct state *st_star, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R) +{ + double gamma = GAMMA; + double gamma_minus1 = gamma - 1.; + + double fac = 1.0 / (S_R - S_L); + + st_star->rho = fac * (S_R * st_R->rho - S_L * st_L->rho - flux_R->mass + flux_L->mass); + + st_star->velx = + fac * (S_R * st_R->rho * st_R->velx - S_L * st_L->rho * st_L->velx - flux_R->momentum[0] + flux_L->momentum[0]) / st_star->rho; + st_star->vely = + fac * (S_R * st_R->rho * st_R->vely - S_L * st_L->rho * st_L->vely - flux_R->momentum[1] + flux_L->momentum[1]) / st_star->rho; + st_star->velz = + fac * (S_R * st_R->rho * st_R->velz - S_L * st_L->rho * st_L->velz - flux_R->momentum[2] + flux_L->momentum[2]) / st_star->rho; + + st_star->Energy = fac * (S_R * st_R->Energy - S_L * st_L->Energy - flux_R->energy + flux_L->energy); + + st_star->Bx = st_R->Bx; /* == st_L->Bx */ + st_star->By = fac * (S_R * st_R->By - S_L * st_L->By - flux_R->B[1] + flux_L->B[1]); + st_star->Bz = fac * (S_R * st_R->Bz - S_L * st_L->Bz - flux_R->B[2] + flux_L->B[2]); + + st_star->press = + gamma_minus1 * + (st_star->Energy - + 0.5 * st_star->rho * (st_star->velx * st_star->velx + st_star->vely * st_star->vely + st_star->velz * st_star->velz) - + 0.5 * (st_star->Bx * st_star->Bx + st_star->By * st_star->By + st_star->Bz * st_star->Bz)); +} + +/*! \brief Get interface flux from states. + * + * \param[out] flux Flux through the interface. + * \param[in] flux_L Flux from left state. + * \param[in] flux_R Flux from right state. + * \param[in] st_L Left state. + * \param[in] st_R Right state. + * \param[in] S_L Speed of characteristics at left side. + * \param[in] S_R Speed of characteristics at right side. + * + * \return void + */ +static void hll_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S_L, double S_R) +{ + double fac = 1.0 / (S_R - S_L); + + flux->mass = fac * (S_R * flux_L->mass - S_L * flux_R->mass + S_R * S_L * (st_R->rho - st_L->rho)); + + flux->momentum[0] = + fac * (S_R * flux_L->momentum[0] - S_L * flux_R->momentum[0] + S_R * S_L * (st_R->rho * st_R->velx - st_L->rho * st_L->velx)); + flux->momentum[1] = + fac * (S_R * flux_L->momentum[1] - S_L * flux_R->momentum[1] + S_R * S_L * (st_R->rho * st_R->vely - st_L->rho * st_L->vely)); + flux->momentum[2] = + fac * (S_R * flux_L->momentum[2] - S_L * flux_R->momentum[2] + S_R * S_L * (st_R->rho * st_R->velz - st_L->rho * st_L->velz)); + + flux->energy = fac * (S_R * flux_L->energy - S_L * flux_R->energy + S_R * S_L * (st_R->Energy - st_L->Energy)); + + flux->B[1] = fac * (S_R * flux_L->B[1] - S_L * flux_R->B[1] + S_R * S_L * (st_R->By - st_L->By)); + flux->B[2] = fac * (S_R * flux_L->B[2] - S_L * flux_R->B[2] + S_R * S_L * (st_R->Bz - st_L->Bz)); +} + +/*! \brief Get interface flux from states. + * + * Lax-Friedrich flux; used whenever the HLL flux estimate invalid. + * + * \param[out] flux Flux through the interface. + * \param[in] flux_L Flux from left state. + * \param[in] flux_R Flux from right state. + * \param[in] st_L Left state. + * \param[in] st_R Right state. + * \param[in] S_L Speed of characteristics at left side. + * \param[in] S_R Speed of characteristics at right side. + * + * \return void + */ +static void lax_get_flux(struct fluxes *flux, struct fluxes *flux_L, struct fluxes *flux_R, struct state *st_L, struct state *st_R, + double S) +{ + flux->mass = 0.5 * (flux_L->mass + flux_R->mass) - 0.5 * S * (st_R->rho - st_L->rho); + + flux->momentum[0] = 0.5 * (flux_L->momentum[0] + flux_R->momentum[0]) - 0.5 * S * (st_R->rho * st_R->velx - st_L->rho * st_L->velx); + flux->momentum[1] = 0.5 * (flux_L->momentum[1] + flux_R->momentum[1]) - 0.5 * S * (st_R->rho * st_R->vely - st_L->rho * st_L->vely); + flux->momentum[2] = 0.5 * (flux_L->momentum[2] + flux_R->momentum[2]) - 0.5 * S * (st_R->rho * st_R->velz - st_L->rho * st_L->velz); + + flux->energy = 0.5 * (flux_L->energy + flux_R->energy) - 0.5 * S * (st_R->Energy - st_L->Energy); + + flux->B[1] = 0.5 * (flux_L->B[1] + flux_R->B[1]) - 0.5 * S * (st_R->By - st_L->By); + flux->B[2] = 0.5 * (flux_L->B[2] + flux_R->B[2]) - 0.5 * S * (st_R->Bz - st_L->Bz); +} + +#endif /* #if defined(RIEMANN_HLLD) */ diff --git a/src/amuse/community/arepo/src/hydro/scalars.c b/src/amuse/community/arepo/src/hydro/scalars.c new file mode 100644 index 0000000000..b28bb67b6f --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/scalars.c @@ -0,0 +1,107 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/scalars.c + * \date 05/2018 + * \brief Routines to initialize passive scalars which are advected with + * the fluid. + * \details contains functions: + * void init_scalars() + * int scalar_init(MyFloat * addr, MyFloat * addr_mass, int + * type) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#ifdef MAXSCALARS +int N_Scalar = 0; +struct scalar_elements scalar_elements[MAXSCALARS]; +struct scalar_index ScalarIndex; +#endif /* #ifdef MAXSCALARS */ + +/*! \brief Main routine to initialize passive scalar quantities. + * + * \return void + */ +void init_scalars() +{ +#ifdef MAXSCALARS + +#if defined(REFINEMENT_HIGH_RES_GAS) + ScalarIndex.HighResMass = scalar_init(&SphP[0].HighResDensity, &SphP[0].HighResMass, SCALAR_TYPE_PASSIVE); + if(ScalarIndex.HighResMass == -1) + terminate("ScalarIndex.HighResMass initialized incorrectly\n"); +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef PASSIVE_SCALARS + for(int i = 0; i < PASSIVE_SCALARS; i++) + { + scalar_init(&SphP[0].PScalars[i], &SphP[0].PConservedScalars[i], SCALAR_TYPE_PASSIVE); + } +#endif /* #ifdef PASSIVE_SCALARS */ + + mpi_printf("INIT: %d/%d Scalars used.\n", N_Scalar, MAXSCALARS); +#endif /* MAXSCALARS */ +} + +/*! \brief Initialize a specific scalar property. + * + * \param[in] addr Pointer to (primitive) scalar in SphP[0] struct. + * \param[in] addr_mass Pointer to conserved scalar quantity in SphP[0]. + * \param[in] type Type of scalar (e.g. SCALAR_TYPE_PASSIVE for passive + * scalar) + * + * \return Number of scalars - 1 + */ +int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type) +{ +#ifdef MAXSCALARS + if(N_Scalar == MAXSCALARS) + { + mpi_printf("Failed to register scalar, maximum of %d already reached\n", MAXSCALARS); + terminate("MAXSCALARS reached"); + } + + /* save type and relative address */ + scalar_elements[N_Scalar].type = type; + scalar_elements[N_Scalar].offset = ((char *)addr) - ((char *)&SphP[0]); + scalar_elements[N_Scalar].offset_mass = ((char *)addr_mass) - ((char *)&SphP[0]); + + N_Scalar++; + + return N_Scalar - 1; + /* note: gradients are initialized in init_gradients */ +#else /* #ifdef MAXSCALARS */ + return -1; +#endif /* #ifdef MAXSCALARS #else */ +} diff --git a/src/amuse/community/arepo/src/hydro/update_primitive_variables.c b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c new file mode 100644 index 0000000000..48a10cd4cf --- /dev/null +++ b/src/amuse/community/arepo/src/hydro/update_primitive_variables.c @@ -0,0 +1,343 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/update_primitive_variables.c + * \date 05/2018 + * \brief Routines to recover the primitive hydrodynamical variables from + * the conserved ones. + * \details contains functions: + * void update_primitive_variables(void) + * void set_pressure_of_cell(int i) + * void set_pressure_of_cell_internal(struct particle_data + * *localP, struct sph_particle_data *localSphP, int i) + * void do_validity_checks(struct particle_data *localP, struct + * sph_particle_data *localSphP, int i, struct pv_update_data + * *pvd) + * void update_primitive_variables_single(struct particle_data + * *localP, struct sph_particle_data *localSphP, int i, + * struct pv_update_data *pvd) + * void update_internal_energy(struct particle_data *localP, + * struct sph_particle_data *localSphP, int i, struct + * pv_update_data *pvd) + * double get_sound_speed(int p) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Main routine to update the primitive hydrodynamics variables from + * the conserved ones. + * + * Note that the primitive variables are inconsistent with the (new) + * conserved variables after the hydro integration up to the point this + * function is called. + * + * \return void + */ +void update_primitive_variables(void) +{ + TIMER_START(CPU_CELL_UPDATES); + + struct pv_update_data pvd; + int idx, i; + + if(All.ComovingIntegrationOn) + { + pvd.atime = All.Time; + pvd.hubble_a = hubble_function(All.Time); + pvd.a3inv = 1 / (All.Time * All.Time * All.Time); + } + else + pvd.atime = pvd.hubble_a = pvd.a3inv = 1.0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + do_validity_checks(P, SphP, i, &pvd); + + update_primitive_variables_single(P, SphP, i, &pvd); + + update_internal_energy(P, SphP, i, &pvd); + + set_pressure_of_cell_internal(P, SphP, i); /* calculate the pressure from Density and Utherm (and composition) */ + + SphP[i].OldMass = P[i].Mass; + + SphP[i].TimeLastPrimUpdate = All.Time; + } + + TIMER_STOP(CPU_CELL_UPDATES); +} + +/*! \brief Wrapper function to calculate pressure of a cell from its internal + * energy. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return void + */ +void set_pressure_of_cell(int i) { set_pressure_of_cell_internal(P, SphP, i); } + +/*! \brief Function to calculate pressure from other hydrodynamics quantities. + * + * How this is done depends on the adiabatic index and potentially on sub- + * resolution physics. Note that this is just the thermal pressure (i.e. not + * including magnetic fields). + * + * \param[in] localP Pointer to particle data array. + * \param[in,out] localSphP Pointer to cell data array. + * \param[in] i Index in localP and localSphP arrays. + * + * \return void + */ +void set_pressure_of_cell_internal(struct particle_data *localP, struct sph_particle_data *localSphP, int i) +{ +#ifdef ISOTHERM_EQS + localSphP[i].Pressure = localSphP[i].Density * All.IsoSoundSpeed * All.IsoSoundSpeed; +#else /* #ifdef ISOTHERM_EQS */ + + if(localSphP[i].Utherm >= 0) + localSphP[i].Pressure = GAMMA_MINUS1 * localSphP[i].Density * localSphP[i].Utherm; + else + localSphP[i].Pressure = 0; +#endif /* #ifdef ISOTHERM_EQS */ + +#ifdef ENFORCE_JEANS_STABILITY_OF_CELLS +#if defined(USE_SFR) + if(get_starformation_rate(i) == 0) +#endif /* #if defined(USE_SFR) */ + { +#ifdef ADAPTIVE_HYDRO_SOFTENING + double cell_soft = All.ForceSoftening[localP[i].SofteningType]; +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + double cell_soft = All.GasSoftFactor * get_cell_radius(i); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ + + localSphP[i].Pressure = + dmax(localSphP[i].Pressure, GAMMA_MINUS1 * localSphP[i].Density * 2 * All.G * localP[i].Mass / (All.cf_atime * cell_soft)); + } +#endif /* #ifdef ENFORCE_JEANS_STABILITY_OF_CELLS */ +} + +/*! \brief Validity checks for a gas cell. + * + * So far, only a positive mass constraint implemented. Terminates if not + * successful. + * + * \param[in] localP Pointer to particle data array + * \param[in,out] localSphP Pointer to cell data array + * \param[in] i Index in localP and localSphP arrays + * \param[in] pvd (unused) + * + * \return void + */ +void do_validity_checks(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd) +{ + if(localP[i].Mass < 0) + { + printf("very bad...i=%d ID=%d mass=%g oldMass=%g utherm=%g pos=%g|%g|%g\n", i, (int)localP[i].ID, localP[i].Mass, + localSphP[i].OldMass, localSphP[i].Utherm, localP[i].Pos[0], localP[i].Pos[1], localP[i].Pos[2]); + + terminate("stop"); + } +} + +/*! \brief Updates primitive variables in a specified cell. + * + * \param[in] localP Pointer to particle data array. + * \param[in,out] localSphP Pointer to cell data array. + * \param[in] i Index of cell in localP and localSphP arrays. + * \param[in] pvd additional data that is needed for update (e.g. cosmological + * factors). + * + * \return void + */ +void update_primitive_variables_single(struct particle_data *localP, struct sph_particle_data *localSphP, int i, + struct pv_update_data *pvd) +{ + localSphP[i].Density = localP[i].Mass / localSphP[i].Volume; + + if(localP[i].Mass > 0) + { + localP[i].Vel[0] = localSphP[i].Momentum[0] / localP[i].Mass; + localP[i].Vel[1] = localSphP[i].Momentum[1] / localP[i].Mass; + localP[i].Vel[2] = localSphP[i].Momentum[2] / localP[i].Mass; + +#ifdef MAXSCALARS + for(int k = 0; k < N_Scalar; k++) + { + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) = + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset_mass) / localP[i].Mass; + } +#endif /* #ifdef MAXSCALARS */ + +#ifdef MHD + localSphP[i].B[0] = localSphP[i].BConserved[0] / localSphP[i].Volume; + localSphP[i].B[1] = localSphP[i].BConserved[1] / localSphP[i].Volume; + localSphP[i].B[2] = localSphP[i].BConserved[2] / localSphP[i].Volume; +#endif /* #ifdef MHD */ + } + else /* P[i].Mass <= 0 */ + { + localP[i].Vel[0] = 0; + localP[i].Vel[1] = 0; + localP[i].Vel[2] = 0; + +#ifdef MAXSCALARS + for(int k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&localSphP[i])) + scalar_elements[k].offset) = 0; +#endif /* #ifdef MAXSCALARS */ + } +} + +/*! \brief Updates the internal energy field in a specified cell + * + * \param[in] localP Pointer to particle data array + * \param[in,out] localSphP Pointer to cell data array + * \param[in] i Index of cell in localP and localSphP arrays + * \param[in] pvd additional data that is needed for update (e.g. cosmological + * factors) + * + * \return void + */ +void update_internal_energy(struct particle_data *localP, struct sph_particle_data *localSphP, int i, struct pv_update_data *pvd) +{ +#ifndef ISOTHERM_EQS + double ulimit; + + if(localP[i].Mass > 0) + { +#ifdef MESHRELAX + localSphP[i].Utherm = localSphP[i].Energy / localP[i].Mass; +#else /* #ifdef MESHRELAX */ + localSphP[i].Utherm = + (localSphP[i].Energy / localP[i].Mass - + 0.5 * (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2])) / + (pvd->atime * pvd->atime); +#endif /* #ifdef MESHRELAX #else */ + +#ifdef MHD + localSphP[i].Utherm -= + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) / + localSphP[i].Density / pvd->atime; +#endif /* #ifdef MHD */ + + ulimit = All.MinEgySpec; + + if(localSphP[i].Utherm < ulimit) + { + EgyInjection -= localSphP[i].Energy; + + localSphP[i].Utherm = ulimit; + +#ifdef MESHRELAX + localSphP[i].Energy = localP[i].Mass * localSphP[i].Utherm; +#else /* #ifdef MESHRELAX */ + localSphP[i].Energy = + pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm + + 0.5 * localP[i].Mass * + (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]); +#endif /* #ifdef MESHRELAX */ + +#ifdef MHD + localSphP[i].Energy += + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) * + localSphP[i].Volume * pvd->atime; +#endif /* #ifdef MHD */ + + EgyInjection += localSphP[i].Energy; + } + } + else + localSphP[i].Utherm = 0; + + if(localSphP[i].Density < All.LimitUBelowThisDensity && localSphP[i].Utherm > All.LimitUBelowCertainDensityToThisValue) + { + localSphP[i].Utherm = All.LimitUBelowCertainDensityToThisValue; + localSphP[i].Energy = + pvd->atime * pvd->atime * localP[i].Mass * localSphP[i].Utherm + + 0.5 * localP[i].Mass * + (localP[i].Vel[0] * localP[i].Vel[0] + localP[i].Vel[1] * localP[i].Vel[1] + localP[i].Vel[2] * localP[i].Vel[2]); +#ifdef MHD + localSphP[i].Energy += + 0.5 * + (localSphP[i].B[0] * localSphP[i].B[0] + localSphP[i].B[1] * localSphP[i].B[1] + localSphP[i].B[2] * localSphP[i].B[2]) * + localSphP[i].Volume * pvd->atime; +#endif /* #ifdef MHD */ + } + + if(localSphP[i].Utherm < 0) + { + printf("negative utherm %g\n", localSphP[i].Utherm); + terminate("stop"); + } + +#endif /* #ifndef ISOTHERM_EQS */ +} + +/*! \brief Calculates the sound speed of a specified cell + * + * Depends on equation of state and potential sub-resolution physics. + * + * \param[in] p Index of gas cell in P and SphP arrays + * + * \return Sound speed + */ +double get_sound_speed(int p) +{ + double csnd; + +#ifdef ISOTHERM_EQS + csnd = All.IsoSoundSpeed; +#else /* #ifdef ISOTHERM_EQS */ + + double gamma; + gamma = GAMMA; + + if(SphP[p].Density > 0) + csnd = sqrt(gamma * SphP[p].Pressure / SphP[p].Density); + else + csnd = 0; +#endif /* #ifdef ISOTHERM_EQS #else */ + +#ifdef MHD + /* for MHD, this is an upper bound to the signal velocity + to do it more precisely, the magnet field in normal direction to the + interfaces has to be taken into account */ + double Bsqr = SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2]; + if(All.ComovingIntegrationOn) + Bsqr /= All.Time; + csnd = sqrt(csnd * csnd + Bsqr / SphP[p].Density); +#endif /* #ifdef MHD */ + + return csnd; +} diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c new file mode 100644 index 0000000000..ad8a5222ca --- /dev/null +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -0,0 +1,344 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/begrun.c + * \date 05/2018 + * \brief Initial set-up of a simulation run + * \details This file contains various functions to initialize a simulation + * run. In particular, the parameter file is read in and parsed + * and global variables are initialized to their proper values. + * contains functions: + * void hello(void) + * void begrun0(void) + * void begrun1(void) + * void begrun2(void) + * void set_units(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#ifdef HAVE_HDF5 +#include +herr_t my_hdf5_error_handler(void *unused); +#endif + +static void delete_end_file(void); + +/*! \brief Prints a welcome message. + * + * \return void + */ +void hello(void) +{ + mpi_printf( + "\n __ ____ ____ ____ _____\n /__\\ ( _ \\( ___)( _ \\( _ )\n /(__)\\ ) / )__) )___/ " + ")(_)(\n(__)(__)(_)\\_)(____)(__) (_____)\n\n"); +} + +/*! \brief Prints used compile options. + * + * \return void + */ +void begrun0(void) +{ + mpi_printf( + "\nThis is Arepo, version %s.\n\nRunning with %d MPI tasks.\n\nApparently we're using %d compute nodes (we have a minimum of %d " + "MPI tasks per node, and a maximum of %d)\n\nCode was compiled with settings:\n\n", + AREPO_VERSION, NTask, NumNodes, MinTasksPerNode, MaxTasksPerNode); + + if(ThisTask == 0) + { + output_compile_time_options(); + } +} + +/*! \brief Initial setup of the simulation. + * + * First, the parameter file is read by read_parameter_file(), + * then routines for setting units, etc are called. This function only does + * the setup necessary to load the IC file. After the IC file has been loaded + * and prepared by init(), setup continues with begrun2(). This splitting is + * done so that we can return cleanly from operations that don't actually + * start the simulation (converting snapshots, making projected images, etc.) + * + * \return void + */ +void begrun1(void) +{ + read_parameter_file(ParameterFile); /* ... read in parameters for this run */ + + check_parameters(); /* consistency check of parameters */ + +#ifdef HAVE_HDF5 + H5Eset_auto(my_hdf5_error_handler, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + gsl_set_error_handler(my_gsl_error_handler); + +#ifdef DEBUG + enable_core_dumps_and_fpu_exceptions(); +#endif /* #ifdef DEBUG */ + + mpi_printf("BEGRUN: Size of particle structure %3d [bytes]\n", (int)sizeof(struct particle_data)); + mpi_printf("BEGRUN: Size of sph particle structure %3d [bytes]\n", (int)sizeof(struct sph_particle_data)); + mpi_printf("BEGRUN: Size of gravity tree node %3d [bytes]\n", (int)sizeof(struct NODE)); +#ifdef MULTIPLE_NODE_SOFTENING + mpi_printf("BEGRUN: Size of auxiliary gravity node %3d [bytes]\n", (int)sizeof(struct ExtNODE)); +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + set_units(); + + if(RestartFlag == 1) /* this is needed here to allow domain decomposition right after restart */ + if(All.ComovingIntegrationOn) + init_drift_table(); + + init_io_fields(); + + force_short_range_init(); + +#if defined(FORCETEST) && !defined(FORCETEST_TESTFORCELAW) + forcetest_ewald_init(); +#endif /* #if defined (FORCETEST) && !defined(FORCETEST_TESTFORCELAW) */ + + /* set up random number generators */ + random_generator = gsl_rng_alloc(gsl_rng_ranlxd1); + random_generator_aux = gsl_rng_alloc(gsl_rng_ranlxd1); + + /* individual start-up seed */ + gsl_rng_set(random_generator, 42 + ThisTask); + gsl_rng_set(random_generator_aux, 31452 + ThisTask); + + timebins_init(&TimeBinsHydro, "Hydro", &All.MaxPartSph); + timebins_init(&TimeBinsGravity, "Gravity", &All.MaxPart); + +#if defined(COOLING) + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + InitCool(); +#endif /* #if defined(COOLING) */ + +#if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) + ewald_init(); +#endif /* #if !defined(PMGRID) && defined(SELFGRAVITY) && !defined(GRAVITY_NOT_PERIODIC) && !defined(ONEDIMS_SPHERICAL) */ + +#ifdef TILE_ICS + All.BoxSize *= All.TileICsFactor; +#endif /* #ifdef TILE_ICS */ + + boxSize = All.BoxSize; + boxHalf = 0.5 * All.BoxSize; +#ifdef LONG_X + boxHalf_X = boxHalf * LONG_X; + boxSize_X = boxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + boxHalf_Y = boxHalf * LONG_Y; + boxSize_Y = boxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + boxHalf_Z = boxHalf * LONG_Z; + boxSize_Z = boxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + + EgyInjection = 0; + +#ifdef PMGRID + if((RestartFlag != 3) && (RestartFlag != 6)) + long_range_init(); +#endif /* #ifdef PMGRID */ + + if(RestartFlag <= 2) + open_logfiles(); + + All.TimeLastRestartFile = CPUThisRun; + +#ifdef REDUCE_FLUSH + All.FlushLast = CPUThisRun; +#endif /* #ifdef REDUCE_FLUSH */ + + init_scalars(); + + init_gradients(); +} + +/*! \brief Late setup, after the IC file has been loaded but before run() is + * called. + * + * The output files are opened and various modules are initialized. The next + * output time is determined by find_next_outputtime() and various timers are + * set. + * + * \return void + */ +void begrun2(void) +{ + char contfname[1000]; + sprintf(contfname, "%scont", All.OutputDir); + unlink(contfname); + + delete_end_file(); + + if(RestartFlag > 2) + open_logfiles(); + +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + +#ifdef PMGRID + long_range_init_regionsize(); +#endif /* #ifdef PMGRID */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + special_particle_create_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + if(RestartFlag != 1) /* this needs to be done here because here All.TimeBegin has the correct value */ + if(All.ComovingIntegrationOn) + init_drift_table(); + + { + if(RestartFlag == 2) + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 100); + else + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current); + } + + All.TimeLastRestartFile = CPUThisRun; + +#ifdef REDUCE_FLUSH + All.FlushLast = CPUThisRun; +#endif /* #ifdef REDUCE_FLUSH */ + +#if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW) + gravity_forcetest_testforcelaw(); +#endif /* #if defined(FORCETEST) && defined(FORCETEST_TESTFORCELAW) */ +} + +/*! \brief Computes conversion factors between internal code units and the + * cgs-system. + * + * In addition constants like the gravitation constant are set. + * + * \return void + */ +void set_units(void) +{ + double meanweight; + +#ifdef STATICNFW + double Mtot; +#endif /* #ifdef STATICNFW */ + + All.UnitTime_in_s = All.UnitLength_in_cm / All.UnitVelocity_in_cm_per_s; + All.UnitTime_in_Megayears = All.UnitTime_in_s / SEC_PER_MEGAYEAR; + + if(All.GravityConstantInternal == 0) + All.G = GRAVITY / pow(All.UnitLength_in_cm, 3) * All.UnitMass_in_g * pow(All.UnitTime_in_s, 2); + else + All.G = All.GravityConstantInternal; + + All.UnitDensity_in_cgs = All.UnitMass_in_g / pow(All.UnitLength_in_cm, 3); + All.UnitPressure_in_cgs = All.UnitMass_in_g / All.UnitLength_in_cm / pow(All.UnitTime_in_s, 2); + All.UnitCoolingRate_in_cgs = All.UnitPressure_in_cgs / All.UnitTime_in_s; + All.UnitEnergy_in_cgs = All.UnitMass_in_g * pow(All.UnitLength_in_cm, 2) / pow(All.UnitTime_in_s, 2); + + /* convert some physical input parameters to internal units */ + + All.Hubble = HUBBLE * All.UnitTime_in_s; + + mpi_printf("BEGRUN: Hubble (internal units) = %g\n", All.Hubble); + mpi_printf("BEGRUN: G (internal units) = %g\n", All.G); + mpi_printf("BEGRUN: UnitMass_in_g = %g\n", All.UnitMass_in_g); + mpi_printf("BEGRUN: UnitTime_in_s = %g\n", All.UnitTime_in_s); + mpi_printf("BEGRUN: UnitVelocity_in_cm_per_s = %g\n", All.UnitVelocity_in_cm_per_s); + mpi_printf("BEGRUN: UnitDensity_in_cgs = %g\n", All.UnitDensity_in_cgs); + mpi_printf("BEGRUN: UnitEnergy_in_cgs = %g\n", All.UnitEnergy_in_cgs); + mpi_printf("\n"); + + meanweight = 4.0 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */ + + if(All.MinEgySpec == 0) + { + All.MinEgySpec = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.MinGasTemp; + All.MinEgySpec *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + mpi_printf("BEGRUN: MinEgySpec set to %g based on MinGasTemp=%g\n", All.MinEgySpec, All.MinGasTemp); + } + +#if defined(USE_SFR) + set_units_sfr(); +#endif /* #if defined(USE_SFR) */ + +#ifdef STATICNFW + R200 = pow(NFW_M200 * All.G / (100 * All.Hubble * All.Hubble), 1.0 / 3); + Rs = R200 / NFW_C; + Dc = 200.0 / 3 * NFW_C * NFW_C * NFW_C / (log(1 + NFW_C) - NFW_C / (1 + NFW_C)); + RhoCrit = 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + V200 = 10 * All.Hubble * R200; + mpi_printf("V200= %g\n", V200); + + fac = 1.0; + Mtot = enclosed_mass(R200); + mpi_printf("M200= %g\n", Mtot); + fac = V200 * V200 * V200 / (10 * All.G * All.Hubble) / Mtot; + Mtot = enclosed_mass(R200); + mpi_printf("M200= %g\n", Mtot); +#endif /* #ifdef STATICNFW */ +} + +/*! \brief deletes the end file if it exists. + * + * This is needed in case a already completed simulation is extended or + * overwritten. Note that the end-file is completely passive. + * + * \return void + */ +static void delete_end_file(void) +{ + if(RestartFlag > 2) // no simulation happening + { + return; + } + + char endfname[1000]; + sprintf(endfname, "%send", All.OutputDir); + unlink(endfname); + return; +} diff --git a/src/amuse/community/arepo/src/init/density.c b/src/amuse/community/arepo/src/init/density.c new file mode 100644 index 0000000000..8be85e443b --- /dev/null +++ b/src/amuse/community/arepo/src/init/density.c @@ -0,0 +1,635 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/density.c + * \date 05/2018 + * \brief SPH density computation and smoothing length determination. + * \details This file contains the "first SPH loop", where the SPH + * densities and smoothing lengths are calculated. + * In Arepo, this is used in setup_smoothinglengths() (init.c) to + * get an initial guess for MaxDelaunayRadius. + * Note that the SPH density is NOT used in the subsequent + * hydrodynamics calculation, but the density is either set by the + * initial conditions explicitly (DENSITY_AS_MASS_IN_INPUT) or + * calculated by the mass given in the initial conditions divided + * by the volume of the cell calculated by the Voronoi + * tessellation algorithm. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void density(void) + * static int density_evaluate(int target, int mode, int + * threadid) + * int density_isactive(int n) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" + +static int density_evaluate(int target, int mode, int threadid); + +static MyFloat *NumNgb, *DhsmlDensityFactor; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES +static MyFloat *MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyIDType ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + in->Hsml = SphP[i].Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + in->ID = P[i].ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Rho; + MyFloat DhsmlDensity; + MyFloat Ngb; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyFloat MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + NumNgb[i] = out->Ngb; + if(P[i].Type == 0) + { + SphP[i].Density = out->Rho; + DhsmlDensityFactor[i] = out->DhsmlDensity; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MinDist[i] = out->MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } + else /* combine */ + { + NumNgb[i] += out->Ngb; + if(P[i].Type == 0) + { + SphP[i].Density += out->Rho; + DhsmlDensityFactor[i] += out->DhsmlDensity; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + if(MinDist[i] > out->MinDist) + MinDist[i] = out->MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsHydro.NActiveParticles) + break; + + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + density_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +static MyFloat *NumNgb, *DhsmlDensityFactor; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES +static MyFloat *MinDist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +/*! \brief Main function of SPH density calculation. + * + * This function computes the local density for each active SPH particle and + * the number of weighted neighbors in the current smoothing radius. If a + * particle with its smoothing region is fully inside the local domain, it is + * not exported to the other processors. The function also detects particles + * that have a number of neighbors outside the allowed tolerance range. For + * these particles, the smoothing length is adjusted accordingly, and the + * computation is called again. + * + * \return void + */ +void density(void) +{ + MyFloat *Left, *Right; + int idx, i, npleft, iter = 0; + long long ntot; + double desnumngb, t0, t1; + + CPU_Step[CPU_MISC] += measure_time(); + + NumNgb = (MyFloat *)mymalloc("NumNgb", NumPart * sizeof(MyFloat)); + DhsmlDensityFactor = (MyFloat *)mymalloc("DhsmlDensityFactor", NumPart * sizeof(MyFloat)); + Left = (MyFloat *)mymalloc("Left", NumPart * sizeof(MyFloat)); + Right = (MyFloat *)mymalloc("Right", NumPart * sizeof(MyFloat)); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MinDist = (MyFloat *)mymalloc("MinDist", NumPart * sizeof(MyFloat)); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + { + Left[i] = Right[i] = 0; + } + } + + generic_set_MaxNexport(); + + desnumngb = All.DesNumNgb; + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported); + + /* do final operations on results */ + for(idx = 0, npleft = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(density_isactive(i)) + { + if(P[i].Type == 0) + { + if(SphP[i].Density > 0) + { + DhsmlDensityFactor[i] *= SphP[i].Hsml / (NUMDIMS * SphP[i].Density); + if(DhsmlDensityFactor[i] > -0.9) /* note: this would be -1 if only a single particle at zero lag is found */ + DhsmlDensityFactor[i] = 1 / (1 + DhsmlDensityFactor[i]); + else + DhsmlDensityFactor[i] = 1; + } + } + + if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation) || NumNgb[i] > (desnumngb + All.MaxNumNgbDeviation)) + { + /* need to redo this particle */ + npleft++; + + if(Left[i] > 0 && Right[i] > 0) + if((Right[i] - Left[i]) < 1.0e-3 * Left[i]) + { + /* this one should be ok */ + npleft--; + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */ + continue; + } + + if(NumNgb[i] < (desnumngb - All.MaxNumNgbDeviation)) + Left[i] = dmax(SphP[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(SphP[i].Hsml < Right[i]) + Right[i] = SphP[i].Hsml; + } + else + Right[i] = SphP[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, SphP[i].Hsml, Left[i], Right[i], (float)NumNgb[i], Right[i] - Left[i], P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + SphP[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("should not occur"); + + if(Right[i] == 0 && Left[i] > 0) + { + SphP[i].Hsml *= 1.26; + } + + if(Right[i] > 0 && Left[i] == 0) + { + SphP[i].Hsml /= 1.26; + } + } + } + else + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; /* Mark as inactive */ + } + } + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0) + mpi_printf("DENSITY: ngb iteration %3d: need to repeat for %12lld particles. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + +#if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z) + + int count2 = 0; + int countall2 = 0; + + for(i = 0; i < NumGas; i++) + { + /* + * If the distance to the border of a particle is too small, + * then the ghost particle will be too close to this particle. + * Therefore we shift the particle in this case into the direction of the box center. + */ + if(distance_to_border(i) < 0.5 * 0.001 * SphP[i].Hsml) + { + count2++; + + double dir[3]; + + dir[0] = boxSize_X * 0.5 - P[i].Pos[0]; + dir[1] = boxSize_Y * 0.5 - P[i].Pos[1]; + dir[2] = boxSize_Z * 0.5 - P[i].Pos[2]; + + double n = sqrt(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]); + // note: it's not possible that the operand of sqrt is zero here. + + dir[0] /= n; + dir[1] /= n; + dir[2] /= n; + + P[i].Pos[0] += 0.05 * SphP[i].Hsml * dir[0]; + P[i].Pos[1] += 0.05 * SphP[i].Hsml * dir[1]; + P[i].Pos[2] += 0.05 * SphP[i].Hsml * dir[2]; + } + } + + MPI_Allreduce(&count2, &countall2, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + mpi_printf("\nFOUND %d particles extremely close to the reflective boundary. Fixing this. \n\n", countall2); +#endif /* #if defined(REFLECTIVE_X) && defined(REFLECTIVE_Y) && defined(REFLECTIVE_Z) */ + + int count = 0, countall; + + for(i = 0; i < NumGas; i++) + if(MinDist[i] < 0.001 * SphP[i].Hsml) + count++; + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(countall) + { + mpi_printf("\nFOUND %d SPH particles with an extremely close neighbor. Fixing this. \n\n", countall); + + for(i = 0; i < NumGas; i++) + if(MinDist[i] < 0.001 * SphP[i].Hsml) + { + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + P[i].Pos[0] += 0.1 * SphP[i].Hsml * sin(theta) * cos(phi); + P[i].Pos[1] += 0.1 * SphP[i].Hsml * sin(theta) * sin(phi); + P[i].Pos[2] += 0.1 * SphP[i].Hsml * cos(theta); + } + } +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + myfree(MinDist); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + myfree(Right); + myfree(Left); + myfree(DhsmlDensityFactor); + myfree(NumNgb); + + /* mark as active again */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinHydro < 0) + P[i].TimeBinHydro = -P[i].TimeBinHydro - 1; + } + + /* collect some timing information */ + CPU_Step[CPU_INIT] += measure_time(); +} + +/*! \brief Inner function of the SPH density calculation + * + * This function represents the core of the SPH density computation. The + * target particle may either be local, or reside in the communication + * buffer. + * + * \param[in] target Index of particle in local data/import buffer. + * \param[in] mode Mode in which function is called (local or impored data). + * \param[in] threadid ID of local thread. + * + * \return 0 + */ +static int density_evaluate(int target, int mode, int threadid) +{ + int j, n; + int numngb, numnodes, *firstnode; + double h, h2, hinv, hinv3, hinv4; + MyFloat rho; + double wk, dwk; + double dx, dy, dz, r, r2, u, mass_j; + MyFloat weighted_numngb; + MyFloat dhsmlrho; + MyDouble *pos; + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + MyFloat mindist = MAX_REAL_NUMBER; + MyIDType ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + h = target_data->Hsml; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + ID = target_data->ID; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + h2 = h * h; + hinv = 1.0 / h; +#ifndef TWODIMS + hinv3 = hinv * hinv * hinv; +#else /* #ifndef TWODIMS */ + hinv3 = hinv * hinv / boxSize_Z; +#endif /* #ifndef TWODIMS #else */ + hinv4 = hinv3 * hinv; + + numngb = 0; + rho = weighted_numngb = dhsmlrho = 0; + + int nfound = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode); + + for(n = 0; n < nfound; n++) + { + j = Thread[threadid].Ngblist[n]; + + dx = pos[0] - P[j].Pos[0]; + dy = pos[1] - P[j].Pos[1]; + dz = pos[2] - P[j].Pos[2]; + +/* now find the closest image in the given box size */ +#ifndef REFLECTIVE_X + if(dx > boxHalf_X) + dx -= boxSize_X; + if(dx < -boxHalf_X) + dx += boxSize_X; +#endif /* #ifndef REFLECTIVE_X */ + +#ifndef REFLECTIVE_Y + if(dy > boxHalf_Y) + dy -= boxSize_Y; + if(dy < -boxHalf_Y) + dy += boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y */ + +#ifndef REFLECTIVE_Z + if(dz > boxHalf_Z) + dz -= boxSize_Z; + if(dz < -boxHalf_Z) + dz += boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z */ + r2 = dx * dx + dy * dy + dz * dz; + + if(r2 < h2) + { + numngb++; + + r = sqrt(r2); + + u = r * hinv; + + if(u < 0.5) + { + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + dwk = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4); + } + else + { + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + dwk = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u); + } + + mass_j = P[j].Mass; + + rho += FLT(mass_j * wk); + + weighted_numngb += FLT(NORM_COEFF * wk / hinv3); /* 4.0/3 * PI = 4.188790204786 */ + + dhsmlrho += FLT(-mass_j * (NUMDIMS * hinv * wk + u * dwk)); + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + if(ID != P[j].ID && mindist > r) + mindist = r; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + } + } + + out.Rho = rho; + out.Ngb = weighted_numngb; + out.DhsmlDensity = dhsmlrho; +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + out.MinDist = mindist; +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/* \brief Determines if a cell is active in current timestep. + * + * If the cell is not active in a timestep, its value in TimeBinHydro is + * negative. + * + * \param[in] n Index of cell in P and SphP arrays. + * + * \return 1: cell active; 0: cell not active or not a cell. + */ +int density_isactive(int n) +{ + if(P[n].TimeBinHydro < 0) + return 0; + + if(P[n].Type == 0) + return 1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/init/init.c b/src/amuse/community/arepo/src/init/init.c new file mode 100644 index 0000000000..934fef29da --- /dev/null +++ b/src/amuse/community/arepo/src/init/init.c @@ -0,0 +1,835 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/init/init.c + * \date 05/2018 + * \brief Initialization of a simulation from initial conditions. + * \details contains functions: + * int init(void) + * void check_omega(void) + * void setup_smoothinglengths(void) + * void test_id_uniqueness(void) + * void calculate_maxid(void) + * int compare_IDs(const void *a, const void *b) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Prepares the loaded initial conditions for the run. + * + * It is only called if RestartFlag !=1. Various counters and variables are + * initialized. Entries of the particle data structures not read from initial + * conditions are initialized or converted and a initial domain decomposition + * is performed. If gas cells are present, the initial SPH smoothing lengths + * are determined. + * + * \return status code: <0 if finished without errors and run can start, + * 0 code ends after calling init() > 0 an error occurred, terminate. + */ +int init(void) +{ + int i, j; + double mass; + + assert(RestartFlag != 1); + + if(All.ComovingIntegrationOn) + if(All.PeriodicBoundariesOn == 1) + { + if(RestartFlag < 3) + /* can't do this check when not all particles are loaded */ + check_omega(); + else + mpi_printf("INIT: Skipping Omega check since we are not doing a dynamical evolution (not all particles may be loaded)\n"); + } + +#if defined(COOLING) + IonizeParams(); +#endif /* #if defined(COOLING) */ + + if(All.ComovingIntegrationOn) + { + All.Timebase_interval = (log(All.TimeMax) - log(All.TimeBegin)) / TIMEBASE; + All.Ti_Current = 0; + } + else + { + All.Timebase_interval = (All.TimeMax - All.TimeBegin) / TIMEBASE; + All.Ti_Current = 0; + } + + set_cosmo_factors_for_current_time(); + + for(j = 0; j < 3; j++) + All.GlobalDisplacementVector[j] = 0; + + All.NumCurrentTiStep = 0; /* setup some counters */ + All.SnapshotFileCount = 0; + + if(RestartFlag == 2) + { + if(RestartSnapNum < 0) + All.SnapshotFileCount = atoi(All.InitCondFile + strlen(All.InitCondFile) - 3) + 1; + else + All.SnapshotFileCount = RestartSnapNum + 1; + } + + All.TotNumOfForces = 0; + All.TopNodeAllocFactor = 0.08; + All.TreeAllocFactor = 0.7; + All.NgbTreeAllocFactor = 0.7; + + if(NumPart < 1000) + All.TreeAllocFactor = 10.0; + + DeRefMesh.Indi.AllocFacNdp = MIN_ALLOC_NUMBER; + DeRefMesh.Indi.AllocFacNdt = MIN_ALLOC_NUMBER; + + Mesh.Indi.AllocFacNdp = 1.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNdt = 8.0 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNvf = 8.0 * NumGas + MIN_ALLOC_NUMBER; + + Mesh.Indi.AllocFacNvc = 16.0 * NumGas + MIN_ALLOC_NUMBER; + Nvc = 0; + + Mesh.Indi.AllocFacNinlist = 1.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacN_DP_Buffer = 0.2 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNflux = 0.01 * NumGas + MIN_ALLOC_NUMBER; + Mesh.Indi.AllocFacNradinflux = 0.01 * NumGas + MIN_ALLOC_NUMBER; + +#ifdef MHD_POWELL + for(j = 0; j < 3; j++) + { + All.Powell_Momentum[j] = 0; + All.Powell_Angular_Momentum[j] = 0; + } + All.Powell_Energy = 0; +#endif /* #ifdef MHD_POWELL */ + + All.TimeLastStatistics = All.TimeBegin - All.TimeBetStatistics; + + set_softenings(); + +#ifdef ADAPTIVE_HYDRO_SOFTENING + mpi_printf("INIT: Adaptive hydro softening, minimum gravitational softening for cells: %g\n", All.MinimumComovingHydroSoftening); + mpi_printf("INIT: Adaptive hydro softening, maximum gravitational softening for cells: %g\n", + All.MinimumComovingHydroSoftening * pow(All.AdaptiveHydroSofteningSpacing, NSOFTTYPES_HYDRO - 1)); + mpi_printf("INIT: Adaptive hydro softening, number of softening values: %d\n", NSOFTTYPES_HYDRO); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + init_individual_softenings(); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef SHIFT_BY_HALF_BOX + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].Pos[j] += 0.5 * All.BoxSize; +#endif /* #ifdef SHIFT_BY_HALF_BOX */ + + for(i = 0; i < GRAVCOSTLEVELS; i++) + All.LevelToTimeBin[i] = -1; + + for(i = 0; i < NumPart; i++) + for(j = 0; j < GRAVCOSTLEVELS; j++) + P[i].GravCost[j] = 0; + + /* set unused coordinate values in 1d and 2d simulations to zero; this is needed for correct interfaces */ + int nonzero_vel = 0; +#ifdef ONEDIMS + for(i = 0; i < NumPart; i++) + { + P[i].Pos[1] = 0.0; + P[i].Pos[2] = 0.0; + + if(P[i].Vel[1] != 0.0 || P[i].Vel[2] != 0.0) + { + nonzero_vel = 1; + } + } + if(nonzero_vel > 0) + { + warn("Initial y or z velocity nonzero in 1d simulation! Make sure you really want this!"); + } +#endif /* #ifdef ONEDIMS */ + +#ifdef TWODIMS + for(i = 0; i < NumPart; i++) + { + P[i].Pos[2] = 0; + + if(P[i].Vel[2] != 0.0) + { + nonzero_vel = 1; + } + } + if(nonzero_vel > 0) + { + warn("Initial z velocity nonzero in 2d simulation! Make sure you really want this!"); + } +#endif /* #ifdef TWODIMS */ + + if(All.ComovingIntegrationOn) /* change to new velocity variable */ + { + for(i = 0; i < NumPart; i++) + { + for(j = 0; j < 3; j++) + P[i].Vel[j] *= sqrt(All.Time) * All.Time; /* for dm/gas particles, p = a^2 xdot */ + } + } + + /* measure mean cell mass */ + int num = 0; + long long glob_num; + double glob_mass; + mass = 0; + + for(i = 0; i < NumGas; i++) +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + { + num += 1; + mass += P[i].Mass; + } + + sumup_large_ints(1, &num, &glob_num); + MPI_Allreduce(&mass, &glob_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + +#ifndef REFINEMENT_HIGH_RES_GAS + if(glob_num != All.TotNumGas) + terminate("glob_num(=%lld) != All.TotNumGas(=%lld)", glob_num, All.TotNumGas); +#endif /* #ifndef REFINEMENT_HIGH_RES_GAS */ + + if(All.TotNumGas > 0 && (glob_num == 0 || glob_mass == 0)) + terminate("All.TotNumGas(=%lld) > 0 && (glob_num(=%lld) == 0 || glob_mass(=%g) == 0)", All.TotNumGas, glob_num, glob_mass); + + /* assign global variables that depend on the mean cell mass */ +#if defined(REFINEMENT) + if(All.ReferenceGasPartMass == 0) + { + if(!All.ComovingIntegrationOn) + terminate("In non-comoving runs, ReferenceGasPartMass must be set to a non-zero value"); + + All.ReferenceGasPartMass = glob_mass / glob_num; + + mpi_printf("REFINEMENT: The mean cell mass, which is used as a reference, is %g\n", All.ReferenceGasPartMass); + } + else + mpi_printf("REFINEMENT: The given reference cell mass is %g\n", All.ReferenceGasPartMass); + All.TargetGasMass = All.TargetGasMassFactor * All.ReferenceGasPartMass; + mpi_printf("REFINEMENT: setting All.TargetGasMass=%g\n", All.TargetGasMass); +#endif /* #if defined(REFINEMENT) */ + + for(i = 0; i < TIMEBINS; i++) + All.Ti_begstep[i] = 0; + + for(i = 0; i < NumPart; i++) /* start-up initialization */ + { + for(j = 0; j < 3; j++) + P[i].GravAccel[j] = 0; + +#ifdef PMGRID + for(j = 0; j < 3; j++) + P[i].GravPM[j] = 0; +#endif /* #ifdef PMGRID */ + P[i].TimeBinHydro = 0; + P[i].TimeBinGrav = 0; + P[i].OldAcc = 0; /* Do not zero as masses are stored here */ + +#ifdef SELFGRAVITY +#ifdef EVALPOTENTIAL + if(RestartFlag == 0) + P[i].Potential = 0; +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #ifdef SELFGRAVITY */ + +#ifdef USE_SFR + if(RestartFlag == 0 && P[i].Type == 0) + SphP[i].Sfr = 0; +#endif /* #ifdef USE_SFR */ + } + + for(i = 0; i < TIMEBINS; i++) + TimeBinSynchronized[i] = 1; + + reconstruct_timebins(); + +#ifdef PMGRID + All.PM_Ti_endstep = All.PM_Ti_begstep = 0; +#endif /* #ifdef PMGRID */ + + for(i = 0; i < NumGas; i++) /* initialize sph_properties */ + { + if(RestartFlag == 2 || RestartFlag == 3) + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; + +#if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS) + if(RestartFlag == 17 || RestartFlag == 18) + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; +#endif /* #if defined(CELL_CENTER_GRAVITY) && !defined(OUTPUT_CENTER_OF_MASS) */ + + if(RestartFlag == 0) + { + for(j = 0; j < 3; j++) + SphP[i].Center[j] = P[i].Pos[j]; + + SphP[i].Hsml = 0; +#if defined(COOLING) + SphP[i].Ne = 1.0; +#endif /* #if defined(COOLING) */ + } + } + +#ifndef NODEREFINE_BACKGROUND_GRID + double mvol = 0; + if(All.TotNumGas) + { +#ifdef TWODIMS + mvol = boxSize_X * boxSize_Y / All.TotNumGas; +#else /* #ifdef TWODIMS */ +#ifdef ONEDIMS + mvol = boxSize_X / All.TotNumGas; +#else /* #ifdef ONEDIMS */ + mvol = boxSize_X * boxSize_Y * boxSize_Z / All.TotNumGas; +#endif /* #ifdef ONEDIMS #else */ +#endif /* #ifdef TWODIMS #else */ + } + + All.MeanVolume = mvol; +#endif /* #ifndef NODEREFINE_BACKGROUND_GRID */ + + mpi_printf("INIT: MeanVolume=%g\n", All.MeanVolume); + +#ifndef NO_ID_UNIQUE_CHECK + test_id_uniqueness(); +#endif /* #ifndef NO_ID_UNIQUE_CHECK */ + +#ifdef REFINEMENT_MERGE_CELLS + for(i = 0; i < NumPart; i++) + if(P[i].Type == 0 && P[i].ID == 0) + terminate("INIT: Cannot use ID==0 for gas in ICs with derefinement enabled."); +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + voronoi_init_connectivity(&Mesh); + +#ifdef ADDBACKGROUNDGRID + prepare_domain_backgroundgrid(); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + domain_Decomposition(); /* do initial domain decomposition (gives equal numbers of particles) */ + + if(RestartFlag == 18) /* recalculation of potential */ + { + mark_active_timebins(); + open_logfiles(); +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + set_non_standard_physics_for_current_time(); + +#ifdef PMGRID + long_range_init_regionsize(); +#endif /* #ifdef PMGRID */ + + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF) + PS = (struct subfind_data *)mymalloc_movable(&PS, "PS", All.MaxPart * sizeof(struct subfind_data)); + fof_prepare_output_order(); /* sort by type and Fileorder */ + fof_subfind_exchange(MPI_COMM_WORLD); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) && defined(FOF) */ + + sprintf(All.SnapshotFileBase, "%s_potupdated", All.SnapshotFileBase); + mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + savepositions(RestartSnapNum, 0); + + endrun(); + } + + /* will build tree */ + ngb_treeallocate(); + ngb_treebuild(NumGas); + + if(RestartFlag == 3) + { +#ifdef FOF + fof_fof(RestartSnapNum); + DumpFlag = 1; + savepositions(RestartSnapNum, 0); +#endif /* #ifdef FOF */ + return (0); + } + + All.Ti_Current = 0; + + if(RestartFlag == 0 || RestartFlag == 2 || RestartFlag == 14 || RestartFlag == 17) + setup_smoothinglengths(); + +#ifdef ADDBACKGROUNDGRID + // This return more clearly shows that this function terminates the run + return add_backgroundgrid(); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + create_mesh(); + mesh_setup_exchange(); + + if(RestartFlag == 14) + { + char tess_name[1024]; + sprintf(tess_name, "%s/tess_%03d", All.OutputDir, RestartSnapNum); + write_voronoi_mesh(&Mesh, tess_name, 0, NTask - 1); + return 0; + } + + for(i = 0, mass = 0; i < NumGas; i++) + { + if(RestartFlag == 0) + { +#ifdef READ_MASS_AS_DENSITY_IN_INPUT + P[i].Mass *= SphP[i].Volume; +#endif /* #ifdef READ_MASS_AS_DENSITY_IN_INPUT */ + } + + SphP[i].Density = P[i].Mass / SphP[i].Volume; + + if(SphP[i].Density < All.MinimumDensityOnStartUp) + { + SphP[i].Density = All.MinimumDensityOnStartUp; + + P[i].Mass = SphP[i].Volume * SphP[i].Density; + } + + SphP[i].Momentum[0] = P[i].Mass * P[i].Vel[0]; + SphP[i].Momentum[1] = P[i].Mass * P[i].Vel[1]; + SphP[i].Momentum[2] = P[i].Mass * P[i].Vel[2]; + +#ifdef MHD +#ifdef MHD_SEEDFIELD + if(RestartFlag == 0) + { + if(i == 0) + { + mpi_printf("MHD Seed field=%g, direction=%d\n", All.B_value, All.B_dir); + } + + int k; + double bfac = 1. / (sqrt(All.UnitMass_in_g / All.UnitLength_in_cm) / (All.UnitTime_in_s / All.HubbleParam)); + + double B_value = All.B_value; + + for(k = 0; k < 3; k++) + if(All.B_dir & (1 << k)) + { + SphP[i].BConserved[k] = B_value * SphP[i].Volume * bfac; + SphP[i].B[k] = SphP[i].BConserved[k] / SphP[i].Volume; + } + else + { + SphP[i].BConserved[k] = 0; + SphP[i].B[k] = SphP[i].BConserved[k] / SphP[i].Volume; + } + + if(i == 0) + { + mpi_printf("BConserved[0] = %g|%g|%g\n", SphP[i].BConserved[0], SphP[i].BConserved[1], SphP[i].BConserved[2]); + mpi_printf("Volume[0] %g bfac %g\n", SphP[i].Volume, bfac); + } + /* convert Gauss-cgs to heavyside - lorentz */ + { + int kk; + for(kk = 0; kk < 3; kk++) + { + SphP[i].BConserved[kk] /= sqrt(4. * M_PI); + SphP[i].B[kk] /= sqrt(4. * M_PI); + } + } + } + else + { + SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume; + SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume; + SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume; + } +#else /* #ifdef MHD_SEEDFIELD */ + SphP[i].BConserved[0] = SphP[i].B[0] * SphP[i].Volume; + SphP[i].BConserved[1] = SphP[i].B[1] * SphP[i].Volume; + SphP[i].BConserved[2] = SphP[i].B[2] * SphP[i].Volume; + +#endif /* #ifdef MHD_SEEDFIELD #else */ +#endif /* #ifdef MHD */ + + /* utherm has been loaded from IC file */ +#ifdef MESHRELAX + SphP[i].Energy = P[i].Mass * SphP[i].Utherm; +#else /* #ifdef MESHRELAX */ + SphP[i].Energy = P[i].Mass * All.cf_atime * All.cf_atime * SphP[i].Utherm + + 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); +#endif /* #ifdef MESHRELAX #else */ + +#ifdef MHD + SphP[i].Energy += 0.5 * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]) * + SphP[i].Volume * All.cf_atime; +#endif /* #ifdef MHD */ + + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = P[i].Vel[j]; + + mass += P[i].Mass; + } + +#ifdef PASSIVE_SCALARS + for(i = 0; i < NumGas; i++) + { + for(j = 0; j < PASSIVE_SCALARS; j++) + SphP[i].PConservedScalars[j] = SphP[i].PScalars[j] * P[i].Mass; + } + +#endif /* #ifdef PASSIVE_SCALARS */ + + if(RestartFlag == 17) + { + update_primitive_variables(); + exchange_primitive_variables(); + calculate_gradients(); + exchange_primitive_variables_and_gradients(); + DumpFlag = 1; + savepositions(RestartSnapNum + 1, 0); + return (0); + } + + update_primitive_variables(); + +#ifdef TREE_BASED_TIMESTEPS + tree_based_timesteps_setsoundspeeds(); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + /* initialize star formation rate */ +#if defined(USE_SFR) + sfr_init(); +#endif /* #if defined(USE_SFR) */ + +#if defined(USE_SFR) + for(i = 0; i < NumGas; i++) + SphP[i].Sfr = get_starformation_rate(i); +#endif /* #if defined(USE_SFR) */ + + update_primitive_variables(); + + exchange_primitive_variables(); + + calculate_gradients(); + + exchange_primitive_variables_and_gradients(); + +#if !defined(ONEDIMS) && !defined(TWODIMS) + int xaxis, yaxis, zaxis, weight_flag = 0; + double xmin, xmax, ymin, ymax, zmin, zmax; +#endif /* #if !defined(ONEDIMS) && !defined(TWODIMS) */ + + free_mesh(); + + return -1; // return -1 means we ran to completion, i.e. not an endrun code +} + +/*! \brief This routine computes the mass content of the box and compares it + * to the specified value of Omega-matter. + * + * If discrepant, the run is terminated. + * + * \return void + */ +void check_omega(void) +{ + double mass = 0, masstot, omega; + double mass_b = 0, masstot_b, omega_b; + int i, n_b = 0; + + for(i = 0; i < NumPart; i++) + { + mass += P[i].Mass; + if(P[i].Type == 0) + { + mass_b += P[i].Mass; + n_b += 1; + } +#ifdef USE_SFR + if(P[i].Type == 4) + { + mass_b += P[i].Mass; + n_b += 1; + } +#endif /* #ifdef USE_SFR */ + } + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&mass_b, &masstot_b, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + omega = masstot / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G)); + omega_b = masstot_b / (All.BoxSize * All.BoxSize * All.BoxSize) / (3 * All.Hubble * All.Hubble / (8 * M_PI * All.G)); + + if(n_b > 0) + { + if(fabs((omega - All.Omega0) / omega) > 1.0e-1 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-1) + { +#ifndef TWODIMS + mpi_terminate( + "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g " + "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n", + omega, omega_b, All.Omega0, All.OmegaBaryon); +#endif /* #ifndef TWODIMS */ + } + + if(fabs((omega - All.Omega0) / omega) > 1.0e-3 || fabs((omega_b - All.OmegaBaryon) / omega_b) > 1.0e-3) + if(ThisTask == 0) + warn( + "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and " + "OmegaBaryon=%g in the parameterfile.", + omega, omega_b, All.Omega0, All.OmegaBaryon); + } + else + { + if(All.OmegaBaryon != 0) + if(ThisTask == 0) + warn( + "We are running with no baryons, even though you have specified OmegaBaryon=%g in the parameterfile. Please make sure " + "you really want this.\n\n", + All.OmegaBaryon); + + if(fabs((omega - All.Omega0) / omega) > 1.0e-1) + { +#ifndef TWODIMS + mpi_terminate( + "\n\nI've found something odd!\nThe mass content accounts for Omega=%g and OmegaBaryon=%g,\nbut you specified Omega=%g " + "and OmegaBaryon=%g in the parameterfile.\n\nI better stop.\n", + omega, omega_b, All.Omega0, All.OmegaBaryon); +#endif /* #ifndef TWODIMS */ + } + + if(fabs((omega - All.Omega0) / omega) > 1.0e-3) + if(ThisTask == 0) + warn( + "I've found something odd! The mass content accounts for Omega=%g and OmegaBaryon=%g, but you specified Omega=%g and " + "OmegaBaryon=%g in the parameterfile.", + omega, omega_b, All.Omega0, All.OmegaBaryon); + } +} + +/*! \brief This function is used to find an initial SPH smoothing length for + * each cell. + * + * It guarantees that the number of neighbours will be between + * desired_ngb-MAXDEV and desired_ngb+MAXDEV. For simplicity, a first guess + * of the smoothing length is provided to the function density(), which will + * then iterate if needed to find the right smoothing length. + * + * \return void + */ +void setup_smoothinglengths(void) +{ + int i, no, p; + double *save_masses = mymalloc("save_masses", NumGas * sizeof(double)); + + for(i = 0; i < NumGas; i++) + { +#ifdef NO_GAS_SELFGRAVITY + /* This is needed otherwise the force tree will not be constructed for gas particles */ + P[i].Type = -1; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ + save_masses[i] = P[i].Mass; + P[i].Mass = 1.0; + } + +#ifdef HIERARCHICAL_GRAVITY + TimeBinsGravity.NActiveParticles = 0; + for(i = 0; i < NumGas; i++) + { + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + construct_forcetree(1, 1, 0, 0); /* build force tree with gas particles only */ + + for(i = 0; i < NumGas; i++) + { + no = Father[i]; + + if(no < 0) + terminate("i=%d no=%d\n", i, no); + + while(10 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass) + { + p = Nodes[no].u.d.father; + + if(p < 0) + break; + + no = p; + } +#ifndef TWODIMS + SphP[i].Hsml = pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len; +#else /* #ifndef TWODIMS */ + SphP[i].Hsml = pow(1.0 / (M_PI)*All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 2) * Nodes[no].len; +#endif /* #ifndef TWODIMS #else */ +#ifdef NO_GAS_SELFGRAVITY + /* Reset the original particle type */ + P[i].Type = 0; +#endif /* #ifdef NO_GAS_SELFGRAVITY */ + } + + myfree(Father); + myfree(Nextnode); + + myfree(Tree_Points); + force_treefree(); + + density(); + + for(i = 0; i < NumGas; i++) + P[i].Mass = save_masses[i]; + + myfree(save_masses); + + for(i = 0; i < NumGas; i++) + SphP[i].MaxDelaunayRadius = SphP[i].Hsml; + +#ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES + ngb_treefree(); + domain_free(); + domain_Decomposition(); + ngb_treeallocate(); + ngb_treebuild(NumGas); +#endif /* #ifdef FIX_SPH_PARTICLES_AT_IDENTICAL_COORDINATES */ +} + +/*! \brief This function checks for unique particle IDs. + * + * The particle IDs are copied to an array and then sorted among all tasks. + * This array is then checked for duplicates. In that case the code + * terminates. + * + * \return void + */ +void test_id_uniqueness(void) +{ + int i; + double t0, t1; + MyIDType *ids, *ids_first; + + mpi_printf("INIT: Testing ID uniqueness...\n"); + + if(NumPart == 0) + terminate("need at least one particle per cpu\n"); + + t0 = second(); + + ids = (MyIDType *)mymalloc("ids", NumPart * sizeof(MyIDType)); + ids_first = (MyIDType *)mymalloc("ids_first", NTask * sizeof(MyIDType)); + + for(i = 0; i < NumPart; i++) + ids[i] = P[i].ID; + + parallel_sort(ids, NumPart, sizeof(MyIDType), compare_IDs); + + for(i = 1; i < NumPart; i++) + { + if(ids[i] == ids[i - 1]) + terminate("non-unique ID=%lld found on task=%d (i=%d NumPart=%d)\n", (long long)ids[i], ThisTask, i, NumPart); + } + MPI_Allgather(&ids[0], sizeof(MyIDType), MPI_BYTE, ids_first, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + if(ThisTask < NTask - 1) + { + if(ids[NumPart - 1] == ids_first[ThisTask + 1]) + terminate("non-unique ID=%lld found on task=%d\n", (long long)ids[NumPart - 1], ThisTask); + } + myfree(ids_first); + myfree(ids); + + t1 = second(); + + mpi_printf("INIT: success. took=%g sec\n", timediff(t0, t1)); +} + +/*! \brief Calculates global maximum of the IDs of all particles. + * + * This is needed for REFINEMENT_SPLIT_CELLS. + * + * \return void + */ +void calculate_maxid(void) +{ + /* determine maximum ID */ + MyIDType maxid, *tmp; + int i; + + for(i = 0, maxid = 0; i < NumPart; i++) + if(P[i].ID > maxid) + { + maxid = P[i].ID; + } + + tmp = mymalloc("tmp", NTask * sizeof(MyIDType)); + + MPI_Allgather(&maxid, sizeof(MyIDType), MPI_BYTE, tmp, sizeof(MyIDType), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0; i < NTask; i++) + if(tmp[i] > maxid) + maxid = tmp[i]; + +#if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR) + All.MaxID = maxid; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) || defined(USE_SFR) */ + + myfree(tmp); +} + +/*! \brief Comparison function for two MyIDType objects. + * + * Used as sorting-kernel for id_uniqueness check. + * + * \return (-1,0,1), -1 if ab + */ +int compare_IDs(const void *a, const void *b) +{ + if(*((MyIDType *)a) < *((MyIDType *)b)) + return -1; + + if(*((MyIDType *)a) > *((MyIDType *)b)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/io/global.c b/src/amuse/community/arepo/src/io/global.c new file mode 100644 index 0000000000..e32ace4300 --- /dev/null +++ b/src/amuse/community/arepo/src/io/global.c @@ -0,0 +1,257 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/global.c + * \date 05/2018 + * \brief Routines to compute statistics of the global state of the + * code. + * \details contains functions: + * void compute_statistics(void) + * void energy_statistics(void) + * void compute_global_quantities_of_system(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Computes new global statistics if needed (call of + * energy_statistics()). + * + * \return void + */ +void compute_statistics(void) +{ + /* check whether we want a full energy statistics */ + if((All.Time - All.TimeLastStatistics) >= All.TimeBetStatistics && + All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */ + { + TIMER_START(CPU_LOGS); + + energy_statistics(); /* compute and output energy statistics */ + + All.TimeLastStatistics += All.TimeBetStatistics; + + TIMER_STOP(CPU_LOGS); + } +} + +/*! \brief Compute global statistics of the system. + * + * This function first calls a computation of various global + * quantities of the particle distribution + * (compute_global_quantities_of_system() ), and then writes some statistics + * about the energies of the various particle types to the file FdEnergy + * (energy.txt). + * + * \return void + */ +void energy_statistics(void) +{ + double egyinj_tot; + + compute_global_quantities_of_system(); + + MPI_Reduce(&EgyInjection, &egyinj_tot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + fprintf(FdEnergy, "%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g\n", All.Time, + SysState.EnergyInt, SysState.EnergyPot, SysState.EnergyKin, SysState.EnergyIntComp[0], SysState.EnergyPotComp[0], + SysState.EnergyKinComp[0], SysState.EnergyIntComp[1], SysState.EnergyPotComp[1], SysState.EnergyKinComp[1], + SysState.EnergyIntComp[2], SysState.EnergyPotComp[2], SysState.EnergyKinComp[2], SysState.EnergyIntComp[3], + SysState.EnergyPotComp[3], SysState.EnergyKinComp[3], SysState.EnergyIntComp[4], SysState.EnergyPotComp[4], + SysState.EnergyKinComp[4], SysState.EnergyIntComp[5], SysState.EnergyPotComp[5], SysState.EnergyKinComp[5], + SysState.MassComp[0], SysState.MassComp[1], SysState.MassComp[2], SysState.MassComp[3], SysState.MassComp[4], + SysState.MassComp[5], egyinj_tot); + + myflush(FdEnergy); + } +} + +/*! \brief This routine computes various global properties of the particle + * distribution and stores the result in the struct `SysState'. + * + * Currently, not all the information that's computed here is + * actually used (e.g. momentum is not really used anywhere), + * just the energies are written to a log-file every once in a while. + * + * \return void + */ +void compute_global_quantities_of_system(void) +{ + int i, j, n; + struct state_of_system sys; + double egyspec, vel[3]; + + for(n = 0; n < NTYPES; n++) + { + sys.MassComp[n] = sys.EnergyKinComp[n] = sys.EnergyPotComp[n] = sys.EnergyIntComp[n] = 0; + + for(j = 0; j < 4; j++) + sys.CenterOfMassComp[n][j] = sys.MomentumComp[n][j] = sys.AngMomentumComp[n][j] = 0; + } + + for(i = 0; i < NumPart; i++) + { + sys.MassComp[P[i].Type] += P[i].Mass; + +#if defined(SELFGRAVITY) +#ifdef EVALPOTENTIAL +#ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE + sys.EnergyPotComp[P[i].Type] += + 0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime; +#else /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + /* ignore self-contribution from gravity if exact gravity is used */ + if(P[i].Type == EXACT_GRAVITY_FOR_PARTICLE_TYPE) + sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].Potential / All.cf_atime; + else + sys.EnergyPotComp[P[i].Type] += + 0.5 * P[i].Mass * (P[i].Potential + All.G * P[i].Mass / (All.ForceSoftening[P[i].SofteningType] / 2.8)) / All.cf_atime; +#endif /* #ifndef EXACT_GRAVITY_FOR_PARTICLE_TYPE #else */ +#endif /* #ifdef EVALPOTENTIAL */ +#endif /* #if defined(SELFGRAVITY) */ + +#if defined(EXTERNALGRAVITY) +#if defined(SELFGRAVITY) + sys.EnergyPotComp[P[i].Type] += 0.5 * P[i].Mass * P[i].ExtPotential; /* note: ExtPotential already included on P[].p.Potential, + that's why only 0.5 is needed here to recover the rest */ +#else /* #if defined(SELFGRAVITY) */ + sys.EnergyPotComp[P[i].Type] += 1.0 * P[i].Mass * P[i].ExtPotential; +#endif /* #if defined(SELFGRAVITY) #else */ +#endif /* #if defined(EXTERNALGRAVITY) */ + + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + vel[j] = P[i].Vel[j]; + } + + sys.EnergyKinComp[0] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]); + + egyspec = SphP[i].Utherm; + + sys.EnergyIntComp[0] += P[i].Mass * egyspec; + } + else + { + for(j = 0; j < 3; j++) + { + vel[j] = P[i].Vel[j]; + } + sys.EnergyKinComp[P[i].Type] += 0.5 * P[i].Mass * (vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]) * All.cf_a2inv; + } + + for(j = 0; j < 3; j++) + { + sys.MomentumComp[P[i].Type][j] += P[i].Mass * vel[j]; + sys.CenterOfMassComp[P[i].Type][j] += P[i].Mass * P[i].Pos[j]; + } + + sys.AngMomentumComp[P[i].Type][0] += P[i].Mass * (P[i].Pos[1] * vel[2] - P[i].Pos[2] * vel[1]); + sys.AngMomentumComp[P[i].Type][1] += P[i].Mass * (P[i].Pos[2] * vel[0] - P[i].Pos[0] * vel[2]); + sys.AngMomentumComp[P[i].Type][2] += P[i].Mass * (P[i].Pos[0] * vel[1] - P[i].Pos[1] * vel[0]); + } + + /* some the stuff over all processors */ + MPI_Reduce(&sys.MassComp[0], &SysState.MassComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyPotComp[0], &SysState.EnergyPotComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyIntComp[0], &SysState.EnergyIntComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.EnergyKinComp[0], &SysState.EnergyKinComp[0], NTYPES, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.MomentumComp[0][0], &SysState.MomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.AngMomentumComp[0][0], &SysState.AngMomentumComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sys.CenterOfMassComp[0][0], &SysState.CenterOfMassComp[0][0], NTYPES * 4, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + for(i = 0; i < NTYPES; i++) + SysState.EnergyTotComp[i] = SysState.EnergyKinComp[i] + SysState.EnergyPotComp[i] + SysState.EnergyIntComp[i]; + + SysState.Mass = SysState.EnergyKin = SysState.EnergyPot = SysState.EnergyInt = SysState.EnergyTot = 0; + + for(j = 0; j < 3; j++) + SysState.Momentum[j] = SysState.AngMomentum[j] = SysState.CenterOfMass[j] = 0; + + for(i = 0; i < NTYPES; i++) + { + SysState.Mass += SysState.MassComp[i]; + SysState.EnergyKin += SysState.EnergyKinComp[i]; + SysState.EnergyPot += SysState.EnergyPotComp[i]; + SysState.EnergyInt += SysState.EnergyIntComp[i]; + SysState.EnergyTot += SysState.EnergyTotComp[i]; + + for(j = 0; j < 3; j++) + { + SysState.Momentum[j] += SysState.MomentumComp[i][j]; + SysState.AngMomentum[j] += SysState.AngMomentumComp[i][j]; + SysState.CenterOfMass[j] += SysState.CenterOfMassComp[i][j]; + } + } + + for(i = 0; i < NTYPES; i++) + for(j = 0; j < 3; j++) + if(SysState.MassComp[i] > 0) + SysState.CenterOfMassComp[i][j] /= SysState.MassComp[i]; + + for(j = 0; j < 3; j++) + if(SysState.Mass > 0) + SysState.CenterOfMass[j] /= SysState.Mass; + + for(i = 0; i < NTYPES; i++) + { + SysState.CenterOfMassComp[i][3] = SysState.MomentumComp[i][3] = SysState.AngMomentumComp[i][3] = 0; + for(j = 0; j < 3; j++) + { + SysState.CenterOfMassComp[i][3] += SysState.CenterOfMassComp[i][j] * SysState.CenterOfMassComp[i][j]; + SysState.MomentumComp[i][3] += SysState.MomentumComp[i][j] * SysState.MomentumComp[i][j]; + SysState.AngMomentumComp[i][3] += SysState.AngMomentumComp[i][j] * SysState.AngMomentumComp[i][j]; + } + SysState.CenterOfMassComp[i][3] = sqrt(SysState.CenterOfMassComp[i][3]); + SysState.MomentumComp[i][3] = sqrt(SysState.MomentumComp[i][3]); + SysState.AngMomentumComp[i][3] = sqrt(SysState.AngMomentumComp[i][3]); + } + + SysState.CenterOfMass[3] = SysState.Momentum[3] = SysState.AngMomentum[3] = 0; + + for(j = 0; j < 3; j++) + { + SysState.CenterOfMass[3] += SysState.CenterOfMass[j] * SysState.CenterOfMass[j]; + SysState.Momentum[3] += SysState.Momentum[j] * SysState.Momentum[j]; + SysState.AngMomentum[3] += SysState.AngMomentum[j] * SysState.AngMomentum[j]; + } + + SysState.CenterOfMass[3] = sqrt(SysState.CenterOfMass[3]); + SysState.Momentum[3] = sqrt(SysState.Momentum[3]); + SysState.AngMomentum[3] = sqrt(SysState.AngMomentum[3]); + } + + /* give everyone the result, maybe the want to do something with it */ + MPI_Bcast(&SysState, sizeof(struct state_of_system), MPI_BYTE, 0, MPI_COMM_WORLD); +} diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c new file mode 100644 index 0000000000..a613a36bdc --- /dev/null +++ b/src/amuse/community/arepo/src/io/hdf5_util.c @@ -0,0 +1,881 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/hdf5_util.c + * \date 05/2018 + * \brief Contains the wrapper functions to the HDF5 library functions. + * \details The wrapper functions explicitly check for error conditions + * and terminate the run if such conditions occur. The HDF5 error + * handler is disabled in case of termination not to repeat the + * error message of the handler again at the program exit. + * + * \par Major modifications and contributions: + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef HAVE_HDF5 +#ifndef HDF5UTIL_H +#define HDF5UTIL_H +#include + +/*! \brief Wraps creating a file to give a nice error message. + * + * Calls H5Fcreate. + * + * \param[in] fname File name. + * \param[in] flags Flags handed to H5Fcreate. + * \param[in] fcpl_id File creation property list identifier, used when + * modifying default file meta-data. Use H5P_DEFAULT to specify + * default file creation properties. + * \param[in] fapl_id File access property list identifier. If parallel file + * access is desired, this is a collective call according to the + * communicator stored in the fapl_id. Use H5P_DEFAULT for default + * file access properties. + * + * \return File identifier. + */ +hid_t my_H5Fcreate(const char *fname, unsigned int flags, hid_t fcpl_id, hid_t fapl_id) +{ + hid_t file_id = H5Fcreate(fname, flags, fcpl_id, fapl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(file_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create file %s\n", ThisTask, fname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return file_id; +} + +/*! \brief Wraps creating a group to give a nice error message. + * + * Calls H5Gcreate. + * + * \param[in] loc_id File or group identifier. + * \param[in] groupname Absolute or relative name of the o new group. + * \param[in] size_hint Optional parameter indicating the number of bytes to + * reserve for the names that will appear in the group. A + * conservative estimate could result in multiple system-level + * I/O requests to read the group name heap; a liberal estimate + * could result in a single large I/O request even when the group + * has just a few names. HDF5 stores each name with a null + * terminator. + * + * \return Group identifier. + */ +hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint) +{ + hid_t group_id = H5Gcreate(loc_id, groupname, size_hint); + +#ifndef TOLERATE_WRITE_ERROR + if(group_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return group_id; +} + +/*! \brief Wraps creating a dataset to give a nice error message. + * + * Calls H5Dcreate. + * + * \param[in] loc_id Identifier of the file or group within which to create + * the dataset. + * \param[in] datasetname The name of the dataset to create. + * \param[in] type_id Identifier of the datatype to use when creating the + * dataset. + * \param[in] space_id Identifier of the dataspace to use when creating the + * dataset. + * \param[in] dcpl_id Dataset creation property list identifier. + * + * \return Dataset identifier. + */ +hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id) +{ + hid_t dataset_id = H5Dcreate(loc_id, datasetname, type_id, space_id, dcpl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(dataset_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, Error detected in HDF5: unable to create dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataset_id; +} + +/*! \brief Wraps writing a dataset to give a nice error message. + * + * Calls H5Dwrite. + * + * \param[in] dataset_id Identifier of the dataset to write to. + * \param[in] mem_type_id Identifier of the memory datatype. + * \param[in] mem_space_id Identifier of the memory dataspace. + * \param[in] file_space_id Identifier of the dataset's dataspace in the file. + * \param[in] xfer_plist_id Identifier of a transfer property list for this + * I/O operation. + * \param[in] buf Buffer with data to be written to the file. + * \param[in] datasetname Name of dataset (for error message only) + * + * \return Status of write operation. + */ +herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf, + const char *datasetname) +{ +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + herr_t status = H5Dwrite(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to write dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps creating an attribute to give a nice error message. + * + * \param[in] loc_id Identifier for the object to which the attribute is to be + * attached. May be any HDF5 object identifier (group, dataset, or + * committed datatype) or an HDF5 file identifier; if loc_id is a + * file identifer, the attribute will be attached to that file's + * root group. + * \param[in] attr_name Name of attribute to create. + * \param[in] type_id Identifier of datatype for attribute. + * \param[in] space_id Identifier of dataspace for attribute. + * \param[in] acpl_id Identifier of creation property list (specify + * H5P_DEFAULT). + * + * \return Attribute identifier. + */ +hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id) +{ + hid_t attribute_id = H5Acreate(loc_id, attr_name, type_id, space_id, acpl_id); + +#ifndef TOLERATE_WRITE_ERROR + if(attribute_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return attribute_id; +} + +/*! \brief Wraps writing an attribute to give a nice error message. + * + * \param[in] attr_id Identifier of an attribute to write. + * \param[in] mem_type_id Identifier of the attribute datatype (in memory). + * \param[in] buf Data to be written. + * \param[in] attr_name Name of attribute (for error message only). + * + * \return status (non-negative if successful). + */ +herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name) +{ +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + herr_t status = H5Awrite(attr_id, mem_type_id, buf); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to write attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps creating a dataspace to give a nice error message. + * + * \param[in] type Type of dataspace to be created. + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Screate(H5S_class_t type) +{ + hid_t dataspace_id = H5Screate(type); + +#ifndef TOLERATE_WRITE_ERROR + if(dataspace_id < 0) + { + H5Eset_auto(NULL, NULL); + switch(type) + { + case H5S_SCALAR: + terminate("On Task %d, error detected in HDF5: unable to create a scalar dataspace\n", ThisTask); + break; + case H5S_SIMPLE: + terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask); + break; + default: + terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask); + break; + } + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataspace_id; +} + +/*! \brief Wraps creating a simple dataspace to give a nice error message. + * + * \param[in] rank Number of dimensions of dataspace. + * \param[in] current_dims Array specifying the size of each dimension. + * \param[in] maximum_dims Array specifying the maximum size of each + * dimension. + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims) +{ + hid_t dataspace_id = H5Screate_simple(rank, current_dims, maximum_dims); + +#ifndef TOLERATE_WRITE_ERROR + if(dataspace_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to create a simple dataspace\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataspace_id; +} + +/*! \brief Wraps opening a file to give a nice error message. + * + * \param[in] fname Name of the file to be opened. + * \param[in] flags File access flags. Allowable values are: + * H5F_ACC_RDWR -- Allow read and write access to file. + * H5F_ACC_RDONLY -- Allow read-only access to file. + * \param[in] fapl_id Identifier for the file access properties list. If + * parallel file access is desired, this is a collective call + * according to the communicator stored in the fapl_id. Use + * H5P_DEFAULT for default file access properties. + * + * \return File identifier if successful. + */ +hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id) +{ + hid_t file_id = H5Fopen(fname, flags, fapl_id); + + if(file_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open file %s\n", ThisTask, fname); + } + + return file_id; +} + +/*! \brief Wraps opening a group to give a nice error message. + * + * \param[in] loc_id File or group identifier within which the group is to be + * opened. + * \param[in] groupname Name of group. + * + * \return Valid group identifier if successful. + */ +hid_t my_H5Gopen(hid_t loc_id, const char *groupname) +{ + hid_t group = H5Gopen(loc_id, groupname); + +#ifndef TOLERATE_WRITE_ERROR + if(group < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return group; +} + +/*! \brief Wraps opening a dataset to give a nice error message. + * + * \param[in] file_id Identifier of the file or group within which the + * dataset to be accessed will be found. + * \param[in] datasetname Name of the dataset to access. + * + * \return Dataset identifier if successful. + */ +hid_t my_H5Dopen(hid_t file_id, const char *datasetname) +{ + hid_t dataset = H5Dopen(file_id, datasetname); + +#ifndef TOLERATE_WRITE_ERROR + if(dataset < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return dataset; +} + +/*! \brief Wraps opening a dataset. + * + * In contrast to my_H5Dpoen(), if the dataset does not exist it does not + * terminate the run. This is useful while reading an ICs file + * because in that case a non-exisitng dataset is put to zero (see also + * read_ic.c). + * + * \param[in] file_id file_id Identifier of the file or group within which the + * dataset to be accessed will be found. + * \param[in] datasetname Name of the dataset to access. + * + * \return Dataset identifier if successful; otherwise negative value. + */ +hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname) +{ + /* save error handler and disable it */ + H5E_auto_t errfunc; + void *client_data; + H5Eget_auto(&errfunc, &client_data); + H5Eset_auto(NULL, NULL); + + hid_t dataset = H5Dopen(file_id, datasetname); + + /* reset error handler */ + H5Eset_auto(errfunc, client_data); + + return dataset; +} + +/*! \brief Wraps opening an attribute to give a nice error message. + * + * \param[in] loc_id Identifier of a group, dataset, or named datatype that + * attribute is attached to. + * \param[in] attr_name Attribute name. + * + * \return Returns attribute identifier if successful. + */ +hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name) +{ + hid_t attribute_id = H5Aopen_name(loc_id, attr_name); + +#ifndef TOLERATE_WRITE_ERROR + if(attribute_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to open attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return attribute_id; +} + +/*! \brief Wraps reading a dataset to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset read from. + * \param[in] mem_type_id Identifier of the memory datatype. + * \param[in] mem_space_id Identifier of the memory dataspace. + * \param[in] file_space_id Identifier of the dataset's dataspace in the file. + * \param[in] xfer_plist_id Identifier of a transfer property list for this + * I/O operation. + * \param[out] buf Buffer to receive data read from file. + * \param[in] datasetname Name of dataset (only for error message). + * + * \return Returns a non-negative value if successful. + */ +herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf, + const char *datasetname) +{ + herr_t status = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, buf); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to read dataset %s\n", ThisTask, datasetname); + } + return status; +} + +/*! \brief Wraps makeing a copy of the dataspace to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset to query. + * \param[in] datasetname Name of the dataset (for error message only). + * + * \return Dataspace identifier if successful. + */ +hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname) +{ + hid_t status = H5Dget_space(dataset_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to determine space for dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps reading an attribute to give a nice error message + * + * \param[in] attr_id Identifier of an attribute to read. + * \param[in] mem_type_id Identifier of the attribute datatype (in memory). + * \param[out] buf Buffer for data to be read. + * \param[in] attr_name Name of the attribute. + * \param[in] size Size of the attribute. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size) +{ + hid_t hdf5_space = H5Aget_space(attr_id); + hssize_t attr_size = H5Sget_simple_extent_npoints(hdf5_space); + H5Sclose(hdf5_space); + + if(attr_size != size) + { + H5Eset_auto(NULL, NULL); + terminate( + "On Task %d, error detected in HDF5: mismatch in size for attribute %s, expected size = %lld, actual attribute size = " + "%lld\n", + ThisTask, attr_name, size, attr_size); + } + + herr_t status = H5Aread(attr_id, mem_type_id, buf); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to read attribute %s\n", ThisTask, attr_name); + } + return status; +} + +/*! \brief Wraps reseting the size of an existing dataspace to give a nice + * error message. + * + * \param[in] space_id Dataspace identifier. + * \param[in] rank Rank, or dimensionality, of the dataspace. + * \param[in] current_size Array containing current size of dataspace. + * \param[in] maximum_size Array containing maximum size of dataspace. + * \param[in] attr_name Name of attribute (only for error message). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size, + const char *attr_name) +{ + herr_t status = H5Sset_extent_simple(space_id, rank, current_size, maximum_size); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to set extent for attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing an attribute to give a nice error message. + * + * \param[in] attr_id Attribute to release access to. + * \param[in] attr_name Name of the attribute (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Aclose(hid_t attr_id, const char *attr_name) +{ + herr_t status = H5Aclose(attr_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close attribute %s\n", ThisTask, attr_name); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a dataset to give a nice error message. + * + * \param[in] dataset_id Identifier of the dataset to close access to. + * \param[in] datasetname Name of the dataset (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname) +{ + herr_t status = H5Dclose(dataset_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close dataset %s\n", ThisTask, datasetname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a group to give a nice error message. + * + * \param[in] group_id Group identifier to release. + * \param[in] groupname Name of the group (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Gclose(hid_t group_id, const char *groupname) +{ + herr_t status = H5Gclose(group_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close group %s\n", ThisTask, groupname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps closing a file to give a nice error message. + * + * \param[in] file_id Identifier of a file to terminate access to. + * \param[in] fname File name (for error message only). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Fclose(hid_t file_id, const char *fname) +{ + herr_t status = H5Fclose(file_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to close file %s\n", ThisTask, fname); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps releasing and terminating access to a dataspace to give a nice + * error message. + * + * \param[in] dataspace_id Identifier of dataspace to release. + * \param[in] type type of dataspace (simple, scalar,...). + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type) +{ + herr_t status = H5Sclose(dataspace_id); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + switch(type) + { + case H5S_SCALAR: + terminate("On Task %d, error detected in HDF5: unable to close a scalar dataspace\n", ThisTask); + break; + case H5S_SIMPLE: + terminate("On Task %d, error detected in HDF5: unable to close a simple dataspace\n", ThisTask); + break; + default: + terminate("On Task %d, error detected in HDF5: unknown dataspace type\n", ThisTask); + break; + } + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +/*! \brief Wraps copying an existing datatype to give a nice error message. + * + * \param[in] type_id Identifier of datatype to copy. Can be a datatype + * identifier, a predefined datatype (defined in H5Tpublic.h), or + * a dataset identifier. + * + * \return Datatype identifier if successful. + */ +hid_t my_H5Tcopy(hid_t type_id) +{ + hid_t datatype_id = H5Tcopy(type_id); +#ifndef TOLERATE_WRITE_ERROR + if(datatype_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly copy datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return datatype_id; +} + +/*! \brief Wraps closing a datatype to give a nice error message. + * + * \param[in] type_id Identifier of datatype to release. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Tclose(hid_t type_id) +{ + herr_t status = H5Tclose(type_id); +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly close datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps selecting a hyperslab to give a nice error message. + * + * \param[in] space_id Identifier of dataspace selection to modify. + * \param[in] op Operation to perform on current selection. + * \param[in] start Offset of start of hyperslab. + * \param[in] stride Hyperslab stride. + * \param[in] count Number of blocks included in hyperslab. + * \param[in] block Size of block in hyperslab. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count, + const hsize_t *block) +{ + herr_t status = H5Sselect_hyperslab(space_id, op, start, stride, count, block); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly select the chosen hyperslab\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return status; +} + +/*! \brief Wraps returning the size in bytes of a given datatype to give a nice + * error message. + * + * \param[in] datatype_id Identifier of datatype to query. + * + * \return The size of the datatype in bytes. + */ +size_t my_H5Tget_size(hid_t datatype_id) +{ + size_t size = H5Tget_size(datatype_id); + +#ifndef TOLERATE_WRITE_ERROR + if(size == 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: unable to determine the size of the given datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + return size; +} + +/*! \brief Wraps setting the size in bytes of a given datatype to give a nice + * error message. + * + * \param[in] datatype_id Identifier of datatype for which the size is being + * changed. + * \param[in] size New datatype size in bytes or H5T_VARIABLE. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Tset_size(hid_t datatype_id, size_t size) +{ + herr_t status = H5Tset_size(datatype_id, size); + +#ifndef TOLERATE_WRITE_ERROR + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly set the size of the given datatype\n", ThisTask); + } +#endif /* #ifndef TOLERATE_WRITE_ERROR */ + + return status; +} + +#ifdef HDF5_FILTERS +/*! \brief Wraps checking if all hdf5 filters selected for plist_id are + * available to give a nice error message. + * + * \param[in] plist_id Dataset or group creation property list identifier. + * + * \return Positive value if all filters are available; + * 0 if at least one filter is not currently available. + */ +htri_t my_H5Pall_filters_avail(hid_t plist_id) +{ + htri_t status = H5Pall_filters_avail(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not properly verify the availability of all filters\n", ThisTask); + } + return status; +} + +/*! \brief Wraps creating the property list of the given property class + * identified by class_id to give a nice error message. + * + * \param[in] The class of the property list to create. + * + * \return Property list identifier if successful. + */ +hid_t my_H5Pcreate(hid_t class_id) +{ + hid_t plist_id = H5Pcreate(class_id); + if(plist_id < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not create the property list associated to the given property class\n", + ThisTask); + } + return plist_id; +} + +/*! \brief Wraps closing a property list to give a nice error message. + * + * \param[in] Identifier of the property list to terminate access to. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pclose(hid_t plist) +{ + herr_t status = H5Pclose(plist); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not close the input property list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the size of the chunks of a chunked dataset to give a + * nice error message. + * + * \param[in] plist Dataset creation property list identifier. + * \param[in] ndims The number of dimensions of each chunk. + * \param[in] dim An array defining the size, in dataset elements, of each + * chunk. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim) +{ + herr_t status = H5Pset_chunk(plist, ndims, dim); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set chunk size for the dataset\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the shuffle filter to give a nice error + * message. + * + * \param[in] plist_id Dataset creation property list identifier. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_shuffle(hid_t plist_id) +{ + herr_t status = H5Pset_shuffle(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the shuffle filter in the properties list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the deflate compression (gzip) to give a + * nice error message. + * + * \param[in] plist_id Dataset or group creation property list identifier. + * \param[in] level Compression level. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_deflate(hid_t plist_id, uint level) +{ + herr_t status = H5Pset_deflate(plist_id, level); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the deflate compression in the properties list\n", ThisTask); + } + return status; +} + +/*! \brief Wraps setting the use of the Fletcher32 checksum to give a nice + * error message. + * + * \param plist_id Dataset or group creation property list identifier. + * + * \return Non-negative value if successful. + */ +herr_t my_H5Pset_fletcher32(hid_t plist_id) +{ + herr_t status = H5Pset_fletcher32(plist_id); + if(status < 0) + { + H5Eset_auto(NULL, NULL); + terminate("On Task %d, error detected in HDF5: could not set the Fletcher32 checksum in the properties list\n", ThisTask); + } + return status; +} +#endif /* #ifdef HDF5_FILTERS */ + +#endif /* #ifndef HDF5UTIL_H */ +#endif /* #ifdef HAVE_HDF5 */ diff --git a/src/amuse/community/arepo/src/io/io.c b/src/amuse/community/arepo/src/io/io.c new file mode 100644 index 0000000000..f5d9a0c73f --- /dev/null +++ b/src/amuse/community/arepo/src/io/io.c @@ -0,0 +1,2226 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io.c + * \date 05/2018 + * \brief Routines for input and output of snapshot files to disk. + * \details contains functions: + * void init_field + * void init_units + * void init_snapshot_type + * void write_error + * void create_snapshot_if_desired(void) + * void produce_dump(void) + * void savepositions(int num, int subbox_flag) + * void fill_write_buffer + * int get_bytes_per_blockelement + * int get_datatype_in_block(enum iofields blocknr, int mode) + * int get_values_per_blockelement(enum iofields blocknr) + * int get_particles_in_block(enum iofields blocknr, int + * *typelist) + * int blockpresent(enum iofields blocknr, int write) + * void get_Tab_IO_Label(enum iofields blocknr, char *label) + * void get_dataset_name(enum iofields blocknr, char *buf) + * void write_file(char *fname, int writeTask, int lastTask, + * int subbox_flag) + * void write_header_attributes_in_hdf5(hid_t handle) + * void write_parameters_attributes_in_hdf5(hid_t handle) + * herr_t my_hdf5_error_handler(void *unused) + * void write_dataset_attributes(hid_t hdf5_dataset, enum + * iofields blocknr) + * void write_xdmf(char *fname) + * size_t my_fwrite(void *ptr, size_t size, size_t nmemb, + * FILE * stream) + * size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE * + * stream) + * void mpi_printf(const char *fmt, ...) + * void mpi_fprintf(FILE * stream, const char *fmt, ...) + * void mpi_printf_each(const char *fmt, ...) + * FILE *open_file(char *fnam) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* needs to be included after allvars.h */ +#ifdef OUTPUT_XDMF +#include /* for basename() function */ +#endif /* #ifdef OUTPUT_XDMF */ + +#include "../fof/fof.h" +#include "../gitversion/version.h" +#include "../mesh/voronoi/voronoi.h" + +#ifdef HAVE_HDF5 +#include +void write_header_attributes_in_hdf5(hid_t handle); +void write_parameters_attributes_in_hdf5(hid_t handle); +void write_compile_time_options_in_hdf5(hid_t handle); +void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef TOLERATE_WRITE_ERROR +static char alternative_fname[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +#ifdef OUTPUT_XDMF +static void write_xdmf(char *fname); +#endif /* #ifdef OUTPUT_XDMF */ + +static int n_type[NTYPES]; /**< contains the local (for a single task) number of particles of each type in the snapshot file */ +static long long ntot_type_all[NTYPES]; /**< contains the global number of particles of each type in the snapshot file */ +static int subbox_dump = 0; + +/*! \brief Function for registering an output field. + * + * Don't forget to add the new IO_FLAG to allvars.h. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. Don't forget to insert new fields + * also in allvars.h. + * \param[in] label The label of the dataset (4 characters). + * \param[in] datasetname The name of the hdf5 dataset (maximum 256 + * characters). + * \param[in] type_in_memory The type of the field in the memory (use + * MEM_NONE if specifying io_func). + * \param[in] type_in_file_output The output type in the hdf5 file. + * \param[in] type_in_file_input The input type in the hdf5 file (use + * FILE_MY_OUTPUT_TYPE for MyInputType, input is disabled with + * FILE_NONE). + * \param[in] values_per_block The number of values per field, e.g. 1 for + * mass, 3 for velocities. + * \param[in] array The array in which the value is stored. For an io_func + * this influences the particle index, the default (A_NONE) is an + * index into P/SphP, can be changed if required. + * \param[in] pointer_to_field A Pointer to the field in one of the global + * arrays, e.g. &SphP[0].Density, or &P[0].Vel[0]. + * \param[in] io_func Alternatively, if the value to output/input is not a + * simple field, you can define a function which handles i/o. + * \param[in] typelist_bitmask Specifies for which particle type the field is + * present, e.g. 1+2+8 => field present for particle types 0,1,3 + * (or use ALL_TYPES, GAS_ONLY,...). + * + * \return void + */ +void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory, + enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array, + void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask) +{ + int alloc_step = 5; + + if(Max_IO_Fields == 0) + { + IO_Fields = (IO_Field *)mymalloc("IO_Fields", alloc_step * sizeof(IO_Field)); + Max_IO_Fields = alloc_step; + } + else if(Max_IO_Fields == N_IO_Fields) + { + Max_IO_Fields = ((Max_IO_Fields / alloc_step) + 1) * alloc_step; + IO_Fields = (IO_Field *)myrealloc(IO_Fields, Max_IO_Fields * sizeof(IO_Field)); + } + + IO_Fields[N_IO_Fields].field = field; + strncpy(IO_Fields[N_IO_Fields].label, label, 4); + strncpy(IO_Fields[N_IO_Fields].datasetname, datasetname, 256); + IO_Fields[N_IO_Fields].type_in_memory = type_in_memory; + IO_Fields[N_IO_Fields].type_in_file_output = type_in_file_output; + IO_Fields[N_IO_Fields].type_in_file_input = type_in_file_input; + IO_Fields[N_IO_Fields].values_per_block = values_per_block; + IO_Fields[N_IO_Fields].snap_type = SN_FULL; + IO_Fields[N_IO_Fields].typelist = typelist_bitmask; + + IO_Fields[N_IO_Fields].array = array; + + if(array == A_NONE) + { + IO_Fields[N_IO_Fields].offset = 0; + } + else if(array == A_SPHP) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)SphP; + } + else if(array == A_P) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)P; + } + else if(array == A_PS) + { + IO_Fields[N_IO_Fields].offset = (size_t)pointer_to_field - (size_t)PS; + } + + IO_Fields[N_IO_Fields].io_func = io_func; + + // validate types + if(type_in_memory == MEM_INT && + ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_INT) || type_in_file_output != FILE_INT)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + if(type_in_memory == MEM_MY_ID_TYPE && + ((type_in_file_input != FILE_NONE && type_in_file_input != FILE_MY_ID_TYPE) || type_in_file_output != FILE_MY_ID_TYPE)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + if((type_in_memory == MEM_FLOAT || type_in_memory == MEM_MY_SINGLE || type_in_memory == MEM_DOUBLE) && + ((type_in_file_input != FILE_NONE && (type_in_file_input == FILE_MY_ID_TYPE || type_in_file_input == FILE_INT)) || + type_in_file_output == FILE_INT || type_in_file_output == FILE_MY_ID_TYPE)) + { + terminate("combination of datatypes not supported (field %s)", datasetname); + } + + IO_Fields[N_IO_Fields].a = 0.; + IO_Fields[N_IO_Fields].h = 0.; + IO_Fields[N_IO_Fields].L = 0.; + IO_Fields[N_IO_Fields].M = 0.; + IO_Fields[N_IO_Fields].V = 0.; + IO_Fields[N_IO_Fields].c = 0.; + IO_Fields[N_IO_Fields].hasunit = 0; + + N_IO_Fields++; +} + +/*! \brief Function for adding units to output field. + * + * This only works for fields registered with init_field. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. + * \param[in] a the exponent of the cosmological a factor. + * \param[in] h the exponent of the hubble parameter. + * \param[in] L the length unit scaling. + * \param[in] M the mass unit scaling. + * \param[in] V the velocity unit scaling. + * \param[in] c conversion factor to cgs units (zero indicates dimensionless + * quantity, integer count, etc). + * + * \return void + */ +void init_units(enum iofields field, double a, double h, double L, double M, double V, double c) +{ + for(int i = 0; i < N_IO_Fields; i++) + { + if(IO_Fields[i].field == field) + { + IO_Fields[i].hasunit = 1; + IO_Fields[i].a = a; + IO_Fields[i].h = h; + IO_Fields[i].L = L; + IO_Fields[i].M = M; + IO_Fields[i].V = V; + IO_Fields[i].c = c; + break; + } + } +} + +/*! \brief Function for determining whether a field is dumped in snapshot. + * + * This only works for fields registered with init_field. + * The member snap_type is initialized to SN_FULL in init_field. + * + * \param[in] field Specifies the field as an enumeration type iofields + * (allvars.h), e.g. IO_POS. + * \param[in] type In which snapshot types this field should be present + * (e.g. SN_FULL). + * + * \return void + */ +void init_snapshot_type(enum iofields field, enum sn_type type) +{ + for(int i = 0; i < N_IO_Fields; i++) + { + if(IO_Fields[i].field == field) + { + IO_Fields[i].snap_type = type; + } + } +} + +#ifdef TOLERATE_WRITE_ERROR +/*! \brief Print information about a write error. + * + * If a write error occurs, this function prints some useful debug information + * and sets to 1 the variable WriteErrorFlag so that the write operation that + * caused the error can be performed again. + * + * \param[in] check Flag that indicates where the function was called [0 and 1 + * in my_fwrite(), 2 in my_hdf5_error_handler(), 3 in + * hdf5_header_error_handler()]. + * \param[in] nwritten Number of elements actually written. + * \param[in] nmemb Number of elements that should be written. + * + * \return void + */ +void write_error(int check, size_t nwritten, size_t nmemb) +{ + if(!WriteErrorFlag) + { + int len; + char hostname[MPI_MAX_PROCESSOR_NAME]; + MPI_Get_processor_name(hostname, &len); + + printf("TOLERATE_WRITE_ERROR: write failed node=%s nwritten=%lld nmemb=%lld errno=%s task=%d check=%d\n", hostname, + (long long)nwritten, (long long)nmemb, strerror(errno), ThisTask, check); + myflush(stdout); + WriteErrorFlag = 1; + } +} +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +/*! \brief Checks if a snapshot should be saved. + * + * This function checks whether a snapshot file or other kinds of output + * files, such as a projection, should be saved at the current time-step. + * If that is the case, the appropriate functions to produce the desired + * file are called and the parameter controlling the output are updated + * accordingly. + * + * \return void + */ +void create_snapshot_if_desired(void) +{ +#ifdef OUTPUT_EVERY_STEP + All.Ti_nextoutput = All.Ti_Current; +#endif /* #ifdef OUTPUT_EVERY_STEP */ + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* allow only top-level synchronization points */ + if(All.Ti_Current >= All.Ti_nextoutput && All.Ti_nextoutput >= 0) + { + DumpFlag = DumpFlagNextSnap; + produce_dump(); + + All.Ti_nextoutput = find_next_outputtime(All.Ti_Current + 1); + } +} + +/*! \brief A wrapper function used to create a snapshot. + * + * This function wraps together savepositions(), the function that + * saves the snapshot file to the disk, with functions used for + * special output needs. + * + * \return void + */ +void produce_dump(void) +{ +#ifdef UPDATE_GRADIENTS_FOR_OUTPUT + exchange_primitive_variables(); + calculate_gradients(); +#endif /* #ifdef UPDATE_GRADIENTS_FOR_OUTPUT */ + + savepositions(All.SnapshotFileCount++, 0); /* write snapshot file */ +} + +/*! \brief Saves snapshot to disk. + * + * This function writes a snapshot of the particle distribution to one or + * several files. If NumFilesPerSnapshot>1, the snapshot is distributed + * into several files, which are written simultaneously. Each file contains + * data from a group of processors of size roughly NTask/NumFilesPerSnapshot. + * + * \param[in] num The snapshot number. + * \param[in] subbox_flag If greater than 0 instructs the code to output only + * a subset of the whole domain. + * + * \return void + */ +void savepositions(int num, int subbox_flag) +{ + char buf[500]; + int n, filenr, gr, ngroups, masterTask, lastTask; + double t0, t1; + + t0 = second(); + CPU_Step[CPU_MISC] += measure_time(); + + if(DumpFlag) + { + subbox_dump = 0; + + if(subbox_flag > 0) + { + mpi_printf("\nwriting small subbox #%d snapshot file #%d @ time %g ... \n", subbox_flag - 1, num, All.Time); + subbox_dump = 1; + } + else + mpi_printf("\nwriting snapshot file #%d @ time %g ... (DumpFlag=%d)\n", num, All.Time, DumpFlag); + +#ifdef FOF + if(RestartFlag != 3 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2) + { + { + mpi_printf("\nWe shall first compute a group catalogue for this snapshot file\n"); + + fof_fof(num); + } + } +#endif /* #ifdef FOF */ + + if(DumpFlag != 4) + { + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + terminate("Unsupported File-Format. All.SnapFormat=%d\n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* determine global and local particle numbers */ + for(n = 0; n < NTYPES; n++) + n_type[n] = 0; + + for(n = 0; n < NumPart; n++) + { + n_type[P[n].Type]++; + } + + sumup_large_ints(NTYPES, n_type, ntot_type_all); + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/snapdir_%03d", All.OutputDir, num); + mkdir(buf, 02755); + +#ifdef TOLERATE_WRITE_ERROR + sprintf(alternative_fname, "%s/snapdir_%03d", AlternativeOutputDir, num); + mkdir(alternative_fname, 02755); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, num); + +#ifdef TOLERATE_WRITE_ERROR + if(All.NumFilesPerSnapshot > 1) + sprintf(alternative_fname, "%s/snapdir_%03d/%s_%03d.%d", AlternativeOutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(alternative_fname, "%s%s_%03d", AlternativeOutputDir, All.SnapshotFileBase, num); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + if(RestartFlag == 3) + { +#ifndef FOF_STOREIDS + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s-groupordered_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s-groupordered_%03d", All.OutputDir, All.SnapshotFileBase, num); +#else /* #ifndef FOF_STOREIDS */ + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/snapdir_%03d/%s-storeids_%03d.%d", All.OutputDir, num, All.SnapshotFileBase, num, filenr); + else + sprintf(buf, "%s%s-storeids_%03d", All.OutputDir, All.SnapshotFileBase, num); +#endif /* #ifndef FOF_STOREIDS #else */ + } + +#ifdef ADDBACKGROUNDGRID + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s-with-grid.%d", All.InitCondFile, filenr); + else + sprintf(buf, "%s-with-grid", All.InitCondFile); +#endif /* #ifdef ADDBACKGROUNDGRID */ + + ngroups = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngroups++; + + for(gr = 0; gr < ngroups; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + { + if(ThisTask == masterTask && (filenr % All.NumFilesWrittenInParallel) == 0) + printf("writing snapshot files group %d out of %d - files %d-%d (total of %d files): '%s'\n", gr + 1, ngroups, + filenr, filenr + All.NumFilesWrittenInParallel - 1, All.NumFilesPerSnapshot, buf); + write_file(buf, masterTask, lastTask, subbox_flag); +#ifdef OUTPUT_XDMF + if(All.SnapFormat == 3) + { + write_xdmf(buf); + } +#endif /* #ifdef OUTPUT_XDMF */ + } + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + + t1 = second(); + CPU_Step[CPU_SNAPSHOT] += measure_time(); + + mpi_printf("done with writing snapshot (took %g sec).\n", timediff(t0, t1)); + } + else + { + mpi_printf("done with writing files: no dump of snapshot (DumpFlag = %d).\n", DumpFlag); + } // if(DumpFlag !=4) + +#ifdef FOF + if(RestartFlag != 3 && RestartFlag != 6 && RestartFlag != 18 && subbox_flag == 0 && DumpFlag != 2) + { + { +#ifndef FOF_STOREIDS + /* now revert from output order to the original order */ + for(n = 0; n < NumPart; n++) + { + PS[n].TargetTask = PS[n].OriginTask; + PS[n].TargetIndex = PS[n].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + + myfree(PS); + + /* do resize because subfind may have increased these limits */ + if(All.MaxPart != fof_OldMaxPart) + { + All.MaxPart = fof_OldMaxPart; + reallocate_memory_maxpart(); + } + if(All.MaxPartSph != fof_OldMaxPartSph) + { + All.MaxPartSph = fof_OldMaxPartSph; + reallocate_memory_maxpartsph(); + } + + CPU_Step[CPU_FOF] += measure_time(); +#endif /* #ifndef FOF_STOREIDS */ + + /* recreate the mesh that we had free to reduce peak memory usage */ + create_mesh(); + mesh_setup_exchange(); + } + } +#endif /* #ifdef FOF */ + + All.Ti_lastoutput = All.Ti_Current; + + CPU_Step[CPU_SNAPSHOT] += measure_time(); + } +} + +/*! \brief This function fills the write buffer with particle data. + * + * \param[out] buffer Buffer to be filled. + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in, out] startindex Pointer containing the offset in write buffer. + * \param[in] pc Number of particle to be put in the buffer. + * \param[in] type Particle type. + * \param[in] subbox_flag If greater than 0 instructs the code to output + * only a subset of the whole domain. + * + * \return void + */ +void fill_write_buffer(void *buffer, enum iofields blocknr, int *startindex, int pc, int type, int subbox_flag) +{ + int n, k, pindex, f; + MyOutputFloat *fp; + MyIDType *ip; + int *intp; + + /* determine which field we are working on */ + int field = -1; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + field = f; + break; + } + } + + if(field < 0) + terminate("IO field=%d not registered with init_field()", (int)blocknr); + + set_cosmo_factors_for_current_time(); + + fp = (MyOutputFloat *)buffer; + ip = (MyIDType *)buffer; + intp = (int *)buffer; + double *doublep = (double *)buffer; + float *floatp = (float *)buffer; + + pindex = *startindex; + + for(n = 0; n < pc; pindex++) + { + /* SUBBOX_SNAPSHOTS specialized output */ + + /* normal particle output */ + if(P[pindex].Type == type) + { + if(IO_Fields[field].io_func) + { + int particle; + switch(IO_Fields[field].array) + { + case A_NONE: + case A_SPHP: + case A_P: + particle = pindex; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in fill_write_buffer: Array not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_output) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 0); + intp += IO_Fields[field].values_per_block; + n++; + break; + case FILE_MY_ID_TYPE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 0); + ip += IO_Fields[field].values_per_block; + n++; + break; + case FILE_MY_IO_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 0); + fp += IO_Fields[field].values_per_block; + n++; + break; + case FILE_DOUBLE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 0); + doublep += IO_Fields[field].values_per_block; + n++; + break; + case FILE_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 0); + floatp += IO_Fields[field].values_per_block; + n++; + break; + } + } + else + { + void *array_pos; + + switch(IO_Fields[field].array) + { + case A_NONE: + array_pos = 0; + break; + + case A_SPHP: + array_pos = SphP + pindex; + break; + + case A_P: + array_pos = P + pindex; + break; + case A_PS: + array_pos = PS + pindex; + break; + + default: + terminate("ERROR in fill_write_buffer: Array not found!\n"); + break; + } + + for(k = 0; k < IO_Fields[field].values_per_block; k++) + { + double value = 0.; + + switch(IO_Fields[field].type_in_memory) + { + case MEM_INT: + *intp = *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int))); + intp++; + break; + + case MEM_MY_ID_TYPE: + *ip = *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType))); + ip++; + break; + + case MEM_FLOAT: + value = *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float))); + break; + + case MEM_DOUBLE: + value = *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double))); + break; + + case MEM_MY_SINGLE: + value = *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle))); + break; + + case MEM_MY_FLOAT: + value = *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat))); + break; + + case MEM_MY_DOUBLE: + value = *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble))); + break; + + case MEM_NONE: + terminate("ERROR in fill_write_buffer: reached MEM_NONE with no io_func specified!\n"); + break; + + default: + terminate("ERROR in fill_write_buffer: Type not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_output) + { + case FILE_MY_IO_FLOAT: + *fp = value; + fp++; + break; + + case FILE_DOUBLE: + *doublep = value; + doublep++; + break; + + case FILE_FLOAT: + *floatp = value; + floatp++; + break; + + default: + break; + } + } + + n++; + } // end io_func/not + } // end type if + } // end particle loop + + *startindex = pindex; +} + +/*! \brief This function tells the size in bytes of one data entry in each of + * the blocks defined for the output file. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] mode Used to distinguish whether the function is called in input + * mode (mode > 0) or in output mode (mode = 0). The size of one + * data entry may vary depending on the mode. + * + * \return Size of the data entry in bytes. + */ +int get_bytes_per_blockelement(enum iofields blocknr, int mode) +{ + int bytes_per_blockelement = 0; + int f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(mode) + { + switch(IO_Fields[f].type_in_file_input) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int); + break; + case FILE_MY_ID_TYPE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType); + break; + case FILE_MY_IO_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyInputFloat); + break; + case FILE_DOUBLE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double); + break; + case FILE_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float); + break; + } + } + else + { + switch(IO_Fields[f].type_in_file_output) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(int); + break; + case FILE_MY_ID_TYPE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyIDType); + break; + case FILE_MY_IO_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(MyOutputFloat); + break; + case FILE_DOUBLE: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(double); + break; + case FILE_FLOAT: + bytes_per_blockelement = IO_Fields[f].values_per_block * sizeof(float); + break; + } + } + break; + } + } + + return bytes_per_blockelement; +} + +/*! \brief This function determines the type of one data entry in each of the + * blocks defined for the output file. + * + * Used only if output in HDF5 format is enabled. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] mode For input mode > 0, for output mode = 0. + * + * \return typekey, a flag that indicates the type of the data entry. + */ +int get_datatype_in_block(enum iofields blocknr, int mode) +{ + int typekey, f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(mode) + typekey = IO_Fields[f].type_in_file_input; + else + typekey = IO_Fields[f].type_in_file_output; + + return typekey; + } + } + + terminate("error invalid field"); + return typekey; +} + +/*! \brief This function determines the number of elements composing one data + * entry in each of the blocks defined for the output file. + * + * Used only if output in HDF5 format is enabled. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * + * \return Number of elements of one data entry. + */ +int get_values_per_blockelement(enum iofields blocknr) +{ + int values = 0; + int f; + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + values = IO_Fields[f].values_per_block; + return values; + } + } + + terminate("reached last entry in switch - strange."); + return values; +} + +/*! \brief Gets particle number in an output block. + * + * This function determines how many particles there are in a given block, + * based on the information in the header-structure. It also flags particle + * types that are present in the block in the typelist array. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] typelist Array that contains the number of particles of each + * type in the block. + * + * \return The total number of particles in the block. + */ +int get_particles_in_block(enum iofields blocknr, int *typelist) +{ + int i, f; + int npart = 0; + + switch(blocknr) + { + case IO_MASS: + for(i = 0; i < NTYPES; i++) + { + typelist[i] = 0; + if(All.MassTable[i] == 0) + if(header.npart[i] > 0) + { + typelist[i] = 1; + npart += header.npart[i]; + } + } + return npart; /* with masses */ + break; + + case IO_LASTENTRY: + terminate("reached last entry in switch - strange."); + break; + + default: + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + for(i = 0; i < NTYPES; i++) + { + if((IO_Fields[f].typelist & (1 << i)) && header.npart[i] > 0) + { + typelist[i] = 1; + npart += header.npart[i]; + } + else + typelist[i] = 0; + } + + return npart; + } + } + break; + + } // end switch + + terminate("reached end of function - this should not happen"); + return 0; +} + +/*! \brief Checks if a block is expected for file input or output. + * + * This function tells whether a block in the input/output file is requested + * or not. Because the blocks processed in the two cases are different, the + * mode is indicated with the flag write (1=write, 0=read). + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] write If 0 the function is in read mode, if 1 the function is + * in write mode. + * + * \return 0 if the block is not present, 1 otherwise. + */ +int blockpresent(enum iofields blocknr, int write) +{ + int f; + + if(!write) + { +#ifdef PASSIVE_SCALARS + if(RestartFlag == 0 && blocknr == IO_PASS) + return 1; +#endif /* #ifdef PASSIVE_SCALARS */ +#if defined(MHD) && !defined(MHD_SEEDFIELD) + if(All.ICFormat != 3 && RestartFlag == 0 && (blocknr > IO_U && blocknr != IO_BFLD)) +#else /* #if defined(MHD) && !defined(MHD_SEEDFIELD) */ + if(All.ICFormat != 3 && RestartFlag == 0 && blocknr > IO_U) +#endif /* #if defined(MHD) && !defined(MHD_SEEDFIELD) #else */ +#ifdef READ_LEGACY_ICS + if(RestartFlag == 0 && blocknr > IO_U && blocknr != IO_BFLD) +#else /* #ifdef READ_LEGACY_ICS */ + if(RestartFlag == 0) +#endif /* #ifdef READ_LEGACY_ICS #else */ + return 0; /* ignore all other blocks in non-HDF5 initial conditions */ + } + + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + if(!write) + { + if(IO_Fields[f].type_in_file_input != FILE_NONE) + { + return 1; + } + } + else + { + if(IO_Fields[f].type_in_file_output == FILE_NONE) + return 0; + + /* subboxes: write all fields except those marked by SN_NO_SUBBOX or SN_MINI_ONLY + (must come first to ignore DumpFlag) */ + if(subbox_dump) + { + if(IO_Fields[f].snap_type == SN_NO_SUBBOX || IO_Fields[f].snap_type == SN_MINI_ONLY) + return 0; + + return 1; + } + + /* normal full snapshot (with or without groupcat): only skip fields marked by SN_MINI_ONLY */ + if(DumpFlag == 1 || DumpFlag == 2) + { + if(IO_Fields[f].snap_type == SN_MINI_ONLY) + return 0; + + return 1; + } + + /* mini-snaps: write only those fields marked by either SN_MINI or SN_MINI_ONLY */ + if(DumpFlag == 3) + { + if(IO_Fields[f].snap_type == SN_MINI || IO_Fields[f].snap_type == SN_MINI_ONLY) + return 1; + + if(IO_Fields[f].typelist == BHS_ONLY) + return 1; // temporarily hard-coded that all BH fields are included in mini-snaps + + return 0; // specifically do not include any other fields in mini-snaps + } + } + return 0; + } + } + + return 0; /* default: not present */ +} + +/*! \brief This function associates a short 4-character block name with each + * block number. + * + * This is stored in front of each block for snapshot FileFormat=2. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] label string containing the dataset name. + * + * \return void + */ +void get_Tab_IO_Label(enum iofields blocknr, char *label) +{ + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + strncpy(label, IO_Fields[f].label, 4); + return; + } + } + + terminate("error invalid field"); +} + +/*! \brief This function associates a dataset name with each block number. + * + * This is needed to name the dataset if the output is written in HDF5 + * format. + * + * \param[in] blocknr ID of the output block (i.e. position, velocities...). + * \param[in] buf String containing the dataset name. + * + * \return void + */ +void get_dataset_name(enum iofields blocknr, char *buf) +{ + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + strcpy(buf, IO_Fields[f].datasetname); + return; + } + } + + terminate("error invalid field"); +} + +/*! \brief Actually write the snapshot file to the disk. + * + * This function writes a snapshot file containing the data from processors + * 'writeTask' to 'lastTask'. 'writeTask' is the one that actually writes. + * Each snapshot file contains a header and cell/particle details. The + * output fields for each particle type depend on included physics + * and compile-time flags. + * + * \param[in] fname String containing the file name. + * \param[in] writeTask The rank of the task in a writing group that which + * is responsible for the output operations. + * \param[in] lastTask The rank of the last task in a writing group. + * \param[in] subbox_flag If greater than 0 instructs the code to output + * only a subset of the whole domain. + * + * \return void + */ +void write_file(char *fname, int writeTask, int lastTask, int subbox_flag) +{ + int type, bytes_per_blockelement, npart, nextblock, typelist[NTYPES]; + int n_for_this_task, n, p, pc, offset = 0, task; + int blockmaxlen, ntot_type[NTYPES], nn[NTYPES]; + enum iofields blocknr; + char label[8]; + int bnr; + int blksize; + MPI_Status status; + FILE *fd = 0; + int pcsum = 0; + +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[NTYPES], hdf5_headergrp = 0, hdf5_dataspace_memory; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0; + hsize_t dims[2], count[2], start[2]; + int rank = 0; + char buf[500]; +#ifdef HDF5_FILTERS + hid_t hdf5_properties; +#endif /* #ifdef HDF5_FILTERS */ + hid_t hdf5_paramsgrp = 0; + hid_t hdf5_configgrp = 0; +#endif /* #ifdef HAVE_HDF5 */ + +#define SKIP \ + { \ + my_fwrite(&blksize, sizeof(int), 1, fd); \ + } + +#ifdef TOLERATE_WRITE_ERROR + for(int try_io = 0; try_io < 2; try_io++) + { + WriteErrorFlag = 0; +#ifdef HAVE_HDF5 + H5Eget_current_stack(); /* clears current error stack */ +#endif /* #ifdef HAVE_HDF5 */ +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + /* determine particle numbers of each type in file */ + if(ThisTask == writeTask) + { + for(n = 0; n < NTYPES; n++) + ntot_type[n] = n_type[n]; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&nn[0], NTYPES, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + for(n = 0; n < NTYPES; n++) + ntot_type[n] += nn[n]; + } + + for(task = writeTask + 1; task <= lastTask; task++) + MPI_Send(&ntot_type[0], NTYPES, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + } + else + { + MPI_Send(&n_type[0], NTYPES, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Recv(&ntot_type[0], NTYPES, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + } + + /* fill file header */ + for(n = 0; n < NTYPES; n++) + { + header.npart[n] = ntot_type[n]; + header.npartTotal[n] = (unsigned int)ntot_type_all[n]; + header.npartTotalHighWord[n] = (unsigned int)(ntot_type_all[n] >> 32); + } + + for(n = 0; n < NTYPES; n++) + header.mass[n] = All.MassTable[n]; + + header.time = All.Time; + + if(All.ComovingIntegrationOn) + header.redshift = 1.0 / All.Time - 1; + else + header.redshift = 0; + + header.flag_sfr = 0; + header.flag_feedback = 0; + header.flag_cooling = 0; + header.flag_stellarage = 0; + header.flag_metals = 0; + + header.flag_tracer_field = 0; + +#ifdef COOLING + header.flag_cooling = 1; +#endif /* #ifdef COOLING */ + +#ifdef USE_SFR + header.flag_sfr = 1; + header.flag_feedback = 1; +#endif /* #ifdef USE_SFR */ + + header.num_files = All.NumFilesPerSnapshot; + header.BoxSize = All.BoxSize; + header.Omega0 = All.Omega0; + header.OmegaLambda = All.OmegaLambda; + header.HubbleParam = All.HubbleParam; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + header.flag_doubleprecision = 1; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + header.flag_doubleprecision = 0; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + + /* open file and write header */ + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + sprintf(buf, "%s.hdf5", fname); + hdf5_file = my_H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + hdf5_headergrp = my_H5Gcreate(hdf5_file, "/Header", 0); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gcreate(hdf5_file, buf, 0); + } + } + + write_header_attributes_in_hdf5(hdf5_headergrp); + + hdf5_paramsgrp = my_H5Gcreate(hdf5_file, "/Parameters", 0); + write_parameters_attributes_in_hdf5(hdf5_paramsgrp); + + hdf5_configgrp = my_H5Gcreate(hdf5_file, "/Config", 0); + write_compile_time_options_in_hdf5(hdf5_configgrp); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + if(!(fd = fopen(fname, "w"))) + { + printf("can't open file `%s' for writing snapshot.\n", fname); + terminate("file open error"); + } + + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + my_fwrite((void *)"HEAD", sizeof(char), 4, fd); + nextblock = sizeof(header) + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = sizeof(header); + SKIP; + my_fwrite(&header, sizeof(header), 1, fd); + SKIP; + } + } + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum iofields)bnr; + + if(blocknr == IO_LASTENTRY) + break; + + if(blockpresent(blocknr, 1)) + { + bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 0); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = get_particles_in_block(blocknr, &typelist[0]); + + if(npart > 0) + { + if(ThisTask == 0) + { + char buf[1000]; + + get_dataset_name(blocknr, buf); + if(subbox_flag == 0) + printf("writing block %d (%s)...\n", blocknr, buf); + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + { + if(All.SnapFormat == 2) + { + blksize = sizeof(int) + 4 * sizeof(char); + SKIP; + get_Tab_IO_Label(blocknr, label); + my_fwrite(label, sizeof(char), 4, fd); + nextblock = npart * bytes_per_blockelement + 2 * sizeof(int); + my_fwrite(&nextblock, sizeof(int), 1, fd); + SKIP; + } + + blksize = npart * bytes_per_blockelement; + SKIP; + } + } + + for(type = 0; type < NTYPES; type++) + { + if(typelist[type]) + { +#ifdef HAVE_HDF5 + if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0) + { + switch(get_datatype_in_block(blocknr, 0)) + { + case FILE_INT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT); + break; + case FILE_MY_IO_FLOAT: +#ifdef OUTPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + break; + case FILE_MY_ID_TYPE: +#ifdef LONGIDS + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); +#else /* #ifdef LONGIDS */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32); +#endif /* #ifdef LONGIDS #else */ + break; + case FILE_DOUBLE: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); + break; + case FILE_FLOAT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); + break; + } + + dims[0] = header.npart[type]; + dims[1] = get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + get_dataset_name(blocknr, buf); + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); +#ifdef HDF5_FILTERS + hdf5_properties = my_H5Pcreate(H5P_DATASET_CREATE); + my_H5Pset_chunk(hdf5_properties, rank, dims); /* set chunk size */ + my_H5Pset_shuffle(hdf5_properties); /* reshuffle bytes to get better compression ratio */ + my_H5Pset_deflate(hdf5_properties, 9); /* gzip compression level 9 */ + my_H5Pset_fletcher32(hdf5_properties); /* Fletcher32 checksum on dataset */ + + if(my_H5Pall_filters_avail(hdf5_properties)) + hdf5_dataset = + my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, hdf5_properties); + else + { + printf("HDF5_FILTERS: Warning selected filters not available! Writing data without filters! \n"); + myflush(stdout); + hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); + } +#else /* #ifdef HDF5_FILTERS */ + hdf5_dataset = my_H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file, H5P_DEFAULT); +#endif /* #ifdef HDF5_FILTERS #else */ + write_dataset_attributes(hdf5_dataset, blocknr); + } +#endif /* #ifdef HAVE_HDF5 */ + + pcsum = 0; + int remaining_space = blockmaxlen; + int bufferstart = 0; + + for(task = writeTask, offset = 0; task <= lastTask; task++) + { + if(task == ThisTask) + { + n_for_this_task = n_type[type]; + + for(p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD); + } + else + MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD, &status); + + while(n_for_this_task > 0) + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(pc > remaining_space) + pc = remaining_space; + + void *buffer = (void *)((char *)CommBuffer + bufferstart * bytes_per_blockelement); + + if(ThisTask == task) + fill_write_buffer(buffer, blocknr, &offset, pc, type, subbox_flag); + + if(ThisTask == writeTask && task != writeTask) + MPI_Recv(buffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD, &status); + + if(ThisTask != writeTask && task == ThisTask) + MPI_Ssend(buffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask, TAG_PDATA, MPI_COMM_WORLD); + + remaining_space -= pc; + bufferstart += pc; + + if(remaining_space == 0) + { + /* write stuff (number of elements equal to bufferstart) */ + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = bufferstart; + count[1] = get_values_per_blockelement(blocknr); + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = bufferstart; + dims[1] = get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd); + } + } + + pcsum += bufferstart; + remaining_space = blockmaxlen; + bufferstart = 0; + } + + n_for_this_task -= pc; + } + } + + if(bufferstart > 0) + { + /* write remaining stuff (number of elements equal to bufferstart) */ + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + start[0] = pcsum; + start[1] = 0; + + count[0] = bufferstart; + count[1] = get_values_per_blockelement(blocknr); + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + dims[0] = bufferstart; + dims[1] = get_values_per_blockelement(blocknr); + hdf5_dataspace_memory = my_H5Screate_simple(rank, dims, NULL); + + my_H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + + my_H5Sclose(hdf5_dataspace_memory, H5S_SIMPLE); +#endif /* #ifdef HAVE_HDF5 */ + } + else + { + my_fwrite(CommBuffer, bytes_per_blockelement, bufferstart, fd); + } + } + + pcsum += bufferstart; + remaining_space = blockmaxlen; + bufferstart = 0; + } + +#ifdef HAVE_HDF5 + if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0) + { + if(All.SnapFormat == 3) + { + my_H5Dclose(hdf5_dataset, buf); +#ifdef HDF5_FILTERS + my_H5Pclose(hdf5_properties); +#endif /* #ifdef HDF5_FILTERS */ + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + my_H5Tclose(hdf5_datatype); + } + } +#endif /* #ifdef HAVE_HDF5 */ + } + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 1 || All.SnapFormat == 2) + SKIP; + } + } + +#ifdef TOLERATE_WRITE_ERROR + if(ThisTask == writeTask) + { + for(int p = writeTask; p <= lastTask; p++) + if(p != ThisTask) + MPI_Send(&WriteErrorFlag, 1, MPI_INT, p, TAG_KEY, MPI_COMM_WORLD); + } + else + MPI_Recv(&WriteErrorFlag, 1, MPI_INT, writeTask, TAG_KEY, MPI_COMM_WORLD, &status); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) /* don't write further blocks in this case */ + break; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + } + + if(ThisTask == writeTask) + { + if(All.SnapFormat == 3) + { +#ifdef HAVE_HDF5 + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + my_H5Gclose(hdf5_grp[type], buf); + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Gclose(hdf5_paramsgrp, "/Parameters"); + my_H5Gclose(hdf5_configgrp, "/Config"); + + sprintf(buf, "%s.hdf5", fname); + my_H5Fclose(hdf5_file, buf); +#endif /* #ifdef HAVE_HDF5 */ + } + else + fclose(fd); + } + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag == 0) + break; + + if(try_io == 0) + { + if(ThisTask == writeTask) + { + printf( + "TOLERATE_WRITE_ERROR: Try to write to alternative file: masterTask=%d lastTask=%d try_io=%d " + "alternative-filename='%s'\n", + writeTask, lastTask, try_io, alternative_fname); + myflush(stdout); + } + fname = alternative_fname; /* try on a different output directory */ + } + else + { + terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n"); + } + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ +} + +#ifdef HAVE_HDF5 +/*! \brief Write the fields contained in the header group of the HDF5 snapshot + * file. + * + * This function stores the fields of the structure io_header as attributes + * belonging to the header group of the HDF5 file. + * + * \param[in] handle A handle for the header group. + * + * \return void + */ +void write_header_attributes_in_hdf5(hid_t handle) +{ + hsize_t adim[1] = {NTYPES}; + hid_t hdf5_dataspace, hdf5_attribute; + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_ThisFile"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_ThisFile", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile"); + my_H5Aclose(hdf5_attribute, "NumPart_ThisFile"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_Total", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total"); + my_H5Aclose(hdf5_attribute, "NumPart_Total"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "NumPart_Total_HighWord"); + hdf5_attribute = my_H5Acreate(handle, "NumPart_Total_HighWord", H5T_NATIVE_UINT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord"); + my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SIMPLE); + my_H5Sset_extent_simple(hdf5_dataspace, 1, adim, NULL, "MassTable"); + hdf5_attribute = my_H5Acreate(handle, "MassTable", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable"); + my_H5Aclose(hdf5_attribute, "MassTable"); + my_H5Sclose(hdf5_dataspace, H5S_SIMPLE); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Time", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time"); + my_H5Aclose(hdf5_attribute, "Time"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Redshift", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift"); + my_H5Aclose(hdf5_attribute, "Redshift"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "BoxSize", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize"); + my_H5Aclose(hdf5_attribute, "BoxSize"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "NumFilesPerSnapshot", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot"); + my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Omega0", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0"); + my_H5Aclose(hdf5_attribute, "Omega0"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaLambda", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda"); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "OmegaBaryon", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.OmegaBaryon, "OmegaBaryon"); + my_H5Aclose(hdf5_attribute, "OmegaBaryon"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "HubbleParam", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam"); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Sfr", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr"); + my_H5Aclose(hdf5_attribute, "Flag_Sfr"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Cooling", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling"); + my_H5Aclose(hdf5_attribute, "Flag_Cooling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_StellarAge", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge"); + my_H5Aclose(hdf5_attribute, "Flag_StellarAge"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Metals", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals"); + my_H5Aclose(hdf5_attribute, "Flag_Metals"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_Feedback", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback"); + my_H5Aclose(hdf5_attribute, "Flag_Feedback"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Flag_DoublePrecision", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision"); + my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Composition_vector_length", H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, &header.composition_vector_length, "Composition_vector_length"); + my_H5Aclose(hdf5_attribute, "Composition_vector_length"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hid_t atype = my_H5Tcopy(H5T_C_S1); + + my_H5Tset_size(atype, strlen(GIT_COMMIT)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_commit", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_COMMIT, "Git_commit"); + my_H5Aclose(hdf5_attribute, "Git_commit"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + my_H5Tset_size(atype, strlen(GIT_DATE)); + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "Git_date", atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, GIT_DATE, "Git_date"); + my_H5Aclose(hdf5_attribute, "Git_date"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitLength_in_cm", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitLength_in_cm, "UnitLength_in_cm"); + my_H5Aclose(hdf5_attribute, "UnitLength_in_cm"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitMass_in_g", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitMass_in_g, "UnitMass_in_g"); + my_H5Aclose(hdf5_attribute, "UnitMass_in_g"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, "UnitVelocity_in_cm_per_s", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &All.UnitVelocity_in_cm_per_s, "UnitVelocity_in_cm_per_s"); + my_H5Aclose(hdf5_attribute, "UnitVelocity_in_cm_per_s"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} + +/*! \brief Write the parameters read from the parameter file in the HDF5 + * snapshot file. + * + * This function stores the parameter io_header as attributes belonging + * to the parameter group of the HDF5 file. + * + * \param[in] handle A handle for the parameter group. + * + * \return void + */ +void write_parameters_attributes_in_hdf5(hid_t handle) +{ + hid_t hdf5_dataspace, hdf5_attribute, atype = my_H5Tcopy(H5T_C_S1); + int i = 0; + + my_H5Tset_size(atype, MAXLEN_PARAM_VALUE); + + for(i = 0; i < All.NParameters; i++) + { + switch(ParametersType[i]) + { + case 1: // REAL + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + case 2: // STRING + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], atype, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, atype, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + case 3: // INT + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(handle, Parameters[i], H5T_NATIVE_INT, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_INT, ParametersValue[i], Parameters[i]); + my_H5Aclose(hdf5_attribute, Parameters[i]); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + break; + } + } + + my_H5Tclose(atype); +} + +/*! \brief A simple error handler for HDF5. + * + * This function terminates the run or if write errors are tolerated, calls + * the write_error() function to print information about the error and returns + * a positive integer to allow the repetition of the write operation + * (see also the HDF5 documentation). + * + * \param[in] unused The parameter is not used, but it is necessary for + * compatibility with the HDF5 library. + * + * \return 1 if the write error is tolerated, otherwise the run is terminated. + */ +herr_t my_hdf5_error_handler(void *unused) +{ +#ifdef TOLERATE_WRITE_ERROR + if(FlagNyt == 0) + write_error(2, 0, 0); + return 1; +#else + return 0; +#endif +} + +/*! \brief Write attributes to dataset, scaling with a and h (cosmological) + * and units. + * + * Only for hdf5 output. + * + * \param[in] hdf5_dataset Dataset identifier. + * \param[in] blocknumber Number of field which is written. + * + * \return void + */ +void write_dataset_attributes(hid_t hdf5_dataset, enum iofields blocknr) +{ + int ind = -1; + + for(int f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + ind = f; + break; + } + } + + if(ind < 0) + { + return; + } + + if(IO_Fields[ind].hasunit == 0) + return; + + if(All.ComovingIntegrationOn) + { + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].a, "a_scaling"); + my_H5Aclose(hdf5_attribute, "a_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].h, "h_scaling"); + my_H5Aclose(hdf5_attribute, "h_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + } + else + { + double zero = 0; + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "a_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "a_scaling"); + my_H5Aclose(hdf5_attribute, "a_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "h_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &zero, "h_scaling"); + my_H5Aclose(hdf5_attribute, "h_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + } + + hid_t hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hid_t hdf5_attribute = my_H5Acreate(hdf5_dataset, "length_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].L, "length_scaling"); + my_H5Aclose(hdf5_attribute, "length_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "mass_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].M, "mass_scaling"); + my_H5Aclose(hdf5_attribute, "mass_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "velocity_scaling", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].V, "velocity_scaling"); + my_H5Aclose(hdf5_attribute, "velocity_scaling"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); + + hdf5_dataspace = my_H5Screate(H5S_SCALAR); + hdf5_attribute = my_H5Acreate(hdf5_dataset, "to_cgs", H5T_NATIVE_DOUBLE, hdf5_dataspace, H5P_DEFAULT); + my_H5Awrite(hdf5_attribute, H5T_NATIVE_DOUBLE, &IO_Fields[ind].c, "to_cgs"); + my_H5Aclose(hdf5_attribute, "to_cgs"); + my_H5Sclose(hdf5_dataspace, H5S_SCALAR); +} +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef OUTPUT_XDMF +/*! \brief Outputs a xdmf file corresponding to this snapshot. + * + * This xdmf file can be used to load the snapshot into programs like visit. + * This option only works with output format 3 (hdf5). + * + * \param[in] fname Name of the snapshot. + * + * \return void + */ +static void write_xdmf(char *fname) +{ + FILE *f; + char buf[256], buf2[256]; + int i; + int npresent[NTYPES]; + + for(i = 0; i < NTYPES; i++) + npresent[i] = 0; + +#ifdef OUTPUT_IN_DOUBLEPRECISION + int prec = 8; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + int prec = 4; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ + + sprintf(buf, "%s.xmf", fname); + f = fopen(buf, "w"); + + fprintf(f, "\n"); + fprintf(f, "\n"); + fprintf(f, "\n"); + fprintf(f, " "); + + /* hdf5 file path relative to xmf file, uses basename function of libgen.h, + * i.e. POSIX version of basename() */ + sprintf(buf, "./%s.hdf5", basename(fname)); + int type = 0; + for(; type < NTYPES; type++) + { + int bnr; + + for(bnr = 0; bnr < 1000; bnr++) + { + enum iofields i = (enum iofields)bnr; + + if(i == IO_LASTENTRY) + break; + + if(blockpresent(i, 1)) + { + // get_particles_in_block(i, ntypes); + + if(header.npart[type] > 0) + { + if(i == IO_POS) + { + fprintf(f, " \n", type); + fprintf(f, " \n", header.npart[type]); + fprintf(f, " \n"); + fprintf(f, " \n", + header.npart[type], prec); + fprintf(f, " %s:/PartType0/Coordinates\n", buf); + fprintf(f, " \n"); + fprintf(f, " \n"); + + npresent[type] = 1; + } + else + { + int dim = get_values_per_blockelement(i); + int dtype = get_datatype_in_block(i, 0); + get_dataset_name(i, buf2); + + if(dim == 1 || dim == 3) + { + if(dtype == 1) + { + if(dim == 1) + { + fprintf(f, " \n", buf2); + fprintf(f, " \n", + header.npart[type], prec); + } + else + { + fprintf(f, " \n", buf2); + fprintf(f, + " \n", + header.npart[type], prec); + } + + fprintf(f, " %s:/PartType%d/%s\n", buf, type, buf2); + fprintf(f, " \n"); + fprintf(f, " \n"); + } + } + } + } + } + } + if(npresent[type] == 1) + { + fprintf(f, " \n"); + } + } + + fprintf(f, " \n"); + fprintf(f, ""); + + fclose(f); +} +#endif /* #ifdef OUTPUT_XDMF */ + +/*! \brief A wrapper for the fwrite() function. + * + * This catches I/O errors occuring for fwrite(). In this case we + * better stop. If stream is null, no attempt at writing is done. + * + * \param[in] ptr Pointer to the beginning of data to write. + * \param[in] size Size in bytes of a single data element. + * \param[in] nmemb Number of elements to be written. + * \param[in] stream Pointer to the output stream. + * + * \return Number of elements written to stream. + */ +size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t nwritten; + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag) + return 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + if(!stream) + return 0; + + if(size * nmemb > 0) + { + if((nwritten = fwrite(ptr, size, nmemb, stream)) != nmemb) + { +#ifdef TOLERATE_WRITE_ERROR + write_error(0, nwritten, nmemb); +#else /* #ifdef TOLERATE_WRITE_ERROR */ + printf("I/O error (fwrite) on task=%d has occured: %s\n", ThisTask, strerror(errno)); + myflush(stdout); + terminate("write error"); +#endif /* #ifdef TOLERATE_WRITE_ERROR #else */ + } + } + else + nwritten = 0; + +#ifdef TOLERATE_WRITE_ERROR + if(ferror(stream)) + write_error(1, nwritten, nmemb); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + return nwritten; +} + +/*! \brief A wrapper for the fread() function. + * + * This catches I/O errors occuring for fread(). In this case we + * better stop. If stream is null, no attempt at readingis done. + * + * \param[out] ptr Pointer to the beginning of memory location where to + * store data. + * \param[in] size Size in bytes of a single data element. + * \param[in] nmemb Number of elements to be read. + * \param[in] stream Pointer to the input stream. + * + * \return Number of elements read from stream. + */ +size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t nread; + + if(!stream) + return 0; + + if(size * nmemb > 0) + { + if((nread = fread(ptr, size, nmemb, stream)) != nmemb) + { + if(feof(stream)) + printf("I/O error (fread) on task=%d has occured: end of file\n", ThisTask); + else + printf("I/O error (fread) on task=%d has occured: %s\n", ThisTask, strerror(errno)); + myflush(stdout); + terminate("read error"); + } + } + else + nread = 0; + + return nread; +} + +/*! \brief A wrapper for the printf() function. + * + * This function has the same functionalities of the standard printf() + * function. However, data is written to the standard output only for + * the task with rank 0. + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_printf(const char *fmt, ...) +{ + if(ThisTask == 0) + { + va_list l; + va_start(l, fmt); + vprintf(fmt, l); + myflush(stdout); + va_end(l); + } +} + +/*! \brief A wrapper for the fprintf() function. + * + * This function has the same functionalities of the standard fprintf() + * function. However, data is written to the standard output only for + * the task with rank 0. + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_fprintf(FILE *stream, const char *fmt, ...) +{ + if(ThisTask == 0) + { + va_list l; + va_start(l, fmt); + vfprintf(stream, fmt, l); + myflush(stream); + va_end(l); + } +} + +/*! \brief A function for printing debug information in parallel. + * + * This function works like printf, however it takes care + * that the output is contigous in the stdout from task 0 to task NTask-1. + * Run this debug function only in code parts which all tasks reach. + * + * + * \param[in] fmt String that contains format arguments. + * + * \return void + */ +void mpi_printf_each(const char *fmt, ...) +{ + char buffer[2048]; + + va_list l; + va_start(l, fmt); + vsprintf(buffer, fmt, l); + va_end(l); + + if(ThisTask == 0) + { + // print own message + printf("%s", buffer); + + // print message from other tasks + unsigned int i; + + for(i = 1; i < NTask; i++) + { + MPI_Recv(buffer, 2048, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + printf("%s", buffer); + } + } + + else + { + MPI_Send(buffer, strlen(buffer) + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + } +} + +/*! \brief Opens the requested file name and returns the file descriptor. + * + * If opening fails, an error is printed and the file descriptor is + * null. + * + * \param[in] fnam The file name. + * + * \return A file descriptor to the file. + */ +FILE *open_file(char *fnam) +{ + FILE *fd; + + if(!(fd = fopen(fnam, "w"))) + { + printf("can't open file `%s' for writing.\n", fnam); + } + return fd; +} diff --git a/src/amuse/community/arepo/src/io/io_fields.c b/src/amuse/community/arepo/src/io/io_fields.c new file mode 100644 index 0000000000..d36231c570 --- /dev/null +++ b/src/amuse/community/arepo/src/io/io_fields.c @@ -0,0 +1,765 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/io_fields.c + * \date 05/2018 + * \brief User defined functions for output; needed for all + * quantities that are not stored in a global array + * \details contains functions: + * static void io_func_task(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_timebin_hydro(int particle, int + * components, void *out_buffer, int mode) + * static void io_func_timestep(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_softenings(int particle, int components, + * void *out_buffer, int mode) + * void io_func_pos(int particle, int components, void *buffer, + * int mode) + * static void io_func_vel(int particle, int components, void + * *buffer, int mode) + * static void io_func_coolrate(int particle, int components, + * void *buffer, int mode) + * static void io_func_ne(int particle, int components, void + * *buffer, int mode) + * static void io_func_nh(int particle, int components, void + * *buffer, int mode) + * static void io_func_curlvel(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_vorticity(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_cell_spin(int particle, int components, + * void *out_buffer, int mode) + * static void io_func_bfield(int particle, int components, + * void *out_buffer, int mode) + * void init_io_fields() + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef OUTPUT_TASK +/*! \brief Output of the task the particles are at. + * + * \param[in] particle (unused) + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_task(int particle, int components, void *out_buffer, int mode) { ((int *)out_buffer)[0] = ThisTask; } +#endif /* #ifdef OUTPUT_TASK */ + +#ifdef OUTPUT_TIMEBIN_HYDRO +/*! \brief Output function of the timebin corresponding to the hydrodynamic + * timestep. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_timebin_hydro(int particle, int components, void *out_buffer, int mode) +{ + ((int *)out_buffer)[0] = P[particle].TimeBinHydro; +} +#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */ + +#ifdef OUTPUTTIMESTEP +/*! \brief Output function of the hydrodynamic timestep. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_timestep(int particle, int components, void *out_buffer, int mode) +{ + ((MyOutputFloat *)out_buffer)[0] = + (P[particle].TimeBinHydro ? (((integertime)1) << P[particle].TimeBinHydro) : 0) * All.Timebase_interval; +} +#endif /* #ifdef OUTPUTTIMESTEP */ + +#ifdef OUTPUT_SOFTENINGS +/*! \brief Output function of the force softening. + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode (output) + * + * \return void + */ +static void io_func_softenings(int particle, int components, void *out_buffer, int mode) +{ + ((MyOutputFloat *)out_buffer)[0] = All.ForceSoftening[P[particle].SofteningType]; +} +#endif /* #ifdef OUTPUT_SOFTENINGS */ + +/*! \brief IO function of the particle positions. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode (0: output, 1: input). + * + * \return void + */ +void io_func_pos(int particle, int components, void *buffer, int mode) +{ + int k; + + if(mode == 0) + { + if(DumpFlag != 3) // TODO: clean up this code duplication + { +#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION + double *pp = buffer; +#else /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION */ + MyOutputFloat *pp = buffer; +#endif /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */ + + for(k = 0; k < 3; k++) + { + pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k]; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[particle].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + double boxSize = All.BoxSize; +#ifdef LONG_X + if(k == 0) + boxSize = All.BoxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + if(k == 1) + boxSize = All.BoxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + if(k == 2) + boxSize = All.BoxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + while(pp[k] < 0) + pp[k] += boxSize; + while(pp[k] >= boxSize) + pp[k] -= boxSize; + } + } + else + { + MyOutputFloat *pp = buffer; + + for(k = 0; k < 3; k++) + { + pp[k] = P[particle].Pos[k] - All.GlobalDisplacementVector[k]; + +#if defined(GRAVITY_NOT_PERIODIC) + if(P[particle].Type != 0) + continue; +#endif /* #if defined(GRAVITY_NOT_PERIODIC) */ + double boxSize = All.BoxSize; +#ifdef LONG_X + if(k == 0) + boxSize = All.BoxSize * LONG_X; +#endif /* #ifdef LONG_X */ +#ifdef LONG_Y + if(k == 1) + boxSize = All.BoxSize * LONG_Y; +#endif /* #ifdef LONG_Y */ +#ifdef LONG_Z + if(k == 2) + boxSize = All.BoxSize * LONG_Z; +#endif /* #ifdef LONG_Z */ + while(pp[k] < 0) + pp[k] += boxSize; + while(pp[k] >= boxSize) + pp[k] -= boxSize; + } + } + } + else + { +#ifdef READ_COORDINATES_IN_DOUBLE + double *in_buffer = buffer; +#else /* #ifdef READ_COORDINATES_IN_DOUBLE */ + MyInputFloat *in_buffer = buffer; +#endif /* #ifdef READ_COORDINATES_IN_DOUBLE #else */ + + for(k = 0; k < components; k++) + { + P[particle].Pos[k] = in_buffer[k] + All.GlobalDisplacementVector[k]; + } + } +} + +/*! \brief IO function for velocities. + * + * Note the different factors of scalefactor in the output than in the code! + * + * \param[in] particle Index of particle/cell. + * \param[in] components Number of entries in array. + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_vel(int particle, int components, void *buffer, int mode) +{ + int k; + + if(mode == 0) + { + for(k = 0; k < components; k++) + { + ((MyOutputFloat *)buffer)[k] = P[particle].Vel[k]; + ((MyOutputFloat *)buffer)[k] *= sqrt(All.cf_a3inv); /* we are dealing with p = a^2 * xdot */ + } + } + else + { + for(k = 0; k < components; k++) + { + P[particle].Vel[k] = ((MyInputFloat *)buffer)[k]; + } + } +} + +#ifdef OUTPUTACCELERATION +/*! \brief IO function for gravitational accelerations. + * + * Note different a factors in output than in code. + * + * \param[in] particle Index of particle/cell. + * \param[in] components Number of entries in array. + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_accel(int particle, int components, void *out_buffer, int mode) +{ + int k; + + if(mode == 0) + { + if(RestartFlag != 6) + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = All.cf_a2inv * P[particle].GravAccel[k]; + else + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = P[particle].GravAccel[k]; +#ifdef PMGRID + if(RestartFlag != 6) + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] += All.cf_a2inv * P[particle].GravPM[k]; + else + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] += P[particle].GravPM[k]; +#endif /* #ifdef PMGRID */ + } + else + { + for(k = 0; k < 3; k++) + P[particle].GravAccel[k] = ((MyOutputFloat *)out_buffer)[k]; + } +} +#endif /* #ifdef OUTPUTACCELERATION */ + +/* -- user defined functions: additional physics -- */ +#ifdef OUTPUTCOOLRATE +/*! \brief Output function of cooling rate. + * + * \param[in] particle Index of particle/cell. + * \param[in] (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_coolrate(int particle, int components, void *buffer, int mode) +{ + double tcool, ne, nh0, coolrate; + + ne = SphP[particle].Ne; + SetOutputGasState(particle, &ne, &nh0, &coolrate); + + /* get cooling time */ + tcool = GetCoolingTime(SphP[particle].Utherm, SphP[particle].Density * All.cf_a3inv, &ne); + + /* convert cooling time with current thermal energy to du/dt */ + if(tcool != 0) + ((MyOutputFloat *)buffer)[0] = SphP[particle].Utherm / tcool; + else + ((MyOutputFloat *)buffer)[0] = 0; +} +#endif /* #ifdef OUTPUTCOOLRATE */ + +/* -- user defined functions: gas properties -- */ +#if defined(COOLING) +/*! \brief IO function of the electron number density. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_ne(int particle, int components, void *buffer, int mode) +{ + if(mode == 0) + { + // normal code path: calculate Ne accounting for GFM options and USE_SFR + double ne = SphP[particle].Ne; + +#if defined(USE_SFR) + // reproduces previous behavior that Ne is updated prior to output only for Sfr>0 cells + // if this is unwanted (or redundant) this if() condition should be removed + double nh0, coolrate; + if(get_starformation_rate(particle) > 0) + SetOutputGasState(particle, &ne, &nh0, &coolrate); +#endif /* #if defined(USE_SFR) */ + + ((MyOutputFloat *)buffer)[0] = ne; + } + else + { + SphP[particle].Ne = ((MyInputFloat *)buffer)[0]; + } +} +#endif /* #if defined(COOLING) */ + +#if defined(COOLING) +/*! \brief Output function for neutral hydrogen fraction. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode (unused) + * + * \return void + */ +static void io_func_nh(int particle, int components, void *buffer, int mode) +{ + double ne, nh0, coolrate; + + ne = SphP[particle].Ne; + SetOutputGasState(particle, &ne, &nh0, &coolrate); + + ((MyOutputFloat *)buffer)[0] = nh0; +} +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR +/*! \brief IO function for star formation rate. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File output buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_sfr(int particle, int components, void *buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)buffer)[0] = get_starformation_rate(particle); + } + else + { + SphP[particle].Sfr = ((MyOutputFloat *)buffer)[0]; + } +} +#endif + +/* -- user defined functions: other -- */ +#if defined(OUTPUT_CURLVEL) +/*! \brief Output function for curl of velocity field. + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output. + * + * \return void + */ +static void io_func_curlvel(int particle, int components, void *out_buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)out_buffer)[0] = SphP[particle].CurlVel; + } +} +#endif /* #if defined(OUTPUT_CURLVEL) */ + +#ifdef OUTPUT_VORTICITY +/*! \brief Output function of vorticity (calculated from velocity spatial + * derivatives). + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output + * + * \return void + */ +static void io_func_vorticity(int particle, int components, void *out_buffer, int mode) +{ + if(mode == 0) + { + ((MyOutputFloat *)out_buffer)[0] = SphP[particle].Grad.dvel[2][1] - SphP[particle].Grad.dvel[1][2]; + ((MyOutputFloat *)out_buffer)[1] = SphP[particle].Grad.dvel[0][2] - SphP[particle].Grad.dvel[2][0]; + ((MyOutputFloat *)out_buffer)[2] = SphP[particle].Grad.dvel[1][0] - SphP[particle].Grad.dvel[0][1]; + } +} +#endif /* #ifdef OUTPUT_VORTICITY */ + +#ifdef MHD +/*! \brief IO function for magnetic field. + * + * Note that the output is in Gauss unit system (in code units) while the + * internal B-field is in Heaviside-Lorentz system (FACTOR of sqrt(4 PI)!). + * + * \param[in] particle Index of particle/cell. + * \param[in] components (unused) + * \param[out] out_buffer File IO buffer. + * \param[in] mode Mode 0: output, 1: input. + * + * \return void + */ +static void io_func_bfield(int particle, int components, void *out_buffer, int mode) +{ + int k; + + if(mode == 0) + { + /* writing: convert from Heavyside-Lorentz to Gauss */ + for(k = 0; k < 3; k++) + ((MyOutputFloat *)out_buffer)[k] = SphP[particle].B[k] * sqrt(4. * M_PI); + } + else + { + /* reading: convert from Gauss to Heavyside-Lorentz */ + for(k = 0; k < 3; k++) + SphP[particle].B[k] = ((MyInputFloat *)out_buffer)[k] / sqrt(4. * M_PI); + } +} +#endif /* #ifdef MHD */ + +/*! \brief Function for field registering. + * + * For init_field arguments read the description of init_field. + * Don't forget to add the new IO_FLAG to allvars.h. + * + * \return void + */ +void init_io_fields() +{ + /* ALL TYPES */ + +#ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION + enum types_in_file pos_out = FILE_DOUBLE; +#else /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION */ + enum types_in_file pos_out = FILE_MY_IO_FLOAT; +#endif /* #ifdef OUTPUT_COORDINATES_IN_DOUBLEPRECISION #else */ +#ifdef READ_COORDINATES_IN_DOUBLE + enum types_in_file pos_in = FILE_DOUBLE; +#else /* #ifdef READ_COORDINATES_IN_DOUBLE */ + enum types_in_file pos_in = FILE_MY_IO_FLOAT; +#endif /* #ifdef READ_COORDINATES_IN_DOUBLE #else */ + init_field(IO_POS, "POS ", "Coordinates", MEM_MY_DOUBLE, pos_out, pos_in, 3, A_NONE, 0, io_func_pos, ALL_TYPES); + init_units(IO_POS, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + + init_field(IO_POS_MINI, "POS ", "Coordinates", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_NONE, 0, io_func_pos, ALL_TYPES); + init_units(IO_POS_MINI, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + init_snapshot_type(IO_POS_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps always in single precision */ + + init_field(IO_VEL, "VEL ", "Velocities", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vel, + ALL_TYPES); /* particle velocities */ + init_units(IO_VEL, 0.5, 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); /* sqrt(a)*km/s */ + init_snapshot_type(IO_VEL, SN_MINI); + + init_field(IO_ID, "ID ", "ParticleIDs", MEM_MY_ID_TYPE, FILE_MY_ID_TYPE, FILE_MY_ID_TYPE, 1, A_P, &P[0].ID, 0, ALL_TYPES); + init_units(IO_ID, 0, 0, 0, 0, 0, 0); + init_snapshot_type(IO_ID, SN_MINI); + + init_field(IO_MASS, "MASS", "Masses", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Mass, 0, + SET_IN_GET_PARTICLES_IN_BLOCK); /* particle mass */ + init_units(IO_MASS, 0., -1., 0., 1., 0., All.UnitMass_in_g); + init_snapshot_type(IO_MASS, SN_MINI); + +#ifdef OUTPUTPOTENTIAL + init_field(IO_POT, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_P, &P[0].Potential, 0, + ALL_TYPES); /* gravitational potential */ + init_units(IO_POT, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); /* (km/s)^2/a */ + + init_field(IO_POT_MINI, "POT ", "Potential", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_P, &P[0].Potential, 0, + STARS_ONLY | BHS_ONLY); + init_units(IO_POT_MINI, -1.0, 0.0, 0.0, 0.0, 2.0, All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_POT_MINI, SN_MINI_ONLY); /* second IO tag output to mini-snaps for stars/BHs only */ +#endif /* #ifdef OUTPUTPOTENTIAL */ + + /* GAS CELLS */ + + init_field(IO_U, "U ", "InternalEnergy", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Utherm, 0, + GAS_ONLY); /* internal energy */ + init_units(IO_U, 0., 0., 0., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_U, SN_MINI); + + init_field(IO_RHO, "RHO ", "Density", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Density, 0, + GAS_ONLY); /* particle density */ + init_units(IO_RHO, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_RHO, SN_MINI); + +#ifdef OUTPUT_PRESSURE + init_field(IO_PRESSURE, "PRES", "Pressure", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Pressure, 0, GAS_ONLY); + init_units(IO_PRESSURE, -3.0, 2.0, -3.0, 1.0, 2.0, + All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_PRESSURE */ + +#ifdef OUTPUT_CSND + init_field(IO_CSND, "CSND", "SoundSpeed", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].Csnd, 0, GAS_ONLY); + init_units(IO_CSND, 0., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_CSND */ + +#if defined(COOLING) + init_field(IO_NE, "NE ", "ElectronAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_ne, + GAS_ONLY); /* electron abundance */ + init_units(IO_NE, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */ + init_snapshot_type(IO_NE, SN_MINI); + + init_field(IO_NH, "NH ", "NeutralHydrogenAbundance", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_nh, + GAS_ONLY); /* neutral hydrogen fraction */ + init_units(IO_NH, 0, 0, 0, 0, 0, 0); /* dimensionless fraction */ +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR + init_field(IO_SFR, "SFR ", "StarFormationRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_sfr, + GAS_ONLY); /* star formation rate */ + init_units(IO_SFR, 0.0, 0.0, -1.0, 1.0, 1.0, SOLAR_MASS / SEC_PER_YEAR); /* Msun/yr */ + init_snapshot_type(IO_SFR, SN_MINI); +#endif /* #ifdef USE_SFR */ + +#ifdef OUTPUT_DIVVEL + init_field(IO_DIVVEL, "DIVV", "VelocityDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivVel, 0, + GAS_ONLY); + init_units(IO_DIVVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_DIVVEL */ + +#if defined(OUTPUT_CURLVEL) + init_field(IO_CURLVEL, "ROTV", "VelocityCurl", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_curlvel, + GAS_ONLY); + init_units(IO_CURLVEL, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #if defined(OUTPUT_CURLVEL) */ + +#ifdef OUTPUT_COOLHEAT + init_field(IO_COOLHEAT, "COHE", "CoolingHeatingEnergy", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].CoolHeat, 0, + GAS_ONLY); + init_units(IO_COOLHEAT, 0.0, 0.0, -1.0, 1.0, 3.0, All.UnitEnergy_in_cgs / All.UnitTime_in_s); +#endif /* #ifdef OUTPUT_COOLHEAT */ + +#ifdef OUTPUT_SURFACE_AREA + init_field(IO_SAREA, "AREA", "SurfaceArea", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].SurfaceArea, 0, + GAS_ONLY); + init_units(IO_SAREA, 2.0, -2.0, 2.0, 0.0, 0.0, All.UnitLength_in_cm * All.UnitLength_in_cm); + + init_field(IO_NFACES, "NFAC", "NumFacesCell", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].CountFaces, 0, GAS_ONLY); + init_units(IO_NFACES, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#ifdef OUTPUTCOOLRATE + init_field(IO_COOLRATE, "COOR", "CoolingRate", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_coolrate, GAS_ONLY); + init_units(IO_COOLRATE, 0.0, 0.0, -1.0, 1.0, 3.0, 1.0); +#endif /* #ifdef OUTPUTCOOLRATE */ + +#ifdef OUTPUT_VORTICITY + init_field(IO_VORT, "VORT", "Vorticity", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_vorticity, GAS_ONLY); + init_units(IO_VORT, 0.0, 1.0, -1.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_VORTICITY */ + + /* GAS CELLS GRADIENTS */ + +#ifdef OUTPUT_PRESSURE_GRADIENT + init_field(IO_GRADP, "GRAP", "PressureGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.dpress[0], 0, + GAS_ONLY); + init_units(IO_GRADP, -4.0, 3.0, -4.0, 1.0, 2.0, + All.UnitDensity_in_cgs * All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_PRESSURE_GRADIENT */ + +#ifdef OUTPUT_DENSITY_GRADIENT + init_field(IO_GRADR, "GRAR", "DensityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 3, A_SPHP, &SphP[0].Grad.drho[0], 0, + GAS_ONLY); + init_units(IO_GRADR, -4., 3., -4., 1., 0., All.UnitDensity_in_cgs / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_DENSITY_GRADIENT */ + +#ifdef OUTPUT_VELOCITY_GRADIENT + init_field(IO_GRADV, "GRAV", "VelocityGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dvel[0][0], 0, + GAS_ONLY); + init_units(IO_GRADV, 0., 1., -1., 0., 1., All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); /* sqrt(a)*km/s */ +#endif /* #ifdef OUTPUT_VELOCITY_GRADIENT */ + +#ifdef OUTPUT_BFIELD_GRADIENT + init_field(IO_GRADB, "GRAB", "BfieldGradient", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 9, A_SPHP, &SphP[0].Grad.dB[0][0], 0, + GAS_ONLY); + init_units(IO_GRADB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_BFIELD_GRADIENT */ + + /* GAS CELLS (MESH PROPERTIES) */ + +#ifdef OUTPUT_VOLUME + init_field(IO_VOL, "VOL ", "Volume", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].Volume, 0, GAS_ONLY); + init_units(IO_VOL, 3., -3., 3., 0., 0., All.UnitLength_in_cm * All.UnitLength_in_cm * All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_VOLUME */ + +#ifdef OUTPUT_VERTEX_VELOCITY + init_field(IO_VERTEXVEL, "VEVE", "VertexVelocity", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP, + &SphP[0].VelVertex[0], 0, GAS_ONLY); + init_units(IO_VERTEXVEL, 1., 0., 0., 0., 1., All.UnitVelocity_in_cm_per_s); +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY */ + +#ifdef OUTPUT_MESH_FACE_ANGLE + init_field(IO_FACEANGLE, "FACA", "MaxFaceAngle", MEM_MY_SINGLE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].MaxFaceAngle, 0, + GAS_ONLY); + init_units(IO_FACEANGLE, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_MESH_FACE_ANGLE */ + +#ifdef OUTPUT_CENTER_OF_MASS + init_field(IO_CM, "CMCE", "CenterOfMass", MEM_MY_DOUBLE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_SPHP, &SphP[0].Center[0], 0, + GAS_ONLY); + init_units(IO_CM, 1., -1., 1., 0., 0., All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_CENTER_OF_MASS */ + + /* DIAGNOSTIC */ + +#ifdef OUTPUT_TASK + init_field(IO_TASK, "TASK", "task", MEM_INT, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_task, GAS_ONLY); + init_units(IO_TASK, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_TASK */ + +#ifdef OUTPUT_TIMEBIN_HYDRO + init_field(IO_TIMEBIN_HYDRO, "TBH", "TimebinHydro", MEM_NONE, FILE_INT, FILE_NONE, 1, A_NONE, 0, io_func_timebin_hydro, GAS_ONLY); + init_units(IO_TIMEBIN_HYDRO, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUT_TIMEBIN_HYDRO */ + +#ifdef OUTPUTTIMESTEP + init_field(IO_TSTP, "TSTP", "TimeStep", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_NONE, 0, io_func_timestep, ALL_TYPES); + init_units(IO_TSTP, 0., -1., 1., 0., -1., All.UnitTime_in_s); +#endif /* #ifdef OUTPUTTIMESTEP */ + +#ifdef OUTPUTACCELERATION + init_field(IO_ACCEL, "ACCE", "Acceleration", MEM_NONE, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 3, A_NONE, 0, io_func_accel, ALL_TYPES); + init_units(IO_ACCEL, -1., 1., -1., 0., 2., All.UnitVelocity_in_cm_per_s * All.UnitVelocity_in_cm_per_s / All.UnitLength_in_cm); +#endif /* #ifdef OUTPUTACCELERATION */ + +#ifdef OUTPUT_SOFTENINGS + init_field(IO_SOFTENING, "SOFT", "Softenings", MEM_NONE, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_NONE, 0, io_func_softenings, ALL_TYPES); + init_units(IO_SOFTENING, 1., -1., 1., 0., 0., All.UnitLength_in_cm); +#endif /* #ifdef OUTPUT_SOFTENINGS */ + +#ifdef OUTPUTGRAVINTERACTIONS + init_field(IO_GRAVITERACTIONS, "GINT", "GravityInteractions", MEM_INT, FILE_INT, FILE_NONE, 1, A_SPHP, &SphP[0].GravInteractions, 0, + ALL_TYPES); + init_units(IO_GRAVITERACTIONS, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + + /* MHD */ + +#ifdef MHD + enum types_in_file mhd_read = FILE_MY_IO_FLOAT; +#if defined(MHD_SEEDFIELD) + if(RestartFlag == 0) + mhd_read = FILE_NONE; /* magnetic field not expected in ICs */ +#endif /* #if defined(MHD_SEEDFIELD) */ + + init_field(IO_BFLD, "BFLD", "MagneticField", MEM_NONE, FILE_MY_IO_FLOAT, mhd_read, 3, A_NONE, 0, io_func_bfield, + GAS_ONLY); /* magnetic field */ + init_units(IO_BFLD, -2., 1., -1.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5)); + + init_field(IO_DIVB, "DIVB", "MagneticFieldDivergence", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, 1, A_SPHP, &SphP[0].DivB, 0, + GAS_ONLY); /* divergence of magnetic field */ + init_units(IO_DIVB, -3., 2., -2.5, 0.5, 1., pow(All.UnitPressure_in_cgs, 0.5) / All.UnitLength_in_cm); +#endif /* #ifdef MHD */ + + /* Scalars */ + +#ifdef PASSIVE_SCALARS + init_field(IO_PASS, "PASS", "PassiveScalars", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_MY_IO_FLOAT, PASSIVE_SCALARS, A_SPHP, + &SphP[0].PScalars[0], 0, GAS_ONLY); + init_units(IO_PASS, 0., 0., 0., 0., 0., 0.0); +#endif /* #ifdef PASSIVE_SCALARS */ + + /* OTHER */ + +#ifdef SAVE_HSML_IN_SNAPSHOT + init_field(IO_SUBFINDDENSITY, "SFDE", "SubfindDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindDensity, 0, + ALL_TYPES); + init_units(IO_SUBFINDDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_SUBFINDDENSITY, SN_NO_SUBBOX); + + init_field(IO_SUBFINDDMDENSITY, "SFDD", "SubfindDMDensity", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, + &PS[0].SubfindDMDensity, 0, ALL_TYPES); + init_units(IO_SUBFINDDMDENSITY, -3., 2., -3., 1., 0., All.UnitDensity_in_cgs); + init_snapshot_type(IO_SUBFINDDMDENSITY, SN_NO_SUBBOX); + + init_field(IO_SUBFINDHSML, "SFHS", "SubfindHsml", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindHsml, 0, + ALL_TYPES); + init_units(IO_SUBFINDHSML, 1., -1., 1., 0., 0., All.UnitLength_in_cm); + init_snapshot_type(IO_SUBFINDHSML, SN_NO_SUBBOX); + + init_field(IO_SUBFINDVELDISP, "SFVD", "SubfindVelDisp", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_PS, &PS[0].SubfindVelDisp, 0, + ALL_TYPES); + init_units(IO_SUBFINDVELDISP, 0.0, 0.0, 0.0, 0.0, 1.0, All.UnitVelocity_in_cm_per_s); + init_snapshot_type(IO_SUBFINDVELDISP, SN_NO_SUBBOX); +#endif /* #ifdef SAVE_HSML_IN_SNAPSHOT */ + +#if defined(REFINEMENT_HIGH_RES_GAS) + init_field(IO_HIGHRESMASS, "HRGM", "HighResGasMass", MEM_MY_FLOAT, FILE_MY_IO_FLOAT, FILE_NONE, 1, A_SPHP, &SphP[0].HighResMass, 0, + GAS_ONLY); + init_units(IO_HIGHRESMASS, 0, -1, 0, 1, 0, All.UnitMass_in_g); + + init_field(IO_ALLOWREFINEMENT, "REF ", "AllowRefinement", MEM_INT, FILE_INT, FILE_INT, 1, A_SPHP, &SphP[0].AllowRefinement, 0, + GAS_ONLY); + init_units(IO_ALLOWREFINEMENT, 0, 0, 0, 0, 0, 0); +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ +} diff --git a/src/amuse/community/arepo/src/io/logs.c b/src/amuse/community/arepo/src/io/logs.c new file mode 100644 index 0000000000..6354cf3609 --- /dev/null +++ b/src/amuse/community/arepo/src/io/logs.c @@ -0,0 +1,623 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/logs.c + * \date 05/2018 + * \brief Log-files handling. + * \details contains functions: + * void open_logfiles(void) + * void close_logfiles(void) + * void output_log_messages(void) + * void init_cpu_log(void) + * void write_cpu_log(void) + * void put_symbol(char *string, double t0, double t1, char c) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#define CPU_STRING_LEN 120 + +/*! \brief Contains informations about the used CPU timers like it's name, + * symbols etc. + */ +struct timer_d Timer_data[CPU_LAST + 1]; + +enum timers TimerStack[TIMER_STACK_DEPTH]; +int TimerStackPos = 0; + +/*! \brief Opens files for logging. + * + * This function opens various log-files that report on the status and + * performance of the simulation. Upon restart, the code will append to + * these files. + * + * \return void + */ +void open_logfiles(void) +{ + char mode[2], buf[1000], msg[1000]; + + if(RestartFlag == 0) + strcpy(mode, "w"); + else + strcpy(mode, "a"); + + if(ThisTask == 0) + mkdir(All.OutputDir, 02755); + + MPI_Barrier(MPI_COMM_WORLD); + +#ifdef DETAILEDTIMINGS + sprintf(buf, "%stimings_detailed_%d.txt", All.OutputDir, ThisTask); + if(!(FdDetailed = fopen(buf, mode))) + terminate("error in opening file '%s'\n", buf); +#endif /* #ifdef DETAILEDTIMINGS */ + + if(ThisTask != 0) /* only the root processors writes to the log files */ + return; + + sprintf(buf, "%s%s", All.OutputDir, "cpu.txt"); + if(!(FdCPU = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "info.txt"); + if(!(FdInfo = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "energy.txt"); + if(!(FdEnergy = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "timings.txt"); + if(!(FdTimings = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "balance.txt"); + if(!(FdBalance = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "timebins.txt"); + if(!(FdTimebin = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "domain.txt"); + if(!(FdDomain = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + + sprintf(buf, "%s%s", All.OutputDir, "memory.txt"); + if(!(FdMemory = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + +#ifdef FORCETEST + sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt"); + if(!(FdForceTest = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } + fclose(FdForceTest); +#endif /* #ifdef FORCETEST */ + +#ifdef RESTART_DEBUG + sprintf(buf, "%s%s", All.OutputDir, "restartdebug.txt"); + if(!(FdRestartTest = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef OUTPUT_CPU_CSV + sprintf(buf, "%s%s", All.OutputDir, "cpu.csv"); + if(!(FdCPUCSV = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + sprintf(buf, "%s%s", All.OutputDir, "sfr.txt"); + if(!(FdSfr = fopen(buf, mode))) + { + sprintf(msg, "error in opening file '%s'\n", buf); + terminate(msg); + } +#endif /* #ifdef USE_SFR */ + + int i = 0; + fprintf(FdBalance, "\n"); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "STEP, TIME, CPUS, MULTIPLEDOMAIN, HIGHESTTIMEBIN, "); +#endif /* #ifdef OUTPUT_CPU_CSV */ + for(; i < CPU_LAST; i++) + { + if(Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0) + { + fprintf(FdBalance, "%-20s = '%c' / '%c'\n", Timer_data[i].longname, Timer_data[i].symb, Timer_data[i].symbImbal); + } +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%s1, %s2, %s3, ", Timer_data[i].shortname, Timer_data[i].shortname, Timer_data[i].shortname); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + fprintf(FdBalance, "\n"); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "\n"); +#endif /* #ifdef OUTPUT_CPU_CSV */ +} + +/*! \brief Closes the global log-files. + * + * \return void + */ +void close_logfiles(void) +{ + if(ThisTask != 0) /* only the root processors writes to the log files */ + return; + + fclose(FdCPU); + fclose(FdInfo); + fclose(FdEnergy); + fclose(FdTimings); + fclose(FdBalance); + fclose(FdTimebin); + +#ifdef OUTPUT_CPU_CSV + fclose(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + fclose(FdSfr); +#endif /* #ifdef USE_SFR */ +} + +/*! \brief Writes log messages in log-files. + * + * At each time step this function writes on to two log-files. + * In FdInfo, it just lists the timesteps that have been done, while in + * FdTimeBin it outputs information about the active and occupied time-bins. + * Additionally, reports to memory log-files are written. + * + * \return void + */ +void output_log_messages(void) +{ + double z; + int i, j, write_logs = 1; + double sum, avg_CPU_TimeBin[TIMEBINS], frac_CPU_TimeBin[TIMEBINS]; + int weight, corr_weight; + long long tot_cumulative_grav[TIMEBINS], tot_cumulative_sph[TIMEBINS]; + long long tot_grav, tot_sph; + + TIMER_START(CPU_LOGS); + + if(write_logs) + report_detailed_memory_usage_of_largest_task(); + + long long count[4 * TIMEBINS], tot_count[4 * TIMEBINS]; + long long *tot_count_grav = &tot_count[0], *tot_count_sph = &tot_count[TIMEBINS]; + int nelem = 2 * TIMEBINS; + + for(int i = 0; i < TIMEBINS; i++) + count[i] = TimeBinsGravity.TimeBinCount[i]; + + for(int i = 0; i < TIMEBINS; i++) + count[i + TIMEBINS] = TimeBinsHydro.TimeBinCount[i]; + + MPI_Reduce(count, tot_count, nelem, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + if(All.ComovingIntegrationOn) + { + z = 1.0 / (All.Time) - 1; + + if(write_logs) + fprintf(FdInfo, + "\nSync-Point %d, TimeBin=%d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: " + "%10llu\n", + All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, z, All.TimeStep, + log(All.Time) - log(All.Time - All.TimeStep), All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + printf("\n\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", + All.NumCurrentTiStep, All.Time, z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep), + All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + if(write_logs) + fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Redshift: %g, Systemstep: %g, Dloga: %g\n", All.NumCurrentTiStep, All.Time, + z, All.TimeStep, log(All.Time) - log(All.Time - All.TimeStep)); + + myflush(FdInfo); + } + else + { + if(write_logs) + fprintf(FdInfo, "\nSync-Point %d, TimeBin=%d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", + All.NumCurrentTiStep, All.HighestActiveTimeBin, All.Time, All.TimeStep, All.GlobalNSynchronizedGravity, + All.GlobalNSynchronizedHydro); + + printf("\n\nSync-Point %d, Time: %g, Systemstep: %g, Nsync-grv: %10llu, Nsync-hyd: %10llu\n", All.NumCurrentTiStep, All.Time, + All.TimeStep, All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro); + + if(write_logs) + fprintf(FdTimebin, "\nSync-Point %d, Time: %g, Systemstep: %g\n", All.NumCurrentTiStep, All.Time, All.TimeStep); + + myflush(FdInfo); + } + + for(i = 1, tot_cumulative_grav[0] = tot_count_grav[0], tot_cumulative_sph[0] = tot_count_sph[0]; i < TIMEBINS; i++) + { + tot_cumulative_grav[i] = tot_count_grav[i] + tot_cumulative_grav[i - 1]; + tot_cumulative_sph[i] = tot_count_sph[i] + tot_cumulative_sph[i - 1]; + } + + for(i = 0; i < TIMEBINS; i++) + { + for(j = 0, sum = 0; j < All.CPU_TimeBinCountMeasurements[i]; j++) + sum += All.CPU_TimeBinMeasurements[i][j]; + if(All.CPU_TimeBinCountMeasurements[i]) + avg_CPU_TimeBin[i] = sum / All.CPU_TimeBinCountMeasurements[i]; + else + avg_CPU_TimeBin[i] = 0; + } + + for(i = All.HighestOccupiedTimeBin, weight = 1, sum = 0; i >= 0 && tot_count_grav[i] > 0; i--, weight *= 2) + { + if(weight > 1) + corr_weight = weight / 2; + else + corr_weight = weight; + + frac_CPU_TimeBin[i] = corr_weight * avg_CPU_TimeBin[i]; + sum += frac_CPU_TimeBin[i]; + } + + for(i = All.HighestOccupiedTimeBin; i >= 0 && tot_count_grav[i] > 0; i--) + { + if(sum) + frac_CPU_TimeBin[i] /= sum; + } + + char tracerString[13]; + + sprintf(tracerString, "%s", ""); + + char dustString[13]; + sprintf(dustString, "%s", ""); + if(write_logs) + fprintf(FdTimebin, + "Occupied timebins: gravity hydro %s %s dt cumul-grav cumul-sph A D avg-time " + "cpu-frac\n", + tracerString, dustString); + + for(i = TIMEBINS - 1, tot_grav = tot_sph = 0; i >= 0; i--) + { + int binUsed = 0; + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + if(tot_count_grav[i] > 0) + binUsed = 1; +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + if(tot_count_sph[i] > 0) + binUsed = 1; + + sprintf(tracerString, "%s", ""); + + if(binUsed) + { + if(write_logs) + fprintf(FdTimebin, " %c bin=%2d %10llu %10llu %s %s %16.12f %10llu %10llu %c %c %10.2f %5.1f%%\n", + TimeBinSynchronized[i] ? 'X' : ' ', i, tot_count_grav[i], tot_count_sph[i], tracerString, dustString, + i > 0 ? (((integertime)1) << i) * All.Timebase_interval : 0.0, tot_cumulative_grav[i], tot_cumulative_sph[i], + (i == All.HighestActiveTimeBin) ? '<' : ' ', + (All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition && i == All.HighestActiveTimeBin) + ? '*' + : ' ', + avg_CPU_TimeBin[i], 100.0 * frac_CPU_TimeBin[i]); + + if(TimeBinSynchronized[i]) + { + tot_grav += tot_count_grav[i]; + tot_sph += tot_count_sph[i]; + } + } + } + + if(write_logs) + { + fprintf(FdTimebin, " ------------------------\n"); + } + + sprintf(tracerString, "%s", ""); + sprintf(dustString, "%s", ""); + + if(write_logs) + { +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) + { + fprintf(FdTimebin, "PM-Step. Total: %10llu %10llu %s %s\n", tot_grav, tot_sph, tracerString, dustString); + } + else +#endif /* #ifdef PMGRID */ + { + fprintf(FdTimebin, "Total active: %10llu %10llu %s %s\n", tot_grav, tot_sph, tracerString, dustString); + } + + fprintf(FdTimebin, "\n"); + } + + myflush(FdTimebin); + } + +#ifdef RESTART_DEBUG + log_restart_debug(); +#endif /* #ifdef RESTART_DEBUG */ + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Initializes cpu log file. + * + * \return void + */ +void init_cpu_log(void) +{ + int i = 0; + +#define TIMER_STRUCT +#include "../utils/timer.h" + + for(i = 0; i < CPU_LAST; i++) + { + if(Timer_data[i].parent >= 0) + Timer_data[i].depth = Timer_data[Timer_data[i].parent].depth + 1; + else + Timer_data[i].depth = 0; + } + + for(i = 0; i < CPU_LAST; i++) + { + All.CPU_Sum[i] = 0.; + CPU_Step[i] = 0.; + } + + TimerStackPos = 0; + TimerStack[0] = CPU_MISC; + + CPUThisRun = 0.; + + WallclockTime = second(); + StartOfRun = second(); +} + +/*! \brief Write the FdBalance and FdCPU files. + * + * At each time step this function writes on to two log-files. + * In FdBalance, it outputs in a graphical way the amount of + * time spent in the various parts of the code, while + * in FdCPU it writes information about the cpu-time consumption + * of the various modules. + * + * \return void + */ +void write_cpu_log(void) +{ + int write_logs = 1; + double max_CPU_Step[CPU_LAST], avg_CPU_Step[CPU_LAST], summed_CPU_Step[CPU_LAST]; + double t0, t1, tsum; + double avg_total = 0; + double local_total = 0; + double max_total = 0; + int i; + + TIMER_START(CPU_LOGS); + + for(i = 0; i < CPU_LAST; i++) + { + local_total += CPU_Step[i]; + } + + MPI_Reduce(CPU_Step, max_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&local_total, &max_total, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(CPU_Step, avg_CPU_Step, CPU_LAST, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + /* sum up cpu items into groups */ + for(i = 0; i < CPU_LAST; i++) + { + summed_CPU_Step[i] = avg_CPU_Step[i]; + } + for(i = CPU_LAST - 1; i > CPU_ALL; i--) + { + if(Timer_data[i].parent >= 0) + { + summed_CPU_Step[Timer_data[i].parent] += summed_CPU_Step[i]; + } + } + + /* calc averages, update All.CPU_Sum */ + for(i = 0; i < CPU_LAST; i++) + { + avg_CPU_Step[i] /= NTask; + avg_total += avg_CPU_Step[i]; + + summed_CPU_Step[i] /= NTask; + All.CPU_Sum[i] += summed_CPU_Step[i]; + } + + /* create balance.txt string */ + char cpu_String[CPU_STRING_LEN + 1]; + put_symbol(cpu_String, 0., 1.0, '-'); + + for(i = 1, tsum = 0.0; i < CPU_LAST; i++) + { + if(max_CPU_Step[i] > 0 && Timer_data[i].symb != 0 && Timer_data[i].symbImbal != 0) + { + t0 = tsum; + t1 = tsum + avg_CPU_Step[i] * (avg_CPU_Step[i] / max_CPU_Step[i]); + put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symb); + tsum += t1 - t0; + + t0 = tsum; + t1 = tsum + avg_CPU_Step[i] * ((max_CPU_Step[i] - avg_CPU_Step[i]) / max_CPU_Step[i]); + put_symbol(cpu_String, t0 / avg_total, t1 / avg_total, Timer_data[i].symbImbal); + tsum += t1 - t0; + } + } + + if(write_logs) + { + fprintf(FdBalance, "Step=%7d sec=%10.3f Nsync-grv=%10llu Nsync-hyd=%10llu %s\n", All.NumCurrentTiStep, max_total, + All.GlobalNSynchronizedGravity, All.GlobalNSynchronizedHydro, cpu_String); + } + + myflush(FdBalance); + + if(All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin] == NUMBER_OF_MEASUREMENTS_TO_RECORD) + { + All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]--; + memmove(&All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][0], &All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][1], + (NUMBER_OF_MEASUREMENTS_TO_RECORD - 1) * sizeof(double)); + } + + All.CPU_TimeBinMeasurements[All.HighestActiveTimeBin][All.CPU_TimeBinCountMeasurements[All.HighestActiveTimeBin]++] = max_total; + + if(write_logs) + { +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%d, %g, %d, %d, %d, ", All.NumCurrentTiStep, All.Time, NTask, All.MultipleDomains, + All.HighestActiveTimeBin); +#endif /* #ifdef OUTPUT_CPU_CSV */ + fprintf(FdCPU, "Step %d, Time: %g, CPUs: %d, MultiDomains: %d, HighestActiveTimeBin: %d\n", All.NumCurrentTiStep, All.Time, + NTask, All.MultipleDomains, All.HighestActiveTimeBin); + + fprintf(FdCPU, " diff cumulative\n"); + + for(i = 0; i < CPU_LAST; i++) + { + fprintf(FdCPU, "%*s%*s%10.2f %5.1f%% %10.2f %*s%5.1f%%\n", 2 * Timer_data[i].depth, "", -20 + 2 * Timer_data[i].depth, + Timer_data[i].longname, summed_CPU_Step[i], summed_CPU_Step[i] / summed_CPU_Step[CPU_ALL] * 100., All.CPU_Sum[i], + 5 * Timer_data[i].depth, "", All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.); + +#ifdef OUTPUT_CPU_CSV + fprintf(FdCPUCSV, "%f, %f, %f, ", summed_CPU_Step[i], All.CPU_Sum[i], All.CPU_Sum[i] / All.CPU_Sum[CPU_ALL] * 100.); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + + fprintf(FdCPU, "\n"); + } + + myflush(FdCPU); + +#ifdef OUTPUT_CPU_CSV + if(write_logs) + fprintf(FdCPUCSV, "\n"); + + myflush(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + } + + for(i = 0; i < CPU_LAST; i++) + CPU_Step[i] = 0.; + + CPUThisRun = timediff(StartOfRun, second()); + + TIMER_STOP(CPU_LOGS); +} + +/*! \brief Fill the cpu balance string representing the cpu usage in a + * graphical way. + * + * This function fills a fraction, specified by the parameters t0 and t1, + * of the array string with the debug symbol given by c. + * + * \param[out] string String to fill. + * \param[in] t0 Initial position of the symbol in the array as a fraction of + * its maximum dimension. + * \param[in] t1 Final position of the symbol in the array as a fraction of + * its maximum dimension. + * \param[in] c Symbol to be put on string. + * + * \return void + */ +void put_symbol(char *string, double t0, double t1, char c) +{ + int i, j; + + i = (int)(t0 * CPU_STRING_LEN + 0.5); + j = (int)(t1 * CPU_STRING_LEN); + + if(i < 0) + i = 0; + if(j >= CPU_STRING_LEN) + j = CPU_STRING_LEN; + + while(i <= j) + string[i++] = c; + + string[CPU_STRING_LEN] = 0; +} diff --git a/src/amuse/community/arepo/src/io/parameters.c b/src/amuse/community/arepo/src/io/parameters.c new file mode 100644 index 0000000000..059d422ceb --- /dev/null +++ b/src/amuse/community/arepo/src/io/parameters.c @@ -0,0 +1,861 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/parameters.c + * \date 05/2018 + * \brief Parses the parameter file. + * \details This file contains the routine to parse the parameter file. + * Additionally the output list is also parsed. + * contains functions: + * void read_parameter_file(char *fname) + * void check_parameters() + * int read_outputlist(char *fname) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief This function parses the parameter file. + * + * Each parameter is defined by a keyword (`tag'), and can be either + * of type douple, int, or character string. Three arrays containing the name, + * type and address of the parameter are filled first. The routine then parses + * the parameter file and fills the referenced variables. The routine makes + * sure that each parameter appears exactly once in the parameter file, + * otherwise error messages are produced that complain about the missing + * parameters. + * + * \param[in] fname The file name of the parameter file + * + * \return void + */ +void read_parameter_file(char *fname) +{ +#define REAL 1 +#define STRING 2 +#define INT 3 + + FILE *fd, *fdout; + char buf[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200], buf1[MAXLEN_PARAM_TAG + 200], buf2[MAXLEN_PARAM_VALUE + 200], + buf3[MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 400]; + int i, j, nt; + int id[MAX_PARAMETERS]; + void *addr[MAX_PARAMETERS]; + char tag[MAX_PARAMETERS][MAXLEN_PARAM_TAG]; + int param_handled[MAX_PARAMETERS]; + int errorFlag = 0; + + All.StarformationOn = 0; /* defaults */ + + for(i = 0; i < MAX_PARAMETERS; i++) + { + param_handled[i] = 0; + } + + if(sizeof(long long) != 8) + { + mpi_terminate("\nType `long long' is not 64 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(int) != 4) + { + mpi_terminate("\nType `int' is not 32 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(float) != 4) + { + mpi_terminate("\nType `float' is not 32 bit on this platform. Stopping.\n\n"); + } + + if(sizeof(double) != 8) + { + mpi_terminate("\nType `double' is not 64 bit on this platform. Stopping.\n\n"); + } + + if(ThisTask == 0) /* read parameter file on process 0 */ + { + nt = 0; + + strcpy(tag[nt], "InitCondFile"); + addr[nt] = All.InitCondFile; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputDir"); + addr[nt] = All.OutputDir; + id[nt++] = STRING; + +#ifdef TOLERATE_WRITE_ERROR + strcpy(tag[nt], "AlternativeOutputDir"); + addr[nt] = AlternativeOutputDir; + id[nt++] = STRING; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + strcpy(tag[nt], "SnapshotFileBase"); + addr[nt] = All.SnapshotFileBase; + id[nt++] = STRING; + + strcpy(tag[nt], "ResubmitCommand"); + addr[nt] = All.ResubmitCommand; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputListFilename"); + addr[nt] = All.OutputListFilename; + id[nt++] = STRING; + + strcpy(tag[nt], "OutputListOn"); + addr[nt] = &All.OutputListOn; + id[nt++] = INT; + + strcpy(tag[nt], "Omega0"); + addr[nt] = &All.Omega0; + id[nt++] = REAL; + + strcpy(tag[nt], "OmegaBaryon"); + addr[nt] = &All.OmegaBaryon; + id[nt++] = REAL; + + strcpy(tag[nt], "OmegaLambda"); + addr[nt] = &All.OmegaLambda; + id[nt++] = REAL; + + strcpy(tag[nt], "HubbleParam"); + addr[nt] = &All.HubbleParam; + id[nt++] = REAL; + + strcpy(tag[nt], "BoxSize"); + addr[nt] = &All.BoxSize; + id[nt++] = REAL; + + strcpy(tag[nt], "PeriodicBoundariesOn"); + addr[nt] = &All.PeriodicBoundariesOn; + id[nt++] = INT; + + strcpy(tag[nt], "MaxMemSize"); + addr[nt] = &All.MaxMemSize; + id[nt++] = INT; + + strcpy(tag[nt], "TimeOfFirstSnapshot"); + addr[nt] = &All.TimeOfFirstSnapshot; + id[nt++] = REAL; + + strcpy(tag[nt], "CpuTimeBetRestartFile"); + addr[nt] = &All.CpuTimeBetRestartFile; + id[nt++] = REAL; + +#ifdef REDUCE_FLUSH + strcpy(tag[nt], "FlushCpuTimeDiff"); + addr[nt] = &All.FlushCpuTimeDiff; + id[nt++] = REAL; +#endif /* #ifdef REDUCE_FLUSH */ + + strcpy(tag[nt], "TimeBetStatistics"); + addr[nt] = &All.TimeBetStatistics; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeBegin"); + addr[nt] = &All.TimeBegin; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeMax"); + addr[nt] = &All.TimeMax; + id[nt++] = REAL; + + strcpy(tag[nt], "TimeBetSnapshot"); + addr[nt] = &All.TimeBetSnapshot; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitVelocity_in_cm_per_s"); + addr[nt] = &All.UnitVelocity_in_cm_per_s; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitLength_in_cm"); + addr[nt] = &All.UnitLength_in_cm; + id[nt++] = REAL; + + strcpy(tag[nt], "UnitMass_in_g"); + addr[nt] = &All.UnitMass_in_g; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolIntAccuracy"); + addr[nt] = &All.ErrTolIntAccuracy; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolTheta"); + addr[nt] = &All.ErrTolTheta; + id[nt++] = REAL; + + strcpy(tag[nt], "ErrTolForceAcc"); + addr[nt] = &All.ErrTolForceAcc; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxSizeTimestep"); + addr[nt] = &All.MaxSizeTimestep; + id[nt++] = REAL; + + strcpy(tag[nt], "MinSizeTimestep"); + addr[nt] = &All.MinSizeTimestep; + id[nt++] = REAL; + + strcpy(tag[nt], "CourantFac"); + addr[nt] = &All.CourantFac; + id[nt++] = REAL; + + strcpy(tag[nt], "LimitUBelowThisDensity"); + addr[nt] = &All.LimitUBelowThisDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "LimitUBelowCertainDensityToThisValue"); + addr[nt] = &All.LimitUBelowCertainDensityToThisValue; + id[nt++] = REAL; + + strcpy(tag[nt], "DesNumNgb"); + addr[nt] = &All.DesNumNgb; + id[nt++] = INT; + + strcpy(tag[nt], "MultipleDomains"); + addr[nt] = &All.MultipleDomains; + id[nt++] = INT; + + strcpy(tag[nt], "TopNodeFactor"); + addr[nt] = &All.TopNodeFactor; + id[nt++] = REAL; + + strcpy(tag[nt], "ActivePartFracForNewDomainDecomp"); + addr[nt] = &All.ActivePartFracForNewDomainDecomp; + id[nt++] = REAL; + +#ifdef SUBFIND + strcpy(tag[nt], "DesLinkNgb"); + addr[nt] = &All.DesLinkNgb; + id[nt++] = INT; + + strcpy(tag[nt], "ErrTolThetaSubfind"); + addr[nt] = &All.ErrTolThetaSubfind; + id[nt++] = REAL; +#endif /* #ifdef SUBFIND */ + +#if defined(ISOTHERM_EQS) + strcpy(tag[nt], "IsoSoundSpeed"); + addr[nt] = &All.IsoSoundSpeed; + id[nt++] = REAL; +#endif /* #if defined(ISOTHERM_EQS) */ + + strcpy(tag[nt], "MaxNumNgbDeviation"); + addr[nt] = &All.MaxNumNgbDeviation; + id[nt++] = REAL; + + strcpy(tag[nt], "ComovingIntegrationOn"); + addr[nt] = &All.ComovingIntegrationOn; + id[nt++] = INT; + + strcpy(tag[nt], "ICFormat"); + addr[nt] = &All.ICFormat; + id[nt++] = INT; + + strcpy(tag[nt], "SnapFormat"); + addr[nt] = &All.SnapFormat; + id[nt++] = INT; + + strcpy(tag[nt], "NumFilesPerSnapshot"); + addr[nt] = &All.NumFilesPerSnapshot; + id[nt++] = INT; + + strcpy(tag[nt], "NumFilesWrittenInParallel"); + addr[nt] = &All.NumFilesWrittenInParallel; + id[nt++] = INT; + + strcpy(tag[nt], "ResubmitOn"); + addr[nt] = &All.ResubmitOn; + id[nt++] = INT; + + strcpy(tag[nt], "CoolingOn"); + addr[nt] = &All.CoolingOn; + id[nt++] = INT; + + strcpy(tag[nt], "StarformationOn"); + addr[nt] = &All.StarformationOn; + id[nt++] = INT; + + strcpy(tag[nt], "TypeOfTimestepCriterion"); + addr[nt] = &All.TypeOfTimestepCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "TypeOfOpeningCriterion"); + addr[nt] = &All.TypeOfOpeningCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "TimeLimitCPU"); + addr[nt] = &All.TimeLimitCPU; + id[nt++] = REAL; + + strcpy(tag[nt], "GasSoftFactor"); + addr[nt] = &All.GasSoftFactor; + id[nt++] = REAL; + + for(i = 0; i < NSOFTTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningComovingType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningComoving[i]; + id[nt++] = REAL; + } + + for(i = 0; i < NSOFTTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningMaxPhysType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningMaxPhys[i]; + id[nt++] = REAL; + } + + for(i = 0; i < NTYPES; i++) + { + char buf[100]; + sprintf(buf, "SofteningTypeOfPartType%d", i); + strcpy(tag[nt], buf); + addr[nt] = &All.SofteningTypeOfPartType[i]; + id[nt++] = INT; + } + +#ifdef ADAPTIVE_HYDRO_SOFTENING + strcpy(tag[nt], "MinimumComovingHydroSoftening"); + addr[nt] = &All.MinimumComovingHydroSoftening; + id[nt++] = REAL; + + strcpy(tag[nt], "AdaptiveHydroSofteningSpacing"); + addr[nt] = &All.AdaptiveHydroSofteningSpacing; + id[nt++] = REAL; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + strcpy(tag[nt], "GravityConstantInternal"); + addr[nt] = &All.GravityConstantInternal; + id[nt++] = REAL; + + strcpy(tag[nt], "InitGasTemp"); + addr[nt] = &All.InitGasTemp; + id[nt++] = REAL; + + strcpy(tag[nt], "MinGasTemp"); + addr[nt] = &All.MinGasTemp; + id[nt++] = REAL; + + strcpy(tag[nt], "MinEgySpec"); + addr[nt] = &All.MinEgySpec; + id[nt++] = REAL; + + strcpy(tag[nt], "MinimumDensityOnStartUp"); + addr[nt] = &All.MinimumDensityOnStartUp; + id[nt++] = REAL; + +#ifdef NODEREFINE_BACKGROUND_GRID + strcpy(tag[nt], "MeanVolume"); + addr[nt] = &All.MeanVolume; + id[nt++] = REAL; +#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */ + +#ifndef VORONOI_STATIC_MESH +#ifdef REGULARIZE_MESH_FACE_ANGLE + strcpy(tag[nt], "CellMaxAngleFactor"); + addr[nt] = &All.CellMaxAngleFactor; + id[nt++] = REAL; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + strcpy(tag[nt], "CellShapingFactor"); + addr[nt] = &All.CellShapingFactor; + id[nt++] = REAL; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + + strcpy(tag[nt], "CellShapingSpeed"); + addr[nt] = &All.CellShapingSpeed; + id[nt++] = REAL; +#endif /* #ifndef VORONOI_STATIC_MESH */ + +#if defined(COOLING) + strcpy(tag[nt], "TreecoolFile"); + addr[nt] = &All.TreecoolFile; + id[nt++] = STRING; +#endif /* #if defined(COOLING) */ + +#if defined(REFINEMENT) + strcpy(tag[nt], "ReferenceGasPartMass"); + addr[nt] = &All.ReferenceGasPartMass; + id[nt++] = REAL; + + strcpy(tag[nt], "TargetGasMassFactor"); + addr[nt] = &All.TargetGasMassFactor; + id[nt++] = REAL; + + strcpy(tag[nt], "RefinementCriterion"); + addr[nt] = &All.RefinementCriterion; + id[nt++] = INT; + + strcpy(tag[nt], "DerefinementCriterion"); + addr[nt] = &All.DerefinementCriterion; + id[nt++] = INT; +#endif /* #if defined(REFINEMENT) */ + +#ifdef USE_SFR + strcpy(tag[nt], "CritOverDensity"); + addr[nt] = &All.CritOverDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "TemperatureThresh"); + addr[nt] = &All.TemperatureThresh; + id[nt++] = REAL; + + strcpy(tag[nt], "CritPhysDensity"); + addr[nt] = &All.CritPhysDensity; + id[nt++] = REAL; + + strcpy(tag[nt], "FactorSN"); + addr[nt] = &All.FactorSN; + id[nt++] = REAL; + + strcpy(tag[nt], "FactorEVP"); + addr[nt] = &All.FactorEVP; + id[nt++] = REAL; + + strcpy(tag[nt], "TempSupernova"); + addr[nt] = &All.TempSupernova; + id[nt++] = REAL; + + strcpy(tag[nt], "TempClouds"); + addr[nt] = &All.TempClouds; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxSfrTimescale"); + addr[nt] = &All.MaxSfrTimescale; + id[nt++] = REAL; +#endif /* #ifdef USE_SFR */ + +#ifdef MHD_SEEDFIELD + strcpy(tag[nt], "MHDSeedDir"); + addr[nt] = &All.B_dir; + id[nt++] = INT; + + strcpy(tag[nt], "MHDSeedValue"); + addr[nt] = &All.B_value; + id[nt++] = REAL; +#endif /* #ifdef MHD_SEEDFIELD */ + +#ifdef REFINEMENT_VOLUME_LIMIT + strcpy(tag[nt], "MaxVolumeDiff"); + addr[nt] = &All.MaxVolumeDiff; + id[nt++] = REAL; + + strcpy(tag[nt], "MinVolume"); + addr[nt] = &All.MinVolume; + id[nt++] = REAL; + + strcpy(tag[nt], "MaxVolume"); + addr[nt] = &All.MaxVolume; + id[nt++] = REAL; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#ifdef TILE_ICS + strcpy(tag[nt], "TileICsFactor"); + addr[nt] = &All.TileICsFactor; + id[nt++] = INT; +#endif /* #ifdef TILE_ICS */ + +#ifdef ADDBACKGROUNDGRID + strcpy(tag[nt], "GridSize"); + addr[nt] = &All.GridSize; + id[nt++] = INT; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#ifdef ONEDIMS_SPHERICAL + strcpy(tag[nt], "CoreRadius"); + addr[nt] = &All.CoreRadius; + id[nt++] = REAL; + + strcpy(tag[nt], "CoreMass"); + addr[nt] = &All.CoreMass; + id[nt++] = REAL; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + if((fd = fopen(fname, "r"))) + { + sprintf(buf, "%s%s", fname, "-usedvalues"); + if(!(fdout = fopen(buf, "w"))) + { + printf("error opening file '%s' \n", buf); + errorFlag = 1; + } + else + { + printf("Obtaining parameters from file '%s':\n\n", fname); + while(!feof(fd)) + { + *buf = 0; + fgets(buf, MAXLEN_PARAM_TAG + MAXLEN_PARAM_VALUE + 200, fd); + if(sscanf(buf, "%s%s%s", buf1, buf2, buf3) < 2) + continue; + + if(buf1[0] == '%') + continue; + + for(i = 0, j = -1; i < nt; i++) + if(strcmp(buf1, tag[i]) == 0) + { + if(param_handled[i] == 0) + { + j = i; + param_handled[i] = 1; + break; + } + else + { + j = -2; + break; + } + } + + if(j >= 0) + { + switch(id[j]) + { + case REAL: + *((double *)addr[j]) = atof(buf2); + sprintf(buf3, "%%-%ds%%g\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, *((double *)addr[j])); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, *((double *)addr[j])); + break; + case STRING: + strcpy((char *)addr[j], buf2); + sprintf(buf3, "%%-%ds%%s\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, buf2); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, buf2); + break; + case INT: + *((int *)addr[j]) = atoi(buf2); + sprintf(buf3, "%%-%ds%%d\n", MAXLEN_PARAM_TAG); + fprintf(fdout, buf3, buf1, *((int *)addr[j])); + fprintf(stdout, " "); + fprintf(stdout, buf3, buf1, *((int *)addr[j])); + break; + } + } + else if(j == -2) + { +#ifdef ALLOWEXTRAPARAMS + warn("Tag '%s' ignored from file %s !", buf1, fname); +#else /* #ifdef ALLOWEXTRAPARAMS */ + fprintf(stdout, "Error in file %s: Tag '%s' multiply defined.\n", fname, buf1); + errorFlag = 1; +#endif /* #ifdef ALLOWEXTRAPARAMS #else */ + } + else + { +#ifdef ALLOWEXTRAPARAMS + warn("Tag '%s' ignored from file %s !", buf1, fname); +#else /* #ifdef ALLOWEXTRAPARAMS */ + fprintf(stdout, "Error in file %s: Tag '%s' not allowed\n", fname, buf1); + errorFlag = 1; +#endif /* #ifdef ALLOWEXTRAPARAMS #else */ + } + } + fclose(fd); + fclose(fdout); + printf("\n"); + + i = strlen(All.OutputDir); + if(i > 0) + if(All.OutputDir[i - 1] != '/') + strcat(All.OutputDir, "/"); + + mkdir(All.OutputDir, 02755); + sprintf(buf1, "%s%s", fname, "-usedvalues"); + sprintf(buf2, "%s%s", All.OutputDir, "parameters-usedvalues"); + sprintf(buf3, "cp %s %s", buf1, buf2); +#ifndef NOCALLSOFSYSTEM + if(errorFlag == 0) + system(buf3); +#endif /* #ifndef NOCALLSOFSYSTEM */ + } + } + else + { + printf("Parameter file %s not found.\n", fname); + errorFlag = 1; + } + + for(i = 0; i < nt; i++) + { + if(param_handled[i] != 1) + { + printf("Error. I miss a value for tag '%s' in parameter file '%s'.\n", tag[i], fname); + errorFlag = 1; + } + } + + if(All.OutputListOn && errorFlag == 0) + errorFlag += read_outputlist(All.OutputListFilename); + else + All.OutputListLength = 0; + } + + MPI_Bcast(&errorFlag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(errorFlag) + { + MPI_Finalize(); + exit(errorFlag); + } + + All.NParameters = nt; + + /* now communicate the relevant parameters to the other processes */ + MPI_Bcast(&All, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); + +#ifdef TOLERATE_WRITE_ERROR + MPI_Bcast(AlternativeOutputDir, MAXLEN_PATH, MPI_BYTE, 0, MPI_COMM_WORLD); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +#ifdef HOST_MEMORY_REPORTING + check_maxmemsize_setting(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + mymalloc_init(); + + Parameters = (char(*)[MAXLEN_PARAM_TAG])mymalloc("Parameters", All.NParameters * MAXLEN_PARAM_TAG * sizeof(char)); + ParametersValue = (char(*)[MAXLEN_PARAM_VALUE])mymalloc("ParametersValue", All.NParameters * MAXLEN_PARAM_VALUE * sizeof(char)); + ParametersType = mymalloc("ParamtersType", All.NParameters * sizeof(char)); + + if(ThisTask == 0) + { + for(i = 0; i < All.NParameters; i++) + { + strncpy(Parameters[i], tag[i], MAXLEN_PARAM_TAG); + ParametersType[i] = id[i]; + void *tmp = ParametersValue[i]; + switch(id[i]) + { + case REAL: + *((double *)tmp) = *((double *)addr[i]); + break; + case STRING: + strncpy(tmp, addr[i], MAXLEN_PARAM_VALUE); + break; + case INT: + tmp = ParametersValue[i]; + *((int *)tmp) = *((int *)addr[i]); + break; + } + } + } + + MPI_Bcast(Parameters, sizeof(char) * All.NParameters * MAXLEN_PARAM_TAG, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(ParametersValue, sizeof(char) * All.NParameters * MAXLEN_PARAM_VALUE, MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(ParametersType, sizeof(char) * All.NParameters, MPI_BYTE, 0, MPI_COMM_WORLD); + +#undef REAL +#undef STRING +#undef INT +} + +/*! \brief This function checks the consistency of the input parameters. + * + * If you encounter some possible misuse and a corresponding error message + * that is hard to interpret, a check should be placed in this function with + * a terminate statement and a clear explanation why this does not work. + * + * \return void + */ +void check_parameters() +{ + int i, errorFlag = 0; + + /* check whether time max is larger than max timestep */ + if(All.TimeMax - All.TimeBegin <= All.MaxSizeTimestep) + { + printf("PARAMETERS: check_parameters: TimeBegin = %g, TimeMax = %g, MaxSizeTimestep = %g \n", All.TimeBegin, All.TimeMax, + All.MaxSizeTimestep); + terminate( + "check_parameters: Your total runtime is smaller than the maximum allowed timestep! Choose an appropriate value for " + "MaxSizeTimestep < TimeMax-TimeBegin! \n"); + } + + /* check softening types */ + for(i = 0; i < NTYPES; i++) + { + if(All.SofteningTypeOfPartType[i] >= NSOFTTYPES || All.SofteningTypeOfPartType[i] < 0) + { + mpi_printf("SofteningTypeOfPartType% invalid (NSOFTTYPES=%d)\n", i, NSOFTTYPES); + errorFlag = 1; + } + } + + if(errorFlag) + mpi_terminate("Softening invalid!"); + + if(All.NumFilesWrittenInParallel > NTask) + { + if(ThisTask == 0) + warn("NOTICE: Reducing requested NumFilesWrittenInParallel=%d to %d\n", All.NumFilesWrittenInParallel, NTask); + All.NumFilesWrittenInParallel = NTask; + } + + if(All.NumFilesWrittenInParallel == 0) + { + mpi_printf("NOTICE: All.NumFilesWrittenInParallel has been set to be equal to the number of processors\n"); + All.NumFilesWrittenInParallel = NTask; + } + +#ifndef GRAVITY_NOT_PERIODIC + if(All.PeriodicBoundariesOn == 0) + { + mpi_terminate( + "Code was compiled with gravity periodic boundary conditions switched on.\nYou must set `PeriodicBoundariesOn=1', or " + "recompile the code.\n"); + } +#else /* #ifndef GRAVITY_NOT_PERIODIC */ + if(All.PeriodicBoundariesOn == 1) + { + mpi_terminate( + "Code was compiled with gravity periodic boundary conditions switched off.\nYou must set `PeriodicBoundariesOn=0', or " + "recompile the code.\n"); + } +#endif /* #ifndef GRAVITY_NOT_PERIODIC #else */ + +#ifdef COOLING + if(All.CoolingOn == 0) + { + mpi_terminate("Code was compiled with cooling switched on.\nYou must set `CoolingOn=1', or recompile the code.\n"); + } +#else /* #ifdef COOLING */ + if(All.CoolingOn == 1) + { + mpi_terminate("Code was compiled with cooling switched off.\nYou must set `CoolingOn=0', or recompile the code.\n"); + } +#endif /* #ifdef COOLING #else */ + + if(All.TypeOfTimestepCriterion >= 3) + { + mpi_terminate("The specified timestep criterion\nis not valid\n"); + } + +#if(NTYPES < 6) + mpi_terminate("NTYPES < 6 is not allowed.\n"); +#endif /* #if (NTYPES < 6) */ + +#if(NTYPES > 15) + mpi_terminate("NTYPES > 15 is not supported yet.\n"); +#endif /* #if (NTYPES > 15) */ + +#if(NTYPES > 8) + if(All.ICFormat == 1 || All.ICFormat == 2) + { + mpi_terminate("NTYPES>8 is not allowed with ICFormat=%d, since the header block is limited to 256 bytes.\n", All.ICFormat); + } +#endif /* #if (NTYPES > 8) */ + +#ifdef USE_SFR + if(All.StarformationOn == 0) + { + mpi_terminate("Code was compiled with star formation switched on.\nYou must set `StarformationOn=1', or recompile the code.\n"); + } + if(All.CoolingOn == 0) + { + mpi_terminate( + "You try to use the code with star formation enabled,\nbut you did not switch on cooling.\nThis mode is not supported.\n"); + } +#else /* #ifdef USE_SFR */ + if(All.StarformationOn == 1) + { + mpi_terminate("Code was compiled with star formation switched off.\nYou must set `StarformationOn=0', or recompile the code.\n"); + } +#endif /* #ifdef USE_SFR #else */ + +#if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && defined(USE_SFR) + if(ThisTask == 0) + warn("Code was compiled with ENFORCE_JEANS_STABILITY_OF_CELLS together with another EOS. Please make sure you really want this."); +#endif /* #if defined(ENFORCE_JEANS_STABILITY_OF_CELLS) && (defined(ISOTHERM_EQS) || (defined(USE_SFR) && !defined(FM_SFR))) */ +} + +/*! \brief This function reads a table with a list of desired output times. + * + * The table does not have to be ordered in any way, but may not contain more + * than MAXLEN_OUTPUTLIST entries. + * + * \param[in] fname The file name of the outputlist. + * + * \return 0: success 1: unable to open file. + */ +int read_outputlist(char *fname) +{ + FILE *fd; + int count, flag; + char buf[512], msg[512]; + + if(!(fd = fopen(fname, "r"))) + { + printf("can't read output list in file '%s'\n", fname); + return 1; + } + + All.OutputListLength = 0; + + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + count = sscanf(buf, " %lg %d ", &All.OutputListTimes[All.OutputListLength], &flag); + + if(count == 1) + flag = 1; + + if(count == 1 || count == 2) + { + if(All.OutputListLength >= MAXLEN_OUTPUTLIST) + { + sprintf(msg, "\ntoo many entries in output-list. You should increase MAXLEN_OUTPUTLIST=%d.\n", (int)MAXLEN_OUTPUTLIST); + terminate(msg); + } + + All.OutputListFlag[All.OutputListLength] = flag; + All.OutputListLength++; + } + } + + fclose(fd); + + printf("\nBEGRUN: found %d times in output-list.\n", All.OutputListLength); + + return 0; +} diff --git a/src/amuse/community/arepo/src/io/read_ic.c b/src/amuse/community/arepo/src/io/read_ic.c new file mode 100644 index 0000000000..97481c91ad --- /dev/null +++ b/src/amuse/community/arepo/src/io/read_ic.c @@ -0,0 +1,1900 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/read_ic.c + * \date 05/2018 + * \brief Contains the routines needed to load initial conditions. + * \details contains functions: + * void read_ic(const char *fname, int readTypes) + * MyIDType determine_ids_offset(void) + * void empty_read_buffer(enum iofields blocknr, int offset, + * int pc, int type) + * void share_particle_number_in_file(const char *fname, int + * filenr, int readTask, int lastTask, int readTypes) + * void read_file(const char *fname, int filenr, int readTask, + * int lastTask, int readTypes) + * int find_files(const char *fname) + * void distribute_file(int nfiles, int firstfile, int + * firsttask, int lasttask, int *filenr, int *master, int + * *last) + * herr_t hdf5_header_error_handler(void *unused) + * void read_header_attributes_in_hdf5(const char *fname) + * void read_header_attributes(FILE * fd) + * void swap_Nbyte(char *data, int n, int m) + * void swap_header() + * void tile_ics(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifndef IDS_OFFSET +#ifdef LONGIDS +#define IDS_OFFSET 100000000000 +#else /* #ifdef LONGIDS */ +#define IDS_OFFSET 1000000000 +#endif /* #ifdef LONGIDS #else */ +#endif /* #ifndef IDS_OFFSET */ + +#define SKIP \ + { \ + my_fread(&blksize1, sizeof(int), 1, fd); \ + } +#define SKIP2 \ + { \ + my_fread(&blksize2, sizeof(int), 1, fd); \ + } + +void read_header_attributes(FILE *fd); + +#ifdef HAVE_HDF5 +#include +void read_header_attributes_in_hdf5(const char *fname); +#endif /* #ifdef HAVE_HDF5 */ + +int num_files; + +int swap_file = 8; + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) +/*! \brief Struct containing information about the number of particles per + * particle type. + */ +static struct ntypes_data +{ + int npart[NTYPES]; +} * ntype_in_files; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + +/*! \brief Reads initial conditions that are in one of the supported file + * formats. + * + * Snapshot files can be used as input files. However, when a + * snapshot file is used as input, not all the information in the header is + * used: THE STARTING TIME NEEDS TO BE SET IN THE PARAMETERFILE. + * Alternatively, the code can be started with restartflag 2, then snapshots + * from the code can be used as initial conditions-files without having to + * change the parameter file. For gas particles, only the internal energy is + * read, the density and mean molecular weight will be recomputed by the code. + * When InitGasTemp>0 is given, the gas temperature will be initialized to + * this value assuming a mean molecular weight either corresponding to + * complete neutrality, or full ionization. + * + * \param[in] fname File name of the ICs. + * \param[in] readTypes A bitfield that determines what particle types to + * read, only if the bit corresponding to a particle type is set, + * the corresponding data is loaded, otherwise its particle number + * is set to zero. (This is only implemented for HDF5 files.) + * + * \return void + */ +void read_ic(const char *fname, int readTypes) +{ + int i, rep, rest_files, ngroups, gr, filenr, masterTask, lastTask, groupMaster; + double u_init, molecular_weight; + char buf[500]; + double t0, t1; + + if((All.ICFormat < 1) || (All.ICFormat > 4)) + { + mpi_terminate("ICFormat=%d not supported.\n", All.ICFormat); + } + + t0 = second(); + CPU_Step[CPU_MISC] += measure_time(); + + num_files = find_files(fname); + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + ntype_in_files = mymalloc("ntype_in_files", num_files * sizeof(struct ntypes_data)); + memset(ntype_in_files, 0, num_files * sizeof(struct ntypes_data)); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + All.TotNumPart = 0; + + /* we repeat reading the headers of the files two times. In the first iteration, only the + * particle numbers ending up on each processor are assembled, followed by memory allocation. + * In the second iteration, the data is actually read in. + */ + for(rep = 0; rep < 2; rep++) + { + NumPart = 0; + NumGas = 0; + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + if(rep == 1) + MPI_Allreduce(MPI_IN_PLACE, ntype_in_files, num_files * NTYPES, MPI_INT, MPI_SUM, MPI_COMM_WORLD); +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + rest_files = num_files; + while(rest_files > NTask) + { + sprintf(buf, "%s.%d", fname, ThisTask + (rest_files - NTask)); + if(All.ICFormat == 3) + sprintf(buf, "%s.%d.hdf5", fname, ThisTask + (rest_files - NTask)); + + ngroups = NTask / All.NumFilesWrittenInParallel; + if((NTask % All.NumFilesWrittenInParallel)) + ngroups++; + groupMaster = (ThisTask / ngroups) * ngroups; + + for(gr = 0; gr < ngroups; gr++) + { + if(ThisTask == (groupMaster + gr)) /* ok, it's this processor's turn */ + { + if(rep == 0) + share_particle_number_in_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes); + else + read_file(buf, ThisTask + (rest_files - NTask), ThisTask, ThisTask, readTypes); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + rest_files -= NTask; + } + + if(rest_files > 0) + { + distribute_file(rest_files, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(num_files > 1) + { + sprintf(buf, "%s.%d", fname, filenr); + if(All.ICFormat == 3) + sprintf(buf, "%s.%d.hdf5", fname, filenr); + } + else + { + sprintf(buf, "%s", fname); + if(All.ICFormat == 3) + sprintf(buf, "%s.hdf5", fname); + } + + ngroups = rest_files / All.NumFilesWrittenInParallel; + if((rest_files % All.NumFilesWrittenInParallel)) + ngroups++; + + for(gr = 0; gr < ngroups; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + { + if(rep == 0) + share_particle_number_in_file(buf, filenr, masterTask, lastTask, readTypes); + else + read_file(buf, filenr, masterTask, lastTask, readTypes); + } + MPI_Barrier(MPI_COMM_WORLD); + } + } + + /* now do the memory allocation */ + if(rep == 0) + { + int max_load, max_sphload; + MPI_Allreduce(&NumPart, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&NumGas, &max_sphload, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + +#ifdef GENERATE_GAS_IN_ICS + if(max_sphload < max_load) + max_sphload = max_load; +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + All.MaxPartSph = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + if(All.TotPartSpecial != 0) + All.MaxPartSpecial = (int)(All.TotPartSpecial); + else + terminate("Code compiled with option EXACT_GRAVITY_FOR_PARTICLE_TYPE but no particles of specified type found in ICs."); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + allocate_memory(); + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + } + } + + myfree(CommBuffer); + +#ifdef TILE_ICS + tile_ics(); +#endif /* #ifdef TILE_ICS */ + + /* this makes sure that masses are initialized in the case that the mass-block + is empty for this particle type */ + for(i = 0; i < NumPart; i++) + { + if(All.MassTable[P[i].Type] != 0) + P[i].Mass = All.MassTable[P[i].Type]; + } + + /* If we are reading in Gadget2 ICs, we need to compute the material + number from the ID */ +#ifdef READ_LEGACY_ICS + if(header.flag_entropy_instead_u) + { + sprintf(buf, "\nProblem: Legacy ICs cannot contain entropy in the u field!\n"); + terminate(buf); + } + + for(i = 0; i < NumGas; i++) + { + int j; + + double mat; + + modf(((double)(P[i].ID - EOS_ID_START)) / EOS_ID_SKIP, &mat); /* This stores the int part in variable mat and + discards the remainder */ + int imat = mat; + + SphP[i].Composition[imat] = 1.0; + } +#endif /* #ifdef READ_LEGACY_ICS */ + +#if defined(REFINEMENT) && defined(REFINEMENT_HIGH_RES_GAS) + if(RestartFlag == 0) /* All gas that is already present in the ICs is allowed to be (de-)refined */ + { + for(i = 0; i < NumGas; i++) + { + if(All.ReferenceGasPartMass == 0 || P[i].Mass < 1.2 * All.ReferenceGasPartMass) + SphP[i].AllowRefinement = 1; + } + } +#endif /* #if defined (REFINEMENT) && defined (REFINEMENT_HIGH_RES_GAS) */ + + for(i = 0; i < NumPart; i++) + P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type]; + +#ifdef GENERATE_GAS_IN_ICS + int count; + double fac, d, a, b, rho; + + if(RestartFlag == 0) + { + header.flag_entropy_instead_u = 0; + + MyIDType ids_offset = determine_ids_offset(); + + for(i = 0, count = 0; i < NumPart; i++) +#ifdef SPLIT_PARTICLE_TYPE + if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE)) +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + if(P[i].Type == 1) +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + count++; + + if(count) + { + domain_resize_storage(count, count, 0); + + memmove(P + count, P, sizeof(struct particle_data) * NumPart); + + NumPart += count; + NumGas += count; + + if(NumGas > All.MaxPartSph) + terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph); + +#ifdef REFINEMENT_HIGH_RES_GAS + for(i = 0; i < NumGas - count; i++) /* make sure that AllowRefinement is shifted with the particles */ + SphP[i + count].AllowRefinement = SphP[i].AllowRefinement; + for(i = 0; i < count; i++) /* by default, new cells are not allowed to be refined */ + SphP[i].AllowRefinement = 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + fac = All.OmegaBaryon / All.Omega0; + rho = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + int j; + + for(i = count, j = 0; i < NumPart; i++) +#ifdef SPLIT_PARTICLE_TYPE + if((1 << P[i].Type) & (SPLIT_PARTICLE_TYPE)) +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + if(P[i].Type == 1) +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + { + d = pow(P[i].Mass / rho, 1.0 / 3); + a = 0.5 * All.OmegaBaryon / All.Omega0 * d; + b = 0.5 * (All.Omega0 - All.OmegaBaryon) / All.Omega0 * d; + + P[j] = P[i]; + + P[j].Mass *= fac; + P[i].Mass *= (1 - fac); + P[j].Type = 0; + P[j].ID += ids_offset; + P[i].Pos[0] += a; + P[i].Pos[1] += a; + P[i].Pos[2] += a; + P[j].Pos[0] -= b; + P[j].Pos[1] -= b; + P[j].Pos[2] -= b; + +#ifdef REFINEMENT_HIGH_RES_GAS + if(P[i].Type == 1) /* also allow gas which is produced by splitting a high res DM particle to be (de-) refined */ + SphP[j].AllowRefinement = 2; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + j++; + } + + All.MassTable[0] = 0; + +#ifdef SPLIT_PARTICLE_TYPE + for(i = 1; i < NTYPES; i++) + if((1 << i) & (SPLIT_PARTICLE_TYPE)) + All.MassTable[i] *= (1 - fac); +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + All.MassTable[1] *= (1 - fac); +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + } + } +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + +#ifdef READ_DM_AS_GAS + { + domain_resize_storage(0, NumPart, 0); + + if(NumGas > All.MaxPartSph) + terminate("Task=%d ends up getting more SPH particles (%d) than allowed (%d)\n", ThisTask, NumGas, All.MaxPartSph); + + for(i = 0; i < NumPart; i++) + { + P[i].Type = 0; + SphP[i].Utherm = 1.0; + } + + All.MassTable[0] = 0; + + header.npartTotal[0] = header.npartTotal[1]; + header.npartTotalHighWord[0] = header.npartTotalHighWord[1]; + header.npart[0] = header.npart[1]; + header.npartTotal[1] = 0; + header.npartTotalHighWord[1] = 0; + header.npart[1] = 0; + NumGas = NumPart; + All.TotNumGas = All.TotNumPart; + mpi_printf("READ_DM_AS_GAS: generated %lld gas particles from type %d\n", + header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), 0); + } +#endif /* #ifdef READ_DM_AS_GAS */ + +#ifdef USE_SFR + if(RestartFlag == 0) + { + if(All.MassTable[4] == 0 && All.MassTable[0] > 0) + { + All.MassTable[0] = 0; + All.MassTable[4] = 0; + } + } +#endif + + u_init = (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.InitGasTemp; + u_init *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; /* unit conversion */ + + if(All.InitGasTemp > 1.0e4) /* assuming FULL ionization */ + molecular_weight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); + else /* assuming NEUTRAL GAS */ + molecular_weight = 4 / (1 + 3 * HYDROGEN_MASSFRAC); + + u_init /= molecular_weight; + + All.InitGasU = u_init; + + header.mass[0] = 0; /* to make sure that the variable masses are stored in output file */ + All.MassTable[0] = 0; + + if(RestartFlag == 0) + { +#if defined(REFINEMENT_HIGH_RES_GAS) + for(i = 0; i < NumGas; i++) + if(SphP[i].AllowRefinement) + SphP[i].HighResMass = P[i].Mass; + else + SphP[i].HighResMass = 0; +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + + if(All.InitGasTemp > 0) + { + for(i = 0; i < NumGas; i++) + { + if(ThisTask == 0 && i == 0 && SphP[i].Utherm == 0) + printf("READIC: Initializing u from InitGasTemp!\n"); + + if(SphP[i].Utherm == 0) + SphP[i].Utherm = All.InitGasU; + /* Note: the coversion to entropy will be done in the function init(), + after the densities have been computed */ + } + } + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Utherm = dmax(All.MinEgySpec, SphP[i].Utherm); + if(SphP[i].Density > 0) + SphP[i].Volume = P[i].Mass / SphP[i].Density; + } + + MPI_Barrier(MPI_COMM_WORLD); + + t1 = second(); + mpi_printf("READIC: reading done (took %g sec).\n", timediff(t0, t1)); + + /* verify number of particles */ + int num = 0; + long long glob_num; + for(i = 0; i < NumPart; i++) + num += 1; + sumup_large_ints(1, &num, &glob_num); + if(glob_num != All.TotNumPart) + terminate("glob_num (=%lld) != All.TotNumPart (=%lld)", glob_num, All.TotNumPart); + + mpi_printf("READIC: Total number of particles : %lld\n\n", All.TotNumPart); + + CPU_Step[CPU_SNAPSHOT] += measure_time(); +} + +/*! \brief This function computes a suitable offset for the particle IDs in + * case gas should be generated in the ICs. + * + * If the macro OFFSET_FOR_NON_CONTIGUOUS_IDS is not defined the code reverts + * to a fixed offset defined at the beginning of the file. + * + * \return Offset for the gas particles to be generated. + */ +MyIDType determine_ids_offset(void) +{ +#ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS + MyIDType ids_offset = IDS_OFFSET; +#else /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */ + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + int bits_used = 1; + int bits_available = CHAR_BIT * sizeof(MyIDType); + MyIDType ids_offset = 1; + + while(ids_offset <= All.MaxID && ids_offset > 0) + { + ids_offset <<= 1; + bits_used++; + } + + All.MaxID = 0; /* reset to allow recomputing */ + + if(ids_offset <= 0) + terminate("not enough memory to generate id offsets. Used %d bits out of %d\n", bits_used, bits_available); + +#ifdef LONGIDS + mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %llu. Used %d bits out of %d\n", ids_offset, bits_used, bits_available); +#else /* #ifdef LONGIDS */ + mpi_printf("GENERATE_GAS_IN_ICS: determined id offset as %u. Used %d bits out of %d\n", ids_offset, bits_used, bits_available); +#endif /* #ifdef LONGIDS #else */ + +#endif /* #ifndef OFFSET_FOR_NON_CONTIGUOUS_IDS */ + return ids_offset; +} + +/*! \brief Reads out the io buffer that was filled with particle data. + * + * The data in the io buffer is put in the appropriate places of the particle + * structures. + * + * \param[in] blocknr Data block present in io buffer. + * \param[in] offset Particle corresponding to the first element in io buffer. + * \param[in] pc Number of elements in the io buffer. + * \param[in] type If blocknr=IO_POS P[n].Type is set to type. + * + * \return void + */ +void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type) +{ + int n, k; + MyInputFloat *fp; + double *doublep; + MyIDType *ip; + int *intp; + float *floatp; + + int vt, vpb; + char *cp; + + fp = (MyInputFloat *)CommBuffer; + doublep = (double *)CommBuffer; + ip = (MyIDType *)CommBuffer; + intp = (int *)CommBuffer; + floatp = (float *)CommBuffer; + + cp = (char *)CommBuffer; + vt = get_datatype_in_block(blocknr, 1); + vpb = get_values_per_blockelement(blocknr); + if(vt == 2) + swap_Nbyte(cp, pc * vpb, 8); + else + { +#ifdef INPUT_IN_DOUBLEPRECISION + if(vt == 1) + swap_Nbyte(cp, pc * vpb, 8); + else +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION */ + swap_Nbyte(cp, pc * vpb, 4); + } + + int field = -1; + int f; + for(f = 0; f < N_IO_Fields; f++) + { + if(IO_Fields[f].field == blocknr) + { + field = f; + break; + } + } + + if(field < 0) + terminate("error: field not found"); + + for(n = 0; n < pc; n++) + { + if(IO_Fields[field].io_func) + { + int particle; + switch(IO_Fields[field].array) + { + case A_NONE: + case A_SPHP: + case A_P: + particle = offset + n; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in empty_read_buffer: Array not found!\n"); + break; + } + + switch(IO_Fields[field].type_in_file_input) + { + case FILE_NONE: + terminate("error"); + break; + case FILE_INT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, intp, 1); + intp += IO_Fields[field].values_per_block; + break; + case FILE_MY_ID_TYPE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, ip, 1); + ip += IO_Fields[field].values_per_block; + break; + case FILE_MY_IO_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, fp, 1); + fp += IO_Fields[field].values_per_block; + break; + case FILE_DOUBLE: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, doublep, 1); + doublep += IO_Fields[field].values_per_block; + break; + case FILE_FLOAT: + IO_Fields[field].io_func(particle, IO_Fields[field].values_per_block, floatp, 1); + floatp += IO_Fields[field].values_per_block; + break; + } + } + else + { + void *array_pos; + switch(IO_Fields[field].array) + { + case A_NONE: + array_pos = 0; + break; + case A_SPHP: + array_pos = SphP + offset + n; + break; + case A_P: + array_pos = P + offset + n; + break; + case A_PS: + terminate("Not good, trying to read into PS[]?\n"); + break; + default: + terminate("ERROR in empty_read_buffer: Array not found!\n"); + break; + } + + for(k = 0; k < IO_Fields[field].values_per_block; k++) + { + double value = 0; + switch(IO_Fields[field].type_in_file_input) + { + case FILE_MY_IO_FLOAT: + value = *fp; + fp++; + break; + case FILE_DOUBLE: + value = *doublep; + doublep++; + break; + case FILE_FLOAT: + value = *floatp; + floatp++; + break; + default: + break; + } + + switch(IO_Fields[field].type_in_memory) + { + case MEM_INT: + *((int *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(int))) = *intp; + intp++; + break; + case MEM_MY_ID_TYPE: + *((MyIDType *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyIDType))) = *ip; + ip++; + break; + case MEM_FLOAT: + *((float *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(float))) = value; + break; + + case MEM_DOUBLE: + *((double *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(double))) = value; + break; + + case MEM_MY_SINGLE: + *((MySingle *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MySingle))) = value; + break; + + case MEM_MY_FLOAT: + *((MyFloat *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyFloat))) = value; + break; + + case MEM_MY_DOUBLE: + *((MyDouble *)((size_t)array_pos + IO_Fields[field].offset + k * sizeof(MyDouble))) = value; + break; + + default: + terminate("ERROR in empty_read_buffer: Type not found!\n"); + break; + } + } + } + } + + if(blocknr == IO_VEL) + { + for(n = 0; n < pc; n++) + P[offset + n].Type = type; /* initialize type here as well */ + } +} + +/*! \brief Distributes the particle numbers in the file fname + * to tasks 'readTask' to 'lastTask', and calculates the number of + * particles each task gets. + * + * \param[in] fname Filename to be read. + * \param[in] readTask Task responsible for reading the file fname. + * \param[in] lastTask Last task which gets data contained in the file. + * \param[in] readTypes A bitfield that determines what particle types to + * read, only if the bit corresponding to a particle type is set, + * the corresponding data is loaded, otherwise its particle number + * is set to zero. (This is only implemented for HDF5 files.) + * + * \return void + */ +void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes) +{ + int i, n_in_file, n_for_this_task, ntask, task; + int blksize1, blksize2; + MPI_Status status; + FILE *fd = 0; + int type; + char label[4], buf[500]; + int nextblock; +#ifdef HAVE_HDF5 + hid_t hdf5_file = 0, hdf5_grp[NTYPES]; +#endif /* #ifdef HAVE_HDF5 */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(!(fd = fopen(fname, "r"))) + { + sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname); + terminate(buf); + } + + if(All.ICFormat == 2) + { + SKIP; + swap_file = blksize1; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock); + SKIP2; + } + + SKIP; + if(All.ICFormat == 1) + { + if(blksize1 != 256) + swap_file = 1; + } + read_header_attributes(fd); + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + + if(blksize1 != 256 || blksize2 != 256) + terminate("incorrect header format blocksize %d, %d\n", blksize1, blksize2); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + } + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + read_header_attributes_in_hdf5(fname); + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if(hdf5_file < 0) + terminate("cannot read initial conditions file %s", fname); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0 && (readTypes & (1 << type))) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gopen(hdf5_file, buf); + } + if(!(readTypes & (1 << type))) + { + // Override particle number in file. If we don't + // read the type, both npart and npartTotal will be 0 + header.npartTotal[type] = 0; + header.npart[type] = 0; + header.npartTotalHighWord[type] = 0; + header.mass[type] = 0; + } + } + } +#endif /* #ifdef HAVE_HDF5 */ + + for(task = readTask + 1; task <= lastTask; task++) + { + MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD); + MPI_Ssend(&swap_file, sizeof(swap_file), MPI_BYTE, task, TAG_KEY, MPI_COMM_WORLD); + } + } + else + { + MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status); + MPI_Recv(&swap_file, sizeof(swap_file), MPI_BYTE, readTask, TAG_KEY, MPI_COMM_WORLD, &status); + } + + if(header.num_files != num_files) + warn("header.num_files=%d != num_files=%d", header.num_files, num_files); + + if(All.TotNumPart == 0) + { + if(num_files == 1) + for(type = 0; type < NTYPES; type++) + { + if(header.npartTotal[type] != header.npart[type]) + { + warn("header.npartTotal[%d]=%d != header.npart[%d]=%d, setting header.npartTotal[%d] = header.npart[%d]\n", type, + header.npartTotal[type], type, header.npart[type], type, type); + header.npartTotal[type] = header.npart[type]; + } +#ifdef USE_SFR + header.npartTotalHighWord[type] = 0; +#endif + } + + All.TotNumGas = header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32); +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + All.TotPartSpecial = header.npartTotal[EXACT_GRAVITY_FOR_PARTICLE_TYPE] + + (((long long)header.npartTotalHighWord[EXACT_GRAVITY_FOR_PARTICLE_TYPE]) << 32); + mpi_printf("Tot Special %d %d %d %d\n", All.TotPartSpecial, EXACT_GRAVITY_FOR_PARTICLE_TYPE, header.npart[4], + header.npartTotal[4]); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + for(type = 0, All.TotNumPart = 0; type < NTYPES; type++) + { + All.TotNumPart += header.npartTotal[type]; + All.TotNumPart += (((long long)header.npartTotalHighWord[type]) << 32); + } + +#ifdef GENERATE_GAS_IN_ICS + if(RestartFlag == 0) + { + if(All.TotNumGas > 0) + terminate("You specified GENERATE_GAS_IN_ICS but your ICs already contain gas! (namely %lld gas cells)\n", All.TotNumGas); + +#ifdef SPLIT_PARTICLE_TYPE + for(i = 0; i < NTYPES; i++) + if((1 << i) & (SPLIT_PARTICLE_TYPE)) + { + All.TotNumGas += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32); + All.TotNumPart += header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32); + mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type %d\n", + header.npartTotal[i] + (((long long)header.npartTotalHighWord[i]) << 32), i); + } +#else /* #ifdef SPLIT_PARTICLE_TYPE */ + All.TotNumGas += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32); + All.TotNumPart += header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32); + mpi_printf("GENERATE_GAS_IN_ICS: generated %lld gas particles from type 1\n", + header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32)); +#endif /* #ifdef SPLIT_PARTICLE_TYPE #else */ + } +#endif /* #ifdef GENERATE_GAS_IN_ICS */ + +#ifdef TILE_ICS + All.TotNumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + All.TotNumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + All.TotPartSpecial *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ +#endif /* #ifdef TILE_ICS */ + + for(i = 0; i < NTYPES; i++) + All.MassTable[i] = header.mass[i]; + + if(RestartFlag >= 2) + All.Time = All.TimeBegin = header.time; + else + All.Time = All.TimeBegin; + + set_cosmo_factors_for_current_time(); + } + + if(ThisTask == readTask) + { + for(type = 0, n_in_file = 0; type < NTYPES; type++) + n_in_file += header.npart[type]; + + printf("READIC: Reading file `%s' on task=%d and distribute it to %d to %d (contains %d particles).\n", fname, ThisTask, + readTask, lastTask, n_in_file); + + myflush(stdout); + } + + for(type = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + ntask = lastTask - readTask + 1; + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + NumPart += n_for_this_task; + + if(type == 0) + NumGas += n_for_this_task; + } + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + fclose(fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + my_H5Gclose(hdf5_grp[type], buf); + } + my_H5Fclose(hdf5_file, fname); + } +#endif /* #ifdef HAVE_HDF5 */ + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + for(int type = 0; type < NTYPES; type++) + ntype_in_files[filenr].npart[type] = header.npart[type]; +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + } +} + +/*! \brief Reads a single snapshot file. + * + * This routine reads a single file. The data it contains is + * distributed to tasks 'readTask' to 'lastTask'. + * + * \param[in] fname Filename to be read. + * \param[in] readTask Task responsible for reading the file fname + * \param[in] lastTask Last task which gets data contained in the file + * \param[in] readTypes readTypes is a bitfield that determines what particle + * types to read, only if the bit corresponding to a particle type + * is set, the corresponding data is loaded, otherwise its particle + * number is set to zero. (This is only implemented for HDF5 + * files.) + * + * \return void + */ +void read_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes) +{ + int blockmaxlen; + int n_in_file, n_for_this_task, ntask, pc, offset = 0, task; + int blksize1, blksize2; + MPI_Status status; + FILE *fd = 0; + int nall; + int type, bnr; + char label[4], expected_label[4], buf[500]; + int nstart, bytes_per_blockelement, npart, nextblock, typelist[NTYPES]; + enum iofields blocknr; + +#ifdef HAVE_HDF5 + int rank, pcsum; + hid_t hdf5_file = 0, hdf5_grp[NTYPES], hdf5_dataspace_in_file; + hid_t hdf5_datatype = 0, hdf5_dataspace_in_memory, hdf5_dataset; + hsize_t dims[2], count[2], start[2]; +#endif /* #ifdef HAVE_HDF5 */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(!(fd = fopen(fname, "r"))) + { + sprintf(buf, "can't open file `%s' for reading initial conditions.\n", fname); + terminate(buf); + } + + if(All.ICFormat == 2) + { + SKIP; + swap_file = blksize1; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + SKIP2; + } + + SKIP; + if(All.ICFormat == 1) + { + if(blksize1 != 256) + swap_file = 1; + } + read_header_attributes(fd); + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + } + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + read_header_attributes_in_hdf5(fname); + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if(hdf5_file < 0) + terminate("cannot read initial conditions file %s", fname); + + for(type = 0; type < NTYPES; type++) + { + if(header.npart[type] > 0 && (readTypes & (1 << type))) + { + sprintf(buf, "/PartType%d", type); + hdf5_grp[type] = my_H5Gopen(hdf5_file, buf); + } + if(!(readTypes & (1 << type))) + { + // Override particle number in file. If we don't + // read the type, both npart and npartTotal will be 0 + header.npartTotal[type] = 0; + header.npart[type] = 0; + header.npartTotalHighWord[type] = 0; + header.mass[type] = 0; + } + } + } +#endif /* #ifdef HAVE_HDF5 */ + + for(task = readTask + 1; task <= lastTask; task++) + MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD); + } + else + MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status); + +#ifdef INPUT_IN_DOUBLEPRECISION + if(header.flag_doubleprecision == 0) + { + sprintf(buf, "\nProblem: Code compiled with INPUT_IN_DOUBLEPRECISION, but input files are in single precision!\n"); + terminate(buf); + } +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ + if(header.flag_doubleprecision) + { + sprintf(buf, "\nProblem: Code not compiled with INPUT_IN_DOUBLEPRECISION, but input files are in double precision!\n"); + terminate(buf); + } +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + + if(ThisTask == readTask) + { + if(filenr == 0) + mpi_printf( + "\nREADIC: filenr=%d, '%s' contains:\n" + "READIC: Type 0 (gas): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 1 (halo): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 2 (disk): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 3 (bulge): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 4 (stars): %8d (tot=%15lld) masstab= %g\n" + "READIC: Type 5 (bndry): %8d (tot=%15lld) masstab= %g\n\n", + filenr, fname, header.npart[0], header.npartTotal[0] + (((long long)header.npartTotalHighWord[0]) << 32), All.MassTable[0], + header.npart[1], header.npartTotal[1] + (((long long)header.npartTotalHighWord[1]) << 32), All.MassTable[1], + header.npart[2], header.npartTotal[2] + (((long long)header.npartTotalHighWord[2]) << 32), All.MassTable[2], + header.npart[3], header.npartTotal[3] + (((long long)header.npartTotalHighWord[3]) << 32), All.MassTable[3], + header.npart[4], header.npartTotal[4] + (((long long)header.npartTotalHighWord[4]) << 32), All.MassTable[4], + header.npart[5], header.npartTotal[5] + (((long long)header.npartTotalHighWord[5]) << 32), All.MassTable[5]); + } + + /* to collect the gas particles all at the beginning (in case several + snapshot files are read on the current CPU) we move the collisionless + particles such that a gap of the right size is created */ + + for(type = 0, nall = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + ntask = lastTask - readTask + 1; + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + nall += n_for_this_task; + } + + memmove(&P[NumGas + nall], &P[NumGas], (NumPart - NumGas) * sizeof(struct particle_data)); + nstart = NumGas; + + for(bnr = 0; bnr < 1000; bnr++) + { + blocknr = (enum iofields)bnr; + + if(blocknr == IO_LASTENTRY) + { +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + int pc = nstart; + + for(int type = 0; type < NTYPES; type++) + { + int n_in_file = header.npart[type]; + + long long nprevious = 0; + for(int t = 0; t < type; t++) + nprevious += header.npartTotal[t] + (((long long)header.npartTotalHighWord[t]) << 32); + + for(int nr = 0; nr < filenr; nr++) + nprevious += ntype_in_files[nr].npart[type]; + + for(int task = readTask; task <= lastTask; task++) + { + int n_for_this_task = n_in_file / ntask; + if((task - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + if(ThisTask == task) + { + for(int i = 0; i < n_for_this_task; i++) + P[pc++].FileOrder = nprevious++; + } + else + nprevious += n_for_this_task; + } + } +#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + break; + } + + /* proceed reading this field only if we are expecting it */ + if(blockpresent(blocknr, 0)) + { + if(ThisTask == readTask) + { + get_dataset_name(blocknr, buf); + if(filenr == 0) + mpi_printf("READIC: reading block %d (%s)...\n", blocknr, buf); + myflush(stdout); + } + + bytes_per_blockelement = get_bytes_per_blockelement(blocknr, 1); + + blockmaxlen = (int)(COMMBUFFERSIZE / bytes_per_blockelement); + + npart = get_particles_in_block(blocknr, &typelist[0]); + + if(npart > 0) + { + if(ThisTask == readTask) + { + if(All.ICFormat == 2) + { + SKIP; + my_fread(&label, sizeof(char), 4, fd); + my_fread(&nextblock, sizeof(int), 1, fd); + swap_Nbyte((char *)&nextblock, 1, 4); + printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3], nextblock); + SKIP2; + + get_Tab_IO_Label(blocknr, expected_label); + if(strncmp(label, expected_label, 4) != 0) + { + sprintf(buf, "incorrect block-structure!\nexpected '%c%c%c%c' but found '%c%c%c%c'\n", expected_label[0], + expected_label[1], expected_label[2], expected_label[3], label[0], label[1], label[2], label[3]); + terminate(buf); + } + } + + if(All.ICFormat == 1 || All.ICFormat == 2) + SKIP; + } + + for(type = 0, offset = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; +#ifdef HAVE_HDF5 + pcsum = 0; +#endif /* #ifdef HAVE_HDF5 */ + if(typelist[type] == 0) + { + /* we are expecting (npart>0) this block, but not for this particle type */ + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + offset += n_for_this_task; + } + else + { + /* we are expecting (npart>0) this block for this particle type, read or recv */ + for(task = readTask; task <= lastTask; task++) + { + n_for_this_task = n_in_file / ntask; + if((task - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + if(task == ThisTask) + if(NumPart + n_for_this_task > All.MaxPart) + terminate("too many particles. %d %d %d\n", NumPart, n_for_this_task, All.MaxPart); + + /* blocked load to fit in finite size of CommBuffer */ + do + { + pc = n_for_this_task; + + if(pc > blockmaxlen) + pc = blockmaxlen; + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + my_fread(CommBuffer, bytes_per_blockelement, pc, fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3 && pc > 0) + { + /* configure HDF5 dataspaces and hyperslab selection */ + dims[0] = header.npart[type]; + dims[1] = get_values_per_blockelement(blocknr); + if(dims[1] == 1) + rank = 1; + else + rank = 2; + + hdf5_dataspace_in_file = my_H5Screate_simple(rank, dims, NULL); + + dims[0] = pc; + hdf5_dataspace_in_memory = my_H5Screate_simple(rank, dims, NULL); + + start[0] = pcsum; + start[1] = 0; + + count[0] = pc; + count[1] = get_values_per_blockelement(blocknr); + pcsum += pc; + + my_H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET, start, NULL, count, NULL); + + switch(get_datatype_in_block(blocknr, 1)) + { + case FILE_INT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT); + break; + case FILE_MY_IO_FLOAT: +#ifdef INPUT_IN_DOUBLEPRECISION + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + break; + case FILE_MY_ID_TYPE: +#ifdef LONGIDS + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT64); +#else /* #ifdef LONGIDS */ + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_UINT32); +#endif /* #ifdef LONGIDS #else */ + break; + case FILE_DOUBLE: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_DOUBLE); + break; + case FILE_FLOAT: + hdf5_datatype = my_H5Tcopy(H5T_NATIVE_FLOAT); + break; + default: + terminate("can't process this input type"); + break; + } + + /* test if HDF5 dataset is actually present */ + get_dataset_name(blocknr, buf); + + hdf5_dataset = my_H5Dopen_if_existing(hdf5_grp[type], buf); + + if(hdf5_dataset < 0) + { + // no, pad with zeros + if((ThisTask == readTask) && (task == ThisTask)) + mpi_printf("\tDataset %s not present for particle type %d, using zero.\n", buf, type); + memset(CommBuffer, 0, dims[0] * dims[1] * my_H5Tget_size(hdf5_datatype)); + } + else + { + // yes, read into CommBuffer + my_H5Dread(hdf5_dataset, hdf5_datatype, hdf5_dataspace_in_memory, hdf5_dataspace_in_file, + H5P_DEFAULT, CommBuffer, buf); + my_H5Dclose(hdf5_dataset, buf); + } + my_H5Tclose(hdf5_datatype); + my_H5Sclose(hdf5_dataspace_in_memory, H5S_SIMPLE); + my_H5Sclose(hdf5_dataspace_in_file, H5S_SIMPLE); + + } /* All.ICFormat == 3 */ +#endif /* #ifdef HAVE_HDF5 */ + } + + if(ThisTask == readTask && task != readTask && pc > 0) + MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA, MPI_COMM_WORLD); + + if(ThisTask != readTask && task == ThisTask && pc > 0) + MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, readTask, TAG_PDATA, MPI_COMM_WORLD, + &status); + + /* copy CommBuffer contents into actual particle data structs */ + if(ThisTask == task) + { + empty_read_buffer(blocknr, nstart + offset, pc, type); + + offset += pc; + } + + n_for_this_task -= pc; + } /* do */ + while(n_for_this_task > 0); + + } /* task loop */ + } /* typelist[type] > 0 */ + } /* type loop */ + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + SKIP2; + swap_Nbyte((char *)&blksize1, 1, 4); + swap_Nbyte((char *)&blksize2, 1, 4); + if(blksize1 != blksize2) + { + sprintf(buf, "incorrect block-sizes detected!\n Task=%d blocknr=%d blksize1=%d blksize2=%d\n", ThisTask, + blocknr, blksize1, blksize2); + if(blocknr == IO_ID) + { + strcat(buf, "Possible mismatch of 32bit and 64bit ID's in IC file and AREPO compilation !\n"); + } + terminate(buf); + } + } + } + + } /* npart > 0 */ + } /* blockpresent */ + } /* blocknr loop */ + + for(type = 0; type < NTYPES; type++) + { + n_in_file = header.npart[type]; + + n_for_this_task = n_in_file / ntask; + if((ThisTask - readTask) < (n_in_file % ntask)) + n_for_this_task++; + + NumPart += n_for_this_task; + + if(type == 0) + NumGas += n_for_this_task; + } + + if(ThisTask == readTask) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + fclose(fd); +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + { + for(type = NTYPES - 1; type >= 0; type--) + if(header.npart[type] > 0) + { + sprintf(buf, "/PartType%d", type); + my_H5Gclose(hdf5_grp[type], buf); + } + my_H5Fclose(hdf5_file, fname); + } +#endif /* #ifdef HAVE_HDF5 */ + } +} + +/*! \brief Determines on how many files a given snapshot is distributed. + * + * \param[in] fname File name of the snapshot as given in the parameter file. + * + * \return Number of files; -1: could not find files. + */ +int find_files(const char *fname) +{ + FILE *fd; + char buf[200], buf1[200]; + int dummy; + + sprintf(buf, "%s.%d", fname, 0); + sprintf(buf1, "%s", fname); + + if(All.ICFormat == 3) + { + sprintf(buf, "%s.%d.hdf5", fname, 0); + sprintf(buf1, "%s.hdf5", fname); + } + +#ifndef HAVE_HDF5 + if(All.ICFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + header.num_files = 0; + + if(ThisTask == 0) + { + if((fd = fopen(buf, "r"))) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(All.ICFormat == 2) + { + my_fread(&dummy, sizeof(dummy), 1, fd); + swap_file = dummy; + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + } + + my_fread(&dummy, sizeof(dummy), 1, fd); + if(All.ICFormat == 1) + { + if(dummy == 256) + swap_file = 8; + else + swap_file = dummy; + } + read_header_attributes(fd); + + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + + my_fread(&dummy, sizeof(dummy), 1, fd); + } + fclose(fd); + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + read_header_attributes_in_hdf5(buf); +#endif /* #ifdef HAVE_HDF5 */ + } + } + + MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(header.num_files < 0) + terminate("header.num_files < 0"); + if(header.num_files > 100000) + terminate("header.num_files=%d read from %s does not make sense - header possibly corrupt.", header.num_files, buf); + if(header.num_files > 0) + return header.num_files; + + if(ThisTask == 0) + { + if((fd = fopen(buf1, "r"))) + { + if(All.ICFormat == 1 || All.ICFormat == 2) + { + if(All.ICFormat == 2) + { + my_fread(&dummy, sizeof(dummy), 1, fd); + swap_file = dummy; + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + my_fread(&dummy, sizeof(dummy), 1, fd); + } + + my_fread(&dummy, sizeof(dummy), 1, fd); + if(All.ICFormat == 1) + { + if(dummy == 256) + swap_file = 8; + else + swap_file = dummy; + } + read_header_attributes(fd); + swap_header(); + +#ifdef COMBINETYPES + header.npartTotal[3] += header.npartTotal[4] + header.npartTotal[5]; + header.npart[3] += header.npart[4] + header.npart[5]; + header.npartTotal[4] = 0; + header.npartTotal[5] = 0; + header.npart[4] = 0; + header.npart[5] = 0; +#endif /* #ifdef COMBINETYPES */ + + my_fread(&dummy, sizeof(dummy), 1, fd); + } + fclose(fd); + +#ifdef HAVE_HDF5 + if(All.ICFormat == 3) + read_header_attributes_in_hdf5(buf1); +#endif /* #ifdef HAVE_HDF5 */ + + header.num_files = 1; + } + } + + MPI_Bcast(&swap_file, sizeof(swap_file), MPI_BYTE, 0, MPI_COMM_WORLD); + MPI_Bcast(&header, sizeof(header), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(header.num_files > 0) + return header.num_files; + + mpi_terminate("\nCan't find initial conditions file, neither as '%s'\nnor as '%s'\n", buf, buf1); + return -1; +} + +/*! \brief This function assigns a certain number of tasks to each file. + * + * These tasks are containing the content of that file after the ICs have been + * read. The number of tasks per file is as homogeneous as possible. + * The number of files may at most be equal to the number of tasks. + * + * \param[in] nfiles Number of files of which the snapshot is distributed. + * \param[in] filenr Contains the file number to which this task belongs. + * \param[in] master The number of the task responsible to read the file. + * \param[in] last Number of the last task belonging to the same file as this + * task. + * + * \return void + */ +void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last) +{ + int i, group; + int tasks_per_file = NTask / nfiles; + int tasks_left = NTask % nfiles; + + if(tasks_left == 0) + { + group = ThisTask / tasks_per_file; + *master = group * tasks_per_file; + *last = (group + 1) * tasks_per_file - 1; + *filenr = group; + return; + } + + double tpf = ((double)NTask) / nfiles; + + for(i = 0, *last = -1; i < nfiles; i++) + { + *master = *last + 1; + *last = (i + 1) * tpf; + if(*last >= NTask) + *last = *last - 1; + if(*last < *master) + terminate("last < master"); + *filenr = i; + + if(i == nfiles - 1) + *last = NTask - 1; + + if(ThisTask >= *master && ThisTask <= *last) + return; + } +} + +#ifdef HAVE_HDF5 +/*! \brief The error handler used during the loading of the hdf5 header. + * + * \param[in] unused The parameter is not used, but it is necessary for + * compatibility with the HDF5 library. + * \return 1 if the write error is tolerated, otherwise the run is terminated. + */ +herr_t hdf5_header_error_handler(void *unused) +{ +#ifdef TOLERATE_WRITE_ERROR + write_error(3, 0, 0); + return 1; +#else + terminate("Failed to read HDF5 header attribute. Probably your file is corrupt.\n"); + return 0; +#endif +} + +/*! \brief This function reads the snapshot header in case of hdf5 files + * (i.e. format 3). + * + * \param[in] fname File name of the snapshot as given in the parameter file. + * + * \return void + */ +void read_header_attributes_in_hdf5(const char *fname) +{ + hid_t hdf5_file, hdf5_headergrp, hdf5_attribute; + hssize_t scalar_attr_dim = 1; + hssize_t vector_attr_dim = NTYPES; + + hdf5_file = my_H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + hdf5_headergrp = my_H5Gopen(hdf5_file, "/Header"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_ThisFile"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, header.npart, "NumPart_ThisFile", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_ThisFile"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotal, "NumPart_Total", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_Total"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumPart_Total_HighWord"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_UINT, header.npartTotalHighWord, "NumPart_Total_HighWord", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "NumPart_Total_HighWord"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "MassTable"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, header.mass, "MassTable", vector_attr_dim); + my_H5Aclose(hdf5_attribute, "MassTable"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Time"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.time, "Time", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Time"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Redshift"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.redshift, "Redshift", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Redshift"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "BoxSize"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.BoxSize, "BoxSize", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "BoxSize"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "NumFilesPerSnapshot"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.num_files, "NumFilesPerSnapshot", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "NumFilesPerSnapshot"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Omega0"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.Omega0, "Omega0", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Omega0"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "OmegaLambda"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.OmegaLambda, "OmegaLambda", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "OmegaLambda"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "HubbleParam"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_DOUBLE, &header.HubbleParam, "HubbleParam", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "HubbleParam"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Sfr"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_sfr, "Flag_Sfr", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Sfr"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Cooling"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_cooling, "Flag_Cooling", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Cooling"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_StellarAge"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_stellarage, "Flag_StellarAge", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_StellarAge"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Metals"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_metals, "Flag_Metals", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Metals"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_Feedback"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_feedback, "Flag_Feedback", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_Feedback"); + + hdf5_attribute = my_H5Aopen_name(hdf5_headergrp, "Flag_DoublePrecision"); + my_H5Aread(hdf5_attribute, H5T_NATIVE_INT, &header.flag_doubleprecision, "Flag_DoublePrecision", scalar_attr_dim); + my_H5Aclose(hdf5_attribute, "Flag_DoublePrecision"); + + my_H5Gclose(hdf5_headergrp, "/Header"); + my_H5Fclose(hdf5_file, fname); +} +#endif /* #ifdef HAVE_HDF5 */ + +/*! \brief Reads the snapshot header in case of non-hdf5 files (i.e. formats 1 + * and 2). + * + * \param[in] *fd Pointer to snapshot file. + * + * \return void + */ +void read_header_attributes(FILE *fd) +{ +#ifdef NTYPES_ICS + int type; + if(RestartFlag == 0) + { + my_fread(&header_ICs, sizeof(header_ICs), 1, fd); + + for(type = 0; type < NTYPES_ICS; type++) + { + header.npart[type] = header_ICs.npart[type]; + header.mass[type] = header_ICs.mass[type]; + header.npartTotal[type] = header_ICs.npartTotal[type]; + header.npartTotalHighWord[type] = header_ICs.npartTotalHighWord[type]; + } + for(type = NTYPES_ICS; type < NTYPES; type++) + { + header.npart[type] = 0; + header.mass[type] = 0; + header.npartTotal[type] = 0; + header.npartTotalHighWord[type] = 0; + } + + header.time = header_ICs.time; + header.redshift = header_ICs.redshift; + header.flag_sfr = header_ICs.flag_sfr; + header.flag_feedback = header_ICs.flag_feedback; + header.flag_cooling = header_ICs.flag_cooling; + header.num_files = header_ICs.num_files; + header.BoxSize = header_ICs.BoxSize; + header.Omega0 = header_ICs.Omega0; + header.OmegaLambda = header_ICs.OmegaLambda; + header.HubbleParam = header_ICs.HubbleParam; + header.flag_stellarage = header_ICs.flag_stellarage; + header.flag_metals = header_ICs.flag_metals; + header.flag_entropy_instead_u = header_ICs.flag_entropy_instead_u; + header.flag_doubleprecision = header_ICs.flag_doubleprecision; + header.flag_lpt_ics = header_ICs.flag_lpt_ics; + header.lpt_scalingfactor = header_ICs.lpt_scalingfactor; + header.flag_tracer_field = header_ICs.flag_tracer_field; + header.composition_vector_length = header_ICs.composition_vector_length; + } + else + my_fread(&header, sizeof(header), 1, fd); +#else /* #ifdef NTYPES_ICS */ + my_fread(&header, sizeof(header), 1, fd); +#endif /* #ifdef NTYPES_ICS #else */ +} + +/*! \brief Swaps endiannes of data. + * + * \param[in, out] data Pointer to the data. + * \param[in] n Number of elements to swap. + * \param[in] m Size of single element to swap: int, float = 4; double = 8. + * + * \return void + */ +void swap_Nbyte(char *data, int n, int m) +{ + int i, j; + char old_data[16]; + + if(swap_file != 8) + { + for(j = 0; j < n; j++) + { + memcpy(&old_data[0], &data[j * m], m); + for(i = 0; i < m; i++) + { + data[j * m + i] = old_data[m - i - 1]; + } + } + } +} + +/*! \brief Swaps the endianness of the snapshot header. + * + * \return void + */ +void swap_header() +{ + swap_Nbyte((char *)&header.npart, NTYPES, 4); + swap_Nbyte((char *)&header.mass, NTYPES, 8); + swap_Nbyte((char *)&header.time, 1, 8); + swap_Nbyte((char *)&header.redshift, 1, 8); + swap_Nbyte((char *)&header.flag_sfr, 1, 4); + swap_Nbyte((char *)&header.flag_feedback, 1, 4); + swap_Nbyte((char *)&header.npartTotal, NTYPES, 4); + swap_Nbyte((char *)&header.flag_cooling, 1, 4); + swap_Nbyte((char *)&header.num_files, 1, 4); + swap_Nbyte((char *)&header.BoxSize, 1, 8); + swap_Nbyte((char *)&header.Omega0, 1, 8); + swap_Nbyte((char *)&header.OmegaLambda, 1, 8); + swap_Nbyte((char *)&header.HubbleParam, 1, 8); + swap_Nbyte((char *)&header.flag_stellarage, 1, 4); + swap_Nbyte((char *)&header.flag_metals, 1, 4); + swap_Nbyte((char *)&header.npartTotalHighWord, NTYPES, 4); + swap_Nbyte((char *)&header.flag_entropy_instead_u, 1, 4); + swap_Nbyte((char *)&header.flag_doubleprecision, 1, 4); + swap_Nbyte((char *)&header.flag_lpt_ics, 1, 4); + swap_Nbyte((char *)&header.lpt_scalingfactor, 1, 4); + swap_Nbyte((char *)&header.flag_tracer_field, 1, 4); + swap_Nbyte((char *)&header.composition_vector_length, 1, 4); +} + +#ifdef TILE_ICS +/*! \brief Duplicates ICs and lines TileICsFactor of them up in each dimension. + * + * \return void + */ +void tile_ics(void) +{ + mpi_printf("TILE_ICS: tiling by a factor of %d...\n", All.TileICsFactor); + + /* allocate memory for new particles */ + domain_resize_storage(NumPart * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1), + NumGas * (All.TileICsFactor * All.TileICsFactor * All.TileICsFactor - 1), 0); + + /* tile gas particles at the beginning of P[] */ + int N_others = NumPart - NumGas; + memmove(&P[NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor], &P[NumGas], N_others * sizeof(struct particle_data)); + int i, j, ix, iy = 0, iz = 0; + for(i = 0; i < NumGas; i++) + { + for(ix = 0; ix < All.TileICsFactor; ix++) + { +#ifndef ONEDIMS + for(iy = 0; iy < All.TileICsFactor; iy++) +#endif /* #ifndef ONEDIMS */ + { +#if !defined(TWODIMS) && !defined(ONEDIMS) + for(iz = 0; iz < All.TileICsFactor; iz++) +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + { + if(ix == 0 && iy == 0 && iz == 0) + continue; + j = i + NumGas * ix + NumGas * All.TileICsFactor * iy + NumGas * All.TileICsFactor * All.TileICsFactor * iz; + P[j] = P[i]; + P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy + + IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz; + P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix; + P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy; + P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz; + SphP[j] = SphP[i]; + } + } + } + } + /* tile the other particle types */ + iy = 0; + iz = 0; + for(i = NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + i < NumGas * All.TileICsFactor * All.TileICsFactor * All.TileICsFactor + N_others; i++) + { + for(ix = 0; ix < All.TileICsFactor; ix++) + { +#ifndef ONEDIMS + for(iy = 0; iy < All.TileICsFactor; iy++) +#endif /* #ifndef ONEDIMS */ + { +#if !defined(TWODIMS) && !defined(ONEDIMS) + for(iz = 0; iz < All.TileICsFactor; iz++) +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + { + if(ix == 0 && iy == 0 && iz == 0) + continue; + j = i + N_others * ix + N_others * All.TileICsFactor * iy + N_others * All.TileICsFactor * All.TileICsFactor * iz; + P[j] = P[i]; + P[j].ID = P[i].ID + IDS_OFFSET * ix + IDS_OFFSET * All.TileICsFactor * iy + + IDS_OFFSET * All.TileICsFactor * All.TileICsFactor * iz; + P[j].Pos[0] += All.BoxSize / All.TileICsFactor * ix; + P[j].Pos[1] += All.BoxSize / All.TileICsFactor * iy; + P[j].Pos[2] += All.BoxSize / All.TileICsFactor * iz; + } + } + } + } + + NumGas *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; + NumPart *= All.TileICsFactor * All.TileICsFactor * All.TileICsFactor; +} +#endif /* #ifdef TILE_ICS */ diff --git a/src/amuse/community/arepo/src/io/restart.c b/src/amuse/community/arepo/src/io/restart.c new file mode 100644 index 0000000000..9a3dff5bba --- /dev/null +++ b/src/amuse/community/arepo/src/io/restart.c @@ -0,0 +1,1549 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/io/restart.c + * \date 05/2018 + * \brief Handling of the loading/writing of restart files. + * \details contains functions: + * void loadrestart(void) + * void reread_params_after_loading_restart(void) + * static int compare_seq_data(const void *a, const void *b) + * static void create_restartfiles_dir() + * static void get_restart_filename(char *buf, int task, + * int modus) + * static void backup_restartfiles(int task) + * static int get_file_to_check(int task) + * static void check_restart_files(char *buf, struct check *ch, + * int *success) + * static void send_work_request(int modus, int i) + * static void polling(int modus) + * static void work_files(int modus) + * void restart(int modus) + * static void write_or_read_this_processors_restart_file(int + * modus, char *buf, struct check *ch) + * static int execute_write_or_read(int modus, char *buf, + * struct check *ch) + * static void contents_restart_file(int modus) + * void readjust_timebase(double TimeMax_old, + * double TimeMax_new) + * void in(int *x, int modus) + * void byten(void *x, size_t n, int modus) + * void byten_nohash(void *x, size_t n, int modus) + * void byten_hash(void *x, size_t n, int modus, int hash) + * void allocate_iobuf(void) + * void deallocate_iobuf(int modus) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef __USE_GNU +#define _GNU_SOURCE /* needed for USE_DIRECT_IO_FOR_RESTARTS */ +#endif /* #ifndef __USE_GNU */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../debug_md5/Md5.h" +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +#define MODUS_WRITE 0 +#define MODUS_READ 1 +#define MODUS_READCHECK 2 +#define MODUS_CHECK 3 + +/*! \brief Data for scheduling restart file IO. + */ +static struct seq_data +{ + int thistask; + int rankinnode; + int thisnode; +} * seq; + +/*! \brief Metadata of restart files to be compared to when checking. + */ +static struct check +{ + long long byte_count; + unsigned char hash[16]; +} * checks; + +static char *write_success; + +static int fdint; + +static void in(int *x, int modus); +static void byten(void *x, size_t n, int modus); +static void byten_nohash(void *x, size_t n, int modus); +static void byten_hash(void *x, size_t n, int modus, int hash); +static void write_or_read_this_processors_restart_file(int modus, char *fname, struct check *ch); +static int execute_write_or_read(int modus, char *buf, struct check *ch); +static void contents_restart_file(int modus); + +#define MAX_BLOCK_SIZE (32 * 1024 * 1024) + +static int PageSize; +static char *iobuf_aligned, *io_buf; +static size_t fillp, iop; +void allocate_iobuf(void); +void deallocate_iobuf(int modus); + +static long long byte_count; +static int files_started; +static int files_completed; +static int files_concurrent; +static int files_groups; + +static MD5_CTX mysum; + +static struct global_data_all_processes all; + +/*! \brief This function loads the last restart file. + * + * Some parameters of the parameter file might be changed between restarting. + * This function ensures that only the allowed parameters change, + * otherwise the old value from the restart file is taken. + * If the end time of the simulation changed readjust_timebase() is called in + * the end. + * + * \return void + */ +void loadrestart(void) +{ + /* save global variables. (will be read from restart file) */ + all = All; + + /* Read restart files. + Note: This also resets all variables in the struct `All'. */ + restart(MODUS_READ); + + /* However, during the run, some variables in the parameter + file are allowed to be changed, if desired. These are copied here. */ + reread_params_after_loading_restart(); +} + +/*! \brief This function takes from the parameter file values that are allowed + * to change after restart. + * + * \return void + */ +void reread_params_after_loading_restart(void) +{ + if(ThisTask == 0 && All.MinSizeTimestep != all.MinSizeTimestep) + warn("MinSizeTimestep modified from %g to %g while restarting at Time=%g", All.MinSizeTimestep, all.MinSizeTimestep, All.Time); + All.MinSizeTimestep = all.MinSizeTimestep; + if(ThisTask == 0 && All.MaxSizeTimestep != all.MaxSizeTimestep) + warn("MaxSizeTimestep modified from %g to %g while restarting at Time=%g", All.MaxSizeTimestep, all.MaxSizeTimestep, All.Time); + All.MaxSizeTimestep = all.MaxSizeTimestep; + if(ThisTask == 0 && All.TimeLimitCPU != all.TimeLimitCPU) + warn("TimeLimitCPU modified from %g to %g while restarting at Time=%g", All.TimeLimitCPU, all.TimeLimitCPU, All.Time); + All.TimeLimitCPU = all.TimeLimitCPU; + if(ThisTask == 0 && All.ResubmitOn != all.ResubmitOn) + warn("ResubmitOn modified from %d to %d while restarting at Time=%g", All.ResubmitOn, all.ResubmitOn, All.Time); + All.ResubmitOn = all.ResubmitOn; + if(ThisTask == 0 && All.TimeBetSnapshot != all.TimeBetSnapshot) + warn("TimeBetSnapshot modified from %g to %g while restarting at Time=%g", All.TimeBetSnapshot, all.TimeBetSnapshot, All.Time); + All.TimeBetSnapshot = all.TimeBetSnapshot; + if(ThisTask == 0 && All.TimeBetStatistics != all.TimeBetStatistics) + warn("TimeBetStatistics modified from %g to %g while restarting at Time=%g", All.TimeBetStatistics, all.TimeBetStatistics, + All.Time); + All.TimeBetStatistics = all.TimeBetStatistics; + if(ThisTask == 0 && All.CpuTimeBetRestartFile != all.CpuTimeBetRestartFile) + warn("CpuTimeBetRestartFile modified from %g to %g while restarting at Time=%g", All.CpuTimeBetRestartFile, + all.CpuTimeBetRestartFile, All.Time); + All.CpuTimeBetRestartFile = all.CpuTimeBetRestartFile; + if(ThisTask == 0 && All.ErrTolIntAccuracy != all.ErrTolIntAccuracy) + warn("ErrTolIntAccuracy modified from %g to %g while restarting at Time=%g", All.ErrTolIntAccuracy, all.ErrTolIntAccuracy, + All.Time); + All.ErrTolIntAccuracy = all.ErrTolIntAccuracy; + if(ThisTask == 0 && All.SnapFormat != all.SnapFormat) + warn("SnapFormat modified from %d to %d while restarting at Time=%g", All.SnapFormat, all.SnapFormat, All.Time); + All.SnapFormat = all.SnapFormat; + + if(ThisTask == 0 && All.ErrTolForceAcc != all.ErrTolForceAcc) + warn("ErrTolForceAcc modified from %g to %g while restarting at Time=%g", All.ErrTolForceAcc, all.ErrTolForceAcc, All.Time); + All.ErrTolForceAcc = all.ErrTolForceAcc; + if(ThisTask == 0 && All.TypeOfTimestepCriterion != all.TypeOfTimestepCriterion) + warn("TypeOfTimestepCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfTimestepCriterion, + all.TypeOfTimestepCriterion, All.Time); + All.TypeOfTimestepCriterion = all.TypeOfTimestepCriterion; + if(ThisTask == 0 && All.TypeOfOpeningCriterion != all.TypeOfOpeningCriterion) + warn("TypeOfOpeningCriterion modified from %d to %d while restarting at Time=%g", All.TypeOfOpeningCriterion, + all.TypeOfOpeningCriterion, All.Time); + All.TypeOfOpeningCriterion = all.TypeOfOpeningCriterion; + if(ThisTask == 0 && All.NumFilesWrittenInParallel != all.NumFilesWrittenInParallel) + warn("NumFilesWrittenInParallel modified from %d to %d while restarting at Time=%g", All.NumFilesWrittenInParallel, + all.NumFilesWrittenInParallel, All.Time); + All.NumFilesWrittenInParallel = all.NumFilesWrittenInParallel; + if(ThisTask == 0 && All.NumFilesPerSnapshot != all.NumFilesPerSnapshot) + warn("NumFilesPerSnapshot modified from %d to %d while restarting at Time=%g", All.NumFilesPerSnapshot, all.NumFilesPerSnapshot, + All.Time); + All.NumFilesPerSnapshot = all.NumFilesPerSnapshot; + + if(ThisTask == 0 && All.LimitUBelowThisDensity != all.LimitUBelowThisDensity) + warn("LimitUBelowThisDensity modified from %g to %g while restarting at Time=%g", All.LimitUBelowThisDensity, + all.LimitUBelowThisDensity, All.Time); + All.LimitUBelowThisDensity = all.LimitUBelowThisDensity; + if(ThisTask == 0 && All.LimitUBelowCertainDensityToThisValue != all.LimitUBelowCertainDensityToThisValue) + warn("LimitUBelowCertainDensityToThisValue modified from %g to %g while restarting at Time=%g", + All.LimitUBelowCertainDensityToThisValue, all.LimitUBelowCertainDensityToThisValue, All.Time); + All.LimitUBelowCertainDensityToThisValue = all.LimitUBelowCertainDensityToThisValue; + if(ThisTask == 0 && All.MinimumDensityOnStartUp != all.MinimumDensityOnStartUp) + warn("MinimumDensityOnStartUp modified from %g to %g while restarting at Time=%g", All.MinimumDensityOnStartUp, + all.MinimumDensityOnStartUp, All.Time); + All.MinimumDensityOnStartUp = all.MinimumDensityOnStartUp; + if(ThisTask == 0 && All.MultipleDomains != all.MultipleDomains) + warn("MultipleDomains modified from %d to %d while restarting at Time=%g", All.MultipleDomains, all.MultipleDomains, All.Time); + All.MultipleDomains = all.MultipleDomains; + if(ThisTask == 0 && All.TopNodeFactor != all.TopNodeFactor) + warn("TopNodeFactor modified from %g to %g while restarting at Time=%g", All.TopNodeFactor, all.TopNodeFactor, All.Time); + All.TopNodeFactor = all.TopNodeFactor; + if(ThisTask == 0 && All.ActivePartFracForNewDomainDecomp != all.ActivePartFracForNewDomainDecomp) + warn("ActivePartFracForNewDomainDecomp modified from %g to %g while restarting at Time=%g", All.ActivePartFracForNewDomainDecomp, + all.ActivePartFracForNewDomainDecomp, All.Time); + All.ActivePartFracForNewDomainDecomp = all.ActivePartFracForNewDomainDecomp; + if(ThisTask == 0 && All.OutputListOn != all.OutputListOn) + warn("OutputListOn modified from %d to %d while restarting at Time=%g", All.OutputListOn, all.OutputListOn, All.Time); + All.OutputListOn = all.OutputListOn; + if(ThisTask == 0 && All.CourantFac != all.CourantFac) + warn("CourantFac modified from %g to %g while restarting at Time=%g", All.CourantFac, all.CourantFac, All.Time); + All.CourantFac = all.CourantFac; +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(ThisTask == 0 && All.CellMaxAngleFactor != all.CellMaxAngleFactor) + warn("CellMaxAngleFactor modified from %g to %g while restarting at Time=%g", All.CellMaxAngleFactor, all.CellMaxAngleFactor, + All.Time); + All.CellMaxAngleFactor = all.CellMaxAngleFactor; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + if(ThisTask == 0 && All.CellShapingFactor != all.CellShapingFactor) + warn("CellShapingFactor modified from %g to %g while restarting at Time=%g", All.CellShapingFactor, all.CellShapingFactor, + All.Time); + All.CellShapingFactor = all.CellShapingFactor; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + if(ThisTask == 0 && All.CellShapingSpeed != all.CellShapingSpeed) + warn("CellShapingSpeed modified from %g to %g while restarting at Time=%g", All.CellShapingSpeed, all.CellShapingSpeed, All.Time); + All.CellShapingSpeed = all.CellShapingSpeed; + + if(ThisTask == 0 && All.OutputListLength != all.OutputListLength) + warn("OutputListLength modified from %d to %d while restarting at Time=%g", All.OutputListLength, all.OutputListLength, All.Time); + All.OutputListLength = all.OutputListLength; + if(ThisTask == 0 && memcmp(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength) != 0) + warn("OutputListTimes modified while restarting at Time=%g", All.Time); + memcpy(All.OutputListTimes, all.OutputListTimes, sizeof(double) * All.OutputListLength); + if(ThisTask == 0 && memcmp(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength) != 0) + warn("OutputListFlag modified while restarting at Time=%g", All.Time); + memcpy(All.OutputListFlag, all.OutputListFlag, sizeof(char) * All.OutputListLength); + + if(ThisTask == 0 && strcmp(All.ResubmitCommand, all.ResubmitCommand) != 0) + warn("ResubmitCommand modified from %s to %s while restarting at Time=%g", All.ResubmitCommand, all.ResubmitCommand, All.Time); + strcpy(All.ResubmitCommand, all.ResubmitCommand); + if(ThisTask == 0 && strcmp(All.OutputListFilename, all.OutputListFilename) != 0) + warn("OutputListFilename modified from %s to %s while restarting at Time=%g", All.OutputListFilename, all.OutputListFilename, + All.Time); + strcpy(All.OutputListFilename, all.OutputListFilename); + if(ThisTask == 0 && strcmp(All.OutputDir, all.OutputDir) != 0) + warn("OutputDir modified from %s to %s while restarting at Time=%g", All.OutputDir, all.OutputDir, All.Time); + strcpy(All.OutputDir, all.OutputDir); + if(ThisTask == 0 && strcmp(All.SnapshotFileBase, all.SnapshotFileBase) != 0) + warn("SnapshotFileBase modified from %s to %s while restarting at Time=%g", All.SnapshotFileBase, all.SnapshotFileBase, All.Time); + strcpy(All.SnapshotFileBase, all.SnapshotFileBase); + +#ifdef MHD_SEEDFIELD + if(ThisTask == 0 && All.B_dir != all.B_dir) + warn("B_dir modified from %d to %d while restarting at Time=%g", All.B_dir, all.B_dir, All.Time); + All.B_dir = all.B_dir; + if(ThisTask == 0 && All.B_value != all.B_value) + warn("B_value modified from %g to %g while restarting at Time=%g", All.B_value, all.B_value, All.Time); + All.B_value = all.B_value; +#endif /* #ifdef MHD_SEEDFIELD */ + + if(All.TimeMax != all.TimeMax) + { + if(ThisTask == 0) + warn("TimeMax modified from %g to %g while restarting at Time=%g", All.TimeMax, all.TimeMax, All.Time); + readjust_timebase(All.TimeMax, all.TimeMax); + } +} + +/*! \brief Sorting kernel for seq_data strucutre. + * + * Compares (top priority first) + * rankinnode + * thisnode + * thistask + * + * \return (-1,0,1), -1 if a < b. + */ +static int compare_seq_data(const void *a, const void *b) +{ + if(((struct seq_data *)a)->rankinnode < ((struct seq_data *)b)->rankinnode) + return -1; + + if(((struct seq_data *)a)->rankinnode > ((struct seq_data *)b)->rankinnode) + return +1; + + if(((struct seq_data *)a)->thisnode < ((struct seq_data *)b)->thisnode) + return -1; + + if(((struct seq_data *)a)->thisnode > ((struct seq_data *)b)->thisnode) + return +1; + + if(((struct seq_data *)a)->thistask < ((struct seq_data *)b)->thistask) + return -1; + + if(((struct seq_data *)a)->thistask > ((struct seq_data *)b)->thistask) + return +1; + + return 0; +} + +/*! \brief Creates the restart file directory with appropriate permissions. + * + * \return void + */ +static void create_restartfiles_dir() +{ + char buf[MAXLEN_PATH]; +#ifdef MULTIPLE_RESTARTS + printf(", All.RestartFileCount=%03d", All.RestartFileCount); +#endif /* #ifdef MULTIPLE_RESTARTS */ + printf(".\n"); + sprintf(buf, "%s/restartfiles", All.OutputDir); +#ifdef MULTIPLE_RESTARTS + sprintf(buf, "%s/restartfiles_%03d", All.OutputDir, All.RestartFileCount); +#endif /* #ifdef MULTIPLE_RESTARTS */ + mkdir(buf, 02755); + +#ifdef TOLERATE_WRITE_ERROR + sprintf(buf, "%s/restartfiles", AlternativeOutputDir); + mkdir(buf, 02755); +#endif /* #ifdef TOLERATE_WRITE_ERROR */ +} + +/*! \brief Sets filename of restart file on local task. + * + * \param[out] buf Buffer to which filename is written. + * \param[in] task Task for which restart file should be written. + * \param[in] modus Read or write mode flag. + * + * \return void + */ +static void get_restart_filename(char *buf, int task, int modus) +{ + sprintf(buf, "%s/restartfiles/%s.%d", All.OutputDir, "restart", task); + +#ifdef MULTIPLE_RESTARTS + if(modus == MODUS_WRITE) + sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount++, "restart", task); + if((modus == MODUS_READ) || (modus == MODUS_READCHECK) || (modus == MODUS_CHECK)) + sprintf(buf, "%s/restartfiles_%03d/%s.%d", All.OutputDir, All.RestartFileCount - 1, "restart", task); +#endif /* #ifdef MULTIPLE_RESTARTS */ +} + +/*! \brief Renames existing restartfiles to backup-restartfiles. + * + * This way the code ensures that there are two sets of restart-files per + * run. + * + * \param[in] task Task for which restart file is renamed. + * + * \return void + */ +static void backup_restartfiles(int task) +{ + char buf[MAXLEN_PATH]; + + FILE *fcheck = NULL; + char buf_bak[MAXLEN_PATH]; + + int bak_files_status = 0; + + mpi_printf("RESTART: Backup restart files...\n"); + myflush(stdout); + + get_restart_filename(buf, task, MODUS_READ); + + sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", All.OutputDir, "restart", ThisTask); + if((fcheck = fopen(buf, "r"))) + { + fclose(fcheck); + + rename(buf, buf_bak); + bak_files_status = 1; + } +#ifdef TOLERATE_WRITE_ERROR + char alternative_fname[MAXLEN_PATH]; + sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask); + sprintf(buf_bak, "%s/restartfiles/bak-%s.%d", AlternativeOutputDir, "restart", ThisTask); + + if((fcheck = fopen(alternative_fname, "r"))) + { + fclose(fcheck); + + rename(alternative_fname, buf_bak); + bak_files_status = 1; + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + int bak_files_status_sum; + MPI_Allreduce(&bak_files_status, &bak_files_status_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(bak_files_status_sum != NTask && bak_files_status_sum != 0) + warn("RESTART: some (%d) restart files were renamed to bak, but some (%d) weren't - something is very possibly wrong!", + bak_files_status, NTask - bak_files_status); + if(bak_files_status_sum == NTask) + mpi_printf("RESTART: done renaming pre-existing restart files to bak files.\n"); + else if(bak_files_status_sum == 0) + mpi_printf("RESTART: no pre-existing restart files found.\n"); + + myflush(stdout); +} + +/*! \brief Returns the index of file which is to be checked by local task. + * + * Task gets assigned a restart file to check which it has not written. + * This is to ensure that the check is actually read from disk (not from some + * buffer). + * + * \param[in] task Local task. + * + * \return File number. + */ +static int get_file_to_check(int task) { return (task + NTask / 2) % NTask; } + +/*! \brief Checks restart files via an md5sum. + * + * This is to ensure that they have been written correctly to the file system. + * + * \param[in] buf Filename of restart file. + * \param[in] ch Metadata about data to be checked. + * \param[out] success Flag whether check was a success. + * + * \return void + */ +static void check_restart_files(char *buf, struct check *ch, int *success) +{ +#ifdef USE_DIRECT_IO_FOR_RESTARTS + struct stat st; + if(stat(buf, &st) == 0) + { + size_t size = st.st_size; + if(size % PageSize > 0) + { + FILE *fd = fopen(buf, "a"); + if(fd) + { + size_t n = PageSize - (size % PageSize); + char *p = calloc(n, 1); + if(p == NULL) + terminate("p == NULL"); + printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n); + fwrite(p, n, 1, fd); + fclose(fd); + free(p); + } + else + terminate("can't increase length of restart file '%s'", buf); + } + } + else + terminate("Restart file '%s' not found.\n", buf); +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + int oflag = O_RDONLY; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + + if((fdint = open(buf, oflag)) < 0) + terminate("Restart file '%s' not found.\n", buf); + + allocate_iobuf(); + + MD5Init(&mysum); + + long long readLen = ch->byte_count; + while(readLen > 0) + { + int readChunk = 1024 * 1024 * 32; + if(readChunk > readLen) + readChunk = readLen; + + byten(NULL, readChunk, MODUS_CHECK); + readLen -= readChunk; + } + + MD5Final(&mysum); + + unsigned char has_hash[16], written_hash[16]; + + for(int k = 0; k < 16; k++) + has_hash[k] = mysum.digest[k]; + + byten_nohash(written_hash, 16, MODUS_READ); + + if(memcmp(has_hash, ch->hash, 16) != 0 || memcmp(has_hash, written_hash, 16) != 0) + { + char str_has[48], str_expected[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_expected + 2 * i, "%02X", ch->hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_expected[32] = str_written[32] = 0; + + char newname[10000]; + sprintf(newname, "%s-damaged", buf); + rename(buf, newname); + + terminate("RESTART: file '%s' has MD5 hash of '%s', does not match expected hash '%s' or written hash '%s'.", newname, str_has, + str_expected, str_written); + *success = 0; + } + else + { +#ifdef VERBOSE + char str_has[48], str_expected[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_expected + 2 * i, "%02X", ch->hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_expected[32] = str_written[32] = 0; + + printf("RESTART: Task %d: file '%s' has MD5 hash of '%s', does match expected hash '%s' and written hash '%s'.\n", ThisTask, buf, + str_has, str_expected, str_written); +#endif /* #ifdef VERBOSE */ + *success = 1; + } + deallocate_iobuf(MODUS_CHECK); + + close(fdint); +} + +/*! \brief Distributes information and meta-data to task that is supposed to + * check the restart file which has just been written. + * + * \param[in] modus Write or check mode. + * \param[in] i Index in seq array. + * + * \return void + */ +static void send_work_request(int modus, int i) +{ + int type = 0; + + if(modus == MODUS_WRITE) + { + if(write_success[seq[i].thistask]) + type = 1; + } + + if(modus == MODUS_CHECK) + { + int task = get_file_to_check(seq[i].thistask); + if(write_success[task]) + type = 1; + } + + MPI_Ssend(&type, 1, MPI_INT, seq[i].thistask, TAG_N, MPI_COMM_WORLD); + + if(modus == MODUS_CHECK) + { + int task = get_file_to_check(seq[i].thistask); + if(!write_success[task]) + MPI_Ssend(&checks[task], sizeof(struct check), MPI_BYTE, seq[i].thistask, TAG_N, MPI_COMM_WORLD); + } +} + +/*! \brief Gets work request. + * + * \param[in] modus Write or check files. + * + * \return void + */ +static void polling(int modus) +{ + if(ThisTask == 0) + if(files_completed < NTask) + { + MPI_Status status; + int flag; + + /* now check for a completion message */ + MPI_Iprobe(MPI_ANY_SOURCE, TAG_KEY, MPI_COMM_WORLD, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + + if(modus == MODUS_WRITE) + { + MPI_Recv(&checks[source], sizeof(struct check), MPI_BYTE, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + else if(modus == MODUS_CHECK) + { + int success; + MPI_Recv(&success, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + write_success[get_file_to_check(source)] = success; + } + else + { + int dummy; + MPI_Recv(&dummy, 1, MPI_INT, source, TAG_KEY, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + files_completed++; + + if(files_started < NTask) + { + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + send_work_request(modus, files_started++); + } + } + } +} + +/*! \brief Schedule the reading/writing/checking of restart files to ensure + * only NumFilesWrittenInParallel are written in parallel. + * + * \param[in] modus Read, write or check files. + * + * \return void + */ +static void work_files(int modus) +{ + if(ThisTask == 0) + if(!(seq = malloc(NTask * sizeof(struct seq_data)))) + terminate("can't allocate seq_data"); + + struct seq_data seq_loc; + seq_loc.thistask = ThisTask; + seq_loc.rankinnode = RankInThisNode; + seq_loc.thisnode = ThisNode; + + MPI_Gather(&seq_loc, sizeof(struct seq_data), MPI_BYTE, seq, sizeof(struct seq_data), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + qsort(seq, NTask, sizeof(struct seq_data), compare_seq_data); + if(seq[0].thistask != 0) + terminate("unexpected"); + + files_started = 0; + files_completed = 0; + + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + for(int i = 1; i < All.NumFilesWrittenInParallel; i++) + { + files_started++; + send_work_request(modus, i); + } + + files_started++; + if(!((modus == MODUS_WRITE && write_success[ThisTask]) || (modus == MODUS_CHECK && write_success[get_file_to_check(ThisTask)]))) + { + if(modus == MODUS_CHECK) + { + char buf[MAXLEN_PATH]; + int task = get_file_to_check(ThisTask); + get_restart_filename(buf, task, modus); + + int success; + check_restart_files(buf, &checks[task], &success); + write_success[task] = success; + } + else + { + char buf[MAXLEN_PATH]; + get_restart_filename(buf, ThisTask, modus); + write_or_read_this_processors_restart_file(modus, buf, &checks[0]); + } + } + files_completed++; + + if(files_started < NTask) + { + if((files_started % files_concurrent) == 0) + { + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else if(modus == MODUS_WRITE) + mpi_printf("RESTART: Writing restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + else + mpi_printf("RESTART: Checking restart files group #%d out of %d...\n", (files_started / files_concurrent) + 1, + files_groups); + } + + send_work_request(modus, files_started++); + } + + while(files_completed < NTask) + polling(modus); + + free(seq); + } + else + { + int type; + MPI_Recv(&type, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); /* wait until we are told to start */ + + if(type == 0) + { + if(modus == MODUS_CHECK) + { + struct check ch; + MPI_Recv(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + char buf[MAXLEN_PATH]; + get_restart_filename(buf, get_file_to_check(ThisTask), modus); + + int success; + check_restart_files(buf, &ch, &success); + MPI_Ssend(&success, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + else + { + char buf[MAXLEN_PATH]; + get_restart_filename(buf, ThisTask, modus); + struct check ch; + write_or_read_this_processors_restart_file(modus, buf, &ch); + + if(modus == MODUS_WRITE) + { + MPI_Ssend(&ch, sizeof(struct check), MPI_BYTE, 0, TAG_KEY, MPI_COMM_WORLD); + } + else + { + int dummy = 0; + MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + } + } + else + { + int dummy = 1; + MPI_Ssend(&dummy, 1, MPI_INT, 0, TAG_KEY, MPI_COMM_WORLD); + } + } +} + +/*! \brief This function reads or writes the restart files. + * + * Each processor writes its own restart file, with the + * I/O being done in parallel. To avoid congestion of the disks + * you can tell the program to restrict the number of files + * that are simultaneously written to NumFilesWrittenInParallel. + * + * \param[in] modus if modus==MODUS_READ the restart()-routine reads, + * if modus==MODUS_WRITE it writes a restart file. + * + * \return void + */ +void restart(int modus) +{ + CPU_Step[CPU_MISC] += measure_time(); + double t0 = second(); + byte_count = 0; + + PageSize = getpagesize(); + mpi_printf("RESTART: PageSize = %d\n", PageSize); + + if(modus == MODUS_READ) + mpi_printf("RESTART: Loading restart files...\n"); + + if(ThisTask == 0 && modus == MODUS_WRITE) + { + printf("RESTART: Writing restart files"); + create_restartfiles_dir(); + } + MPI_Barrier(MPI_COMM_WORLD); + + if(NTask < All.NumFilesWrittenInParallel) + { + warn("Number of processors should be a smaller or equal than `NumFilesWrittenInParallel'. We're adjusting the latter.\n"); + All.NumFilesWrittenInParallel = NTask; + } + + if(All.NumFilesWrittenInParallel < 1) + All.NumFilesWrittenInParallel = 1; + + files_concurrent = All.NumFilesWrittenInParallel; + files_groups = NTask / All.NumFilesWrittenInParallel; + if(NTask % All.NumFilesWrittenInParallel) + files_groups++; + +#ifndef MULTIPLE_RESTARTS + if(modus == MODUS_WRITE) /* write */ + backup_restartfiles(ThisTask); +#endif /* #ifndef MULTIPLE_RESTARTS */ + + if(modus == MODUS_WRITE) + if(ThisTask == 0) + { + if(!(checks = malloc(NTask * sizeof(struct check)))) + terminate("can't allocate checks"); + if(!(write_success = malloc(NTask))) + terminate("can't allocate write_success"); + + for(int i = 0; i < NTask; i++) + { + checks[i].byte_count = 0; + write_success[i] = 0; + } + } + + work_files(modus); + + MPI_Barrier(MPI_COMM_WORLD); + + if(modus == MODUS_WRITE) + { + int iter = 0; + int success = 0; + while(!success) + { + work_files(MODUS_CHECK); + + if(ThisTask == 0) + { + int count = 0; + for(int i = 0; i < NTask; i++) + { + if(!write_success[i]) + count++; + } + + if(count == 0) + { + printf("All restart files written successfully.\n"); + success = 1; + } + else + { + printf("Need to repeat writing for %d restartfiles.\n", count); + } + } + + MPI_Bcast(&success, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(success) + break; + + iter++; + if(iter > 4) + terminate("Too many iterations, fix your file system."); + + work_files(MODUS_WRITE); + }; + + free(checks); + } + + /* check whether the restarts are all at the same time */ + if(modus == MODUS_READ) /* read */ + { + struct global_data_all_processes all_task0; + + if(ThisTask == 0) + all_task0 = All; + + MPI_Bcast(&all_task0, sizeof(struct global_data_all_processes), MPI_BYTE, 0, MPI_COMM_WORLD); + + if(all_task0.Time != All.Time) + terminate("The restart file on task=%d is not consistent with the one on task=0\n", ThisTask); + } + + long long byte_count_all; + sumup_longs(1, &byte_count, &byte_count_all); + + double t1 = second(); + + mpi_printf("RESTART: load/save took %g sec, corresponds to I/O rate of %g MB/sec\n", timediff(t0, t1), + byte_count_all / (1024.0 * 1024.0) / timediff(t0, t1)); + + CPU_Step[CPU_RESTART] += measure_time(); + mpi_printf("RESTART: done.\n"); +} + +/*! \brief Reads or writes restart file. + * + * Try write until successful. + * + * \param[in] modus Flag for write or read. + * \param[in] buf File name. + * \param[in] ch Check metadata. + * + * \return void + */ +static void write_or_read_this_processors_restart_file(int modus, char *buf, struct check *ch) +{ + if(modus == MODUS_READ) + { + execute_write_or_read(MODUS_READ, buf, ch); + } + else + { + int failed = 0; + + do + { + execute_write_or_read(MODUS_WRITE, buf, ch); + } + while(failed > 0); + } +} + +/*! \brief Reads or writes a restart file. + * + * A single attempt which either is successful or fails. + * + * \param[in] modus Flag for write or read. + * \param[in] buf File name. + * \param[in] ch Check metadata. + * + * \return 0: success, 1: failed. + */ +static int execute_write_or_read(int modus, char *buf, struct check *ch) +{ + if(modus == MODUS_WRITE) + ch->byte_count = byte_count; + + int failed_flag = 0; + +#ifdef TOLERATE_WRITE_ERROR + for(int try_io = 0; try_io < 2; try_io++) + { + WriteErrorFlag = 0; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + if(modus == MODUS_READ || modus == MODUS_READCHECK) + { +#ifdef USE_DIRECT_IO_FOR_RESTARTS + struct stat st; + if(stat(buf, &st) == 0) + { + size_t size = st.st_size; + if(size % PageSize > 0) + { + FILE *fd = fopen(buf, "a"); + if(fd) + { + size_t n = PageSize - (size % PageSize); + char *p = calloc(n, 1); + if(p == NULL) + terminate("p == NULL"); + printf("RESTART: Topping of restart file '%s' by %lld bytes\n", buf, (long long)n); + fwrite(p, n, 1, fd); + fclose(fd); + free(p); + } + else + terminate("can't increase length of restart file '%s'", buf); + } + } + else + terminate("Restart file '%s' not found.\n", buf); +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + int oflag = O_RDONLY; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag)) < 0) + terminate("Restart file '%s' not found.\n", buf); + + allocate_iobuf(); + } + else + { +#ifdef TOLERATE_WRITE_ERROR + int try_open = 0; + + while(try_open < IO_TRIALS) + { + int oflag = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0) + { + printf("Restart file '%s' cannot be opened. Trying again...\n", buf); + myflush(stdout); + + try_open++; + + sleep(IO_SLEEP_TIME); + } + else + break; + } + + if(try_open == IO_TRIALS) + terminate("Opening of restart file failed too often!"); +#else /* #ifdef TOLERATE_WRITE_ERROR */ + int oflag = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef USE_DIRECT_IO_FOR_RESTARTS + oflag |= O_DIRECT; +#endif /* #ifdef USE_DIRECT_IO_FOR_RESTARTS */ + if((fdint = open(buf, oflag, S_IRUSR | S_IWUSR | S_IRGRP)) < 0) + terminate("Restart file '%s' cannot be opened.\n", buf); +#endif /* #ifdef TOLERATE_WRITE_ERROR #else */ + allocate_iobuf(); + } + + MD5Init(&mysum); + + contents_restart_file(modus); + + MD5Final(&mysum); + + unsigned char has_hash[16]; + static unsigned char should_hash[16]; + + for(int k = 0; k < 16; k++) + has_hash[k] = mysum.digest[k]; + + if(modus == MODUS_READ) + { + /* read */ + unsigned char written_hash[16]; + byten_nohash(written_hash, 16, modus); + if(memcmp(has_hash, written_hash, 16) != 0) + { + char str_has[48], str_written[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + sprintf(str_written + 2 * i, "%02X", written_hash[i]); + } + + str_has[32] = str_written[32] = 0; + + terminate("RESTART: file '%s' does not match expected MD5 hash of '%s', found '%s' instead.", buf, str_has, str_written); + } + } + else if(modus == MODUS_READCHECK) + { + if(memcmp(should_hash, has_hash, 16) != 0) + { + char str_should[48], str_has[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_should + 2 * i, "%02X", should_hash[i]); + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + } + + str_should[32] = str_has[32] = 0; + + failed_flag = 1; + + terminate( + "RESTART-READCHECK: file '%s' does not match expected MD5 hash of '%s' after read-back check, has '%s' instead.", + buf, str_should, str_has); + } +#ifdef VERBOSE + else + { + char str_should[48], str_has[48]; + for(int i = 0; i < 16; i++) + { + sprintf(str_should + 2 * i, "%02X", should_hash[i]); + sprintf(str_has + 2 * i, "%02X", has_hash[i]); + } + + str_should[32] = str_has[32] = 0; + + printf("RESTART-READCHECK: Task %d: file '%s' does match expected MD5 hash of '%s' after read-back check, has '%s'.\n", + ThisTask, buf, str_should, str_has); + } +#endif /* #ifdef VERBOSE */ + } + else if(modus == MODUS_WRITE) + { + ch->byte_count = byte_count - ch->byte_count; + for(int k = 0; k < 16; k++) + ch->hash[k] = has_hash[k]; + + /* write */ + byten_nohash(has_hash, 16, modus); + + for(int k = 0; k < 16; k++) + should_hash[k] = has_hash[k]; + } + else + terminate("This should not happen - wrong modus!"); + + deallocate_iobuf(modus); + + close(fdint); + +#ifdef TOLERATE_WRITE_ERROR + if(WriteErrorFlag == 0) + break; + + if(try_io == 0) + { + char alternative_fname[MAXLEN_PATH]; + sprintf(alternative_fname, "%s/restartfiles/%s.%d", AlternativeOutputDir, "restart", ThisTask); + + printf("TOLERATE_WRITE_ERROR: Try to write to alternative file: Task=%d try_io=%d alternative-filename='%s'\n", ThisTask, + try_io, alternative_fname); + myflush(stdout); + strncpy(buf, alternative_fname, MAXLEN_PATH); /* try on a different output directory */ + } + else + { + terminate("TOLERATE_WRITE_ERROR: Second try with alternative file failed too.\n"); + } + } +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + + return failed_flag; +} + +/*! \brief Defines contents of restart file. + * + * \param[in] modus Read or write (0: write; >0 read). + * + * \return void + */ +static void contents_restart_file(int modus) +{ + /* common data */ + byten(&All, sizeof(struct global_data_all_processes), modus); + + /* individual allocation factors for meshes */ + byten(&Mesh.Indi, sizeof(struct individual_alloc_data), modus); + byten(&DeRefMesh.Indi, sizeof(struct individual_alloc_data), modus); + + polling(modus); + + if(modus == MODUS_READ) /* read */ + allocate_memory(); + + int ntask = NTask; + in(&ntask, modus); + + if(modus == MODUS_READ) + if(ntask != NTask) + terminate("The restart files were written for ntask=%d while you're using now %d MPI ranks\n", ntask, NTask); + + in(&NumPart, modus); + + /* Particle data */ + byten(&P[0], NumPart * sizeof(struct particle_data), modus); + + polling(modus); + + in(&NumGas, modus); + + if(NumGas > 0) + { + /* Sph-Particle data */ + byten(&SphP[0], NumGas * sizeof(struct sph_particle_data), modus); + } + + polling(modus); + + in(&Nvc, modus); + in(&MaxNvc, modus); + in(&FirstUnusedConnection, modus); + + if(modus == MODUS_READ) /* read */ + DC = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection)); + + byten(DC, MaxNvc * sizeof(connection), modus); + + polling(modus); + + /* write state of random number generators */ + byten(gsl_rng_state(random_generator), gsl_rng_size(random_generator), modus); + byten(gsl_rng_state(random_generator_aux), gsl_rng_size(random_generator_aux), modus); + + /* now store variables for time integration bookkeeping */ + byten(TimeBinSynchronized, TIMEBINS * sizeof(int), modus); + + in(&TimeBinsHydro.NActiveParticles, modus); + in(&TimeBinsGravity.NActiveParticles, modus); + byten(&TimeBinsHydro.GlobalNActiveParticles, sizeof(long long), modus); + byten(&TimeBinsGravity.GlobalNActiveParticles, sizeof(long long), modus); + byten(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles * sizeof(int), modus); + byten(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles * sizeof(int), modus); + byten(TimeBinsHydro.NextInTimeBin, NumGas * sizeof(int), modus); + byten(TimeBinsGravity.NextInTimeBin, NumPart * sizeof(int), modus); + byten(TimeBinsHydro.PrevInTimeBin, NumGas * sizeof(int), modus); + byten(TimeBinsGravity.PrevInTimeBin, NumPart * sizeof(int), modus); + byten(TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int), modus); + byten(TimeBinsHydro.FirstInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.FirstInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsHydro.LastInTimeBin, TIMEBINS * sizeof(int), modus); + byten(TimeBinsGravity.LastInTimeBin, TIMEBINS * sizeof(int), modus); + +#ifdef USE_SFR + byten(TimeBinSfr, TIMEBINS * sizeof(double), modus); +#endif + + polling(modus); + + /* now store custom data for optional Config settings */ +#ifdef USE_SFR + in(&Stars_converted, modus); +#endif + + polling(modus); + + /* now store relevant data for tree */ + + in(&NTopleaves, modus); + in(&NTopnodes, modus); + + in(&Ngb_MaxPart, modus); + in(&Ngb_MaxNodes, modus); + in(&Ngb_NumNodes, modus); + in(&Ngb_MarkerValue, modus); + in(&Ngb_FirstNonTopLevelNode, modus); + + polling(modus); + + if(modus == MODUS_READ) /* read */ + { + domain_allocate(); + ngb_treeallocate(); + } + + if(All.TotNumGas > 0) + { +#ifdef TREE_BASED_TIMESTEPS + byten(ExtNgb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct ExtNgbNODE), modus); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + byten(Ngb_Nodes + Ngb_MaxPart, Ngb_NumNodes * sizeof(struct NgbNODE), modus); + byten(Ngb_DomainNodeIndex, NTopleaves * sizeof(int), modus); + byten(Ngb_Nextnode, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus); + byten(Ngb_Father, Ngb_MaxPart * sizeof(int), modus); + byten(Ngb_Marker, (Ngb_MaxPart + NTopleaves) * sizeof(int), modus); + } + + polling(modus); + + byten(TopNodes, NTopnodes * sizeof(struct topnode_data), modus); + byten(DomainTask, NTopleaves * sizeof(int), modus); + byten(DomainCorner, 3 * sizeof(double), modus); + byten(DomainCenter, 3 * sizeof(double), modus); + byten(&DomainLen, sizeof(double), modus); + byten(&DomainFac, sizeof(double), modus); + byten(&DomainInverseLen, sizeof(double), modus); + byten(&DomainBigFac, sizeof(double), modus); +} + +/*! \brief Adjusts the timeline if the TimeMax variable is + * increased between a restart. + * + * The approach taken here is to reduce the resolution of the + * integer timeline by factors of 2 until the new final time + * can be reached within TIMEBASE. + * + * \param[in] TimeMax_old old final time. + * \param[in] TimeMax_new new final time (must be larger than old one). + * + * \return void + */ +void readjust_timebase(double TimeMax_old, double TimeMax_new) +{ + int i; + long long ti_end; + + if(sizeof(long long) != 8) + terminate("\nType 'long long' is not 64 bit on this platform\n\n"); + + mpi_printf("\nRESTART: All.TimeMax has been changed in the parameterfile\nNeed to adjust integer timeline\n\n\n"); + + if(TimeMax_new < TimeMax_old) + terminate("\nIt is not allowed to reduce All.TimeMax\n\n"); + + if(All.ComovingIntegrationOn) + ti_end = (long long)(log(TimeMax_new / All.TimeBegin) / All.Timebase_interval); + else + ti_end = (long long)((TimeMax_new - All.TimeBegin) / All.Timebase_interval); + + while(ti_end > TIMEBASE) + { + All.Timebase_interval *= 2.0; + + ti_end /= 2; + All.Ti_Current /= 2; + All.Previous_Ti_Current /= 2; + +#ifdef PMGRID + All.PM_Ti_begstep /= 2; + All.PM_Ti_endstep /= 2; +#endif /* #ifdef PMGRID */ + + for(i = 0; i < NumPart; i++) + { + P[i].Ti_Current /= 2; + + if(P[i].TimeBinGrav > 0) + { + P[i].TimeBinGrav--; + if(P[i].TimeBinGrav <= 0) + { + char buf[1000]; + sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i); + terminate(buf); + } + } + + if(P[i].Type == 0) + if(P[i].TimeBinHydro > 0) + { + P[i].TimeBinHydro--; + if(P[i].TimeBinHydro <= 0) + { + char buf[1000]; + sprintf(buf, "Error in readjust_timebase(). Minimum Timebin for particle %d reached.\n", i); + terminate(buf); + } + } + } + } + + All.TimeMax = TimeMax_new; +} + +/*! \brief Reads/writes one integer to a restart file. + * + * \param[in, out] x pointer to the integer. + * \param[in] modus if modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void in(int *x, int modus) { byten(x, sizeof(int), modus); } + +/*! \brief Reads/writes n bytes to restart file buffer. + * + * \param[in, out] x Pointer to the data. + * \param[in] n Number of bytes. + * \param[in] modus If modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void byten(void *x, size_t n, int modus) { byten_hash(x, n, modus, 1); } + +/*! \brief Wrapper for byten; called with hash=0. + * + * I.e. writes something without including it in calculating the md5sum. This + * should only be done for the md5sum itself, but not for actual data. + * + * \param[in, out] x Pointer to the data. + * \param[in] n Number of bytes. + * \param[in] modus If modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * + * \return void + */ +void byten_nohash(void *x, size_t n, int modus) { byten_hash(x, n, modus, 0); } + +/*! \brief Reads/writes n bytes to restart file buffer. + * + * \param[in, out] x pointer to the data + * \param[in] n number of bytes + * \param[in] modus if modus>0 the restart()-routine reads, + * if modus==0 it writes a restart file. + * \param[in] hash If nonzero, this part is considered in md5sum. + * + * + * \return void + */ +void byten_hash(void *x, size_t n, int modus, int hash) +{ + byte_count += n; + + if(n > 0) + { + size_t nin = n; + + if(modus == MODUS_READ || modus == MODUS_READCHECK || modus == MODUS_CHECK) /* read */ + { + if(modus == MODUS_READCHECK || modus == MODUS_CHECK) + x = mymalloc("x", n); + + unsigned char *ptr = x; + + while(n > 0) + { + if(iop != fillp) + { + size_t nn = n; + if(nn > (fillp - iop)) + nn = fillp - iop; + + memcpy(ptr, iobuf_aligned + iop, nn); + + n -= nn; + ptr += nn; + iop += nn; + } + else + { + if(iop == MAX_BLOCK_SIZE) + { + iop = 0; + fillp = 0; + } + + size_t nn = n; + if(nn % PageSize > 0) + nn = (nn / PageSize + 1) * PageSize; + + if(nn > MAX_BLOCK_SIZE - fillp) + nn = MAX_BLOCK_SIZE - fillp; + + if(read(fdint, iobuf_aligned + fillp, nn) != nn) + terminate("read error"); + + fillp += nn; + } + } + + if(hash) /* to prevent call if we write/load the checksum itself */ + MD5UpdateLong(&mysum, x, nin); + + if(modus == MODUS_READCHECK || modus == MODUS_CHECK) + myfree(x); + } + else /* write */ + { + unsigned char *ptr = x; + + while(n > 0) + { + if(iop < MAX_BLOCK_SIZE) + { + size_t nn = n; + if(nn > MAX_BLOCK_SIZE - iop) + nn = MAX_BLOCK_SIZE - iop; + memcpy(iobuf_aligned + iop, ptr, nn); + + n -= nn; + ptr += nn; + iop += nn; + } + else + { + size_t nn = MAX_BLOCK_SIZE; + if(write(fdint, iobuf_aligned, nn) != nn) + terminate("write error"); + + iop = 0; + } + } + + if(hash) /* to prevent call if we write/load the checksum itself */ + MD5UpdateLong(&mysum, x, nin); + } + } +} + +/*! \brief Allocates the IO buffer for reading/writing the restart-file buffer. + * + * \return void + */ +void allocate_iobuf(void) +{ + if((MAX_BLOCK_SIZE % PageSize) > 0) + terminate("MAX_BLOCK_SIZE must be a multiple of PageSize"); + + if(!(io_buf = malloc(MAX_BLOCK_SIZE + PageSize))) + terminate("cannot allocated IO buffer"); + + iobuf_aligned = (char *)(((((size_t)io_buf) + (PageSize - 1)) / PageSize) * PageSize); + + fillp = 0; + iop = 0; +} + +/*! \brief Frees the IO buffer for reading/writing the restart-files. + * + * Writes buffer before freeing it if in MODUS_WRITE. + * + * \param[in] modus Read or write. + * + * \return void + */ +void deallocate_iobuf(int modus) +{ + if(modus == MODUS_WRITE) /* write */ + { + if(iop > 0) + { + if(iop % PageSize > 0) + iop = ((iop / PageSize) + 1) * PageSize; + + if(write(fdint, iobuf_aligned, iop) != iop) + terminate("write error"); + } + } + + free(io_buf); +} diff --git a/src/amuse/community/arepo/src/main/allvars.c b/src/amuse/community/arepo/src/main/allvars.c new file mode 100644 index 0000000000..fedd5f6306 --- /dev/null +++ b/src/amuse/community/arepo/src/main/allvars.c @@ -0,0 +1,331 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/allvars.c + * \date 05/2018 + * \brief Contains all global variables. + * \details This file contains the global variables used in Arepo. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +struct data_nodelist *DataNodeList; /* to be deleted */ + +MyDouble boxSize, boxHalf; + +#ifdef LONG_X +MyDouble boxSize_X, boxHalf_X; +#else /* #ifdef LONG_X */ +#endif /* #ifdef LONG_X #else */ +#ifdef LONG_Y +MyDouble boxSize_Y, boxHalf_Y; +#else /* #ifdef LONG_Y */ +#endif /* #ifdef LONG_Y #else */ +#ifdef LONG_Z +MyDouble boxSize_Z, boxHalf_Z; +#else /* #ifdef LONG_Z */ +#endif /* #ifdef LONG_Z #else */ + +#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG +MPI_Status mpistat; +#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */ + +/*********************************************************/ +/* Global variables */ +/*********************************************************/ + +int ThisTask; /*!< the number of the local processor */ +int NTask; /*!< number of processors */ +int PTask; /*!< note: NTask = 2^PTask */ + +int ThisNode; /*!< the rank of the current compute node */ +int NumNodes; /*!< the number of compute nodes used */ +int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes */ +int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes */ +int TasksInThisNode; /*!< number of MPI tasks on current compute node */ +int RankInThisNode; /*!< rank of the MPI task on the current compute node */ +long long MemoryOnNode; +double CPUThisRun; /*!< Sums CPU time of current process */ +int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */ +int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from + initial conditions, 1 is resuming a run from a set of restart files, while 2 + marks a restart from a snapshot file. */ +int RestartSnapNum; +int Argc; +char **Argv; + +size_t AllocatedBytes; +size_t FreeBytes; + +int Nforces; +int *TargetList; +struct thread_data Thread[NUM_THREADS]; + +#ifdef IMPOSE_PINNING +hwloc_cpuset_t cpuset_thread[NUM_THREADS]; +#endif /* #ifdef IMPOSE_PINNING */ + +int *Exportflag, + *ThreadsExportflag[NUM_THREADS]; /*!< Buffer used for flagging whether a particle needs to be exported to another process */ +int *Exportnodecount; +int *Exportindex; + +int *Send_offset, *Send_count, *Recv_count, *Recv_offset; +int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes; +int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks; +struct send_recv_counts *Send, *Recv; + +int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset; +int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset; + +int TakeLevel; +int TagOffset; + +int TimeBinSynchronized[TIMEBINS]; +struct TimeBinData TimeBinsHydro, TimeBinsGravity; + +#ifdef USE_SFR +double TimeBinSfr[TIMEBINS]; +#endif + +#ifdef SUBFIND +int GrNr; +int NumPartGroup; +#endif /* #ifdef SUBFIND */ + +char DumpFlag = 1; +char DumpFlagNextSnap = 1; + +int FlagNyt = 0; + +double CPU_Step[CPU_LAST]; +double CPU_Step_Stored[CPU_LAST]; + +double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */ +double StartOfRun; /*!< This stores the time of the start of the run for evaluating the elapsed time */ + +double EgyInjection; + +int NumPart; /*!< number of particles on the LOCAL processor */ +int NumGas; /*!< number of gas particles on the LOCAL processor */ + +gsl_rng *random_generator; /*!< a random number generator */ +gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main code's + random numbers */ + +#ifdef USE_SFR +int Stars_converted; /*!< current number of star particles in gas particle block */ +#endif + +#ifdef TOLERATE_WRITE_ERROR +int WriteErrorFlag; +char AlternativeOutputDir[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +double TimeOfLastDomainConstruction; /*!< holds what it says */ + +int *Ngblist; /*!< Buffer to hold indices of neighbours retrieved by the neighbour search + routines */ + +double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac; +double DomainInverseLen, DomainBigFac; +int *DomainStartList, *DomainEndList; +double *DomainCost, *TaskCost; +int *DomainCount, *TaskCount; +struct no_list_data *ListNoData; + +int domain_bintolevel[TIMEBINS]; +int domain_refbin[TIMEBINS]; +int domain_grav_weight[TIMEBINS]; +int domain_hydro_weight[TIMEBINS]; +int domain_to_be_balanced[TIMEBINS]; + +int *DomainTask; +int *DomainNewTask; +int *DomainNodeIndex; + +peanokey *Key, *KeySorted; + +struct topnode_data *TopNodes; + +int NTopnodes, NTopleaves; + +/* variables for input/output , usually only used on process 0 */ + +char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */ + +FILE *FdInfo, /*!< file handle for info.txt log-file. */ + *FdEnergy, /*!< file handle for energy.txt log-file. */ + *FdTimings, /*!< file handle for timings.txt log-file. */ + *FdDomain, /*!< file handle for domain.txt log-file. */ + *FdBalance, /*!< file handle for balance.txt log-file. */ + *FdMemory, /*!< file handle for memory.txt log-file. */ + *FdTimebin, /*!< file handle for timebins.txt log-file. */ + *FdCPU; /*!< file handle for cpu.txt log-file. */ + +#ifdef DETAILEDTIMINGS +FILE *FdDetailed; +#endif /* #ifdef DETAILEDTIMINGS */ + +#ifdef OUTPUT_CPU_CSV +FILE *FdCPUCSV; +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef RESTART_DEBUG +FILE *FdRestartTest; +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef USE_SFR +FILE *FdSfr; /*!< file handle for sfr.txt log-file. */ +#endif + +struct pair_data *Pairlist; + +#ifdef FORCETEST +FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */ +#endif /* #ifdef FORCETEST */ + +int WriteMiscFiles = 1; + +void *CommBuffer; /*!< points to communication buffer, which is used at a few places */ + +/*! This structure contains data which is the SAME for all tasks (mostly code parameters read from the + * parameter file). Holding this data in a structure is convenient for writing/reading the restart file, and + * it allows the introduction of new global variables in a simple way. The only thing to do is to introduce + * them into this structure. + */ +struct global_data_all_processes All; + +/*! This structure holds all the information that is + * stored for each particle of the simulation. + */ +struct particle_data *P, /*!< holds particle data on local processor */ + *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */ + +struct subfind_data *PS; + +/* the following struture holds data that is stored for each SPH particle in addition to the collisionless + * variables. + */ +struct sph_particle_data *SphP, /*!< holds SPH particle data on local processor */ + *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +struct special_particle_data *PartSpecialListGlobal; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +peanokey *DomainKeyBuf; + +/*! global state of system + */ +struct state_of_system SysState, SysStateAtStart, SysStateAtEnd; + +/*! Various structures for communication during the gravity computation. + */ +struct directdata *DirectDataIn, *DirectDataAll; +struct accdata *DirectAccOut, *DirectAccIn; +int ThreadsNexport[NUM_THREADS], ThreadsNexportNodes[NUM_THREADS]; +struct data_partlist *PartList, *ThreadsPartList[NUM_THREADS]; +struct datanodelist *NodeList, *ThreadsNodeList[NUM_THREADS]; +struct potdata_out *PotDataResult, /*!< holds the partial results computed for imported particles. Note: We use GravDataResult = + GravDataGet, such that the result replaces the imported data */ + *PotDataOut; /*!< holds partial results received from other processors. This will overwrite the GravDataIn array */ + +/*! Header for the standard file format. + */ +struct io_header header; /*!< holds header for snapshot files */ +#ifdef NTYPES_ICS +struct io_header_ICs header_ICs; /*!< holds header for IC files */ +#endif /* #ifdef NTYPES_ICS */ +char (*Parameters)[MAXLEN_PARAM_TAG]; +char (*ParametersValue)[MAXLEN_PARAM_VALUE]; +char *ParametersType; + +/*! Variables for gravitational tree + * ------------------ + */ +int Tree_MaxPart; +int Tree_NumNodes; +int Tree_MaxNodes; +int Tree_FirstNonTopLevelNode; +int Tree_NumPartImported; +int Tree_NumPartExported; +int Tree_ImportedNodeOffset; +int Tree_NextFreeNode; +MyDouble *Tree_Pos_list; +unsigned long long *Tree_IntPos_list; +int *Tree_Task_list; +int *Tree_ResultIndexList; + +struct treepoint_data *Tree_Points; +struct resultsactiveimported_data *Tree_ResultsActiveImported; + +int *Nextnode; /*!< gives next node in tree walk (nodes array) */ +int *Father; /*!< gives parent node in tree (Prenodes array) */ + +struct NODE *Nodes; /*!< points to the actual memory allocted for the nodes */ + /*!< this is a pointer used to access the nodes which is shifted such that Nodes[All.MaxPart] + gives the first allocated node */ + +#ifdef MULTIPLE_NODE_SOFTENING +struct ExtNODE *ExtNodes; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +float *Nodes_GravCost; + +/*! Variables for neighbor tree + * ----------------- + */ +int Ngb_MaxPart; +int Ngb_NumNodes; +int Ngb_MaxNodes; +int Ngb_FirstNonTopLevelNode; +int Ngb_NextFreeNode; +int *Ngb_Father; +int *Ngb_Marker; +int Ngb_MarkerValue; + +int *Ngb_DomainNodeIndex; +int *DomainListOfLocalTopleaves; +int *DomainNLocalTopleave; +int *DomainFirstLocTopleave; +int *Ngb_Nextnode; + +/*! The ngb-tree data structure + */ +struct NgbNODE *Ngb_Nodes; +struct ExtNgbNODE *ExtNgb_Nodes; + +#ifdef STATICNFW +double Rs, R200; +double Dc; +double RhoCrit, V200; +double fac; +#endif /* #ifdef STATICNFW */ + +int MaxThreads = 1; + +IO_Field *IO_Fields; +int N_IO_Fields = 0; +int Max_IO_Fields = 0; diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h new file mode 100644 index 0000000000..2dc46e56b3 --- /dev/null +++ b/src/amuse/community/arepo/src/main/allvars.h @@ -0,0 +1,1924 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/allvars.h + * \date 05/2018 + * \brief All (global) variables. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 30.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef ALLVARS_H +#define ALLVARS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./arepoconfig.h" + +#ifdef IMPOSE_PINNING +#include +#endif /* #ifdef IMPOSE_PINNING */ + +#include "../time_integration/timestep.h" +#include "../utils/dtypes.h" +#include "../utils/tags.h" + +#define AREPO_VERSION "Arepo public 1.0" /* code version string */ + +/* default values for unspecified config options */ + +#if defined(__linux__) && !defined(HOST_MEMORY_REPORTING) +#define HOST_MEMORY_REPORTING +#endif /* #if defined(__linux__) && !defined(HOST_MEMORY_REPORTING) */ + +#ifndef LOAD_TYPES +#define LOAD_TYPES 0xff +#endif /* #ifndef LOAD_TYPES */ + +#if defined(REFINEMENT_SPLIT_CELLS) || defined(REFINEMENT_MERGE_CELLS) +#define REFINEMENT +#else /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) */ +#undef REFINEMENT +#endif /* #if defined (REFINEMENT_SPLIT_CELLS) || defined (REFINEMENT_MERGE_CELLS) #else */ + +#ifndef NTYPES +#define NTYPES 6 +#endif /* #ifndef NTYPES */ + +#ifndef NSOFTTYPES +#define NSOFTTYPES NTYPES +#endif /* #ifndef NSOFTTYPES */ + +#if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) +// only if no gradient output defined, no need to update them directly before output. +#else /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) */ +#define UPDATE_GRADIENTS_FOR_OUTPUT +#endif /* #if !defined(OUTPUT_PRESSURE_GRADIENT) && !defined(OUTPUT_DENSITY_GRADIENT) && !defined(OUTPUT_VELOCITY_GRADIENT) && \ + !defined(OUTPUT_BFIELD_GRADIENT) && !defined(OUTPUT_DIVVEL) && !defined(OUTPUT_CURLVEL) && !defined(OUTPUT_VORTICITY) #else \ + */ + +#ifdef ADAPTIVE_HYDRO_SOFTENING +#ifndef NSOFTTYPES_HYDRO +#define NSOFTTYPES_HYDRO 64 +#endif /* #ifndef NSOFTTYPES_HYDRO */ +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#undef NSOFTTYPES_HYDRO +#define NSOFTTYPES_HYDRO 0 +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ + +#if defined(SAVE_HSML_IN_SNAPSHOT) +#define SUBFIND_CALC_MORE +#endif /* #if defined(SAVE_HSML_IN_SNAPSHOT) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +#define NO_SELFGRAVITY_TYPE \ + EXACT_GRAVITY_FOR_PARTICLE_TYPE // exclude particle type from self-gravity (can be used with exact gravity) +#define NO_GRAVITY_TYPE EXACT_GRAVITY_FOR_PARTICLE_TYPE // disable computation of gravity on particle type +#define EXACT_GRAVITY_REACTION // include reaction to other particle types when using exact gravity +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +/* restrictions on config option combinations */ +#if(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254 +#error "(NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254" +#endif /* #if (NSOFTTYPES + NSOFTTYPES_HYDRO) >= 254 */ + +#if NSOFTTYPES < 2 +#error "NSOFTTYPES < 2" +#endif /* #if NSOFTTYPES < 2 */ + +#if defined(HOST_MEMORY_REPORTING) && !defined(__linux__) +#error "HOST_MEMORY_REPORTING only works under Linux." +#endif /* #if defined(HOST_MEMORY_REPORTING) && !defined(__linux__) */ + +#if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__) +#error "USE_DIRECT_IO_FOR_RESTARTS only works under Linux." +#endif /* #if defined(USE_DIRECT_IO_FOR_RESTARTS) && !defined(__linux__) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +#if !((INDIVIDUAL_GRAVITY_SOFTENING + 0) >= 1) +#error "set INDIVIDUAL_GRAVITY_SOFTENING to a bitmask of particle types" +#endif /* #if !((INDIVIDUAL_GRAVITY_SOFTENING+0) >= 1) */ +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef OUTPUTPOTENTIAL +#ifndef EVALPOTENTIAL +#error "the option OUTPUTPOTENTIAL requires EVALPOTENTIAL" +#endif /* #ifndef EVALPOTENTIAL */ +#endif /* #ifdef OUTPUTPOTENTIAL */ + +#if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY) +#ifndef HIERARCHICAL_GRAVITY +#error "the of option CELL_CENTER_GRAVITY requires HIERARCHICAL_GRAVITY" +#endif /* #ifndef HIERARCHICAL_GRAVITY */ +#endif /* #if defined(CELL_CENTER_GRAVITY) && defined(SELFGRAVITY) */ + +#ifdef MHD +#ifndef RIEMANN_HLLD +#error "the of option MHD requires RIEMANN_HLLD" +#endif /* #ifndef RIEMANN_HLLD */ +#endif /* #ifdef MHD */ + +/* optional additional headers based on config options */ + +#include "../utils/timer.h" + +#if defined(COOLING) +#include "../cooling/cooling_vars.h" +#endif /* #if defined(COOLING) */ + +#ifdef ADDBACKGROUNDGRID +#include "../add_backgroundgrid/add_bggrid.h" +#endif /* #ifdef ADDBACKGROUNDGRID */ + +/* function mappings and macros */ + +#ifdef MPI_HYPERCUBE_ALLGATHERV +#define MPI_Allgatherv MPI_hypercube_Allgatherv +#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */ + +#ifdef MPISENDRECV_CHECKSUM +#define MPI_Sendrecv MPI_Check_Sendrecv +#endif /* #ifdef MPISENDRECV_CHECKSUM */ + +#define terminate(...) \ + { \ + if(FlagNyt == 0) \ + { \ + char termbuf1[1000], termbuf2[1000]; \ + sprintf(termbuf1, "TERMINATE: ******!!!!!****** Code termination on task=%d, function %s(), file %s, line %d", ThisTask, \ + __FUNCTION__, __FILE__, __LINE__); \ + sprintf(termbuf2, __VA_ARGS__); \ + printf("%s: %s\n", termbuf1, termbuf2); \ + fflush(stdout); \ + FlagNyt = 1; \ + MPI_Abort(MPI_COMM_WORLD, 1); \ + } \ + exit(1); \ + } +#define mpi_terminate(...) \ + { \ + if(ThisTask == 0) \ + terminate(__VA_ARGS__); \ + } +#define warn(...) \ + { \ + char termbuf1[1000], termbuf2[1000]; \ + sprintf(termbuf1, "WARNING: Code warning on task=%d, function %s(), file %s, line %d", ThisTask, __FUNCTION__, __FILE__, \ + __LINE__); \ + sprintf(termbuf2, __VA_ARGS__); \ + printf("%s: %s\n", termbuf1, termbuf2); \ + myflush(stdout); \ + FILE *fd = fopen("WARNINGS", "a"); \ + fprintf(fd, "%s: %s\n", termbuf1, termbuf2); \ + fclose(fd); \ + } + +/* define an "assert" macro which outputs MPI task (we do NOT want to + call MPI_Abort, because then the assertion failure isn't caught in + the debugger) */ +#define myassert(cond) \ + if(!(cond)) \ + { \ + char termbuf[1000]; \ + sprintf(termbuf, "Assertion failure!\n\ttask=%d, function %s(), file %s, line %d:\n\t%s\n", ThisTask, __FUNCTION__, __FILE__, \ + __LINE__, #cond); \ + printf("%s", termbuf); \ + myflush(stdout); \ + assert(0); \ + } + +/* memory manager */ +#define mymalloc(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, NULL) +#define mymalloc_g(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 0, callorigin) +#define mymalloc_clear(x, y) mymalloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__, 1, NULL) +#define mymalloc_movable(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, NULL) +#define mymalloc_movable_g(x, y, z) mymalloc_movable_fullinfo(x, y, z, __FUNCTION__, __FILE__, __LINE__, callorigin) +#define myrealloc(x, y) myrealloc_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__) +#define myrealloc_movable(x, y) myrealloc_movable_fullinfo(x, y, __FUNCTION__, __FILE__, __LINE__) +#define myfree(x) myfree_fullinfo(x, __FUNCTION__, __FILE__, __LINE__) +#define myfree_movable(x) myfree_movable_fullinfo(x, __FUNCTION__, __FILE__, __LINE__) + +#define MAX_FIRST_ELEMENTS_CONSIDERED \ + 5 /* This sets the number of lowest loaded tasks to be considered for assignment of next domain patch */ + +#define NUMBER_OF_MEASUREMENTS_TO_RECORD 6 + +#ifndef GRAVCOSTLEVELS +#define GRAVCOSTLEVELS 6 +#endif /* #ifndef GRAVCOSTLEVELS */ + +#define MODE_LOCAL_NO_EXPORT -1 +#define MODE_LOCAL_PARTICLES 0 +#define MODE_IMPORTED_PARTICLES 1 +#define MODE_FINISHED 2 + +#ifndef DIRECT_SUMMATION_THRESHOLD +#define DIRECT_SUMMATION_THRESHOLD 3000 +#endif /* #ifndef DIRECT_SUMMATION_THRESHOLD */ + +#define MODE_FIRST_HALFSTEP 0 +#define MODE_SECOND_HALFSTEP 1 + +#define FLAG_PARTIAL_TREE 0 +#define FLAG_FULL_TREE 1 + +#ifndef MPI_MESSAGE_SIZELIMIT_IN_MB +#define MPI_MESSAGE_SIZELIMIT_IN_MB 200 +#endif /* #ifndef MPI_MESSAGE_SIZELIMIT_IN_MB */ + +#define MPI_MESSAGE_SIZELIMIT_IN_BYTES ((MPI_MESSAGE_SIZELIMIT_IN_MB)*1024LL * 1024LL) + +#define COMMBUFFERSIZE (32 * 1024LL * 1024LL) + +#define NUM_THREADS 1 /* no OpenMP support in this code! */ + +extern int Nforces; +extern int *TargetList; + +extern struct thread_data +{ + int Nexport __attribute__((__aligned__(64))); /* to align on different cache lines */ + int NexportNodes; + int Interactions; + int dummy; + double Cost; + + double Costtotal; /*!< The total cost of the particles/nodes processed by each thread */ + double Ewaldcount; /*!< The total cost for the Ewald correction per thread */ + int FirstExec; /*!< Keeps track, if a given thread executes the gravity_primary_loop() for the first time */ + + size_t ExportSpace; + size_t InitialSpace; + size_t ItemSize; + + int *P_CostCount; + int *TreePoints_CostCount; + int *Node_CostCount; + + struct data_partlist *PartList; + + int *Ngblist; + double *R2list; + int *Exportflag; + int *toGoDM; + int *toGoSph; + +} Thread[NUM_THREADS]; + +/* If we use a static Voronoi mesh with local timestepping and no rebuild of + * the static mesh, then we need to backup the face areas before calling + * compute_interface_fluxes(), because this function calls face_get_normals() + * which sets some face area to 0 under some circumstances */ +#if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) +#define VORONOI_BACKUP_RESTORE_FACE_AREAS +#else /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) \ + */ +#undef VORONOI_BACKUP_RESTORE_FACE_AREAS +#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(FORCE_EQUAL_TIMESTEPS) && \ + !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */ + +#ifdef IMPOSE_PINNING +extern hwloc_cpuset_t cpuset_thread[NUM_THREADS]; +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef ONEDIMS +#define ALLOC_TOLERANCE 0.3 +#else /* #ifdef ONEDIMS */ +#define ALLOC_TOLERANCE 0.1 +#endif /* #ifdef ONEDIMS #else */ +#define ALLOC_STARBH_ROOM 0.02 + +#ifdef TOLERATE_WRITE_ERROR +#define IO_TRIALS 20 +#define IO_SLEEP_TIME 10 +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +/* calculate appropriate value of MAXSCALARS */ + +#if defined(REFINEMENT_HIGH_RES_GAS) || defined(PASSIVE_SCALARS) + +#ifdef REFINEMENT_HIGH_RES_GAS +#define COUNT_REFINE 1 +#else /* #ifdef REFINEMENT_HIGH_RES_GAS */ +#define COUNT_REFINE 0 +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS #else */ + +#ifdef PASSIVE_SCALARS +#define COUNT_PASSIVE_SCALARS PASSIVE_SCALARS +#else /* #ifdef PASSIVE_SCALARS */ +#define COUNT_PASSIVE_SCALARS 0 +#endif /* #ifdef PASSIVE_SCALARS #else */ + +#define MAXSCALARS (COUNT_REFINE + COUNT_PASSIVE_SCALARS) +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) || defined(PASSIVE_SCALARS)*/ + +/* calculate appropriate value of MAXGRADIENTS */ + +#define COUNT_GRAD_DEFAULT 5 + +#ifdef MHD +#define COUNT_GRAD_MHD 3 +#else /* #ifdef MHD */ +#define COUNT_GRAD_MHD 0 +#endif /* #ifdef MHD #else */ + +#ifdef MAXSCALARS +#define COUNT_GRAD_SCALARS MAXSCALARS +#else /* #ifdef MAXSCALARS */ +#define COUNT_GRAD_SCALARS 0 +#endif /* #ifdef MAXSCALARS #else*/ + +#define MAXGRADIENTS (COUNT_GRAD_DEFAULT + COUNT_GRAD_MHD + COUNT_GRAD_SCALARS) + +/*************************************/ + +/*! For Peano-Hilbert order. + * Note: Maximum is 10 to fit in 32-bit integer, + * maximum is 21 to fit into 64-bit integer, + * and 42 is the absolute maximum, for which 128-bit integers are needed + */ +#ifndef BITS_PER_DIMENSION +#define BITS_PER_DIMENSION 42 +#endif /* #ifndef BITS_PER_DIMENSION */ +#if(BITS_PER_DIMENSION <= 21) +typedef unsigned long long peanokey; +#else /* #if (BITS_PER_DIMENSION <= 21) */ +typedef __int128 peanokey; +#endif /* #if (BITS_PER_DIMENSION <= 21) #else */ +#if(BITS_PER_DIMENSION <= 31) +typedef unsigned int peano1D; +#else /* #if (BITS_PER_DIMENSION <= 31) */ +#if(BITS_PER_DIMENSION <= 42) +typedef unsigned long long peano1D; +#else /* #if (BITS_PER_DIMENSION <= 42) */ +#error "BITS_PER_DIMENSION can be at most 42" +#endif /* #if (BITS_PER_DIMENSION <= 42) #else */ +#endif /* #if (BITS_PER_DIMENSION <= 31) #else */ + +#define PEANOCELLS (((peanokey)1) << (3 * BITS_PER_DIMENSION)) + +#define MAX_FLOAT_NUMBER 1e37 +#define MIN_FLOAT_NUMBER 1e-37 +#define MAX_DOUBLE_NUMBER 1e306 +#define MIN_DOUBLE_NUMBER 1e-306 + +#ifdef DOUBLEPRECISION +#if(DOUBLEPRECISION == 2) +#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER +#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER +#else /* #if (DOUBLEPRECISION==2) */ +#define MAX_REAL_NUMBER MAX_DOUBLE_NUMBER +#define MIN_REAL_NUMBER MIN_DOUBLE_NUMBER +#endif /* #if (DOUBLEPRECISION==2) #else */ +#else /* #ifdef DOUBLEPRECISION */ +#define MAX_REAL_NUMBER MAX_FLOAT_NUMBER +#define MIN_REAL_NUMBER MIN_FLOAT_NUMBER +#endif /* #ifdef DOUBLEPRECISION #else */ + +#ifndef GAMMA +#define GAMMA (5. / 3.) /*!< adiabatic index of simulated gas */ +#endif /* #ifndef GAMMA */ +#define GAMMA_MINUS1 (GAMMA - 1.) +#define GAMMA_PLUS1 (GAMMA + 1.) + +#define HYDROGEN_MASSFRAC 0.76 /*!< mass fraction of hydrogen, relevant only for radiative cooling */ +#define HE_ABUND ((1. / HYDROGEN_MASSFRAC - 1.) / 4.) + +/* ... often used physical constants (cgs units; NIST 2010) */ + +#define GRAVITY 6.6738e-8 +#define SOLAR_MASS 1.989e33 +#define SOLAR_LUM 3.826e33 +#define SOLAR_EFF_TEMP 5.780e3 +#define RAD_CONST 7.5657e-15 +#define AVOGADRO 6.02214e23 +#define BOLTZMANN 1.38065e-16 +#define GAS_CONST 8.31446e7 +#define CLIGHT 2.99792458e10 + +#define PLANCK 6.6260695e-27 +#define PARSEC 3.085678e18 +#define KILOPARSEC 3.085678e21 +#define MEGAPARSEC 3.085678e24 +#define ASTRONOMICAL_UNIT 1.49598e13 +#define PROTONMASS 1.67262178e-24 +#define ELECTRONMASS 9.1093829e-28 +#define THOMPSON 6.65245873e-25 +#define ELECTRONCHARGE 4.8032042e-10 +#define HUBBLE 3.2407789e-18 /* in h/sec */ +#define LYMAN_ALPHA 1215.6e-8 /* 1215.6 Angstroem */ +#define LYMAN_ALPHA_HeII 303.8e-8 /* 303.8 Angstroem */ +#define OSCILLATOR_STRENGTH 0.41615 +#define OSCILLATOR_STRENGTH_HeII 0.41615 +#define ELECTRONVOLT_IN_ERGS 1.60217656e-12 + +#define SEC_PER_GIGAYEAR 3.15576e16 +#define SEC_PER_MEGAYEAR 3.15576e13 +#define SEC_PER_YEAR 3.15576e7 + +#ifndef FOF_PRIMARY_LINK_TYPES +#define FOF_PRIMARY_LINK_TYPES 2 +#endif /* #ifndef FOF_PRIMARY_LINK_TYPES */ + +#ifndef FOF_SECONDARY_LINK_TYPES +#define FOF_SECONDARY_LINK_TYPES 0 +#endif /* #ifndef FOF_SECONDARY_LINK_TYPES */ + +#ifndef ASMTH +/*! ASMTH gives the scale of the short-range/long-range force split in units + * of FFT-mesh cells + */ +#define ASMTH 1.25 +#endif /* #ifndef ASMTH */ + +#ifndef RCUT +/*! RCUT gives the maximum distance (in units of the scale used for the force + * split) out to which short-range forces are evaluated in the short-range + * tree walk. + */ +#define RCUT 4.5 +#endif /* #ifndef RCUT */ + +#define MAXLEN_OUTPUTLIST 1100 /*!< maxmimum number of entries in output list */ +#define MAXLEN_PATH 256 /*!< maximum length of various filenames (full path) */ +#define MAXLEN_PARAM_TAG 50 /*!< maximum length of the tag of a parameter in the parameter file */ +#define MAXLEN_PARAM_VALUE 200 /*!< maximum length of the value of a parameter in the parameter file */ +#define MAX_PARAMETERS 300 /*!< maximum number of parameters in the parameter file */ +#define DRIFT_TABLE_LENGTH 1000 /*!< length of the lookup table used to hold the drift and kick factors */ + +#define BASENUMBER 100 +#define HIGHRESMASSFAC 0.5 + +#define MAXITER 300000 /*! Maximum number of iterations before process is terminated */ + +#ifndef FOF_LINKLENGTH +#define FOF_LINKLENGTH 0.2 +#endif /* #ifndef FOF_LINKLENGTH */ + +#ifndef FOF_GROUP_MIN_LEN +#define FOF_GROUP_MIN_LEN 32 +#endif /* #ifndef FOF_GROUP_MIN_LEN */ + +typedef struct +{ + double r; + double mass; +} sort_r2list; + +typedef struct +{ + MyFloat r2; + int index; +} r2type; + +#include "../mesh/mesh.h" +#include "../mesh/voronoi/voronoi.h" + +struct unbind_data +{ + int index; +}; + +#ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG +extern MPI_Status mpistat; +#undef MPI_STATUS_IGNORE +#define MPI_STATUS_IGNORE &mpistat +#endif /* #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG */ + +#define FLT(x) (x) + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /* #ifndef M_PI */ + +#define TO_MBYTE_FAC (1.0 / (1024.0 * 1024.0)) + +#ifdef ONEDIMS +#define NUMDIMS 1 +#define KERNEL_COEFF_1 (4.0 / 3) +#define KERNEL_COEFF_2 (8.0) +#define KERNEL_COEFF_3 (24.0) +#define KERNEL_COEFF_4 (16.0) +#define KERNEL_COEFF_5 (8.0 / 3) +#define KERNEL_COEFF_6 (-8.0) +#define NORM_COEFF 2.0 +#else /* #ifdef ONEDIMS */ +#ifndef TWODIMS +#define NUMDIMS 3 /*!< For 3D-normalized kernel */ +#define KERNEL_COEFF_1 2.546479089470 /*!< Coefficients for SPH spline kernel and its derivative */ +#define KERNEL_COEFF_2 15.278874536822 +#define KERNEL_COEFF_3 45.836623610466 +#define KERNEL_COEFF_4 30.557749073644 +#define KERNEL_COEFF_5 5.092958178941 +#define KERNEL_COEFF_6 (-15.278874536822) +#define NORM_COEFF 4.188790204786 /*!< Coefficient for kernel normalization. Note: 4.0/3 * PI = 4.188790204786 */ +#else /* #ifndef TWODIMS */ +#define NUMDIMS 2 /*!< For 2D-normalized kernel */ +#define KERNEL_COEFF_1 (5.0 / 7 * 2.546479089470) /*!< Coefficients for SPH spline kernel and its derivative */ +#define KERNEL_COEFF_2 (5.0 / 7 * 15.278874536822) +#define KERNEL_COEFF_3 (5.0 / 7 * 45.836623610466) +#define KERNEL_COEFF_4 (5.0 / 7 * 30.557749073644) +#define KERNEL_COEFF_5 (5.0 / 7 * 5.092958178941) +#define KERNEL_COEFF_6 (5.0 / 7 * (-15.278874536822)) +#define NORM_COEFF M_PI /*!< Coefficient for kernel normalization. */ +#endif /* #ifndef TWODIMS #else */ +#endif /* #ifdef ONEDIMS #else*/ + +#define SOFTFAC1 10.666666666667 /*!< Coefficients for gravitational softening */ +#define SOFTFAC2 32.0 +#define SOFTFAC3 (-38.4) +#define SOFTFAC4 (-2.8) +#define SOFTFAC5 5.333333333333 +#define SOFTFAC6 6.4 +#define SOFTFAC7 (-9.6) +#define SOFTFAC8 21.333333333333 +#define SOFTFAC9 (-48.0) +#define SOFTFAC10 38.4 +#define SOFTFAC11 (-10.666666666667) +#define SOFTFAC12 (-0.066666666667) +#define SOFTFAC13 (-3.2) +#define SOFTFAC14 0.066666666667 +#define SOFTFAC15 (-16.0) +#define SOFTFAC16 9.6 +#define SOFTFAC17 (-2.133333333333) +#define SOFTFAC18 128.0 +#define SOFTFAC19 (-115.2) +#define SOFTFAC20 21.333333333333 +#define SOFTFAC21 (-96.0) +#define SOFTFAC22 115.2 +#define SOFTFAC23 (-42.666666666667) +#define SOFTFAC24 0.1333333333333 + +extern MyDouble boxSize, boxHalf; +#ifdef LONG_X +extern MyDouble boxSize_X, boxHalf_X; +#else /* #ifdef LONG_X */ +#define boxSize_X boxSize +#define boxHalf_X boxHalf +#endif /* #ifdef LONG_X #else */ +#ifdef LONG_Y +extern MyDouble boxSize_Y, boxHalf_Y; +#else /* #ifdef LONG_Y */ +#define boxSize_Y boxSize +#define boxHalf_Y boxHalf +#endif /* #ifdef LONG_Y #else */ +#ifdef LONG_Z +extern MyDouble boxSize_Z, boxHalf_Z; +#else /* #ifdef LONG_Z */ +#define boxSize_Z boxSize +#define boxHalf_Z boxHalf +#endif /* #ifdef LONG_Z #else */ + +#if !defined(GRAVITY_NOT_PERIODIC) +#define GRAVITY_NEAREST_X(x) \ + (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp))) +#define GRAVITY_NEAREST_Y(x) \ + (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp))) +#define GRAVITY_NEAREST_Z(x) \ + (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp))) +#else /* #if !defined(GRAVITY_NOT_PERIODIC) */ +#define GRAVITY_NEAREST_X(x) (x) +#define GRAVITY_NEAREST_Y(x) (x) +#define GRAVITY_NEAREST_Z(x) (x) +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */ + +#if !defined(GRAVITY_NOT_PERIODIC) +#define FOF_NEAREST_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp) +#define FOF_NEAREST_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp) +#define FOF_NEAREST_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp) +#else /* #if !defined(GRAVITY_NOT_PERIODIC) */ +#define FOF_NEAREST_LONG_X(x) fabs(x) +#define FOF_NEAREST_LONG_Y(x) fabs(x) +#define FOF_NEAREST_LONG_Z(x) fabs(x) +#endif /* #if !defined(GRAVITY_NOT_PERIODIC) #else */ + +/* periodicity of gas */ +#ifndef REFLECTIVE_X +#define NGB_PERIODIC_LONG_X(x) (xtmp = fabs(x), (xtmp > boxHalf_X) ? (boxSize_X - xtmp) : xtmp) +#define NEAREST_X(x) (xtmp = (x), (xtmp > boxHalf_X) ? (xtmp - boxSize_X) : ((xtmp < -boxHalf_X) ? (xtmp + boxSize_X) : (xtmp))) +#define WRAP_X(x) (xtmp = (x), (xtmp > boxSize_X) ? (xtmp - boxSize_X) : ((xtmp < 0) ? (xtmp + boxSize_X) : (xtmp))) +#else /* #ifndef REFLECTIVE_X */ +#define NGB_PERIODIC_LONG_X(x) fabs(x) +#define NEAREST_X(x) (x) +#define WRAP_X(x) (x) +#endif /* #ifndef REFLECTIVE_X #else */ + +#ifndef REFLECTIVE_Y +#define NGB_PERIODIC_LONG_Y(x) (ytmp = fabs(x), (ytmp > boxHalf_Y) ? (boxSize_Y - ytmp) : ytmp) +#define NEAREST_Y(x) (ytmp = (x), (ytmp > boxHalf_Y) ? (ytmp - boxSize_Y) : ((ytmp < -boxHalf_Y) ? (ytmp + boxSize_Y) : (ytmp))) +#define WRAP_Y(x) (ytmp = (x), (ytmp > boxSize_Y) ? (ytmp - boxSize_Y) : ((ytmp < 0) ? (ytmp + boxSize_Y) : (ytmp))) +#else /* #ifndef REFLECTIVE_Y */ +#define NGB_PERIODIC_LONG_Y(x) fabs(x) +#define NEAREST_Y(x) (x) +#define WRAP_Y(x) (x) +#endif /* #ifndef REFLECTIVE_Y #else */ + +#ifndef REFLECTIVE_Z +#define NGB_PERIODIC_LONG_Z(x) (ztmp = fabs(x), (ztmp > boxHalf_Z) ? (boxSize_Z - ztmp) : ztmp) +#define NEAREST_Z(x) (ztmp = (x), (ztmp > boxHalf_Z) ? (ztmp - boxSize_Z) : ((ztmp < -boxHalf_Z) ? (ztmp + boxSize_Z) : (ztmp))) +#define WRAP_Z(x) (ztmp = (x), (ztmp > boxSize_Z) ? (ztmp - boxSize_Z) : ((ztmp < 0) ? (ztmp + boxSize_Z) : (ztmp))) +#else /* #ifndef REFLECTIVE_Z */ +#define NGB_PERIODIC_LONG_Z(x) fabs(x) +#define NEAREST_Z(x) (x) +#define WRAP_Z(x) (x) +#endif /* #ifndef REFLECTIVE_Z #else */ + +#define FACT1 0.366025403785 /* FACT1 = 0.5 * (sqrt(3)-1) */ +#define FAC_TWO_TO_TWO_THIRDS 1.5874011 + +/*********************************************************/ +/* Global variables */ +/*********************************************************/ + +extern int TimeBinSynchronized[TIMEBINS]; +extern struct TimeBinData TimeBinsHydro, TimeBinsGravity; + +#ifdef USE_SFR +extern double TimeBinSfr[TIMEBINS]; +#endif /* #ifdef USE_SFR */ + +extern int ThisTask; /*!< the number of the local processor */ +extern int NTask; /*!< number of processors */ +extern int PTask; /*!< note: NTask = 2^PTask */ + +extern int ThisNode; /*!< the rank of the current compute node */ +extern int NumNodes; /*!< the number of compute nodes used */ +extern int MinTasksPerNode; /*!< the minimum number of MPI tasks that is found on any of the nodes */ +extern int MaxTasksPerNode; /*!< the maximum number of MPI tasks that is found on any of the nodes */ +extern int TasksInThisNode; /*!< number of MPI tasks on current compute node */ +extern int RankInThisNode; /*!< rank of the MPI task on the current compute node */ +extern long long MemoryOnNode; + +extern double CPUThisRun; /*!< Sums CPU time of current process */ + +extern int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */ + +extern int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from + initial conditions, 1 is resuming a run from a set of restart files, while 2 + marks a restart from a snapshot file. */ +extern int RestartSnapNum; +extern int TakeLevel; +extern int TagOffset; + +extern int Argc; +extern char **Argv; + +extern double CPU_Step[CPU_LAST]; +extern double CPU_Step_Stored[CPU_LAST]; + +extern double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */ +extern double StartOfRun; /*!< This stores the time of the start of the run for evaluating the elapsed time */ + +extern size_t AllocatedBytes; +extern size_t FreeBytes; + +extern char DumpFlag; +extern char DumpFlagNextSnap; + +extern int FlagNyt; + +extern int NumPart; /*!< number of particles on the LOCAL processor */ +extern int NumGas; /*!< number of gas particles on the LOCAL processor */ + +extern gsl_rng *random_generator; /*!< a random number generator */ +extern gsl_rng *random_generator_aux; /*!< an auxialiary random number generator for use if one doesn't want to influence the main + code's random numbers */ + +#ifdef USE_SFR +extern int Stars_converted; /*!< current number of star particles in gas particle block */ +#endif /* #ifdef USE_SFR */ + +#ifdef TOLERATE_WRITE_ERROR +extern int WriteErrorFlag; +extern char AlternativeOutputDir[MAXLEN_PATH]; +#endif /* #ifdef TOLERATE_WRITE_ERROR */ + +extern double EgyInjection; + +extern double TimeOfLastDomainConstruction; /*!< holds what it says */ + +extern double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac; +extern double DomainInverseLen, DomainBigFac; +extern int *DomainStartList, *DomainEndList; +extern double *DomainCost, *TaskCost; +extern int *DomainCount, *TaskCount; +extern struct no_list_data +{ + int task; + int no; + int domainCount; + double domainCost; +} * ListNoData; + +extern int domain_bintolevel[TIMEBINS]; +extern int domain_refbin[TIMEBINS]; +extern int domain_grav_weight[TIMEBINS]; +extern int domain_hydro_weight[TIMEBINS]; +extern int domain_to_be_balanced[TIMEBINS]; + +/*! Array of task numbers holding the respective top-level nodes. For + the topnodes entries, it is indexed by the Leaf member, for + pseudoparticles it is indexed by the node + number-MaxPart-MaxNodes. */ +extern int *DomainTask; +extern int *DomainNewTask; + +/*! Array of indices of the main tree nodes that are identical to the + * top-level nodes. For the topnodes entries, it is indexed by the + * Leaf member, for pseudoparticles it is indexed by the node + * number-MaxPart-MaxNodes. + */ +extern int *DomainNodeIndex; + +extern peanokey *Key, *KeySorted; + +/*! The top node structure is an octree used for encoding the domain + * decomposition. Its leaf nodes are the units into which the domain + * is decomposed. + */ +extern struct topnode_data +{ + peanokey Size; + peanokey StartKey; + long long Count; + /*! The index of the first daughter node. The remaining 7 follow + sequentially, I think. */ + int Daughter; + /*! The index of this topnode in the DomainTask etc arrays. Is this + only valid for topnodes that have daughter=-1, i.e. the actual + leaves? */ + int Leaf; + unsigned char MortonToPeanoSubnode[8]; +} * TopNodes; + +extern int NTopnodes, NTopleaves; + +/*! Variables for gravitational tree */ +extern int Tree_MaxPart; +extern int Tree_NumNodes; +extern int Tree_MaxNodes; +extern int Tree_FirstNonTopLevelNode; +extern int Tree_NumPartImported; +extern int Tree_NumPartExported; +extern int Tree_ImportedNodeOffset; +extern int Tree_NextFreeNode; + +extern int *Tree_ResultIndexList; +extern int *Tree_Task_list; +extern MyDouble *Tree_Pos_list; +extern unsigned long long *Tree_IntPos_list; + +extern struct treepoint_data +{ + MyDouble Pos[3]; + unsigned long long IntPos[3]; + MyDouble Mass; + float OldAcc; + int index; + int th; + unsigned char level; + unsigned char Type; + unsigned char SofteningType : 7; +#ifndef HIERARCHICAL_GRAVITY + unsigned char ActiveFlag : 1; +#endif /* #ifndef HIERARCHICAL_GRAVITY */ + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + MyFloat GroupRad; + int GrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ +} * Tree_Points; + +extern struct resultsactiveimported_data +{ + MyFloat GravAccel[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ + int index; +} * Tree_ResultsActiveImported; + +extern char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */ + +extern FILE *FdInfo, /*!< file handle for info.txt log-file. */ + *FdEnergy, /*!< file handle for energy.txt log-file. */ + *FdTimings, /*!< file handle for timings.txt log-file. */ + *FdBalance, /*!< file handle for balance.txt log-file. */ + *FdTimebin, /*!< file handle for timebins.txt log-file. */ + *FdDomain, /*!< file handle for domain.txt log-file. */ + *FdMemory, /*!< file handle for memory.txt log-file. */ + *FdCPU; /*!< file handle for cpu.txt log-file. */ + +#ifdef DETAILEDTIMINGS +extern FILE *FdDetailed; +#endif /* #ifdef DETAILEDTIMINGS */ + +#ifdef OUTPUT_CPU_CSV +extern FILE *FdCPUCSV; /**< file handle for cpu.csv log-file. Used if the cpu log is printed in csv format as well. */ +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef RESTART_DEBUG +extern FILE *FdRestartTest; +#endif /* #ifdef RESTART_DEBUG */ + +#ifdef USE_SFR +extern FILE *FdSfr; /**< file handle for sfr.txt log-file. */ +#endif /* #ifdef USE_SFR */ + +#ifdef FORCETEST +extern FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */ +#endif /* #ifdef FORCETEST */ + +/*! Determines whether various dump files are written. Normally true, + set to false by Sunrise to avoid creating them. */ +extern int WriteMiscFiles; + +extern void *CommBuffer; /*!< points to communication buffer, which is used at a few places */ + +/*! \brief Global simulation data. + * + * Data which is the SAME for all tasks (mostly code parameters read + * from the parameter file). Holding this data in a structure is + * convenient for writing/reading the restart file, and it allows the + * introduction of new global variables in a simple way. The only + * thing to do is to introduce them into this structure. + */ +extern struct global_data_all_processes +{ + long long TotNumPart; /*!< total particle numbers (global value) */ + long long TotNumGas; /*!< total gas particle number (global value) */ + + int MaxPart; /*!< This gives the maxmimum number of particles that can be stored on one + processor. */ + int MaxPartSph; /*!< This gives the maxmimum number of SPH particles that can be stored on one + processor. */ + +#if defined(COOLING) + char TreecoolFile[MAXLEN_PATH]; +#endif /* #if defined(COOLING) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + int TotPartSpecial, MaxPartSpecial; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#if defined(REFINEMENT) + double ReferenceGasPartMass; +#endif /* #if defined(REFINEMENT) */ + +#ifdef REFINEMENT + double TargetGasMass; + double TargetGasMassFactor; + int RefinementCriterion; + int DerefinementCriterion; +#endif /* #ifdef REFINEMENT */ + + double TotGravCost; + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + double AvgType1Mass; +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + double MeanVolume; + + int MultipleDomains; + double TopNodeFactor; + + int ICFormat; /*!< selects different versions of IC file-format */ + + int SnapFormat; /*!< selects different versions of snapshot file-formats */ + + int NumFilesPerSnapshot; /*!< number of files in multi-file snapshot dumps */ + int NumFilesWrittenInParallel; /*!< maximum number of files that may be written/read simultaneously when + writing/reading restart-files, or when writing snapshot files */ + + double TreeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times + the maximum(!) number of particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + double TopNodeAllocFactor; /*!< Each processor allocates a number of nodes which is TreeAllocFactor times + the maximum(!) number of particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + double NgbTreeAllocFactor; /*!< Each processor allocates a number of nodes for the neighbor search which is NgbTreeAllocFactor times + the maximum(!) number of gas particles. Note: A typical local tree for N + particles needs usually about ~0.65*N nodes. */ + + int MaxMemSize; /*!< size of maximum memory consumption in MB */ + + /* some SPH parameters */ + + int DesNumNgb; /*!< Desired number of SPH neighbours */ + +#ifdef SUBFIND + int DesLinkNgb; + double ErrTolThetaSubfind; +#endif /* #ifdef SUBFIND */ + + double TotCountReducedFluxes; + double TotCountFluxes; + + double DtDisplacement; + + double MaxNumNgbDeviation; /*!< Maximum allowed deviation neighbour number */ + + double InitGasTemp; /*!< may be used to set the temperature in the IC's */ + double InitGasU; /*!< the same, but converted to thermal energy per unit mass */ + double MinGasTemp; /*!< may be used to set a floor for the gas temperature */ + double MinEgySpec; /*!< the minimum allowed temperature expressed as energy per unit mass; code will inject energy if a cell falls + below this limit */ + + double MinimumDensityOnStartUp; + + double GasSoftFactor; + + double LimitUBelowThisDensity; + double LimitUBelowCertainDensityToThisValue; + + /* some force counters */ + long long TotNumOfForces; /*!< counts total number of force computations */ + +#ifdef MULTIPLE_RESTARTS + int RestartFileCount; +#endif /* #ifdef MULTIPLE_RESTARTS */ + + /* various cosmological factors that are only a function of the current scale factor, and in non-comoving runs are set to 1 */ + double cf_atime, cf_a2inv, cf_a3inv, cf_afac1, cf_afac2, cf_afac3, cf_hubble_a, cf_time_hubble_a, cf_redshift; + /* Hubble rate at the current time, valid both for comoving and non-comoving integration */ + double cf_H; + /* Hubble expansion rate, but in non-comoving integration set to zero */ + double cf_Hrate; + + /* system of units */ + double UnitTime_in_s, /*!< factor to convert internal time unit to seconds/h */ + UnitMass_in_g, /*!< factor to convert internal mass unit to grams/h */ + UnitVelocity_in_cm_per_s, /*!< factor to convert internal velocity unit to cm/sec */ + UnitLength_in_cm, /*!< factor to convert internal length unit to cm/h */ + UnitPressure_in_cgs, /*!< factor to convert internal pressure unit to cgs units (little 'h' still + around!) */ + UnitDensity_in_cgs, /*!< factor to convert internal mass density unit to g/cm^3*h^2 */ + UnitCoolingRate_in_cgs, /*!< factor to convert internal cooling rate to cgs units */ + UnitEnergy_in_cgs, /*!< factor to convert internal energy to cgs units */ + UnitTime_in_Megayears, /*!< factor to convert internal time to megayears/h */ + GravityConstantInternal, /*!< If set to zero in the parameterfile, the internal value of the + gravitational constant is set to the Newtonian value based on the system of + units specified. Otherwise the value provided is taken as internal gravity + constant G. */ + G; /*!< Gravity-constant in internal units */ + + /* Cosmology */ + + double Hubble; /*!< Hubble-constant in internal units */ + double Omega0, /*!< matter density in units of the critical density (at z=0) */ + OmegaLambda, /*!< vaccum energy density relative to crictical density (at z=0) */ + OmegaBaryon, /*!< baryon density in units of the critical density (at z=0) */ + HubbleParam; /*!< little `h', i.e. Hubble constant in units of 100 km/s/Mpc. Only needed to get absolute + * physical values for cooling physics + */ + + double BoxSize; /*!< Boxsize in case periodic boundary conditions are used */ + + /* Code options */ + + int ComovingIntegrationOn; /*!< flags that comoving integration is enabled */ + int PeriodicBoundariesOn; /*!< flags that periodic boundaries are enabled for gravity */ + int ResubmitOn; /*!< flags that automatic resubmission of job to queue system is enabled */ + int TypeOfOpeningCriterion; /*!< determines tree cell-opening criterion: 0 for Barnes-Hut, 1 for relative + criterion */ + int TypeOfTimestepCriterion; /*!< gives type of timestep criterion (only 0 supported right now - unlike + gadget-1.1) */ + int OutputListOn; /*!< flags that output times are listed in a specified file */ + int CoolingOn; /*!< flags that cooling is enabled */ + int StarformationOn; /*!< flags that star formation is enabled */ + + int NParameters; + + int LowestActiveTimeBin; + int HighestActiveTimeBin; + int LowestOccupiedTimeBin; + int HighestOccupiedTimeBin; + int LowestOccupiedGravTimeBin; + int HighestOccupiedGravTimeBin; + int HighestSynchronizedTimeBin; + int SmallestTimeBinWithDomainDecomposition; + double ActivePartFracForNewDomainDecomp; + + /* parameters determining output frequency */ + + int SnapshotFileCount; /*!< number of snapshot that is written next */ + double TimeBetSnapshot, /*!< simulation time interval between snapshot files */ + TimeOfFirstSnapshot, /*!< simulation time of first snapshot files */ + CpuTimeBetRestartFile, /*!< cpu-time between regularly generated restart files */ + TimeLastRestartFile, /*!< cpu-time when last restart-file was written */ + TimeBetStatistics, /*!< simulation time interval between computations of energy statistics */ + TimeLastStatistics; /*!< simulation time when the energy statistics was computed the last time */ + int NumCurrentTiStep; /*!< counts the number of system steps taken up to this point */ + + /* Current time of the simulation, global step, and end of simulation */ + + double Time, /*!< current time of the simulation */ + TimeBegin, /*!< time of initial conditions of the simulation */ + TimeStep, /*!< difference between current times of previous and current timestep */ + TimeMax; /*!< marks the point of time until the simulation is to be evolved */ + + /* variables for organizing discrete timeline */ + + double Timebase_interval; /*!< factor to convert from floating point time interval to integer timeline */ + integertime Ti_Current; /*!< current time on integer timeline */ + integertime Previous_Ti_Current; + integertime Ti_nextoutput; /*!< next output time on integer timeline */ + integertime Ti_lastoutput; + + integertime Ti_begstep[TIMEBINS]; /*!< marks start of current step of each timebin on integer timeline */ + +#ifdef PMGRID + integertime PM_Ti_endstep, PM_Ti_begstep; + double Asmth[2], Rcut[2]; + double Corner[2][3], UpperCorner[2][3], Xmintot[2][3], Xmaxtot[2][3]; + double TotalMeshSize[2]; +#if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) + double MassPMregions[2]; +#endif /* #if defined(EVALPOTENTIAL) && defined(PMGRID) && !defined(GRAVITY_NOT_PERIODIC) */ +#endif /* #ifdef PMGRID */ + + long long GlobalNSynchronizedHydro; + long long GlobalNSynchronizedGravity; + + int LevelToTimeBin[GRAVCOSTLEVELS]; + int LevelHasBeenMeasured[GRAVCOSTLEVELS]; + + /* variables that keep track of cumulative CPU consumption */ + + double TimeLimitCPU; + double CPU_Sum[CPU_LAST]; /*!< sums wallclock time/CPU consumption in whole run */ + + /* tree code opening criterion */ + + double ErrTolTheta; /*!< BH tree opening angle */ + double ErrTolForceAcc; /*!< parameter for relative opening criterion in tree walk */ + + /* adjusts accuracy of time-integration */ + + double ErrTolIntAccuracy; /*!< accuracy tolerance parameter \f$ \eta \f$ for timestep criterion. The + timesteps is \f$ \Delta t = \sqrt{\frac{2 \eta eps}{a}} \f$ */ + + double MinSizeTimestep, /*!< minimum allowed timestep. Normally, the simulation terminates if the + timestep determined by the timestep criteria falls below this limit. */ + MaxSizeTimestep; /*!< maximum allowed timestep */ + +#ifdef TIMESTEP_OUTPUT_LIMIT + double TimestepOutputLimit; +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + +#ifdef FORCE_EQUAL_TIMESTEPS + integertime GlobalTimeStep; +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + + double IsoSoundSpeed; + + double CourantFac; /*!< Hydrodynamics-Courant factor */ + +#ifdef REGULARIZE_MESH_FACE_ANGLE + double CellMaxAngleFactor; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + double CellShapingFactor; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + double CellShapingSpeed; + + int CPU_TimeBinCountMeasurements[TIMEBINS]; + double CPU_TimeBinMeasurements[TIMEBINS][NUMBER_OF_MEASUREMENTS_TO_RECORD]; + + /* gravitational and hydrodynamical softening lengths (given in terms of an `equivalent' Plummer softening + * length) + * + */ + + int SofteningTypeOfPartType[NTYPES]; + + double SofteningComoving[NSOFTTYPES]; /*!< comoving gravitational softening lengths for each softeniung type */ + double SofteningMaxPhys[NSOFTTYPES]; /*!< maximum physical gravitational softening lengths for each softening type */ + + double + SofteningTable[NSOFTTYPES + NSOFTTYPES_HYDRO]; /*!< current (comoving) gravitational softening lengths for each softening type */ + double ForceSoftening[NSOFTTYPES + NSOFTTYPES_HYDRO + 1]; /*!< current (comoving) gravitational softening lengths, multiplied by a + factor 2.8 - at that scale the force is Newtonian */ + + /*! If particle masses are all equal for one type, the corresponding entry in MassTable is set to this + * value, * allowing the size of the snapshot files to be reduced + */ + double MassTable[NTYPES]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + double MinimumComovingHydroSoftening; + double AdaptiveHydroSofteningSpacing; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + + /* some filenames */ + char InitCondFile[MAXLEN_PATH], OutputDir[MAXLEN_PATH], SnapshotFileBase[MAXLEN_PATH], ResubmitCommand[MAXLEN_PATH], + OutputListFilename[MAXLEN_PATH]; + + /*! table with desired output times */ + double OutputListTimes[MAXLEN_OUTPUTLIST]; + char OutputListFlag[MAXLEN_OUTPUTLIST]; + int OutputListLength; /*!< number of times stored in table of desired output times */ + +#ifdef USE_SFR /* enable Springel & Hernquist model */ + double OverDensThresh; + double CritOverDensity; + double TemperatureThresh; + double CritPhysDensity; + double PhysDensThresh; + double EgySpecSN; + double EgySpecCold; + double FactorEVP; + double TempSupernova; + double TempClouds; + double MaxSfrTimescale; + double FactorSN; +#endif /* #ifdef USE_SFR */ + +#ifdef MHD_POWELL + double Powell_Momentum[3]; + double Powell_Angular_Momentum[3]; + double Powell_Energy; +#endif /* #ifdef MHD_POWELL */ + +#ifdef MHD_SEEDFIELD + int B_dir; /* flags for direction: x = 1, y = 2, z = 4 */ + double B_value; /* value for the chosen component(s) of the magnetic field */ +#endif /* #ifdef MHD_SEEDFIELD */ + + MyIDType MaxID; + +#ifdef REFINEMENT_VOLUME_LIMIT + double MaxVolumeDiff; + double MinVolume; + double MaxVolume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#ifdef REDUCE_FLUSH + double FlushCpuTimeDiff; + double FlushLast; +#endif /* #ifdef REDUCE_FLUSH */ + +#ifdef TILE_ICS + int TileICsFactor; +#endif /* #ifdef TILE_ICS */ + +#ifdef ADDBACKGROUNDGRID + int GridSize; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +#ifdef ONEDIMS_SPHERICAL + double CoreMass; + double CoreRadius; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + double GlobalDisplacementVector[3]; +} All; + +/***************************************************************************** + ** particle data ************************************************************ + ****************************************************************************/ + +/*! \brief This structure holds all the information that is + * stored for each particle of the simulation. + */ +extern struct particle_data +{ + MyDouble Pos[3]; /*!< particle position at its current time */ + MyDouble Mass; /*!< particle mass */ + MyFloat Vel[3]; /*!< particle velocity at its current time */ + MySingle GravAccel[3]; /*!< particle acceleration due to gravity */ + +#ifdef EXTERNALGRAVITY + MySingle dGravAccel; /*!< norm of spatial derivatives tensor of gravity accelerations due to external force */ +#endif + +#ifdef PMGRID + MySingle GravPM[3]; /*!< particle acceleration due to long-range PM gravity force */ +#endif /* #ifdef PMGRID */ + +#ifdef FORCETEST + MyFloat GravAccelDirect[3]; /*!< particle acceleration calculated by direct summation */ + MyFloat PotentialDirect; /*!< potential computed with direct summation */ + MyFloat DistToID1; +#ifdef PMGRID + MyFloat GravAccelShortRange[3]; /*!< short range component of gravitational acceleration */ + MyFloat GravAccelLongRange[3]; /*!< long range component of gravitational acceleration */ + MyFloat PotentialShortRange; /*!< potential due to short-range forces */ + MyFloat PotentialLongRange; /*!< potential due to long-range forces */ +#endif /* #ifdef PMGRID */ +#endif /* #ifdef FORCETEST */ + +#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL) + MySingle Potential; /*!< gravitational potential */ +#if defined(PMGRID) + MySingle PM_Potential; /*!< gravitational potential in Particle-Mesh */ +#endif /* #if defined(PMGRID) */ +#endif /* #if defined(EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) */ + +#ifdef OUTPUTGRAVINTERACTIONS + int GravInteractions; /*!< number of gravitational ineractions calculated */ +#endif /* #ifdef OUTPUTGRAVINTERACTIONS */ + +#ifdef EXTERNALGRAVITY + MyFloat ExtPotential; /*!< value of external potential */ +#endif /* #ifdef EXTERNALGRAVITY */ + + MyIDType ID; /*!< unique ID of particle */ + +#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) + MyIDType FileOrder; +#endif /* #ifdefined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */ + + integertime Ti_Current; /*!< current time on integer timeline */ + + float OldAcc; /*!< magnitude of old gravitational force. Used in relative opening criterion */ + + float GravCost[GRAVCOSTLEVELS]; /*!< weight factors used for balancing the work-load */ + + unsigned char Type; /*!< flags particle type. 0=gas, 1=halo, 2=disk, 3=bulge, 4=stars, 5=bndry */ + unsigned char SofteningType; + signed char TimeBinGrav; + signed char TimeBinHydro; +} * P, /*!< holds particle data on local processor */ + *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */ + +/***************************************************************************** + ** (sub)halo data *********************************************************** + ****************************************************************************/ + +extern struct subfind_data +{ + int OriginIndex, OriginTask; + int TargetIndex, TargetTask; + int GrNr; + +#ifdef SUBFIND + int SubNr; + int OldIndex; + int submark; + int originindex, origintask; + MyFloat Utherm; + MyFloat Density; + MyFloat Potential; + MyFloat Hsml; + MyFloat BindingEnergy; + +#ifdef CELL_CENTER_GRAVITY + MyDouble Center[3]; +#endif /* #ifdef CELL_CENTER_GRAVITY */ + +#ifdef SUBFIND_CALC_MORE + MyFloat SubfindHsml; + MyFloat SubfindDensity; /* total matter density */ + MyFloat SubfindDMDensity; /* dark matter density */ + MyFloat SubfindVelDisp; /* 3D DM velocity dispersion */ +#endif /* #ifdef SUBFIND_CALC_MORE */ + +#endif /* #ifdef SUBFIND */ +} * PS; + +/***************************************************************************** + ** cell data **************************************************************** + ****************************************************************************/ + +/*! \brief Holds data that is stored for each hydro mesh cell in addition to + * the collisionless variables. + */ +extern struct sph_particle_data +{ + /* conserved variables */ + MyFloat Energy; + MyFloat Momentum[3]; + MyFloat Volume; + MyFloat OldMass; + + /* primitive variables */ + MyFloat Density; + MyFloat Pressure; /*!< current pressure */ + MySingle Utherm; + +#ifdef HIERARCHICAL_GRAVITY + MySingle FullGravAccel[3]; +#endif /* #ifdef HIERARCHICAL_GRAVITY */ + + /* variables for mesh */ + MyDouble Center[3]; /*!< center of mass of cell */ + MySingle VelVertex[3]; /*!< current vertex velocity (primitive variable) */ + + MySingle MaxDelaunayRadius; + MySingle Hsml; /* auxiliary search radius for points around a delaunay triangle */ + MySingle SurfaceArea; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + MySingle MaxFaceAngle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + MySingle ActiveArea; + +#if defined(OUTPUT_DIVVEL) + MyFloat DivVel; /*!< divergence of the velocity field */ +#endif /* #if defined(OUTPUT_DIVVEL) */ + +#if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL) + MySingle CurlVel; /*!< magnitude of the curl of the velocity field */ +#endif /* #if defined(REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED) || defined(OUTPUT_CURLVEL) */ + +#ifdef TREE_BASED_TIMESTEPS + MySingle CurrentMaxTiStep; + MySingle Csnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + +#if defined(REFINEMENT_HIGH_RES_GAS) + MyFloat HighResMass; + MyFloat HighResDensity; +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef MHD + MyFloat B[3]; + MyFloat BConserved[3]; + MyFloat DivB; + MyFloat CurlB[3]; +#endif /* #ifdef MHD */ + +#ifdef PASSIVE_SCALARS + MyFloat PScalars[PASSIVE_SCALARS]; + MyFloat PConservedScalars[PASSIVE_SCALARS]; +#endif /* #ifdef PASSIVE_SCALARS */ + +#ifdef OUTPUT_SURFACE_AREA + int CountFaces; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + MySingle MinimumEdgeDistance; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + +#if defined(COOLING) + MyFloat Ne; /* electron fraction, expressed as local electron number + density normalized to the hydrogen number density. Gives + indirectly ionization state and mean molecular weight. */ +#endif /* #if defined(COOLING) */ + +#ifdef USE_SFR + MySingle Sfr; +#endif /* #ifdef USE_SFR */ + +#ifdef OUTPUT_COOLHEAT + MyFloat CoolHeat; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + struct grad_data Grad; + + int first_connection; + int last_connection; + +#ifdef REFINEMENT_HIGH_RES_GAS + int AllowRefinement; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#ifdef REFINEMENT_SPLIT_CELLS + MySingle SepVector[3]; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + +#ifdef REFINEMENT_VOLUME_LIMIT + MyFloat MinNgbVolume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + + double TimeLastPrimUpdate; + +#ifdef ADDBACKGROUNDGRID + MyFloat Weight; +#endif /* #ifdef ADDBACKGROUNDGRID */ + +} * SphP, /*!< holds SPH particle data on local processor */ + *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +extern struct special_particle_data +{ + MyIDType ID; + double pos[3]; + double mass; +} * PartSpecialListGlobal; +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +extern peanokey *DomainKeyBuf; + +/*! global state of system + */ +extern struct state_of_system +{ + double Mass, EnergyKin, EnergyPot, EnergyInt, EnergyTot, Momentum[4], AngMomentum[4], CenterOfMass[4], MassComp[NTYPES], + EnergyKinComp[NTYPES], EnergyPotComp[NTYPES], EnergyIntComp[NTYPES], EnergyTotComp[NTYPES], MomentumComp[NTYPES][4], + AngMomentumComp[NTYPES][4], CenterOfMassComp[NTYPES][4]; +} SysState, SysStateAtStart, SysStateAtEnd; + +/*! \brief Struct used for passing the parameters during the mesh cell search. + */ +typedef struct +{ + MyDouble Pos[3]; + int Task; + union + { + int Index; + float hsmlguess; + } u; + +} mesh_search_data; + +/*! \brief Struct used for sending positions to other tasks during the + * mesh cell search. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Distance; +} mesh_search_request; + +/*! \brief Struct used for receiving the results from other tasks during the + * mesh cell search. + */ +typedef struct +{ + MyDouble Distance; + int Task; + int Index; +} mesh_search_response; + +extern struct data_partlist +{ + int Task; /*!< The task the item was exported to. */ + int Index; /*!< The particle index of the item on the sending task. */ +} * PartList; + +extern struct datanodelist +{ + int Task; /*!< target process */ + int Index; /*!< local index that wants to open this node */ + int Node; /*!< node to be opened on foreign process */ +} * NodeList; + +#define FAC_AVG_NODES_PER_EXPORT 4.0 /*!< default choice for estimated average number of exported nodes per exported particle */ + +extern struct directdata +{ + MyDouble Pos[3]; + MyDouble Mass; + unsigned char Type; + unsigned char SofteningType; +} * DirectDataIn, *DirectDataAll; + +extern struct accdata +{ + MyFloat Acc[3]; +#ifdef EVALPOTENTIAL + MyFloat Potential; +#endif /* #ifdef EVALPOTENTIAL */ +} * DirectAccOut, *DirectAccIn; + +#if defined(EVALPOTENTIAL) || defined(OUTPUTPOTENTIAL) || defined(SUBFIND) +extern struct potdata_out +{ + MyFloat Potential; +} + /*! \brief Holds the partial results computed for imported particles. Note: + * We use GravDataResult = GravDataGet, such that the result replaces + * the imported data + */ + * PotDataResult, + /*! \brief Holds partial results received from other processors. This will + * overwrite the GravDataIn array + */ + *PotDataOut; +#endif /* #if defined (EVALPOTENTIAL) || defined (OUTPUTPOTENTIAL) || defined(SUBFIND) */ + +/*! \brief Buffer of size NTask used for flagging whether a particle needs to + * be exported to the other tasks. + */ +extern int *Exportflag; +/*! \brief Buffer of size NTask used for counting how many nodes are to be + * exported to the other tasks? + */ +extern int *Exportnodecount; +/*! \brief Buffer of size NTask used for holding the index into the + * DataIndexTable. + */ +extern int *Exportindex; +/*! \brief Array of NTask size of the offset into the send array where the + * objects to be sent to the specified task starts. + */ +extern int *Send_offset, + /*! \brief Array of NTask size of the number of objects to send to the + * tasks. + */ + *Send_count, + /*! \brief Array of NTask size of the number of objects to receive from the + * tasks. + */ + *Recv_count, + /*! \brief Array of NTask size of the offset into the receive array where the + * objects from the specified task starts. + */ + *Recv_offset; + +extern int *TasksThatSend, *TasksThatRecv, NSendTasks, NRecvTasks; + +extern struct send_recv_counts +{ + int Count; + int CountNodes; +} * Send, *Recv; + +extern int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes; + +extern int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset; + +extern int Force_nimport, Force_nexport, *Force_Send_offset, *Force_Send_count, *Force_Recv_count, *Force_Recv_offset; + +/*! \brief Header for the standard file format. + */ +#if(NTYPES == 7 || NTYPES == 8) +#define NTYPES_INT_HEADER 8 +#else /* #if (NTYPES==7 || NTYPES==8) */ +#define NTYPES_INT_HEADER NTYPES +#endif /* #if (NTYPES==7 || NTYPES==8) #else */ +extern struct io_header +{ + int npart[NTYPES_INT_HEADER]; /*!< number of particles of each type in this file */ + double mass[NTYPES]; /*!< mass of particles of each type. If 0, then the masses are explicitly + stored in the mass-block of the snapshot file, otherwise they are omitted */ + double time; /*!< time of snapshot file */ + double redshift; /*!< redshift of snapshot file */ + int flag_sfr; /*!< flags whether the simulation was including star formation */ + int flag_feedback; /*!< flags whether feedback was included (obsolete) */ + unsigned int npartTotal[NTYPES_INT_HEADER]; /*!< total number of particles of each type in this snapshot. This can be + different from npart if one is dealing with a multi-file snapshot. */ + int flag_cooling; /*!< flags whether cooling was included */ + int num_files; /*!< number of files in multi-file snapshot */ + double BoxSize; /*!< box-size of simulation in case periodic boundaries were used */ + double Omega0; /*!< matter density in units of critical density */ + double OmegaLambda; /*!< cosmological constant parameter */ + double HubbleParam; /*!< Hubble parameter in units of 100 km/sec/Mpc */ + int flag_stellarage; /*!< flags whether the file contains formation times of star particles */ + int flag_metals; /*!< flags whether the file contains metallicity values for gas and star + particles */ + unsigned int npartTotalHighWord[NTYPES_INT_HEADER]; /*!< High word of the total number of particles of each type */ + int flag_entropy_instead_u; /*!< flags that IC-file contains entropy instead of u */ + int flag_doubleprecision; /*!< flags that snapshot contains double-precision instead of single precision */ + + int flag_lpt_ics; /*!< flag to signal that IC file contains 2lpt initial conditions */ + float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */ + + int flag_tracer_field; /*!< flags presence of a tracer field */ + + int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present) */ + +#if(NTYPES == 6) + char fill[40]; /*!< fills to 256 Bytes */ +#elif(NTYPES == 7) /* #if (NTYPES==6) */ + char fill[8]; /*!< fills to 256 Bytes */ +#endif /* #elif (NTYPES==7) */ +} header; /*!< holds header for snapshot files */ + +/*! \brief Header for the ICs file format, if NTYPES does not match. + */ +#ifdef NTYPES_ICS +extern struct io_header_ICs +{ + int npart[NTYPES_ICS]; /*!< number of particles of each type in this file */ + double mass[NTYPES_ICS]; /*!< mass of particles of each type. If 0, then the masses are explicitly + stored in the mass-block of the snapshot file, otherwise they are omitted */ + double time; /*!< time of snapshot file */ + double redshift; /*!< redshift of snapshot file */ + int flag_sfr; /*!< flags whether the simulation was including star formation */ + int flag_feedback; /*!< flags whether feedback was included (obsolete) */ + unsigned int npartTotal[NTYPES_ICS]; /*!< total number of particles of each type in this snapshot. This can be + different from npart if one is dealing with a multi-file snapshot. */ + int flag_cooling; /*!< flags whether cooling was included */ + int num_files; /*!< number of files in multi-file snapshot */ + double BoxSize; /*!< box-size of simulation in case periodic boundaries were used */ + double Omega0; /*!< matter density in units of critical density */ + double OmegaLambda; /*!< cosmological constant parameter */ + double HubbleParam; /*!< Hubble parameter in units of 100 km/sec/Mpc */ + int flag_stellarage; /*!< flags whether the file contains formation times of star particles */ + int flag_metals; /*!< flags whether the file contains metallicity values for gas and star + particles */ + unsigned int npartTotalHighWord[NTYPES_ICS]; /*!< High word of the total number of particles of each type */ + int flag_entropy_instead_u; /*!< flags that IC-file contains entropy instead of u */ + int flag_doubleprecision; /*!< flags that snapshot contains double-precision instead of single precision */ + + int flag_lpt_ics; /*!< flag to signal that IC file contains 2lpt initial conditions */ + float lpt_scalingfactor; /*!< scaling factor for 2lpt initial conditions */ + + int flag_tracer_field; /*!< flags presence of a tracer field */ + + int composition_vector_length; /*!< specifies the length of the composition vector (0 if not present) */ + +#if(NTYPES_ICS == 6) + char fill[40]; /*!< fills to 256 Bytes */ +#else /* #if (NTYPES_ICS==6) */ + terminate("NTYPES_ICS != 6") +#endif /* #if (NTYPES_ICS==6) #else */ +} header_ICs; /*!< holds header for IC files */ +#endif /* #ifdef NTYPES_ICS */ + +enum iofields +{ + IO_POS, + IO_VEL, + IO_ID, + IO_MASS, + IO_U, + IO_RHO, + IO_VORT, + IO_VOL, + IO_CM, + IO_VERTEXVEL, + IO_FACEANGLE, + IO_SAREA, + IO_NFACES, + + IO_HIGHRESMASS, + IO_PRESSURE, + IO_CSND, + IO_NE, + IO_NH, + IO_SFR, + + IO_POT, + IO_ACCEL, + IO_GRADP, + IO_GRADR, + IO_GRADV, + IO_GRADB, + + IO_POT_MINI, + IO_POS_MINI, + + IO_HI, + IO_TSTP, + IO_BFLD, + IO_DIVB, + IO_COOLRATE, + IO_ALLOWREFINEMENT, + + IO_DIVVEL, + IO_CURLVEL, + IO_COOLHEAT, + IO_PASS, + + IO_SUBFINDHSML, + IO_SUBFINDDENSITY, + IO_SUBFINDDMDENSITY, + IO_SUBFINDVELDISP, + IO_GROUPNR, + + IO_SOFTENING, + IO_TASK, + IO_TIMEBIN_HYDRO, + + IO_LASTENTRY /* This should be kept - it signals the end of the list */ +}; + +enum arrays +{ + A_NONE, + A_SPHP, + A_P, + A_PS +}; + +enum types_in_file +{ + FILE_NONE = -1, + FILE_INT = 0, + FILE_MY_ID_TYPE = 2, + FILE_MY_IO_FLOAT = 1, + FILE_DOUBLE = 3, + FILE_FLOAT = 4 +}; + +enum types_in_memory +{ + MEM_INT, + MEM_MY_ID_TYPE, + MEM_FLOAT, + MEM_DOUBLE, + MEM_MY_SINGLE, + MEM_MY_FLOAT, + MEM_MY_DOUBLE, + MEM_NONE +}; + +enum e_typelist +{ + GAS_ONLY = 1, + STARS_ONLY = 16, + GAS_AND_STARS = 17, + BHS_ONLY = 32, + ALL_TYPES = ((1 << NTYPES) - 1), + SET_IN_GET_PARTICLES_IN_BLOCK = 0 +}; + +enum sn_type +{ + SN_FULL = 0, + SN_MINI = 1, + SN_MINI_ONLY = 2, + SN_NO_SUBBOX = 3 +}; + +typedef struct +{ + enum iofields field; + enum types_in_memory type_in_memory; + enum types_in_file type_in_file_input; + enum types_in_file type_in_file_output; + int values_per_block; + char label[4]; + char datasetname[256]; + void (*io_func)(int, int, void *, int); + int typelist; + enum arrays array; + size_t offset; + enum sn_type snap_type; + + char hasunit; + double a; + double h; + double L; + double M; + double V; + double c; +} IO_Field; + +extern IO_Field *IO_Fields; +extern int N_IO_Fields; +extern int Max_IO_Fields; + +extern char (*Parameters)[MAXLEN_PARAM_TAG]; +extern char (*ParametersValue)[MAXLEN_PARAM_VALUE]; +extern char *ParametersType; + +/*! \brief The tree data structure. + * + * Nodes points to the actual memory + * allocated for the internal nodes, but is shifted such that + * Nodes[All.MaxPart] gives the first allocated node. Note that node + * numbers less than All.MaxPart are the leaf nodes that contain a + * single particle, and node numbers >= MaxPart+MaxNodes are "pseudo + * particles" that hang off the toplevel leaf nodes belonging to + * other tasks. These are not represented by this structure. Instead, + * the tree traversal for these are saved in the Nextnode, Prevnode + * and Father arrays, indexed with the node number in the case of + * real particles and by nodenumber-MaxNodes for pseudo + * particles. + */ +extern struct NODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + /*! The next node in the tree walk in case the current node does + * not need to be opened. This means that it traverses the 8 + * subnodes of a node in a breadth-first fashion, and then goes + * to father->sibling. + */ + int sibling; + /*! The next node in case the current node needs to be + * opened. Applying nextnode repeatedly results in a pure + * depth-first traversal of the tree. + */ + int nextnode; + /*! The parent node of the node. (Is -1 for the root node.) + */ + int father; +#if(NSOFTTYPES > 1) + unsigned char maxsofttype; /**< hold the maximum gravitational softening of particles */ +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ +#endif /* #if (NSOFTTYPES > 1) */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +} * Nodes; + +#ifdef MULTIPLE_NODE_SOFTENING +extern struct ExtNODE +{ + MyDouble mass_per_type[NSOFTTYPES]; +} * ExtNodes; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + +/*! Gives next node in tree walk for the "particle" nodes. Entries 0 + * -- MaxPart-1 are the real particles, and the "pseudoparticles" are + * indexed by the node number-MaxNodes. + */ +extern int *Nextnode; + +/*! Gives previous node in tree walk for the leaf (particle) + * nodes. Entries 0 -- MaxPart-1 are the real particles, and the + * "pseudoparticles" are indexed by the node number-MaxNodes. + */ +extern int *Father; + +/*! Variables for neighbor tree */ +extern int Ngb_MaxPart; +extern int Ngb_NumNodes; +extern int Ngb_MaxNodes; +extern int Ngb_FirstNonTopLevelNode; +extern int Ngb_NextFreeNode; +extern int *Ngb_Father; +extern int *Ngb_Marker; +extern int Ngb_MarkerValue; + +extern int *Ngb_DomainNodeIndex; +extern int *DomainListOfLocalTopleaves; +extern int *DomainNLocalTopleave; +extern int *DomainFirstLocTopleave; +extern int *Ngb_Nextnode; + +/*! The ngb-tree data structure + */ +extern struct NgbNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + int sibling; + int nextnode; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + } d; + } u; + + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; + + int father; + + integertime Ti_Current; + +} * Ngb_Nodes; + +extern struct ExtNgbNODE +{ + float vmin[3]; + float vmax[3]; + float MaxCsnd; +} * ExtNgb_Nodes; + +#ifdef STATICNFW +extern double Rs, R200; +extern double Dc; +extern double RhoCrit, V200; +extern double fac; +#endif /* #ifdef STATICNFW */ + +extern int MaxThreads; + +#endif /* #define ALLVARS_H */ diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c new file mode 100644 index 0000000000..f1ae80be6a --- /dev/null +++ b/src/amuse/community/arepo/src/main/main.c @@ -0,0 +1,296 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +// #ifdef HAVE_HDF5 +// #include +// #endif /* #ifdef HAVE_HDF5 */ + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ +// #ifdef IMPOSE_PINNING +// detect_topology(); +// get_core_set(); +// #endif /* #ifdef IMPOSE_PINNING */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + +// #ifdef IMPOSE_PINNING +// /* pin the MPI ranks to the available core set */ +// pin_to_core_set(); +// report_pinning(); +// #endif /* #ifdef IMPOSE_PINNING */ + +// #ifdef HOST_MEMORY_REPORTING +// mpi_report_committable_memory(); +// #endif /* #ifdef HOST_MEMORY_REPORTING */ + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + // if(argc < 2) + // { + // if(ThisTask == 0) + // { + // printf("\nParameters are missing. \n"); + // printf("Call with [] [] []\n"); + // printf("\n"); + // printf(" RestartFlag Action\n"); + // printf(" 0 Read initial conditions and start simulation\n"); + // printf(" 1 Read restart files and resume simulation\n"); + // printf(" 2 Restart from specified snapshot dump and resume simulation\n"); + // printf(" 3 Run FOF and optionally SUBFIND: [ for SUBBOX_SNAPSHOTS]\n"); + // printf( + // " 6 Convert snapshot file to different format [input=ICFormat output=SnapFormat NOTE: derived " + // "quantities have round-off errors!\n"); + // printf(" 14 Write out the Voronoi mesh: \n"); + // printf(" 17 Write out snapshot dump with measured gradients\n"); + // printf(" 18 Recalculate gravitational potential values for specified snaphot dump: \n"); + // printf("\n"); + // } + // endrun(); + // } + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + + // if(argc >= 3) + // RestartFlag = atoi(argv[2]); + // else + RestartFlag = 0; + + // if(argc >= 4) + // RestartSnapNum = atoi(argv[3]); + // else + // RestartSnapNum = -1; + + // Do minimal validation of arguments here rather than in random places in the code + // if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0) + // { + // mpi_printf("Need to give the snapshot number\n"); + // return (0); + // } + +// #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT +// if(RestartFlag == 18) +// { +// mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n"); +// return (0); +// } +// #endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */ + +// #ifdef RUNNING_SAFETY_FILE +// /* do not run if 'running' safety file exists */ +// int runningflag = 0; +// if(ThisTask == 0) +// { +// FILE *fd; +// char runningfname[MAXLEN_PATH]; + +// sprintf(runningfname, "./running"); +// if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */ +// { +// fclose(fd); +// printf("running-file detected. stopping.\n"); +// runningflag = 1; +// } +// } +// MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD); +// if(runningflag) +// { +// MPI_Finalize(); /* do not call endrun() */ +// return 0; +// } +// else +// { +// /* touch a running safety file */ +// if(ThisTask == 0) +// { +// FILE *fd; +// char runningfname[MAXLEN_PATH]; + +// sprintf(runningfname, "./running"); +// if((fd = fopen(runningfname, "w"))) +// { +// fclose(fd); +// printf("touching a running-file: %s \n", runningfname); +// } +// else +// terminate("could not touch a running-file: %s\n", runningfname); +// } +// } +// #endif /* #ifdef RUNNING_SAFETY_FILE */ + + begrun1(); /* set-up run */ + + /* see if we are loading a restart file or an IC file */ + // if(RestartFlag == 1) + // loadrestart(); + // else + // { + /* We're reading an IC file. Is it a snapshot or really an IC? */ + char fname[MAXLEN_PATH]; + + // if(RestartFlag >= 2 && RestartSnapNum >= 0) + // { + // if(All.NumFilesPerSnapshot > 1) + // sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum); + // else + // sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum); + // } + // else + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* If we are supposed to just convert the file, write and exit here. */ + // if(RestartFlag == 6) + // { + // /* important for proper functioning of FOF+SUBFIND */ + // if(All.ComovingIntegrationOn) /* change to new velocity variable */ + // { + // int i, j; + // for(i = 0; i < NumPart; i++) + // for(j = 0; j < 3; j++) + // P[i].Vel[j] *= sqrt(All.Time) * All.Time; + // } + // set_softenings(); + // All.TopNodeAllocFactor = 0.08; + // All.TreeAllocFactor = 0.7; + // All.NgbTreeAllocFactor = 0.7; + + // sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase); + // mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + // savepositions(RestartSnapNum, 0); + // endrun(); + // } + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + // } + + begrun2(); + + run(); /* main simulation loop */ + + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + +// #ifdef RUNNING_SAFETY_FILE +// if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */ +// { +// char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH]; +// sprintf(running_fname, "./running"); +// sprintf(running_done_fname, "./running_done"); +// rename(running_fname, running_done_fname); +// mpi_printf("moved ./running file to ./running_done, job can now restart.\n"); +// } +// else +// mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n"); +// #endif /* #ifdef RUNNING_SAFETY_FILE */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/main_original.c b/src/amuse/community/arepo/src/main/main_original.c new file mode 100644 index 0000000000..629e988526 --- /dev/null +++ b/src/amuse/community/arepo/src/main/main_original.c @@ -0,0 +1,299 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef HAVE_HDF5 +#include +#endif /* #ifdef HAVE_HDF5 */ + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ +#ifdef IMPOSE_PINNING + detect_topology(); + get_core_set(); +#endif /* #ifdef IMPOSE_PINNING */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + +#ifdef IMPOSE_PINNING + /* pin the MPI ranks to the available core set */ + pin_to_core_set(); + report_pinning(); +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef HOST_MEMORY_REPORTING + mpi_report_committable_memory(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + Argc = argc; + Argv = argv; + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + if(argc < 2) + { + if(ThisTask == 0) + { + printf("\nParameters are missing. \n"); + printf("Call with [] [] []\n"); + printf("\n"); + printf(" RestartFlag Action\n"); + printf(" 0 Read initial conditions and start simulation\n"); + printf(" 1 Read restart files and resume simulation\n"); + printf(" 2 Restart from specified snapshot dump and resume simulation\n"); + printf(" 3 Run FOF and optionally SUBFIND: [ for SUBBOX_SNAPSHOTS]\n"); + printf( + " 6 Convert snapshot file to different format [input=ICFormat output=SnapFormat NOTE: derived " + "quantities have round-off errors!\n"); + printf(" 14 Write out the Voronoi mesh: \n"); + printf(" 17 Write out snapshot dump with measured gradients\n"); + printf(" 18 Recalculate gravitational potential values for specified snaphot dump: \n"); + printf("\n"); + } + endrun(); + } + + strcpy(ParameterFile, argv[1]); + + if(argc >= 3) + RestartFlag = atoi(argv[2]); + else + RestartFlag = 0; + + if(argc >= 4) + RestartSnapNum = atoi(argv[3]); + else + RestartSnapNum = -1; + + // Do minimal validation of arguments here rather than in random places in the code + if((RestartFlag == 3 || RestartFlag == 6 || RestartFlag == 14 || RestartFlag == 17 || RestartFlag == 18) && RestartSnapNum < 0) + { + mpi_printf("Need to give the snapshot number\n"); + return (0); + } + +#ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT + if(RestartFlag == 18) + { + mpi_printf("Need RECOMPUTE_POTENTIAL_IN_SNAPSHOT for this option\n"); + return (0); + } +#endif /* #ifndef RECOMPUTE_POTENTIAL_IN_SNAPSHOT */ + +#ifdef RUNNING_SAFETY_FILE + /* do not run if 'running' safety file exists */ + int runningflag = 0; + if(ThisTask == 0) + { + FILE *fd; + char runningfname[MAXLEN_PATH]; + + sprintf(runningfname, "./running"); + if((fd = fopen(runningfname, "r"))) /* Is the running-file present? If yes, interrupt the run. */ + { + fclose(fd); + printf("running-file detected. stopping.\n"); + runningflag = 1; + } + } + MPI_Bcast(&runningflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + if(runningflag) + { + MPI_Finalize(); /* do not call endrun() */ + return 0; + } + else + { + /* touch a running safety file */ + if(ThisTask == 0) + { + FILE *fd; + char runningfname[MAXLEN_PATH]; + + sprintf(runningfname, "./running"); + if((fd = fopen(runningfname, "w"))) + { + fclose(fd); + printf("touching a running-file: %s \n", runningfname); + } + else + terminate("could not touch a running-file: %s\n", runningfname); + } + } +#endif /* #ifdef RUNNING_SAFETY_FILE */ + + begrun1(); /* set-up run */ + + /* see if we are loading a restart file or an IC file */ + if(RestartFlag == 1) + loadrestart(); + else + { + /* We're reading an IC file. Is it a snapshot or really an IC? */ + char fname[MAXLEN_PATH]; + + if(RestartFlag >= 2 && RestartSnapNum >= 0) + { + if(All.NumFilesPerSnapshot > 1) + sprintf(fname, "%s/snapdir_%03d/%s_%03d", All.OutputDir, RestartSnapNum, All.SnapshotFileBase, RestartSnapNum); + else + sprintf(fname, "%s%s_%03d", All.OutputDir, All.SnapshotFileBase, RestartSnapNum); + } + else + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* If we are supposed to just convert the file, write and exit here. */ + if(RestartFlag == 6) + { + /* important for proper functioning of FOF+SUBFIND */ + if(All.ComovingIntegrationOn) /* change to new velocity variable */ + { + int i, j; + for(i = 0; i < NumPart; i++) + for(j = 0; j < 3; j++) + P[i].Vel[j] *= sqrt(All.Time) * All.Time; + } + set_softenings(); + All.TopNodeAllocFactor = 0.08; + All.TreeAllocFactor = 0.7; + All.NgbTreeAllocFactor = 0.7; + + sprintf(All.SnapshotFileBase, "%s_converted", All.SnapshotFileBase); + mpi_printf("Start writing file %s\nRestartSnapNum %d\n", All.SnapshotFileBase, RestartSnapNum); + savepositions(RestartSnapNum, 0); + endrun(); + } + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + } + + begrun2(); + + run(); /* main simulation loop */ + + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef RUNNING_SAFETY_FILE + if(All.Ti_Current < TIMEBASE) /* simulation has not reached the final time */ + { + char running_fname[MAXLEN_PATH], running_done_fname[MAXLEN_PATH]; + sprintf(running_fname, "./running"); + sprintf(running_done_fname, "./running_done"); + rename(running_fname, running_done_fname); + mpi_printf("moved ./running file to ./running_done, job can now restart.\n"); + } + else + mpi_printf("leaving ./running file in place since run is complete to prevent any restarts.\n"); +#endif /* #ifdef RUNNING_SAFETY_FILE */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/main_reduced.c b/src/amuse/community/arepo/src/main/main_reduced.c new file mode 100644 index 0000000000..1e7eec7ba7 --- /dev/null +++ b/src/amuse/community/arepo/src/main/main_reduced.c @@ -0,0 +1,135 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/main.c + * \date 05/2018 + * \brief Start of the program. + * \details contains functions: + * int main(int argc, char **argv) + * void endrun() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief The entry point of the program. + * + * This function initializes the MPI communication packages, and sets + * cpu-time counters to 0. Then begrun1() is called, which sets up + * the simulation. Then either IC's or restart files are loaded. In + * case of IC's init() is called which prepares the IC's for the run. + * A call to begrun2() finishes the initialization. Finally, run() is + * started, the main simulation loop, which iterates over the timesteps. + * + * \param[in] argc Argument count from command line. + * \param[in] argv Argument vector from command line. + * + * \return status of exit; 0 for normal exit. + */ +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + RestartFlag = 0; + + begrun1(); /* set-up run */ + + char fname[MAXLEN_PATH]; + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + endrun(); + } + + begrun2(); + run(); /* main simulation loop */ + endrun(); /* clean up & finalize MPI */ + + return 0; +} + +/*! \brief This function ends the simulations in case of no error. + * + * This method has to be called by all processes. It should be used only + * if the simulation ends without a errors. + * Otherwise terminate() should be used instead. + * + * \return void + */ +void endrun() +{ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + MPI_Finalize(); + exit(0); +} diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h new file mode 100644 index 0000000000..15a346f1bc --- /dev/null +++ b/src/amuse/community/arepo/src/main/proto.h @@ -0,0 +1,665 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/proto.h + * \date 05/2018 + * \brief Function declarations. + * \details No particular order. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef PROTO_H +#define PROTO_H + +#include "../gravity/forcetree.h" +#include "../main/allvars.h" +#include "../utils/timer.h" + +#include +#include +#include + +#ifdef IMPOSE_PINNING +#ifndef __USE_GNU +#define __USE_GNU +#endif /* #ifndef __USE_GNU */ +#include +#endif /* #ifdef IMPOSE_PINNING */ + +#ifdef HAVE_HDF5 +#include +#endif /* #ifdef HAVE_HDF5 */ + +#if defined(COOLING) +#include "../cooling/cooling_proto.h" +#endif /* #if defined(COOLING) */ + +void sfr_init(); +void sfr_create_star_particles(void); +void ngb_finish_rangebounds_update(int nchanged, int *nodelist); +void ngb_update_rangebounds(int i, int *nchanged, int *nodelist); +int ngb_treefind_variable(MyDouble searchcenter[3], MyFloat hsml, int target, int *startnode, int mode, int *nexport, + int *nsend_local); +int ngb_treebuild(int npart); +void ngb_treeallocate(void); +void ngb_treefree(void); +int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag); +int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes, + int *firstnode); + +void drift_node(struct NgbNODE *current, integertime time1); +void drift_all_particles(void); +double get_desired_softening_from_mass(double mass); +void log_restart_debug(void); +int get_thread_num(void); +void report_pinning(void); +void detect_topology(void); +void pin_to_core_set(void); +void get_core_set(void); +int derefine_should_this_cell_be_merged(int i, int flag); + +void gravity_external(void); +void gravity(int timebin, int fullflag); +int my_ffsll(peanokey i); +void set_cosmo_factors_for_current_time(void); +void calc_exact_gravity_for_particle_type(void); +void calculate_non_standard_physics_with_valid_gravity_tree(void); +void calculate_non_standard_physics_with_valid_gravity_tree_always(void); +int get_softeningtype_for_hydro_cell(int i); +void gravity_forcetest_testforcelaw(void); +void *myfree_query_last_block(void); + +void subdivide_evenly(int N, int pieces, int index, int *first, int *count); +void force_evaluate_direct(int target, int result_idx, int nimport); +void gravity_direct(int timebin); +double dabs(double a); +double dmax(double a, double b); +double dmin(double a, double b); +double max_array(double *a, int num_elements); +int imax(int a, int b); +int imin(int a, int b); +double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *)); + +int myflush(FILE *fstream); +int flush_everything(void); +void gravity_force_finalize(int timebin); +void permutate_chunks_in_list(int ncount, int *list); +double get_default_softening_of_particletype(int type); +double get_random_number_aux(void); +void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm); +void ngb_update_velocities(void); +void hello(void); +void find_long_range_step_constraint(void); + +void ngb_treemodifylength(int delta_NgbMaxPart); +void domain_resize_storage(int count_get, int count_get_sph, int option_flag); +void init_individual_softenings(void); +void do_derefinements_and_refinements(); +void mark_active_timebins(void); +void voronoi_test(void); +void execute_resubmit_command(void); +void output_compile_time_options(void); +void init_io_fields(); +void produce_dump(void); + +void create_snapshot_if_desired(void); +void output_log_messages(void); +void mpi_report_committable_memory(void); +long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree); +int check_for_interruption_of_run(void); +void set_non_standard_physics_for_current_time(void); +void calculate_non_standard_physics_prior_mesh_construction(void); +void calculate_non_standard_physics_end_of_step(void); +void compute_statistics(void); +void face_limit_fluxes(struct state *st_L, struct state *st_R, struct state *st_center_L, struct state *st_center_R, + struct fluxes *flux, double dt, double *count, double *count_reduced); + +double get_sound_speed(int p); +void set_pressure_of_cell(int i); +void gradient_init(MyFloat *addr, MyFloat *addr_exch, MySingle *addr_grad, int type); +void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd); +void subfind_density_hsml_guess(void); +void peano_hilbert_key_inverse(peanokey key, int bits, peano1D *x, peano1D *y, peano1D *z); +void find_nearest_meshpoint_global(mesh_search_data *searchdata, int n, int hsmlguess, int verbose); +void reorder_DP(void); +void peano_hilbert_order_DP(void); +void validate_vertex_velocities(void); + +double get_cell_radius(int i); +double nearest_x(double d); +double nearest_y(double d); +double nearest_z(double d); +int voronoi_get_connected_particles(tessellation *T); +void voronoi_init_connectivity(tessellation *T); +void voronoi_update_connectivity(tessellation *T); +int compare_foreign_connection(const void *a, const void *b); +void voronoi_remove_connection(int i); +int pmforce_is_particle_high_res(int type, MyDouble *pos); + +void cooling_only(void); +void report_VmRSS(void); +void tree_based_timesteps_setsoundspeeds(void); +void voronoi_update_ghost_velvertex(void); +int should_this_cell_be_split(int i); +int do_refinements(void); +int should_this_cell_be_merged(int i, int flag); +int do_derefinements(void); +void move_collisionless_particle(int new_i, int old_i); +void dump_memory_table(void); + +void report_detailed_memory_usage_of_largest_task(void); +void calculate_vertex_velocity_divergence(void); +void make_list_of_active_particles(void); +void find_gravity_timesteps_and_do_gravity_step_first_half(void); +void do_gravity_step_second_half(void); +void voronoi_1D_reorder_gas(void); +int voronoi_1D_compare_key(const void *a, const void *b); +void voronoi_1D_order(void); +void pm2d_init_periodic(void); +void pm2d_init_periodic_allocate(void); + +void pm2d_init_periodic_free(void); +void pm2d_force_periodic(int mode); +int pm2d_periodic_compare_sortindex(const void *a, const void *b); +void pm2d_mysort_pmperiodic(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +int timestep_evaluate(int target, int mode, int threadid); +void tree_based_timesteps(void); +int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm); +double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); + +double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm); +int compare_IDs(const void *a, const void *b); +void test_id_uniqueness(void); +void drift_particle(int i, integertime time1); +void put_symbol(char *string, double t0, double t1, char c); +void write_cpu_log(void); +void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int linenr, int clear_flag, char *origin); +void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line, char *origin); +void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line); +void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line); + +void myfree_fullinfo(void *p, const char *func, const char *file, int line); +void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line); +void mymalloc_init(void); +void calculate_maxid(void); +void determine_compute_nodes(void); +double INLINE_FUNC hubble_function(double a); +void fof_fof(int num); +double fof_find_groups(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask); +void fof_compile_catalogue(void); +void fof_save_groups(int num); + +double fof_periodic(double x); +double fof_periodic_wrap(double x); +double fof_find_nearest_dmparticle(MyIDType *vMinID, int *vHead, int *vLen, int *vNext, int *vTail, int *vMinIDTask); +void fof_compute_group_properties(int gr, int start, int len); +int fof_compare_FOF_PList_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_MinIDTask(const void *a, const void *b); +int fof_compare_FOF_GList_MinIDTask_MinID(const void *a, const void *b); +int fof_compare_FOF_GList_LocCountTaskDiffMinID(const void *a, const void *b); +int fof_compare_FOF_GList_ExtCountMinID(const void *a, const void *b); + +int fof_compare_Group_GrNr(const void *a, const void *b); +int fof_compare_Group_MinIDTask(const void *a, const void *b); +int fof_compare_Group_MinID(const void *a, const void *b); +int fof_compare_ID_list_GrNrID(const void *a, const void *b); +int fof_compare_Group_MinIDTask_MinID(const void *a, const void *b); +int fof_compare_Group_Len(const void *a, const void *b); +int fof_compare_aux_sort_Type(const void *a, const void *b); +int fof_compare_aux_sort_GrNr(const void *a, const void *b); +int fof_compare_aux_sort_OriginTask_OriginIndex(const void *a, const void *b); +int fof_compare_aux_sort_FileOrder(const void *a, const void *b); + +int fof_compare_local_sort_data_targetindex(const void *a, const void *b); +void fof_subfind_exchange(MPI_Comm Communicator); +void fof_prepare_output_order(void); +void fof_compute_group_properties(int gr, int start, int len); +void fof_exchange_group_data(void); +void fof_finish_group_properties(void); +double fof_get_comoving_linking_length(void); +void fof_assign_group_numbers(void); +void fof_reorder_PS(int *Id, int Nstart, int N); +void fof_subfind_write_file(char *fname, int writeTask, int lastTask); + +void fof_subfind_prepare_ID_list(void); +int subfind_compare_procassign_GrNr(const void *a, const void *b); +double subfind_so_potegy(double *egypot); +void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach); +void fof_check_for_full_nodes_recursive(int no); +int fof_return_a_particle_in_cell_recursive(int no); +void subfind_loctree_copyExtent(void); +int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail); +void subfind_reorder_according_to_submp(void); +int subfind_compare_submp_OldIndex(const void *a, const void *b); + +int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b); +double subfind_exchange(void); +void subfind_coll_domain_decomposition(void); +void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain); +void subfind_coll_domain_free(void); +void subfind_coll_domain_allocate(void); +int subfind_coll_domain_determineTopTree(void); +void subfind(int num); +double subfind_density(int mode); +double subfind_overdensity(void); + +void subfind_save_final(int num); +void subfind_process_group_collectively(int nsubgroups_cat); +void subfind_coll_findExtent(void); +void subfind_reorder_PS(int *Id, int Nstart, int N); +void subfind_reorder_P(int *Id, int Nstart, int N); +void subfind_distribute_particles(MPI_Comm Communicator); +void subfind_coll_domain_walktoptree(int no); +int subfind_compare_densities(const void *a, const void *b); +int subfind_compare_binding_energy(const void *a, const void *b); +int subfind_compare_dist_rotcurve(const void *a, const void *b); + +int subfind_compare_coll_candidates_rank(const void *a, const void *b); +int subfind_compare_coll_candidates_boundlength(const void *a, const void *b); +int subfind_compare_coll_candidates_nsubs(const void *a, const void *b); +int subfind_compare_coll_candidates_subnr(const void *a, const void *b); +void subfind_col_find_coll_candidates(int totgrouplen); +void subfind_unbind_independent_ones(int count); +void subfind_distribute_groups(void); +void subfind_potential_compute(int num, struct unbind_data *d, int phase, double weakly_bound_limit); +int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas); +void subfind_find_linkngb(void); + +int subfind_loctree_treebuild(int npart, struct unbind_data **mp); +void subfind_loctree_update_node_recursive(int no, int sib, int father); +double subfind_loctree_treeevaluate_potential(int target); +void subfind_loctree_copyExtent(void); +double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess); +void subfind_loctree_findExtent(int npart, struct unbind_data *mp); +int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess); +size_t subfind_loctree_treeallocate(int maxnodes, int maxpart); +void subfind_loctree_treefree(void); +void subfind_find_nearesttwo(void); + +int subfind_process_group_serial(int gr, int offset, int nsubgroups_cat); +int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas); +int subfind_locngb_compare_key(const void *a, const void *b); +int subfind_compare_serial_candidates_subnr(const void *a, const void *b); +int subfind_compare_serial_candidates_rank(const void *a, const void *b); +int subfind_compare_dens(const void *a, const void *b); +int subfind_compare_serial_candidates_boundlength(const void *a, const void *b); +int subfind_compare_dist_rotcurve(const void *a, const void *b); +int subfind_compare_binding_energy(const void *a, const void *b); +int subfind_compare_densities(const void *a, const void *b); + +int subfind_compare_ID_list(const void *a, const void *b); +int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b); +void subfind_poll_for_requests(void); +long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank); +long long subfind_distlinklist_get_rank(long long index); +void subfind_distlinklist_set_next(long long index, long long next); +void subfind_distlinklist_add_particle(long long index); +void subfind_distlinklist_add_bound_particles(long long index, int nsub); +void subfind_distlinklist_mark_particle(long long index, int target, int submark); +long long subfind_distlinklist_get_next(long long index); + +long long subfind_distlinklist_get_head(long long index); +void subfind_distlinklist_set_headandnext(long long index, long long head, long long next); +void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len); +void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len); +void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next); +long long subfind_distlinklist_set_head_get_next(long long index, long long head); +int subfind_compare_dist_rotcurve(const void *a, const void *b); +void subfind_coll_treeallocate(int maxpart, int maxindex); +void subfind_coll_treefree(void); +void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z); + +void subfind_coll_exchange_topleafdata(void); +void subfind_coll_update_node_recursive(int no, int sib, int father, int *last); +void subfind_coll_insert_pseudo_particles(void); +int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc, + unsigned long long zc, unsigned long long ilen); +int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels); +int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp); +int subfind_coll_treebuild(int npart, struct unbind_data *mp); +double subfind_get_particle_balance(void); +int subfind_fof_compare_ID(const void *a, const void *b); +void write_file(char *fname, int readTask, int lastTask, int subbox_flag); + +void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last); +int get_values_per_blockelement(enum iofields blocknr); +int get_datatype_in_block(enum iofields blocknr, int mode); +void get_dataset_name(enum iofields blocknr, char *buf); +int blockpresent(enum iofields blocknr, int write); +void fill_write_buffer(void *buffer, enum iofields blocknr, int *pindex, int pc, int type, int subbox_flag); +void empty_read_buffer(enum iofields blocknr, int offset, int pc, int type); +int get_particles_in_block(enum iofields blocknr, int *typelist); +int get_bytes_per_blockelement(enum iofields blocknr, int mode); +void read_file(const char *fname, int filenr, int readTask, int lastTask, int); + +void get_Tab_IO_Label(enum iofields blocknr, char *label); +void long_range_init_regionsize(void); +int find_files(const char *fname); +double get_random_number(void); +int peano_compare_key(const void *a, const void *b); +void mysort_domain(void *b, size_t n, size_t s); +void mysort_peano(void *b, size_t n, size_t s, int (*cmp)(const void *, const void *)); +int density_isactive(int n); +size_t sizemax(size_t a, size_t b); +void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno); + +void reconstruct_timebins(void); +peanokey peano_hilbert_key(peano1D x, peano1D y, peano1D z, int bits); +void enable_core_dumps_and_fpu_exceptions(void); +void find_next_sync_point(void); +void set_units_sfr(void); +void gravity_forcetest(void); +void allocate_memory(void); +void begrun0(void); +void begrun1(void); +void begrun2(void); + +int init(void); +void loadrestart(void); +void reread_params_after_loading_restart(void); +void check_omega(void); +void close_logfiles(void); +void compute_grav_accelerations(int timebin, int fullflag); +void compute_global_quantities_of_system(void); +void cooling_and_starformation(void); +void density(void); +void do_box_wrapping(void); + +void domain_Decomposition(void); +double enclosed_mass(double R); +void endrun(void); +void energy_statistics(void); +void ewald_corr(double dx, double dy, double dz, double *fper); +void ewald_force(double x, double y, double z, double force[3]); +int my_fls(int x); +void ewald_init(void); +double ewald_psi(double x, double y, double z); +double ewald_pot_corr(double dx, double dy, double dz); + +integertime find_next_outputtime(integertime time); +void minimum_large_ints(int n, long long *src, long long *res); +double get_starformation_rate(int i); +double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP); +void gravity_tree(int timebin); +void init_clouds(void); +void integrate_sfr(void); +size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); +void open_logfiles(void); + +void peano_hilbert_order(void); +void predict(double time); +void read_ic(const char *fname, int); +void read_header_attributes(FILE *fd); +MyIDType determine_ids_offset(void); +int read_outputlist(char *fname); +void read_parameter_file(char *fname); +void check_parameters(); +void reorder_gas(int *Id); +void reorder_particles(int *Id); + +void restart(int mod); +void run(void); +void savepositions(int num, int subbox_flag); +void mpi_printf(const char *fmt, ...); +void mpi_fprintf(FILE *stream, const char *fmt, ...); +void mpi_printf_each(const char *fmt, ...); +FILE *open_file(char *); +double second(void); +void set_softenings(void); +void set_units(void); + +void setup_smoothinglengths(void); +void sumup_large_ints(int n, int *src, long long *res); +void sumup_longs(int n, long long *src, long long *res); +void statistics(void); +double timediff(double t0, double t1); +void veldisp(void); +double get_hydrokick_factor(integertime time0, integertime time1); +double get_gravkick_factor(integertime time0, integertime time1); +double drift_integ(double a, void *param); +double gravkick_integ(double a, void *param); + +double hydrokick_integ(double a, void *param); +void init_drift_table(void); +double get_drift_factor(integertime time0, integertime time1); +double measure_time(void); +void long_range_init(void); +void long_range_force(void); +void pm_init_periodic(void); +void pmforce_periodic(int mode, int *typelist); +void pm_init_regionsize(void); +void pm_init_nonperiodic(void); + +int pmforce_nonperiodic(int grnr); +void readjust_timebase(double TimeMax_old, double TimeMax_new); +void pm_setup_nonperiodic_kernel(void); +void init_gradients(); +void init_scalars(); +void print_particle_info(int i); +void print_state_info(struct state *st); +void print_state_face_info(struct state_face *st); +void face_set_scalar_states_and_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux); +void face_turn_momentum_flux(struct fluxes *flux, struct geometry *geom); + +void face_clear_fluxes(struct fluxes *flux); +int face_check_responsibility_of_this_task(tessellation *T, int p1, int p2, struct state *st_L, struct state *st_R); +int face_get_normals(tessellation *T, int i, struct geometry *geom); +int face_get_state(tessellation *T, int p, int i, struct state *st); +void face_boundary_check(point *p, double *velx, double *vely, double *velz); +void face_boundary_check_vertex(tessellation *T, int p, MyFloat *velx, MyFloat *vely, MyFloat *velz); +double face_timestep(struct state *state_L, struct state *state_R, double *hubble_a, double *atime); +void state_convert_to_local_frame(struct state *st, double *vel_face, double hubble_a, double atime); +void face_do_time_extrapolation(struct state *delta, struct state *st, double atime); +void face_do_spatial_extrapolation(struct state *delta, struct state *st, struct state *st_other); + +void face_do_spatial_extrapolation_single_quantity(double *delta, double st, double st_other, MySingle *grad, double *dx, double *r); +void face_add_extrapolations(struct state *st_face, struct state *delta_time, struct state *delta_space, struct fvs_stat *stat); +void face_add_extrapolation(struct state *st_face, struct state *delta, struct fvs_stat *stat); +void face_turn_velocities(struct state *st, struct geometry *geom); +void solve_advection(struct state *st_L, struct state *st_R, struct state_face *st_face, struct geometry *geom, double *vel_face); +void face_turnback_velocities(struct state_face *st_face, struct geometry *geom); +void face_get_fluxes(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux, struct geometry *geom, + double *vel_face); +void face_add_fluxes_advection(struct state_face *st_face, struct fluxes *flux, struct geometry *geom, double *vel_face); +double godunov_flux_3d(struct state *st_L, struct state *st_R, struct state_face *st_face); +void sample_solution_vacuum_left_3d(double S, struct state *st_R, struct state_face *st_face); + +void sample_solution_vacuum_right_3d(double S, struct state *st_L, struct state_face *st_face); +void sample_solution_vacuum_generate_3d(double S, struct state *st_L, struct state *st_R, struct state_face *st_face); +void get_mach_numbers(struct state *st_L, struct state *st_R, double Press); +void sample_solution_3d(double S, struct state *st_L, struct state *st_R, double Press, double Vel, struct state_face *st_face); +int riemann(struct state *st_L, struct state *st_R, double *Press, double *Vel); +void pressure_function(double P, struct state *st, double *F, double *FD); +double guess_for_pressure(struct state *st_L, struct state *st_R); +void riemann_isotherm(struct state *st_L, struct state *st_R, double *Rho, double *Vel, double csnd); +void isothermal_function(double rhostar, double rho, double *F, double *FD); +void sample_solution_isothermal3d(double S, struct state *st_L, struct state *st_R, double Rho, double Vel, struct state_face *st_face, + double csnd); + +void apply_flux_list(void); +int flux_list_data_compare(const void *a, const void *b); +void set_vertex_velocities(void); +int scalar_init(MyFloat *addr, MyFloat *addr_mass, int type); +void compute_interface_fluxes(tessellation *T); +void update_primitive_variables(void); +void set_pressure_of_cell_internal(struct particle_data *P, struct sph_particle_data *SphP, int i); +void do_validity_checks(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); +void update_primitive_variables_single(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); + +void update_internal_energy(struct particle_data *P, struct sph_particle_data *SphP, int i, struct pv_update_data *pvd); +void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset, + int item_size, int commtag, int include_self); +int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical); +void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size); +void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size); +void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag, + int task_offset, int cell_offset); +void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag); +void tile_ics(void); +void reallocate_memory_maxpart(void); +void reallocate_memory_maxpartsph(void); + +void share_particle_number_in_file(const char *fname, int filenr, int readTask, int lastTask, int readTypes); +int dump_memory_table_buffer(char *p); +void calc_memory_checksum(void *base, size_t bytes); +void allreduce_sparse_double_sum(double *loc, double *glob, int N); +void allreduce_sparse_imin(int *loc, int *glob, int N); +void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len, + int big_flag, MPI_Comm comm); +int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +size_t roundup_to_multiple_of_cacheline_size(size_t n); +void init_cpu_log(void); + +void write_error(int check, size_t nwritten, size_t nmemb); +size_t smax(size_t a, size_t b); +void init_field(enum iofields field, const char *label, const char *datasetname, enum types_in_memory type_in_memory, + enum types_in_file type_in_file_output, enum types_in_file type_in_file_input, int values_per_block, enum arrays array, + void *pointer_to_field, void (*io_func)(int, int, void *, int), int typelist_bitmask); +void init_units(enum iofields field, double a, double h, double L, double M, double V, double c); +void init_snapshot_type(enum iofields field, enum sn_type type); + +void swap_Nbyte(char *data, int n, int m); +void swap_header(void); + +#if defined(COOLING) +void cool_cell(int i); +#endif /* #if defined(COOLING) */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE +void special_particle_create_list(); +void special_particle_update_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + +#ifdef HAVE_HDF5 + +hid_t my_H5Fcreate(const char *fname, unsigned flags, hid_t fcpl_id, hid_t fapl_id); +hid_t my_H5Gcreate(hid_t loc_id, const char *groupname, size_t size_hint); +hid_t my_H5Dcreate(hid_t loc_id, const char *datasetname, hid_t type_id, hid_t space_id, hid_t dcpl_id); +hid_t my_H5Acreate(hid_t loc_id, const char *attr_name, hid_t type_id, hid_t space_id, hid_t acpl_id); +hid_t my_H5Screate(H5S_class_t type); +hid_t my_H5Screate_simple(int rank, const hsize_t *current_dims, const hsize_t *maximum_dims); +herr_t my_H5Dwrite(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, const void *buf, + const char *datasetname); +herr_t my_H5Awrite(hid_t attr_id, hid_t mem_type_id, const void *buf, const char *attr_name); +hid_t my_H5Fopen(const char *fname, unsigned int flags, hid_t fapl_id); +hid_t my_H5Dopen(hid_t file_id, const char *datasetname); + +hid_t my_H5Dopen_if_existing(hid_t file_id, const char *datasetname); +herr_t my_H5Dread(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, hid_t file_space_id, hid_t xfer_plist_id, void *buf, + const char *datasetname); +hid_t my_H5Gopen(hid_t loc_id, const char *groupname); +hid_t my_H5Aopen_name(hid_t loc_id, const char *attr_name); +herr_t my_H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf, const char *attr_name, hssize_t size); +herr_t my_H5Aclose(hid_t attr_id, const char *attr_name); +herr_t my_H5Dclose(hid_t dataset_id, const char *datasetname); +herr_t my_H5Gclose(hid_t group_id, const char *groupname); +herr_t my_H5Fclose(hid_t file_id, const char *fname); +herr_t my_H5Sclose(hid_t dataspace_id, H5S_class_t type); + +hid_t my_H5Tcopy(hid_t type_id); +herr_t my_H5Tclose(hid_t type_id); +herr_t my_H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, const hsize_t *start, const hsize_t *stride, const hsize_t *count, + const hsize_t *block); +size_t my_H5Tget_size(hid_t datatype_id); +herr_t my_H5Tset_size(hid_t datatype_id, size_t size); +herr_t my_H5Sset_extent_simple(hid_t space_id, int rank, const hsize_t *current_size, const hsize_t *maximum_size, + const char *attr_name); +hid_t my_H5Dget_space(hid_t dataset_id, const char *datasetname); + +#ifdef HDF5_FILTERS +htri_t my_H5Pall_filters_avail(hid_t plist_id); +hid_t my_H5Pcreate(hid_t class_id); +herr_t my_H5Pclose(hid_t plist); +herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim); +herr_t my_H5Pset_shuffle(hid_t plist_id); +herr_t my_H5Pset_deflate(hid_t plist_id, uint level); +herr_t my_H5Pset_fletcher32(hid_t plist_id); +#endif /* #ifdef HDF5_FILTERS */ + +#endif /* #ifdef HAVE_HDF5 */ + +#ifdef HOST_MEMORY_REPORTING +void check_maxmemsize_setting(void); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING +int get_softening_type_from_mass(double mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + +#ifdef MHD +void do_mhd_source_terms_first_half(void); +void do_mhd_source_terms_second_half(void); +#endif /* #ifdef MHD */ + +#ifdef ONEDIMS_SPHERICAL +void gravity_monopole_1d_spherical(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(PMGRID) +void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward); +void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward); +void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch); +void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch); +void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_column_based_fft_init_c2c(fft_plan *plan, int NgridX, int NgridY, int NgridZ); +void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward); +void my_column_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward); +void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out); + +void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out); +void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out); +void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out); +#endif /* #if defined(PMGRID) */ + +#ifdef RIEMANN_HLLC +double godunov_flux_3d_hllc(struct state *st_L, struct state *st_R, struct state_face *st_face, struct fluxes *flux); +#endif /* #ifdef RIEMANN_HLLC */ + +#if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) +void flux_convert_to_lab_frame(struct state *st_L, struct state *st_R, double *vel_face, struct fluxes *flux); +#endif /* #if defined(RIEMANN_HLLC) || defined(RIEMANN_HLLD) */ + +#ifdef RIEMANN_HLLD +double godunov_flux_3d_hlld(struct state *st_L, struct state *st_R, double *vel_face, struct state_face *st_face, struct fluxes *flux); +#endif /* #ifdef RIEMANN_HLLD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES +void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat); +int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat); +void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef USE_SFR +void convert_cell_into_star(int i, double birthtime); +void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star); +void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars); +#endif /* #ifdef USE_SFR */ + +#endif /* #ifndef PROTO_H */ diff --git a/src/amuse/community/arepo/src/main/run.c b/src/amuse/community/arepo/src/main/run.c new file mode 100644 index 0000000000..0bdca04354 --- /dev/null +++ b/src/amuse/community/arepo/src/main/run.c @@ -0,0 +1,660 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/main/run.c + * \date 05/2018 + * \brief The main simulation loop. + * \details contains functions: + * void run(void) + * void do_second_order_source_terms_first_half(void) + * void do_second_order_source_terms_second_half(void) + * void set_non_standard_physics_for_current_time(void) + * void calculate_non_standard_physics_with_valid_gravity_tree(void) + * void calculate_non_standard_physics_with_valid_gravity_tree_always(void) + * void calculate_non_standard_physics_prior_mesh_construction(void) + * void calculate_non_standard_physics_end_of_step(void) + * int check_for_interruption_of_run(void) + * int check_for_interruption_of_run(void) + * integertime find_next_outputtime(integertime ti_curr) + * void execute_resubmit_command(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../mesh/voronoi/voronoi.h" + +static void do_second_order_source_terms_first_half(void); +static void do_second_order_source_terms_second_half(void); +static void create_end_file(void); + +/*! \brief Contains the main simulation loop that iterates over + * single timesteps. + * + * The loop terminates when the cpu-time limit is + * reached, when a `stop' file is found in the output directory, or + * when the simulation ends because we arrived at TimeMax. + * + * If the simulation is started from initial conditions, a domain + * decomposition performed, the gravitational forces are computed and the + * Voronoi mesh is constructed. + * + * The main loop is structured as follow: + * - find new timesteps: find_timesteps() + * - first gravitational half kick: do_gravity_step_first_half() + * - gradients are calculated: calculate_gradients() + * - vertex velocities are assigned: set_vertex_velocities() + * - computation of the hydro flux: compute_interface_fluxes() (first half) + * - (de)refinement of hydro cells: do_derefinements_and_refinements() + * - drifting particles to next sync point: find_next_sync_point() + * (Afterwards the timebins are updated, so different particles might + * now be active then before) + * - (if needed) a new domain decomposition: domain_Decomposition() + * - construction of the Voronoi mesh: create_mesh() + * - computation of the hydro flux: compute_interface_fluxes() (second half) + * - update of primitive variables: update_primitive_variables() + * - computation of gravitational forces: in do_gravity_step_second_half() + * - second gravitational half kick: do_gravity_step_second_half() + * + * \return void + */ +void run(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + if(RestartFlag != 1) /* if we have restarted from restart files, no need to do the setup sequence */ + { + mark_active_timebins(); + + output_log_messages(); + + set_non_standard_physics_for_current_time(); + + ngb_treefree(); + domain_free(); + domain_Decomposition(); /* do domain decomposition if needed */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + + calculate_non_standard_physics_prior_mesh_construction(); + + create_mesh(); + + mesh_setup_exchange(); + + update_primitive_variables(); + + calculate_non_standard_physics_end_of_step(); + + exchange_primitive_variables(); + + calculate_gradients(); + + set_vertex_velocities(); /* determine the speed of the mesh-generating vertices */ + + ngb_update_velocities(); /* update the neighbor tree with the new vertex and cell velocities */ + + do_second_order_source_terms_second_half(); + + do_gravity_step_second_half(); + } + +#if defined(VORONOI_STATIC_MESH) + if(RestartFlag == 1) + { + int n_hydro_backup = TimeBinsHydro.NActiveParticles; + int *time_bin_hydro = (int *)malloc(NumGas * sizeof(int)); + int *hydro_particles = (int *)malloc(n_hydro_backup * sizeof(int)); + for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++) + hydro_particles[j] = TimeBinsHydro.ActiveParticleList[j]; + + for(int j = 0; j < NumGas; j++) + { + time_bin_hydro[j] = P[j].TimeBinHydro; + P[j].TimeBinHydro = All.HighestActiveTimeBin; + TimeBinsHydro.ActiveParticleList[j] = j; + } + TimeBinsHydro.NActiveParticles = NumGas; + + create_mesh(); + mesh_setup_exchange(); + + for(int j = 0; j < NumGas; j++) + P[j].TimeBinHydro = time_bin_hydro[j]; + + TimeBinsHydro.NActiveParticles = n_hydro_backup; + for(int j = 0; j < TimeBinsHydro.NActiveParticles; j++) + TimeBinsHydro.ActiveParticleList[j] = hydro_particles[j]; + + free(time_bin_hydro); + free(hydro_particles); + } +#endif /* #if defined(VORONOI_STATIC_MESH) */ + + while(1) /* main loop */ + { + if(RestartFlag != + 1) /* if we are starting from restart files, skip in the first iteration the parts until the restart files were written */ + { + compute_statistics(); + + flush_everything(); + + create_snapshot_if_desired(); + + if(All.Ti_Current >= TIMEBASE) /* we reached the final time */ + { + mpi_printf("\nFinal time=%g reached. Simulation ends.\n", All.TimeMax); + + if(All.Ti_lastoutput != All.Ti_Current) /* make a snapshot at the final time in case none has produced at this time */ + produce_dump(); /* this will be overwritten if All.TimeMax is increased and the run is continued */ + + create_end_file(); // create empty file called end in output directory + + break; + } + + find_timesteps_without_gravity(); /* find-timesteps */ + + find_gravity_timesteps_and_do_gravity_step_first_half(); /* gravity half-step for hydrodynamics */ + /* kicks collisionless particles by half a step */ + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + update_timesteps_from_gravity(); +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + do_second_order_source_terms_first_half(); + + exchange_primitive_variables(); + + /* let's reconstruct gradients for every cell using Green-Gauss gradient estimation */ + calculate_gradients(); + + /* determine the speed of the mesh-generating vertices */ + set_vertex_velocities(); + + /* update the neighbor tree with the new vertex and cell velocities */ + ngb_update_velocities(); + + exchange_primitive_variables_and_gradients(); + + /* compute intercell flux with Riemann solver and update the cells with the fluxes */ + compute_interface_fluxes(&Mesh); + +#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT +#ifndef VORONOI_STATIC_MESH + free_mesh_structures_not_needed_for_derefinement_refinement(); +#endif /* #ifndef VORONOI_STATIC_MESH */ +#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + +#ifdef REFINEMENT + do_derefinements_and_refinements(); +#endif /* #ifdef REFINEMENT */ + + write_cpu_log(); /* output some CPU usage log-info (accounts for everything needed up to completion of the current + sync-point) */ + + find_next_sync_point(); /* find next synchronization time */ + + make_list_of_active_particles(); + + output_log_messages(); /* write some info to log-files */ + +#if !defined(VORONOI_STATIC_MESH) +#ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + free_all_remaining_mesh_structures(); +#else /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + free_mesh(); +#endif /* #ifdef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ +#endif /* #if !defined(VORONOI_STATIC_MESH) */ + /* Check whether we should write a restart file. + * Note that at this place we do not need to store the mesh, not the gravity tree. + */ + if(check_for_interruption_of_run()) + return; + } + else + RestartFlag = 0; + + set_non_standard_physics_for_current_time(); + +#if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) /* may only be used if there is no gravity \ + */ +#else /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) */ + + if(All.HighestActiveTimeBin >= All.SmallestTimeBinWithDomainDecomposition) /* only do this for sufficiently large steps */ + { +#ifdef VORONOI_STATIC_MESH + free_mesh(); +#endif /* #ifdef VORONOI_STATIC_MESH */ + + ngb_treefree(); + domain_free(); + + drift_all_particles(); + + domain_Decomposition(); /* do new domain decomposition, will also make a new chained-list of synchronized particles */ + + ngb_treeallocate(); + ngb_treebuild(NumGas); + +#if defined(VORONOI_STATIC_MESH) + create_mesh(); + mesh_setup_exchange(); +#endif /* #if defined(VORONOI_STATIC_MESH) */ + } +#endif /* #if defined(VORONOI_STATIC_MESH) && !defined(VORONOI_STATIC_MESH_DO_DOMAIN_DECOMPOSITION) #else */ + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + special_particle_update_list(); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + calculate_non_standard_physics_prior_mesh_construction(); + +#if !defined(VORONOI_STATIC_MESH) + create_mesh(); + mesh_setup_exchange(); +#endif /* #if !defined(VORONOI_STATIC_MESH) */ + + exchange_primitive_variables_and_gradients(); + + compute_interface_fluxes(&Mesh); + + update_primitive_variables(); /* these effectively closes off the hydro step */ + + /* the masses and positions are updated, let's get new forces and potentials */ + + do_second_order_source_terms_second_half(); + + do_gravity_step_second_half(); /* this closes off the gravity half-step */ + + /* do any extra physics, Strang-split (update both primitive and conserved variables as needed ) */ + calculate_non_standard_physics_end_of_step(); + } + + restart(0); /* write a restart file at final time - can be used to continue simulation beyond final time */ + + write_cpu_log(); /* output final cpu measurements */ +} + +/*! \brief Source terms before hydrodynamics timestep. + * + * \return void + */ +void do_second_order_source_terms_first_half(void) +{ +#ifdef MHD + do_mhd_source_terms_first_half(); +#endif /* #ifdef MHD */ +} + +/* \brief Source terms after hydrodynamics timestep. + * + * If there are multiple source terms, the order of the second half source + * terms should be applied inverse to the order of the source terms in + * do_second_order_source_terms_first_half(). + * + * \return void + */ +void do_second_order_source_terms_second_half(void) +{ +#ifdef MHD + do_mhd_source_terms_second_half(); +#endif /* #ifdef MHD */ +} + +/*! \brief Calls extra modules after drift operator. + * + * This routine is called after the active particles are drifted + * to the next syncpoint, but before a new domain decomposition + * is performed. + * + * \return void + */ +void set_non_standard_physics_for_current_time(void) +{ +#if defined(COOLING) + IonizeParams(); /* set UV background for the current time */ +#endif /* #if defined(COOLING) */ +} + +/*! \brief calls extra modules after the gravitational force is recomputed. + * + * Only called if full gravity tree is present. + * *** NOTICE *** if HIERARCHICAL_GRAVITY is adopted, this function is carried + * out once per synchronization time, with in general only a partial tree that + * does not necessarily contain all particles. The latter is the case only for + * steps where the highest timesteps are active ("full timesteps"). + * + * \return void + */ +void calculate_non_standard_physics_with_valid_gravity_tree(void) {} + +/*! \brief Calls extra modules after the gravitational force is recomputed + * + * This is for runs which have the full tree at each time step; + * no HIERARCHICAL_GRAVITY + * + * \return void + */ +void calculate_non_standard_physics_with_valid_gravity_tree_always(void) {} + +/*! \brief Calls extra modules before the Voronoi mesh is built. + * + * \return void + */ +void calculate_non_standard_physics_prior_mesh_construction(void) +{ +#if defined(COOLING) && defined(USE_SFR) + sfr_create_star_particles(); +#endif /* #if defined(COOLING) && defined(USE_SFR) */ +} + +/*! \brief Calls extra modules at the end of the run loop. + * + * The second gravitational half kick is already applied to the + * particles and the voronoi mesh is updated. + * + * \return void + */ +void calculate_non_standard_physics_end_of_step(void) +{ +#ifdef COOLING +#ifdef USE_SFR + cooling_and_starformation(); +#else /* #ifdef USE_SFR */ + cooling_only(); +#endif /* #ifdef USE_SFR #else */ +#endif /* #ifdef COOLING */ +} + +/*! \brief Checks whether the run must interrupted. + * + * The run is interrupted either if the stop file is present or, + * if 85% of the CPU time are up. This routine also handles the + * regular writing of restart files. The restart file is also + * written if the restart file is present. + * + * \return 1 if the run has to be interrupted, 0 otherwise. + */ +int check_for_interruption_of_run(void) +{ + /* Check whether we need to interrupt the run */ + int stopflag = 0; + if(ThisTask == 0) + { + FILE *fd; + char stopfname[MAXLEN_PATH]; + + sprintf(stopfname, "%sstop", All.OutputDir); + if((fd = fopen(stopfname, "r"))) /* Is the stop-file present? If yes, interrupt the run. */ + { + fclose(fd); + printf("stop-file detected. stopping.\n"); + stopflag = 1; + unlink(stopfname); + } + + sprintf(stopfname, "%srestart", All.OutputDir); + if((fd = fopen(stopfname, "r"))) /* Is the restart-file present? If yes, write a user-requested restart file. */ + { + fclose(fd); + printf("restart-file detected. writing restart files.\n"); + stopflag = 3; + unlink(stopfname); + } + + if(CPUThisRun > 0.85 * All.TimeLimitCPU) /* are we running out of CPU-time ? If yes, interrupt run. */ + { + printf("reaching time-limit. stopping.\n"); + stopflag = 2; + } + } + + MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(stopflag) + { + restart(0); /* write restart file */ + + MPI_Barrier(MPI_COMM_WORLD); + + if(stopflag == 3) + return 0; + + if(stopflag == 2 && ThisTask == 0) + { + FILE *fd; + char contfname[MAXLEN_PATH]; + sprintf(contfname, "%scont", All.OutputDir); + if((fd = fopen(contfname, "w"))) + fclose(fd); + + if(All.ResubmitOn) + execute_resubmit_command(); + } + return 1; + } + + /* is it time to write a regular restart-file? (for security) */ + if(ThisTask == 0) + { + if((CPUThisRun - All.TimeLastRestartFile) >= All.CpuTimeBetRestartFile) + { + All.TimeLastRestartFile = CPUThisRun; + stopflag = 3; + } + else + stopflag = 0; + } + + MPI_Bcast(&stopflag, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(stopflag == 3) + { + restart(0); /* write an occasional restart file */ + stopflag = 0; + } + return 0; +} + +/*! \brief Returns the next output time that is equal or larger than + * ti_curr. + * + * \param[in] ti_curr Current simulation time. + * + * \return Next output time. + */ +integertime find_next_outputtime(integertime ti_curr) +{ + int i, iter = 0; + integertime ti, ti_next; + double next, time; + + DumpFlagNextSnap = 1; + ti_next = -1; + + if(All.OutputListOn) + { + for(i = 0; i < All.OutputListLength; i++) + { + time = All.OutputListTimes[i]; + + if(time >= All.TimeBegin && time <= All.TimeMax) + { + if(All.ComovingIntegrationOn) + ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval); + else + ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval); + +#ifdef PROCESS_TIMES_OF_OUTPUTLIST + /* first, determine maximum output interval based on All.MaxSizeTimestep */ + integertime timax = (integertime)(All.MaxSizeTimestep / All.Timebase_interval); + + /* make it a power 2 subdivision */ + integertime ti_min = TIMEBASE; + while(ti_min > timax) + ti_min >>= 1; + timax = ti_min; + + double multiplier = ti / ((double)timax); + + /* now round this to the nearest multiple of timax */ + ti = ((integertime)(multiplier + 0.5)) * timax; +#endif /* #ifdef PROCESS_TIMES_OF_OUTPUTLIST */ + if(ti >= ti_curr) + { + if(ti_next == -1) + { + ti_next = ti; + DumpFlagNextSnap = All.OutputListFlag[i]; + } + + if(ti_next > ti) + { + ti_next = ti; + DumpFlagNextSnap = All.OutputListFlag[i]; + } + } + } + } + } + else + { + if(All.ComovingIntegrationOn) + { + if(All.TimeBetSnapshot <= 1.0) + terminate("TimeBetSnapshot > 1.0 required for your simulation.\n"); + } + else + { + if(All.TimeBetSnapshot <= 0.0) + terminate("TimeBetSnapshot > 0.0 required for your simulation.\n"); + } + + time = All.TimeOfFirstSnapshot; + iter = 0; + + while(time < All.TimeBegin) + { + if(All.ComovingIntegrationOn) + time *= All.TimeBetSnapshot; + else + time += All.TimeBetSnapshot; + + iter++; + + if(iter > 1000000) + terminate("Can't determine next output time.\n"); + } + + while(time <= All.TimeMax) + { + if(All.ComovingIntegrationOn) + ti = (integertime)(log(time / All.TimeBegin) / All.Timebase_interval); + else + ti = (integertime)((time - All.TimeBegin) / All.Timebase_interval); + + if(ti >= ti_curr) + { + ti_next = ti; + break; + } + + if(All.ComovingIntegrationOn) + time *= All.TimeBetSnapshot; + else + time += All.TimeBetSnapshot; + + iter++; + + if(iter > 1000000) + terminate("Can't determine next output time.\n"); + } + } + + if(ti_next == -1) + { + ti_next = 2 * TIMEBASE; /* this will prevent any further output */ + + mpi_printf("\nRUN: There is no valid time for a further snapshot file.\n"); + } + else + { + if(All.ComovingIntegrationOn) + next = All.TimeBegin * exp(ti_next * All.Timebase_interval); + else + next = All.TimeBegin + ti_next * All.Timebase_interval; + +#ifdef TIMESTEP_OUTPUT_LIMIT + mpi_printf("\nRUN: Limiting timestep to %g to fulfill output frequency", 0.1 * (next - All.Time)); + All.TimestepOutputLimit = 0.1 * (next - All.Time); +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + + mpi_printf("\nRUN: Setting next time for snapshot file to Time_next= %g (DumpFlag=%d)\n\n", next, DumpFlagNextSnap); + } + + return ti_next; +} + +/*! \brief Creates an empty file called 'end' in the output directory. + * + * The existence of this file can be used e.g. for analysis scripts to + * verify that the simulation has run up to its final time and ended without + * error. Note that the end-file is completely passive. + * + * \return void + */ +static void create_end_file(void) +{ + FILE *fd; + char contfname[MAXLEN_PATH]; + sprintf(contfname, "%send", All.OutputDir); + if((fd = fopen(contfname, "w"))) + fclose(fd); +} + +/*! \brief Executes the resubmit command. + * + * \return void + */ +void execute_resubmit_command(void) +{ + char buf[1000]; + sprintf(buf, "%s", All.ResubmitCommand); +#ifndef NOCALLSOFSYSTEM + system(buf); +#endif /* #ifndef NOCALLSOFSYSTEM */ +} diff --git a/src/amuse/community/arepo/src/mesh/criterion_derefinement.c b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c new file mode 100644 index 0000000000..7108310fc2 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/criterion_derefinement.c @@ -0,0 +1,181 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/criterion_derefinement.c + * \date 05/2018 + * \brief Criteria for the de-refinement of a cell. + * \details Routines which are checking whether a cell should be + * de-refined. + * contains functions: + * int derefine_should_this_cell_be_merged(int i, int flag) + * static int derefine_criterion_default(int i) + * static int derefine_criterion_jeans_ref(int i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) +static int derefine_criterion_jeans_ref(int i); +static int derefine_criterion_default(int i); +static int jeans_derefinement_criteria(int i); + +/*! \brief Should this cell be dissolved? + * + * This function signals whether a cell should be dissolved. This needs to be + * adjusted according to the needs of the simulation in question. One may also + * set the SphP[].Flag variable beforehand, these cells will also be + * dissolved. + * + * \param[in] i Index of cell in P and SphP arrays. + * \param[in] flag If this is nonzero, flag is returned. + * + * \return Flag if this cell should be dissolved. + */ +int derefine_should_this_cell_be_merged(int i, int flag) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement == 0) + return 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#ifdef NODEREFINE_BACKGROUND_GRID + /* Keep in mind that this is used in cosmological zoom simulations. + * I.e. this enforces no derefinement for cells in low-res region, while not + * affecting the high-res region. + */ + if(SphP[i].Volume > 0.1 * All.MeanVolume) + return 0; +#endif /* #ifdef NODEREFINE_BACKGROUND_GRID */ + +#if defined(REFINEMENT_VOLUME_LIMIT) + double maxvolume = All.MaxVolume; + double minvolume = All.MinVolume; + + if(SphP[i].Volume > 0.5 * maxvolume) + return 0; + + if(SphP[i].Volume < 0.5 * minvolume) + return 1; + + if(All.MaxVolumeDiff > 0 && SphP[i].Volume > 0.3 * All.MaxVolumeDiff * SphP[i].MinNgbVolume) + return 0; +#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */ + + if(flag) + return flag; + + switch(All.DerefinementCriterion) + { + case 0: + return 0; + break; + + case 1: + return derefine_criterion_default(i); + break; + + case 2: + return derefine_criterion_jeans_ref(i); + break; + + default: + terminate("invalid derefinement criterion specified"); + break; + } + + return 0; +} + +/* + * static functions; i.e. functions that are only called within this file + */ + +/*! \brief Default de-refinement criterion. + * + * Checks if cell is within a factor of 2 of the target gas mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int derefine_criterion_default(int i) +{ +#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) + + if(P[i].Mass < 0.5 * All.TargetGasMass) + return 1; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */ + + return 0; +} + +/*! \brief Wrapper for Jeans de-refinement criterion. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int derefine_criterion_jeans_ref(int i) +{ +#ifdef JEANS_REFINEMENT + return jeans_derefinement_criteria(i); +#endif /* #ifdef JEANS_REFINEMENT */ + return 0; +} + +/*! \brief De-refinement criterion according to Jeans stability of a cell. + * + * The cell can only be de-refined if the Jeans length is resolved by + * 1.5 * JEANS_REFINEMENT cells. Otherwise, no de-refinement is possible even + * if the cell has a low mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be dissolved. + */ +static int jeans_derefinement_criteria(int i) +{ + if(P[i].Mass < 0.5 * All.TargetGasMass) + return 1; + +#ifdef JEANS_REFINEMENT + double jeans_number, jeans_length, sound_speed, dx; + sound_speed = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); + jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed; + dx = 2.0 * get_cell_radius(i); + jeans_number = jeans_length / dx; + + if(jeans_number > 1.5 * JEANS_REFINEMENT && P[i].Mass < 0.5 * All.TargetGasMass) + return 1; +#endif /* #ifdef JEANS_REFINEMENT */ + return 0; +} + +#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/criterion_refinement.c b/src/amuse/community/arepo/src/mesh/criterion_refinement.c new file mode 100644 index 0000000000..5b0334972a --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/criterion_refinement.c @@ -0,0 +1,267 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/criterion_refinement.c + * \date 05/2018 + * \brief Criteria for the refinement of a cell. + * \details Routines which are checking whether a cell should be refined. + * contains functions: + * int should_this_cell_be_split(int i) + * static int can_this_cell_be_split(int i) + * static int refine_criterion_default(int i) + * static int refine_criterion_jeans_ref(int i) + * static int jeans_refinement_criteria(int i) + * static int refine_criterion_volume(int i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS) +static int can_this_cell_be_split(int i); +static int refine_criterion_default(int i); +static int refine_criterion_jeans_ref(int i); +static int jeans_refinement_criteria(int i); + +#ifdef REFINEMENT_VOLUME_LIMIT +static int refine_criterion_volume(int i); +#endif + +#ifdef REFINEMENT_MERGE_CELLS +char *FlagDoNotRefine; +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +/*! \brief Should this cell be refined? + * + * This function signals whether a cell needs further refinement. This needs + * to be adjusted according to the needs of the simulation in question. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be split. + */ +int should_this_cell_be_split(int i) +{ +#ifdef REFINEMENT_MERGE_CELLS + if(FlagDoNotRefine[i]) + return 0; +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + if(P[i].Mass == 0 && P[i].ID == 0) /* skip cells that have been swallowed or dissolved */ + return 0; + +#if defined(REFINEMENT_VOLUME_LIMIT) + double maxvolume = All.MaxVolume; + double minvolume = All.MinVolume; + + if(SphP[i].Volume > 2. * maxvolume) + if(can_this_cell_be_split(i)) + return 1; + + if(SphP[i].Volume < 2. * minvolume) + return 0; + + if(refine_criterion_volume(i)) + if(can_this_cell_be_split(i)) + return 1; +#endif /* #if defined(REFINEMENT_VOLUME_LIMIT) */ + + switch(All.RefinementCriterion) /* select the function that evaluates the refinement criterion */ + { + case 0: + return 0; + break; + + case 1: + return refine_criterion_default(i); + break; + + case 2: + return refine_criterion_jeans_ref(i); + break; + + default: + terminate("invalid refinement criterion specified"); + break; + } + + return 0; +} + +/* + * static functions; i.e. functions that are only called within this file + */ + +/*! \brief Is cell round enough to be refined? + * + * This function signals whether a cell is allowed refinement. A cell that + * is supposed to be refined needs to match certain roundness criteria, which + * are specified in this function. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell is allowed to be refined. + */ +static int can_this_cell_be_split(int i) +{ +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor) + return 1; + +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + double dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + double dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + double dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + double d = sqrt(dx * dx + dy * dy + dz * dz); + double cellrad = get_cell_radius(i); + + if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */ + return 1; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + + return 0; +} + +/*! \brief Default refinement criterion. + * + * Checks if cell is within a factor of 2 of the target gas mass. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_default(int i) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + if(can_this_cell_be_split(i) && P[i].Mass > 2.0 * All.TargetGasMass) + return 1; + + return 0; /* default is not to refine */ +} + +/*! \brief Jeans refinement criterion additional target mass criterion + * + * Resolving the Jeans length is an additional criterion, apart from obeying + * the usual factor of 2 within a target mass criterion. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_jeans_ref(int i) +{ +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].AllowRefinement != 0) +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + if(can_this_cell_be_split(i)) + { + if(P[i].Mass > 2.0 * All.TargetGasMass) + return 1; + +#ifdef JEANS_REFINEMENT + return jeans_refinement_criteria(i); +#else /* #ifdef JEANS_REFINEMENT */ + return 0; +#endif /* #ifdef JEANS_REFINEMENT #else */ + } + + return 0; +} + +/*! \brief Refinement criterion according to Jeans stability of a cell. + * + * The cell will be refined if the Jeans length is not resolved by + * JEANS_REFINEMENT cells. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int jeans_refinement_criteria(int i) +{ +#ifdef JEANS_REFINEMENT + if(can_this_cell_be_split(i)) + { + double jeans_number, jeans_length, sound_speed, dx; + + sound_speed = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); + jeans_length = sqrt(M_PI / All.G / SphP[i].Density) * sound_speed; + dx = 2.0 * get_cell_radius(i); + jeans_number = jeans_length / dx; + + if(jeans_number < JEANS_REFINEMENT) + { + return 1; + } + } +#endif /* #ifdef JEANS_REFINEMENT */ + + return 0; +} + +#ifdef REFINEMENT_VOLUME_LIMIT +/*! \brief Refinement criterion for based on the minimum volume of a + * neighboring cell. + * + * This criterion is supposed to avoid sudden jumps in resolution which lead + * to an inaccurate result. Each cell that has a volume larger than a + * specified factor times the minimum volume of all neighboring cells will be + * refined. This also includes a global absolute minimum and maximum volume. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return Flag if this cell should be refined. + */ +static int refine_criterion_volume(int i) +{ + if(All.MaxVolumeDiff > 0 && SphP[i].Volume > All.MaxVolumeDiff * SphP[i].MinNgbVolume) + { +#ifdef REGULARIZE_MESH_FACE_ANGLE + if(SphP[i].MaxFaceAngle < 1.5 * All.CellMaxAngleFactor) + return 1; +#else /* #ifdef REGULARIZE_MESH_FACE_ANGLE */ + + double dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + double dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + double dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + double d = sqrt(dx * dx + dy * dy + dz * dz); + double cellrad = get_cell_radius(i); + + if(d < 2.0 * All.CellShapingFactor * cellrad) /* only refine cells which are reasonably 'round' */ + return 1; +#endif /* #ifdef REGULARIZE_MESH_FACE_ANGLE #else */ + } + + return 0; +} +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/mesh.h b/src/amuse/community/arepo/src/mesh/mesh.h new file mode 100644 index 0000000000..654555ebf6 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/mesh.h @@ -0,0 +1,268 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/mesh.h + * \date 05/2018 + * \brief Header for mesh structures. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef MESH_H +#define MESH_H + +#define SCALAR_TYPE_PASSIVE 0 /*!< only advection */ +#define SCALAR_TYPE_SPECIES 1 /*!< species are normalised to guarantee sum{species}=1 */ +#define SCALAR_TYPE_NORMALIZE 2 /*!< the same normalisation factor as for species is applied, but no contribution to sum{species} */ + +#define REFL_X_FLAGS 115043766 +#define REFL_Y_FLAGS 132379128 +#define REFL_Z_FLAGS 134217216 + +#define OUTFLOW_X (1 << 27) +#define OUTFLOW_Y (1 << 28) +#define OUTFLOW_Z (1 << 29) + +#if defined MAXSCALARS +extern struct scalar_elements +{ + int type; /*!< scalar type, determines whether a normalization is applied */ + size_t offset; /*!< offset of the primitive quantity in the SphP struct */ + size_t offset_mass; /*!< offset of the conserved quantity in the SphP struct */ +} scalar_elements[MAXSCALARS]; + +extern struct scalar_index +{ +#ifdef REFINEMENT_HIGH_RES_GAS + int HighResMass; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ +} ScalarIndex; + +extern int N_Scalar; /*!< number of registered scalars */ +#endif /* #if defined MAXSCALARS */ + +#define GRADIENT_TYPE_NORMAL 0 +#define GRADIENT_TYPE_VELX 1 +#define GRADIENT_TYPE_VELY 2 +#define GRADIENT_TYPE_VELZ 3 +#define GRADIENT_TYPE_DENSITY 4 +#define GRADIENT_TYPE_PRESSURE 5 +#define GRADIENT_TYPE_UTHERM 6 +#define GRADIENT_TYPE_AX 7 +#define GRADIENT_TYPE_AY 8 +#define GRADIENT_TYPE_AZ 9 +#define GRADIENT_TYPE_FLD 10 +#define GRADIENT_TYPE_RTF 11 + +extern struct grad_elements +{ + int type; /*!< gradient type, ensures special treatment for velocities and speed of sound */ + size_t offset; /*!< offset of the quantity in the SphP struct */ + size_t offset_exch; /*!< offset of the quantity in the PrimExch struct */ + size_t offset_grad; /*!< offset in the grad_data struct */ + double *min_value, *max_value; + double value0, value1; +} grad_elements[MAXGRADIENTS], *GDensity, *GVelx, *GVely, *GVelz, *GPressure, *GUtherm; + +extern int N_Grad; /*!< number of gradients to be calculated */ + +extern struct grad_data +{ + MySingle drho[3]; + + MySingle dvel[3][3]; + MySingle dpress[3]; + +#ifdef MHD + MySingle dB[3][3]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + MySingle dscalars[MAXSCALARS][3]; +#endif /* #ifdef MAXSCALARS */ +} * GradExch; + +extern struct primexch +{ + double Volume; + MyFloat Density; + + MyFloat VelGas[3]; + MyFloat VelVertex[3]; + +#ifdef MHD + MyFloat B[3]; + +#ifdef MHD_POWELL + MyFloat DivB; +#endif /* #ifdef MHD_POWELL */ + + MyFloat CurlB[3]; +#endif /* #ifdef MHD */ + MyFloat Pressure; + +#ifdef MAXSCALARS + MyFloat Scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ + + double TimeLastPrimUpdate; + + MyDouble Center[3]; + MyFloat OldMass; + MySingle Csnd; + MySingle SurfaceArea; + MySingle ActiveArea; + /* int task, index; */ + short int TimeBinHydro; +} * PrimExch; + +#ifdef REFINEMENT +extern struct refdata +{ +#ifdef REFINEMENT_VOLUME_LIMIT + double Volume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + short int TimeBinHydro; +} * RefExch; +#endif /* #ifdef REFINEMENT */ + +typedef struct face_data +{ + int p1, p2; +#ifdef REFINEMENT_MERGE_CELLS + int t, nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +#ifdef OPTIMIZE_MEMORY_USAGE + MyFloat area; + MyFloat cx, cy, cz; /* center-of-mass of face */ +#else /* #ifdef OPTIMIZE_MEMORY_USAGE */ + double area; + double cx, cy, cz; /* center-of-mass of face */ +#endif /* #ifdef OPTIMIZE_MEMORY_USAGE #else */ + +#ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS + double area_backup; +#endif /* #ifdef VORONOI_BACKUP_RESTORE_FACE_AREAS */ +#ifdef TETRA_INDEX_IN_FACE + int dt_index; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ +} face; + +/*! left or right state of a face */ +struct state +{ + double dx, dy, dz; + double dt_half; + short int timeBin; + + double rho; + double velx, vely, velz; + double press; + double oldmass; + double surfacearea; + double activearea; + double volume; + + MyFloat velGas[3]; + MyFloat velVertex[3]; + struct grad_data *grad; + + double csnd; + double Energy; +#ifdef MHD + double Bx, By, Bz; +#ifdef MHD_POWELL + double divB; +#endif /* #ifdef MHD_POWELL */ + double CurlB[3]; +#endif /* #ifdef MHD */ + +#if defined(GODUNOV_STATS) + double mach; +#endif /* #if defined(GODUNOV_STATS) */ + +#ifdef MAXSCALARS + double scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ + MyIDType ID; + +#ifdef ONEDIMS_SPHERICAL + double radius; +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + double dtExtrapolation; +}; + +/*! state on a face determined by riemann solver */ +extern struct state_face +{ + double rho; + double velx, vely, velz; + double press; +#ifdef MHD + double Bx, By, Bz; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + double *scalars; +#endif /* #ifdef MAXSCALARS */ +} state_face; + +/*! flux through a face */ +extern struct fluxes +{ + double mass; + double momentum[3]; + double energy; + +#ifdef MHD + double B[3]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + double scalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} fluxes, diffusionfluxes; + +extern struct geometry +{ + double nn; + double nx, ny, nz; + double mx, my, mz; + double px, py, pz; + double cx, cy, cz; +} geom; + +struct pv_update_data +{ + double atime; + double hubble_a; + double a3inv; +}; +#endif /* MESH_H */ + +struct fvs_stat +{ + int count_disable_extrapolation; +}; diff --git a/src/amuse/community/arepo/src/mesh/refinement.c b/src/amuse/community/arepo/src/mesh/refinement.c new file mode 100644 index 0000000000..20b2c4d5a2 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/refinement.c @@ -0,0 +1,217 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/refinement.c + * \date 05/2018 + * \brief Driver routines that handle refinement and de-refinement. + * \details contains functions: + * void do_derefinements_and_refinements() + * void refinement_prepare() + * void refinement_cleanup() + * void move_collisionless_particle(int new_i, int old_i) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 06.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +#ifdef REFINEMENT +#include "../main/proto.h" + +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) +char *FlagDoNotRefine; +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ + +static void refinement_prepare(); +static void refinement_cleanup(); + +/*! \brief Main routine to trigger refinement and de-refinements. + * + * Called in main run loop (run.c). + * + * \return void + */ +void do_derefinements_and_refinements() +{ + refinement_prepare(); + +#ifdef REFINEMENT_MERGE_CELLS + do_derefinements(); +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + +#ifdef REFINEMENT_SPLIT_CELLS + do_refinements(); +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + refinement_cleanup(); +} + +/*! \brief Prepares for refinement. + * + * Determines quantities needed by refinement routine; + * Allocates additional arrays. + * + * \return void + */ +void refinement_prepare() +{ + TIMER_START(CPU_REFINE); + +#ifdef REFINEMENT_VOLUME_LIMIT + int idx, i; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) + FlagDoNotRefine = mymalloc_movable(&FlagDoNotRefine, "FlagDoNotRefine", NumGas * sizeof(char)); +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ + +#ifdef REFINEMENT_VOLUME_LIMIT + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].MinNgbVolume = MAX_REAL_NUMBER; + + int q = SphP[i].first_connection; + while(q >= 0) + { + int dp = DC[q].dp_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + continue; + } + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + + double Volume; + if(DC[q].task == ThisTask) + Volume = SphP[particle].Volume; + else + { +#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + Volume = PrimExch[particle].Volume; +#else /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + Volume = RefExch[particle].Volume; +#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ + } + + if(Volume < SphP[i].MinNgbVolume) + SphP[i].MinNgbVolume = Volume; + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + + TIMER_STOP(CPU_REFINE); +} + +/*! \brief Cleans up after refinement. + * + * Frees memory allocated by refinement_prepare(). + * + * \return void + */ +void refinement_cleanup() +{ +#if defined(REFINEMENT_MERGE_CELLS) && defined(REFINEMENT_SPLIT_CELLS) + myfree(FlagDoNotRefine); +#endif /* #if defined (REFINEMENT_MERGE_CELLS) && defined (REFINEMENT_SPLIT_CELLS) */ +} + +/*! \brief Moves collisionless particle from index old_i to new_i. + * + * Needed if new cell is introduced, as cells have to be at the beginning of + * the P array and all other particles have to be located after the last + * gas cell. This routine moves not only data in P and SphP, but also updates + * the time-bin data consistently. + * + * \param[in] new_i New index of particle in P. + * \param[in] old_i Previous index of particle in P. + * + * \return void + */ +void move_collisionless_particle(int new_i, int old_i) +{ + int prev, next, bin; + struct TimeBinData *tbData; + + P[new_i] = P[old_i]; + + if(P[old_i].Mass == 0 && P[old_i].ID == 0) + return; + + if(P[old_i].Mass == 0 && P[old_i].Type == 4) + return; + + tbData = &TimeBinsGravity; + bin = P[old_i].TimeBinGrav; + + if(TimeBinSynchronized[bin]) + { + /* particle is active, need to add it to the list of active particles again + we assume here, that the new particle at the old index in this list is also active! */ + tbData->ActiveParticleList[tbData->NActiveParticles] = new_i; + tbData->NActiveParticles++; + } + + /* now move it in the link list of its timebin + we only need to change the gravity timebin here */ + + tbData->NextInTimeBin[new_i] = tbData->NextInTimeBin[old_i]; + tbData->PrevInTimeBin[new_i] = tbData->PrevInTimeBin[old_i]; + + prev = tbData->PrevInTimeBin[old_i]; + next = tbData->NextInTimeBin[old_i]; + + if(prev >= 0) + tbData->NextInTimeBin[prev] = new_i; + else + { + if(tbData->FirstInTimeBin[bin] != old_i) + terminate("strange"); + tbData->FirstInTimeBin[bin] = new_i; + } + + if(next >= 0) + tbData->PrevInTimeBin[next] = new_i; + else + { + if(tbData->LastInTimeBin[bin] != old_i) + terminate("strange"); + tbData->LastInTimeBin[bin] = new_i; + } +} + +#endif /* REFINEMENT */ diff --git a/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c new file mode 100644 index 0000000000..9280b5fde6 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/set_vertex_velocities.c @@ -0,0 +1,321 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/set_vertex_velocities.c + * \date 05/2018 + * \brief Algorithms that decide how individual cells are moving. + * \details contains functions: + * void set_vertex_velocities(void) + * static void validate_vertex_velocities_1d() + * void validate_vertex_velocities(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#ifdef ONEDIMS_SPHERICAL +static void validate_vertex_velocities_1d(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +/*! \brief Sets velocities of individual mesh-generating points. + * + * \retur void + */ +void set_vertex_velocities(void) +{ + TIMER_START(CPU_SET_VERTEXVELS); + + int idx, i, j; + double dt; + +#if defined(VORONOI_STATIC_MESH) || defined(NOHYDRO) + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = 0; + } + TIMER_STOP(CPU_SET_VERTEXVELS); + return; +#endif /* #if defined (VORONOI_STATIC_MESH) || defined (NOHYDRO) */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef MESHRELAX + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = 0; +#else /* #ifdef MESHRELAX */ + for(j = 0; j < 3; j++) + SphP[i].VelVertex[j] = P[i].Vel[j]; /* make cell velocity equal to fluid's velocity */ +#endif /* #ifdef MESHRELAX #else */ + + double acc[3]; + + /* the actual time-step of particle */ + integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0; + dt = ti_step * All.Timebase_interval; + dt /= All.cf_hubble_a; /* this gives the actual timestep: dt = dloga/ (adot/a) */ + + /* now let's add the gradient of the pressure force + * note that the gravity half-step was already included in P[i].Vel[j] + * prior to calling this function, thus it does not need to be accounted + * here explicitly. + */ + if(SphP[i].Density > 0) + { + acc[0] = -SphP[i].Grad.dpress[0] / SphP[i].Density; + acc[1] = -SphP[i].Grad.dpress[1] / SphP[i].Density; + acc[2] = -SphP[i].Grad.dpress[2] / SphP[i].Density; + +#ifdef MHD + /* we also add the acceleration due to the Lorentz force */ + acc[0] += (SphP[i].CurlB[1] * SphP[i].B[2] - SphP[i].CurlB[2] * SphP[i].B[1]) / SphP[i].Density; + acc[1] += (SphP[i].CurlB[2] * SphP[i].B[0] - SphP[i].CurlB[0] * SphP[i].B[2]) / SphP[i].Density; + acc[2] += (SphP[i].CurlB[0] * SphP[i].B[1] - SphP[i].CurlB[1] * SphP[i].B[0]) / SphP[i].Density; + +#endif /* #ifdef MHD */ + + SphP[i].VelVertex[0] += 0.5 * dt * acc[0]; + SphP[i].VelVertex[1] += 0.5 * dt * acc[1]; + SphP[i].VelVertex[2] += 0.5 * dt * acc[2]; + } + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifdef REGULARIZE_MESH_CM_DRIFT + + double dx, dy, dz, d, fraction; + + dx = nearest_x(P[i].Pos[0] - SphP[i].Center[0]); + dy = nearest_y(P[i].Pos[1] - SphP[i].Center[1]); + dz = nearest_z(P[i].Pos[2] - SphP[i].Center[2]); + + /* the actual time-step of particle */ + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + dt /= All.cf_hubble_a; /* this is dt, the actual timestep */ + + double cellrad = get_cell_radius(i); + +#if !defined(REGULARIZE_MESH_FACE_ANGLE) + /* if there is a density gradient, use a center that is displaced slightly in the direction of the gradient. + * This makes sure that the Lloyd scheme does not simply iterate towards cells of equal volume, instead + * we keep cells of roughly equal mass. + */ + double dgrad = sqrt(SphP[i].Grad.drho[0] * SphP[i].Grad.drho[0] + SphP[i].Grad.drho[1] * SphP[i].Grad.drho[1] + + SphP[i].Grad.drho[2] * SphP[i].Grad.drho[2]); + + if(dgrad > 0) + { + double scale = SphP[i].Density / dgrad; + double tmp = 3 * cellrad + scale; + double x = (tmp - sqrt(tmp * tmp - 8 * cellrad * cellrad)) / 4; + + if(x < 0.25 * cellrad) + { + dx = nearest_x(P[i].Pos[0] - (SphP[i].Center[0] + x * SphP[i].Grad.drho[0] / dgrad)); + dy = nearest_y(P[i].Pos[1] - (SphP[i].Center[1] + x * SphP[i].Grad.drho[1] / dgrad)); + dz = nearest_z(P[i].Pos[2] - (SphP[i].Center[2] + x * SphP[i].Grad.drho[2] / dgrad)); + } + } +#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */ + + d = sqrt(dx * dx + dy * dy + dz * dz); + + fraction = 0; + +#if !defined(REGULARIZE_MESH_FACE_ANGLE) + if(d > 0.75 * All.CellShapingFactor * cellrad && dt > 0) + { + if(d > All.CellShapingFactor * cellrad) + fraction = All.CellShapingSpeed; + else + fraction = All.CellShapingSpeed * (d - 0.75 * All.CellShapingFactor * cellrad) / (0.25 * All.CellShapingFactor * cellrad); + } +#else /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) */ + if(SphP[i].MaxFaceAngle > 0.75 * All.CellMaxAngleFactor && dt > 0) + { + if(SphP[i].MaxFaceAngle > All.CellMaxAngleFactor) + fraction = All.CellShapingSpeed; + else + fraction = All.CellShapingSpeed * (SphP[i].MaxFaceAngle - 0.75 * All.CellMaxAngleFactor) / (0.25 * All.CellMaxAngleFactor); + } +#endif /* #if !defined(REGULARIZE_MESH_FACE_ANGLE) #else */ + + if(d > 0 && fraction > 0) + { + double v; +#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED + + v = All.cf_atime * get_sound_speed(i); + +#if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE) + /* calculate gravitational velocity scale */ + double ax, ay, az, ac, vgrav; +#ifdef HIERARCHICAL_GRAVITY + ax = SphP[i].FullGravAccel[0]; + ay = SphP[i].FullGravAccel[1]; + az = SphP[i].FullGravAccel[2]; +#else /* #ifdef HIERARCHICAL_GRAVITY */ + ax = P[i].GravAccel[0]; + ay = P[i].GravAccel[1]; + az = P[i].GravAccel[2]; +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ +#ifdef PMGRID + ax += P[i].GravPM[0]; + ay += P[i].GravPM[1]; + az += P[i].GravPM[2]; +#endif /* #ifdef PMGRID */ + ac = sqrt(ax * ax + ay * ay + az * az); + vgrav = 4 * sqrt(All.cf_atime * cellrad * ac); + if(v < vgrav) + v = vgrav; +#endif /* #if defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE) */ + + double vcurl = cellrad * SphP[i].CurlVel; + if(v < vcurl) + v = vcurl; + +#else /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */ + v = All.cf_atime * All.cf_atime * d / dt; /* use fiducial velocity */ + + double vel = sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + double vmax = dmax(All.cf_atime * get_sound_speed(i), vel); + if(v > vmax) + v = vmax; +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED #else */ + +#ifdef REFINEMENT_SPLIT_CELLS + double proj = SphP[i].SepVector[0] * dx + SphP[i].SepVector[1] * dy + SphP[i].SepVector[2] * dz; + + if(proj != 0) + { + dx = proj * SphP[i].SepVector[0]; + dy = proj * SphP[i].SepVector[1]; + dz = proj * SphP[i].SepVector[2]; + } + + SphP[i].SepVector[0] = 0; + SphP[i].SepVector[1] = 0; + SphP[i].SepVector[2] = 0; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + SphP[i].VelVertex[0] += fraction * v * (-dx / d); + SphP[i].VelVertex[1] += fraction * v * (-dy / d); + SphP[i].VelVertex[2] += fraction * v * (-dz / d); + } +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT */ + + for(j = NUMDIMS; j < 3; j++) + SphP[i].VelVertex[j] = 0; /* vertex velocities for unused dimensions set to zero */ + } + +#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE + voronoi_exchange_primitive_variables(); + calculate_vertex_velocity_divergence(); +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */ + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + validate_vertex_velocities(); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + +#ifdef ONEDIMS_SPHERICAL + validate_vertex_velocities_1d(); +#endif /* #ifdef ONEDIMS_SPHERICAL */ + + TIMER_STOP(CPU_SET_VERTEXVELS); +} + +#ifdef ONEDIMS_SPHERICAL +/*! \brief Handles inner boundary cells in 1d spherical case. + * + * \return void + */ +static void validate_vertex_velocities_1d() +{ + double dt = (P[0].TimeBinHydro ? (((integertime)1) << P[0].TimeBinHydro) : 0) * All.Timebase_interval; + if(P[0].Pos[0] + dt * SphP[0].VelVertex[0] < All.CoreRadius) + SphP[0].VelVertex[0] = 0.; +} +#endif /* #ifdef ONEDIMS_SPHERICAL */ + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) +/*! \brief Checks validity of vertex velocities with boundary conditions. + * + * In case we have reflecting boundaries, make sure that cell does not drift + * beyond boundary. + * + * \return void + */ +void validate_vertex_velocities(void) +{ + int idx, i; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + integertime ti_step = P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0; + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(All.Ti_Current, All.Ti_Current + ti_step); + else + dt_drift = ti_step * All.Timebase_interval; + +#if defined(REFLECTIVE_X) + if((P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) < 0 || (P[i].Pos[0] + dt_drift * SphP[i].VelVertex[0]) >= boxSize_X) + SphP[i].VelVertex[0] = 0; +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if((P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) < 0 || (P[i].Pos[1] + dt_drift * SphP[i].VelVertex[1]) >= boxSize_Y) + SphP[i].VelVertex[1] = 0; +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if((P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) < 0 || (P[i].Pos[2] + dt_drift * SphP[i].VelVertex[2]) >= boxSize_Z) + SphP[i].VelVertex[2] = 0; +#endif /* #if defined(REFLECTIVE_Z) */ + } +} +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c new file mode 100644 index 0000000000..cc6964c01b --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.c @@ -0,0 +1,1163 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi.c + * \date 05/2018 + * \brief Main file for Voronoi-mesh construction. + * \details contains functions: + * void create_mesh(void) + * int voronoi_get_local_particles(void) + * void free_mesh_structures_not_needed_for_derefinement_ + * refinement(void) + * void free_all_remaining_mesh_structures(void) + * void free_mesh(void) + * int compute_max_delaunay_radius(void) + * void compute_voronoi_faces_and_volumes(void) + * int area_list_data_compare(const void *a, const void *b) + * void apply_area_list(void) + * void derefine_refine_compute_volumes(double *vol) + * double nearest_x(double d) + * double nearest_y(double d) + * double nearest_z(double d) + * double get_cell_radius(int i) + * void dump_points(tessellation * T) + * int face_get_normals(tessellation * T, int i, struct + * geometry *geom) + * double distance_to_border(int cell) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +tessellation Mesh, DeRefMesh; + +unsigned char *Edge_visited; +struct area_list_data *AreaList; +int Narea, MaxNarea; + +int DPinfinity; /* marker for special infinity point */ +double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac; + +struct list_export_data *ListExports; +struct list_P_data *List_P; +int NumGasInMesh; +int *List_InMesh; + +int CountInSphereTests, CountInSphereTestsExact; +int CountConvexEdgeTest, CountConvexEdgeTestExact; +int Ninlist, MaxNinlist; + +int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d; +int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips; +int Count_EdgeSplits, Count_FaceSplits; +int Count_InTetra, Count_InTetraExact; +int Largest_N_DP_Buffer; + +long long TotCountInSphereTests, TotCountInSphereTestsExact; +long long TotCountConvexEdgeTest, TotCountConvexEdgeTestExact; + +long long TotCountFlips, TotCount_1_to_3_Flips2d, TotCount_2_to_4_Flips2d; +long long TotCount_1_to_4_Flips, TotCount_2_to_3_Flips, TotCount_3_to_2_Flips, TotCount_4_to_4_Flips; +long long TotCount_EdgeSplits, TotCount_FaceSplits; +long long TotCount_InTetra, TotCount_InTetraExact; + +/*! \brief Creates the Voronoi mesh. + * + * Routine which is called in run. + * If first creates a first, giant tetrahedron and than successively insert + * particles (first local, then ghost particles) compute their circumcircles + * and count the undecided tetrahedra. This procedure is repeated until all + * tetrahedra are decided. Then, the maximum Delauny radius is computed as + * well as the faces and volumes of the Voronoi-cells. + * + * \return void + */ +void create_mesh(void) +{ +#ifdef CREATE_FULL_MESH + int k; + + short int *buTimeBin = mymalloc_movable(&buTimeBin, "buTimeBin", NumPart * sizeof(short int)); + static int buTimeBinActive[TIMEBINS]; + + for(k = 0; k < NumPart; k++) + { + buTimeBin[k] = P[k].TimeBinHydro; + P[k].TimeBinHydro = 0; + } + + for(k = 0; k < TIMEBINS; k++) + { + buTimeBinActive[k] = TimeBinSynchronized[k]; + + TimeBinSynchronized[k] = 1; + } + + reconstruct_timebins(); +#endif /* #ifdef CREATE_FULL_MESH */ + + int tlast; + int idx, i, iter = 0, n, skip; + double tstart, tend; + long long ntot; + + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH); + + mpi_printf("VORONOI: create delaunay mesh\n"); + + Ngb_MarkerValue++; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Ti_Current != All.Ti_Current) + { + terminate("surprise! we don't expect this here anymore"); + drift_particle(i, All.Ti_Current); + } + + SphP[i].Hsml = 1.01 * SphP[i].MaxDelaunayRadius; + } + + initialize_and_create_first_tetra(&Mesh); + + CountInSphereTests = CountInSphereTestsExact = 0; + CountConvexEdgeTest = CountConvexEdgeTestExact = 0; + CountFlips = Count_1_to_3_Flips2d = Count_2_to_4_Flips2d = 0; + Count_1_to_4_Flips = 0; + Count_2_to_3_Flips = 0; + Count_3_to_2_Flips = 0; + Count_4_to_4_Flips = 0; + Count_EdgeSplits = 0; + Count_FaceSplits = 0; + Count_InTetra = Count_InTetraExact = 0; + Largest_N_DP_Buffer = 0; + + MaxNinlist = Mesh.Indi.AllocFacNinlist; + ListExports = mymalloc_movable(&ListExports, "ListExports", MaxNinlist * sizeof(struct list_export_data)); + + NumGasInMesh = 0; + List_InMesh = mymalloc_movable(&List_InMesh, "List_InMesh", NumGas * sizeof(int)); + + List_P = mymalloc_movable(&List_P, "List_P", NumGas * sizeof(struct list_P_data)); + + Mesh.DTC = mymalloc_movable(&Mesh.DTC, "DTC", Mesh.MaxNdt * sizeof(tetra_center)); + Mesh.DTF = mymalloc_movable(&Mesh.DTF, "DTF", Mesh.MaxNdt * sizeof(char)); + for(i = 0; i < Mesh.Ndt; i++) + Mesh.DTF[i] = 0; + + Ninlist = 0; + + tlast = 0; + + do + { + skip = Mesh.Ndp; + + TIMER_STOPSTART(CPU_MESH, CPU_MESH_FIND_DP); + + tstart = second(); + + if(iter == 0) + { + MPI_Allreduce(&Nvc, &Largest_Nvc, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(Largest_Nvc > 0) + n = voronoi_get_connected_particles(&Mesh); + else + n = voronoi_get_local_particles(); + } + else + { + n = voronoi_ghost_search(&Mesh); + } + + sumup_large_ints(1, &n, &ntot); + + tend = second(); + + if(iter == 0) + mpi_printf("VORONOI: iter=%d: %llu local points, points/sec/task = %g, took %g secs\n", iter, ntot, + ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend)); + else + { + if(ntot) + mpi_printf("VORONOI: iter=%d: %llu additional points, points/sec/task = %g, took %g secs\n", iter, ntot, + ntot / (timediff(tstart, tend) + 1.0e-30) / NTask, timediff(tstart, tend)); + else + mpi_printf("VORONOI: iter=%d: %llu additional points, took %g secs\n", iter, ntot, timediff(tstart, tend)); + } + + TIMER_STOPSTART(CPU_MESH_FIND_DP, CPU_MESH_INSERT); + + for(i = 0; i < n; i++) + { +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&Mesh, skip + i); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&Mesh, skip + i, tlast); + } + + TIMER_STOPSTART(CPU_MESH_INSERT, CPU_MESH_CELLCHECK); + + compute_circumcircles(&Mesh); + + if(iter > 0) + { + n = count_undecided_tetras(&Mesh); + + sumup_large_ints(1, &n, &ntot); + + if(ntot) + { + mpi_printf("VORONOI: still undecided %llu tetrahedras\n", ntot); + +#ifndef DOUBLE_STENCIL + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + SphP[i].Hsml *= HSML_INCREASE_FACTOR; + } +#else /* #ifndef DOUBLE_STENCIL */ + for(i = 0; i < Mesh.Ndp; i++) + Mesh.DP[i].Hsml *= HSML_INCREASE_FACTOR; +#endif /* #ifndef DOUBLE_STENCIL #else */ + } + } + else + { + ntot = 1; + } + + TIMER_STOPSTART(CPU_MESH_CELLCHECK, CPU_MESH); + + if(iter > MAX_VORONOI_ITERATIONS) + terminate("too many iterations\n"); + + iter++; + } + while(ntot > 0); + +#if(REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2) + for(i = 0; i < Mesh.Ndp; i++) + { +#if(REFLECTIVE_X == 2) + Mesh.DP[i].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#if(REFLECTIVE_Y == 2) + Mesh.DP[i].image_flags |= OUTFLOW_Y; +#endif /* #if (REFLECTIVE_Y == 2) */ +#if(REFLECTIVE_Z == 2) + Mesh.DP[i].image_flags |= OUTFLOW_Z; +#endif /* #if (REFLECTIVE_Z == 2) */ + } +#endif /* #if (REFLECTIVE_X == 2) || (REFLECTIVE_Y == 2) || (REFLECTIVE_Z == 2) */ + + compute_max_delaunay_radius(); + + TIMER_STOPSTART(CPU_MESH, CPU_LOGS); + +#ifdef VERBOSE + long long TotNdp, TotNdt; + + int in[15]; + long long out[15]; + + in[0] = Mesh.Ndp; + in[1] = Mesh.Ndt; + in[2] = CountInSphereTests; + in[3] = CountInSphereTestsExact; + in[4] = CountFlips; + in[5] = Count_InTetra; + in[6] = Count_InTetraExact; +#ifndef TWODIMS + in[7] = Count_1_to_4_Flips; + in[8] = Count_2_to_3_Flips; + in[9] = Count_3_to_2_Flips; + in[10] = Count_4_to_4_Flips; + in[11] = Count_FaceSplits; + in[12] = Count_EdgeSplits; + in[13] = CountConvexEdgeTest; + in[14] = CountConvexEdgeTestExact; + n = 15; +#else /* #ifndef TWODIMS */ + in[7] = Count_1_to_3_Flips2d; + in[8] = Count_2_to_4_Flips2d; + n = 9; +#endif /* #ifndef TWODIMS #else */ + + sumup_large_ints(n, in, out); + + TotNdp = out[0]; + TotNdt = out[1]; + TotCountInSphereTests = out[2]; + TotCountInSphereTestsExact = out[3]; + TotCountFlips = out[4]; + TotCount_InTetra = out[5]; + TotCount_InTetraExact = out[6]; +#ifndef TWODIMS + TotCount_1_to_4_Flips = out[7]; + TotCount_2_to_3_Flips = out[8]; + TotCount_3_to_2_Flips = out[9]; + TotCount_4_to_4_Flips = out[10]; + TotCount_FaceSplits = out[11]; + TotCount_EdgeSplits = out[12]; + TotCountConvexEdgeTest = out[13]; + TotCountConvexEdgeTestExact = out[14]; +#else /* #ifndef TWODIMS */ + TotCount_1_to_3_Flips2d = out[7]; + TotCount_2_to_4_Flips2d = out[8]; +#endif /* #ifndef TWODIMS #else */ + + if(ThisTask == 0) + { +#ifndef TWODIMS + printf( + "VORONOI: Average D-Points=%llu (NumGas=%llu) D-Tetrahedra=%llu InSphereTests=%llu InSphereTestsExact=%llu " + "Flips=%llu\n", + TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask, + TotCountFlips / NTask); + printf("VORONOI: 1_to_4_Flips=%llu 2_to_3_Flips=%llu 3_to_2_Flips=%llu 4_to_4_Flips=%llu FaceSplits=%llu EdgeSplits=%llu\n", + TotCount_1_to_4_Flips / NTask, TotCount_2_to_3_Flips / NTask, TotCount_3_to_2_Flips / NTask, + TotCount_4_to_4_Flips / NTask, TotCount_FaceSplits / NTask, TotCount_EdgeSplits / NTask); + printf("VORONOI: InTetra=%llu InTetraExact=%llu ConvexEdgeTest=%llu ConvexEdgeTestExact=%llu\n", TotCount_InTetra, + TotCount_InTetraExact / NTask, TotCountConvexEdgeTest / NTask, TotCountConvexEdgeTestExact / NTask); +#else /* #ifndef TWODIMS */ + printf( + "VORONOI: Average D-Points=%llu (NumGas=%llu) D-Triangles=%llu InCircleTests=%llu InCircleTestsExact=%llu Flips=%llu\n", + TotNdp / NTask, All.TotNumGas / NTask, TotNdt / NTask, TotCountInSphereTests / NTask, TotCountInSphereTestsExact / NTask, + TotCountFlips / NTask); + printf("VORONOI: 1_to_3_Flips=%llu 2_to_4_Flips=%llu InTriangle=%llu InTriangleExact=%llu\n", TotCount_1_to_3_Flips2d / NTask, + TotCount_2_to_4_Flips2d / NTask, TotCount_InTetra / NTask, TotCount_InTetraExact / NTask); +#endif /* #ifndef TWODIMS #else */ + printf("VORONOI: Total D-Points: %llu Ratio=%g\n", TotNdp, ((double)TotNdp) / All.TotNumGas); + } +#endif /* #ifdef VERBOSE */ + + TIMER_STOPSTART(CPU_LOGS, CPU_MESH_GEOMETRY); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].Volume = 0; + SphP[i].SurfaceArea = 0; +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + SphP[i].MaxFaceAngle = 0; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ +#ifdef OUTPUT_SURFACE_AREA + SphP[i].CountFaces = 0; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + } + + compute_voronoi_faces_and_volumes(); + + double vol, voltot; + + vol = 0; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + vol += SphP[i].Volume; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + P[i].SofteningType = get_softeningtype_for_hydro_cell(i); +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + } + + MPI_Reduce(&vol, &voltot, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + mpi_printf("VORONOI: Total volume of active cells = %g\n", voltot); + + TIMER_STOP(CPU_MESH_GEOMETRY); + + voronoi_update_connectivity(&Mesh); + + myfree(Mesh.DTF); + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) /* only do this for full steps */ + { + /* check whether we can reduce allocation factors */ + while(Mesh.Ndp < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdp && Mesh.Indi.AllocFacNdp > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNdp /= ALLOC_INCREASE_FACTOR; + + while(Mesh.Ndt < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNdt && Mesh.Indi.AllocFacNdt > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNdt /= ALLOC_INCREASE_FACTOR; + + while(Mesh.Nvf < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNvf && Mesh.Indi.AllocFacNvf > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNvf /= ALLOC_INCREASE_FACTOR; + + while(Ninlist < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacNinlist && Mesh.Indi.AllocFacNinlist > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacNinlist /= ALLOC_INCREASE_FACTOR; + + while(Largest_N_DP_Buffer < ALLOC_DECREASE_FACTOR * Mesh.Indi.AllocFacN_DP_Buffer && + Mesh.Indi.AllocFacN_DP_Buffer > MIN_ALLOC_NUMBER) + Mesh.Indi.AllocFacN_DP_Buffer /= ALLOC_INCREASE_FACTOR; + } + +#ifdef CREATE_FULL_MESH + for(k = 0; k < TIMEBINS; k++) + TimeBinSynchronized[k] = buTimeBinActive[k]; + + for(k = 0; k < NumPart; k++) + P[k].TimeBinHydro = buTimeBin[k]; + + reconstruct_timebins(); + + myfree_movable(buTimeBin); +#endif /* #if defined(CREATE_FULL_MESH) */ +} + +/*! \brief Routine that fetches local gas cells. + * + * Runs through all active particles and inserts active gas cells into mesh + * structure. Increases length of Mesh.DP and ListExports arrays if needed. + * + * \return Number of points. + */ +int voronoi_get_local_particles(void) +{ + int p, idx, count = 0; + + /* first, let's add all the primary active points */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + p = TimeBinsHydro.ActiveParticleList[idx]; + + if(p < 0) + continue; + + if(P[p].Type == 0) + { + Ngb_Marker[p] = Ngb_MarkerValue; + + if((P[p].Mass == 0) && (P[p].ID == 0)) /* skip cells that have been swallowed or eliminated */ + { + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + continue; + } + + if(Ninlist >= MaxNinlist) + { + Mesh.Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = Mesh.Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + Mesh.Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 1; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + + if(Mesh.Ndp >= Mesh.MaxNdp) + { + Mesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + Mesh.MaxNdp = Mesh.Indi.AllocFacNdp; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, Mesh.MaxNdp, + Mesh.Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + Mesh.DP -= 5; + Mesh.DP = myrealloc_movable(Mesh.DP, (Mesh.MaxNdp + 5) * sizeof(point)); + Mesh.DP += 5; + + if(Mesh.Ndp >= Mesh.MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &Mesh.DP[Mesh.Ndp]; + + dp->x = P[p].Pos[0]; + dp->y = P[p].Pos[1]; + dp->z = P[p].Pos[2]; + dp->ID = P[p].ID; + dp->task = ThisTask; + dp->index = p; + dp->originalindex = -1; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = 1; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + + Mesh.Ndp++; + count++; + } + } + + return count; +} + +#ifdef REFINEMENT +struct refdata *RefExch; + +/*! \brief Structures that are freed before refinement and derefinement step. + * + * To Optimize the memory usage, this, in comubnation with + * free_all_remaining_mesh_structures() can be used instead of a free_mesh() + * after the refinement. This saves some memory. + * + * \return void + */ +void free_mesh_structures_not_needed_for_derefinement_refinement(void) +{ + if(All.TotNumGas == 0) + return; + + int i; + + myfree(GradExch); + + RefExch = (struct refdata *)mymalloc_movable(&RefExch, "RefExch", Mesh_nimport * sizeof(struct refdata)); + + for(i = 0; i < Mesh_nimport; i++) + { +#ifdef REFINEMENT_VOLUME_LIMIT + RefExch[i].Volume = PrimExch[i].Volume; +#endif /* #ifdef REFINEMENT_VOLUME_LIMIT */ + RefExch[i].TimeBinHydro = PrimExch[i].TimeBinHydro; + } + + myfree_movable(PrimExch); +} + +/* \brief Structures that are freed after refinement and derefinement step. + * + * To Optimize the memory usage, this, in comubnation with + * free_mesh_structures_not_needed_for_derefinement_refinement(void) can be + * used instead of a free_mesh() after the refinement. This saves some memory. + * + * \return void + */ +void free_all_remaining_mesh_structures(void) +{ + if(All.TotNumGas == 0) + return; + + myfree(RefExch); + + myfree(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */ + Mesh.DTC = NULL; + myfree(List_P); + myfree(List_InMesh); + myfree(ListExports); + myfree(Mesh.DT); + myfree(Mesh.DP - 5); + myfree(Mesh.VF); +} +#endif /* #ifdef REFINEMENT */ + +/*! \brief Frees arrays associated with Voronoi-mesh. + * + * \return void + */ +void free_mesh(void) +{ + if(All.TotNumGas == 0) + return; + +#if defined(DOUBLE_STENCIL) + mpi_printf("freeing double stencil connections...\n"); + int i; + for(i = 0; i < Mesh.Ndp; i++) + if(Mesh.DP[i].first_connection >= 0) + { + if(Mesh.DP[i].flag_primary_triangle == 0) + terminate("Mesh.DP[i].flag_primary_triangle"); + + int q = Mesh.DP[i].first_connection; + + if(q >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == Mesh.DP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[Mesh.DP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = Mesh.DP[i].first_connection; + + Mesh.DP[i].first_connection = -1; + Mesh.DP[i].last_connection = -1; + } + } + mpi_printf("done with freeing double stencil connections.\n"); +#endif /* #if defined(DOUBLE_STENCIL) */ + + myfree_movable(GradExch); + myfree_movable(PrimExch); + + myfree_movable(Mesh.DTC); /* here we can free the centers of the Delaunay triangles again */ + Mesh.DTC = NULL; + myfree_movable(List_P); + myfree_movable(List_InMesh); + myfree_movable(ListExports); + myfree_movable(Mesh.DT); + myfree_movable(Mesh.DP - 5); + myfree_movable(Mesh.VF); +} + +/*! \brief Get the maximum Delaunay radius for all active cells. + * + * Defined as the maximum distance between tetrahedron center and its + * neighboring points. Stores this radius in the respective field in the + * SphP structure. + * + * \return 0 (unused). + */ +int compute_max_delaunay_radius(void) +{ + int idx, i, j, count = 0; + point *p; + double dx, dy, dz, r; + +#ifdef ONEDIMS + return 0; +#endif /* #ifdef ONEDIMS */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].MaxDelaunayRadius = 0; + } + + point *DP = Mesh.DP; + tetra *DT = Mesh.DT; + tetra_center *DTC = Mesh.DTC; + + for(i = 0; i < Mesh.Ndt; i++) + { + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + dx = DP[DT[i].p[0]].x - DTC[i].cx; + dy = DP[DT[i].p[0]].y - DTC[i].cy; + dz = DP[DT[i].p[0]].z - DTC[i].cz; + + r = 2 * sqrt(dx * dx + dy * dy + dz * dz); + + for(j = 0; j < (DIMS + 1); j++) + { + p = &DP[DT[i].p[j]]; + + if(p->task == ThisTask && p->index < NumGas && p->index >= 0) + if(TimeBinSynchronized[P[p->index].TimeBinHydro]) + if(r > SphP[p->index].MaxDelaunayRadius) + SphP[p->index].MaxDelaunayRadius = r; + } + } + + return count; +} + +#ifndef ONEDIMS +/*! \brief Computes interface areas volume of cells. + * + * Loops over Delaunay tetrahedra to calculate interface area and volume + * contributions to the individual cells. Calculates as well the center of + * mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int idx, i, bit, nr; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].Volume = 0; + SphP[i].Center[0] = 0; + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; +#if defined(REFINEMENT_SPLIT_CELLS) + SphP[i].MinimumEdgeDistance = MAX_FLOAT_NUMBER; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + } + + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char)); + + for(i = 0; i < Mesh.Ndt; i++) + Edge_visited[i] = 0; + + MaxNarea = Mesh.Indi.AllocFacNflux; + Narea = 0; + AreaList = mymalloc_movable(&AreaList, "AreaList", MaxNarea * sizeof(struct area_list_data)); + + for(i = 0; i < Mesh.Ndt; i++) + { + if(Mesh.DT[i].t[0] < 0) /* deleted ? */ + continue; + + bit = 1; + nr = 0; + + while(Edge_visited[i] != EDGE_ALL) + { + if((Edge_visited[i] & bit) == 0) + process_edge_faces_and_volumes(&Mesh, i, nr); + + bit <<= 1; + nr++; + } + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(SphP[i].Volume) + { + SphP[i].Center[0] /= SphP[i].Volume; + SphP[i].Center[1] /= SphP[i].Volume; + SphP[i].Center[2] /= SphP[i].Volume; + } + } + + apply_area_list(); + myfree(AreaList); + + myfree(Edge_visited); +} + +/*! \brief Compare task of two area_list_data structures. + * + * \param[in] a Pointer to first area_list_data structure. + * \param[in] b Pointer to second area_list_data structure. + * + * \return (-1,0,1), -1 if a.tasktask < (((struct area_list_data *)b)->task)) + return -1; + + if(((struct area_list_data *)a)->task > (((struct area_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Sorts all interface areas and adds them to respective mesh + * generating points (ActiveArea). + * + * \return void + */ +void apply_area_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the area-list and apply where needed */ + + mysort(AreaList, Narea, sizeof(struct area_list_data), area_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Narea; i++) + Send_count[AreaList[i].task]++; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct area_list_data *AreaListGet = (struct area_list_data *)mymalloc("AreaListGet", nimport * sizeof(struct area_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&AreaList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct area_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &AreaListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct area_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the area */ + for(i = 0; i < nimport; i++) + { + p = AreaListGet[i].index; + SphP[p].ActiveArea += AreaListGet[i].darea; + } + + myfree(AreaListGet); +} + +/*! \brief Calculates volumes of all cells that are created in refinement. + * + * \param[out] vol Volumes of cells. + * + * \return void + */ +void derefine_refine_compute_volumes(double *vol) +{ + int i, bit, nr; + + for(i = 0; i < DeRefMesh.Ndp; i++) + vol[i] = 0; + + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", DeRefMesh.Ndt * sizeof(unsigned char)); + + for(i = 0; i < DeRefMesh.Ndt; i++) + Edge_visited[i] = 0; + + for(i = 0; i < DeRefMesh.Ndt; i++) + { + if(DeRefMesh.DT[i].t[0] < 0) /* deleted ? */ + continue; + + bit = 1; + nr = 0; + + while(Edge_visited[i] != EDGE_ALL) + { + if((Edge_visited[i] & bit) == 0) + derefine_refine_process_edge(&DeRefMesh, vol, i, nr); + + bit <<= 1; + nr++; + } + } + + myfree(Edge_visited); +} + +#endif /* #ifndef ONEDIMS */ + +/*! \brief Nearest distance in x direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_x(double d) +{ +#if !defined(REFLECTIVE_X) + if(d < -boxHalf_X) + d += boxSize_X; + if(d > boxHalf_X) + d -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) */ + return d; +} + +/*! \brief Nearest distance in y direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_y(double d) +{ +#if !defined(REFLECTIVE_Y) + if(d < -boxHalf_Y) + d += boxSize_Y; + if(d > boxHalf_Y) + d -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ + return d; +} + +/* \brief Nearest distance in z direction, accounting for periodicity. + * + * \param[in] d Distance to be checked. + * + * \return Nearest distance. + */ +double nearest_z(double d) +{ +#if !defined(REFLECTIVE_Z) + if(d < -boxHalf_Z) + d += boxSize_Z; + if(d > boxHalf_Z) + d -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + return d; +} + +/*! \brief Gets "radius" of a cell. + * + * Defined as the radius of a sphere with the same volume as the Voronoi cell. + * + * \param[in] i Index of cell in P and SphP arrays. + * + * \return radius of cell i. + */ +double get_cell_radius(int i) +{ + double cellrad; + +#ifdef TWODIMS + cellrad = sqrt(SphP[i].Volume / M_PI); +#else /* #ifdef TWODIMS */ +#ifdef ONEDIMS +#ifdef ONEDIMS_SPHERICAL + cellrad = 0.5 * (Mesh.VF[i + 1].cx - Mesh.VF[i].cx); +#else /* #ifdef ONEDIMS_SPHERICAL */ + cellrad = 0.5 * SphP[i].Volume; +#endif /* #ifdef ONEDIMS_SPHERICAL #else */ +#else /* #ifdef ONEDIMS */ + cellrad = pow(SphP[i].Volume * 3.0 / (4.0 * M_PI), 1.0 / 3); +#endif /* #ifdef ONEDIMS #else */ +#endif /* #ifdef TWODIMS */ + return cellrad; +} + +/*! \brief Writes a file points_X.dat with Delaunay points. + * + * Writes position as in DP structure. + * + * \param[in] T tessellation for which Delaunay point positions should be + * written. + * + * \return void + */ +void dump_points(tessellation *T) +{ + FILE *fd; + int i; + double xyz[3]; + char buf[1000]; + + sprintf(buf, "points_%d.dat", ThisTask); + fd = fopen(buf, "w"); + my_fwrite(&T->Ndp, sizeof(int), 1, fd); + for(i = 0; i < T->Ndp; i++) + { + xyz[0] = T->DP[i].x; + xyz[1] = T->DP[i].y; + xyz[2] = T->DP[i].z; + my_fwrite(xyz, sizeof(double), 3, fd); + } + fclose(fd); +} + +/*! \brief Calculates the normals to given interfaces. + * + * \param[in] T Pointer to tesslation data. + * \param[in] i Index of Voronoi-face in tesslation T. + * \param[out] geom Pointer to structure to which normal data is written. + * + * \return 0 if success, -1 if interface can be ignored. + */ +int face_get_normals(tessellation *T, int i, struct geometry *geom) +{ + int li, ri; + double surface, surface_l, surface_r; + int present_left, present_right; + double mm; + + face *VF = T->VF; + point *DP = T->DP; + + li = DP[VF[i].p1].index; + ri = DP[VF[i].p2].index; + + if(li < 0 || ri < 0) + return -1; + + if(li >= NumGas && DP[VF[i].p1].task == ThisTask) + li -= NumGas; + + if(ri >= NumGas && DP[VF[i].p2].task == ThisTask) + ri -= NumGas; + + if(DP[VF[i].p1].task == ThisTask) + surface_l = SphP[li].SurfaceArea; + else + surface_l = PrimExch[li].SurfaceArea; + + if(DP[VF[i].p2].task == ThisTask) + surface_r = SphP[ri].SurfaceArea; + else + surface_r = PrimExch[ri].SurfaceArea; + + if(surface_r > surface_l) + surface = 1.0e-5 * surface_r; + else + surface = 1.0e-5 * surface_l; + + present_left = present_right = 0; + + /* if the area of this face is negligible compared to the surface + of the larger cell, skip it */ + if(DP[VF[i].p1].task == ThisTask && DP[VF[i].p1].index < NumGas) + if(TimeBinSynchronized[P[DP[VF[i].p1].index].TimeBinHydro]) + if(VF[i].area > surface) + present_left = 1; + + if(DP[VF[i].p2].task == ThisTask && DP[VF[i].p2].index < NumGas) + if(TimeBinSynchronized[P[DP[VF[i].p2].index].TimeBinHydro]) + if(VF[i].area > surface) + present_right = 1; + + if(present_left == 0 && present_right == 0) + { +#ifndef VORONOI_STATIC_MESH + VF[i].area = 0; +#endif /* #ifndef VORONOI_STATIC_MESH */ + return -1; + } + + /* center of face */ + geom->cx = VF[i].cx; + geom->cy = VF[i].cy; + geom->cz = VF[i].cz; + + /* normal vector pointing to "right" state */ + geom->nx = DP[VF[i].p2].x - DP[VF[i].p1].x; + geom->ny = DP[VF[i].p2].y - DP[VF[i].p1].y; + geom->nz = DP[VF[i].p2].z - DP[VF[i].p1].z; + + geom->nn = sqrt(geom->nx * geom->nx + geom->ny * geom->ny + geom->nz * geom->nz); + geom->nx /= geom->nn; + geom->ny /= geom->nn; + geom->nz /= geom->nn; + + /* need an ortonormal basis */ + if(geom->nx != 0 || geom->ny != 0) + { + geom->mx = -geom->ny; + geom->my = geom->nx; + geom->mz = 0; + } + else + { + geom->mx = 1; + geom->my = 0; + geom->mz = 0; + } + + mm = sqrt(geom->mx * geom->mx + geom->my * geom->my + geom->mz * geom->mz); + geom->mx /= mm; + geom->my /= mm; + geom->mz /= mm; + + geom->px = geom->ny * geom->mz - geom->nz * geom->my; + geom->py = geom->nz * geom->mx - geom->nx * geom->mz; + geom->pz = geom->nx * geom->my - geom->ny * geom->mx; + + return 0; +} + +/*! \brief Calculates distance of a cell to boundary of computational box. + * + * \param[in] cell Index of cell in P and SphP structure. + * + * \return Distance to border. + */ +double distance_to_border(int cell) +{ + double d1 = boxSize_X - P[cell].Pos[0]; + assert(d1 > 0); + + double d2 = P[cell].Pos[0]; + + double min = fmin(d1, d2); + + d1 = boxSize_Y - P[cell].Pos[1]; + assert(d1 > 0); + + d2 = P[cell].Pos[1]; + + double min2 = fmin(d1, d2); + min = fmin(min, min2); + + d1 = boxSize_Z - P[cell].Pos[2]; + assert(d1 > 0); + + d2 = P[cell].Pos[2]; + min2 = fmin(d1, d2); + + min = fmin(min, min2); + + return min; +} diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h new file mode 100644 index 0000000000..31aaae1ecb --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi.h @@ -0,0 +1,379 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi.h + * \date 05/2018 + * \brief Header for Voronoi mesh-construcion + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef HAVE_H_VORONOI +#define HAVE_H_VORONOI + +#include + +#define STACKSIZE_TETRA 10000 +#define MIN_ALLOC_NUMBER 1000 +#define ALLOC_INCREASE_FACTOR 1.1 +#define ALLOC_DECREASE_FACTOR 0.7 +#define MAX_VORONOI_ITERATIONS 500 + +#define GENTLE_DEREFINE_FACTOR 1.2 + +#define USEDBITS 52 + +#if USEDBITS > 31 +typedef signed long long int IntegerMapType; +void MY_mpz_set_si(mpz_t dest, signed long long int val); +void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val); +void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val); +#else /* #if USEDBITS > 31 */ +typedef signed long int IntegerMapType; +#define MY_mpz_set_si mpz_set_si +#define MY_mpz_mul_si mpz_mul_si +#define MY_mpz_sub_ui mpz_sub_ui +#endif /* #if USEDBITS > 31 #else */ + +#define DOUBLE_to_VORONOIINT(y) ((IntegerMapType)(((*((long long *)&y)) & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS))) + +/* Prerequisites for this function: + * sizeof(double)==sizeof(unsigned long long) + * doubles must be stored according to IEEE 754 + */ +static inline IntegerMapType double_to_voronoiint(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - USEDBITS); +} + +static inline double mask_voronoi_int(double x) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = x; + u.ull = u.ull & (~((1llu << (52 - USEDBITS)) - 1)); + return u.d; +} + +#ifndef TWODIMS + +#define EDGE_0 1 /* points 0-1 */ +#define EDGE_1 2 /* points 0-2 */ +#define EDGE_2 4 /* points 0-3 */ +#define EDGE_3 8 /* points 1-2 */ +#define EDGE_4 16 /* points 1-3 */ +#define EDGE_5 32 /* points 2-3 */ +#define EDGE_ALL 63 + +#else /* #ifndef TWODIMS */ + +#define EDGE_0 1 /* points 1-2 */ +#define EDGE_1 2 /* points 0-2 */ +#define EDGE_2 4 /* points 0-1 */ +#define EDGE_ALL 7 + +#endif /* #ifndef TWODIMS #else */ + +#define HSML_INCREASE_FACTOR 1.3 + +#ifdef TWODIMS /* will only be compiled in 2D case */ +#define DIMS 2 +#else /* #ifdef TWODIMS */ +#define DIMS 3 +#endif /*#ifdef TWODIMS #else */ + +typedef struct +{ + double x, y, z; // The 3-space position of the point + MyIDType ID; + int task; // The MPI task owning this cell + int index; // The hydro quantity index of the cell + int originalindex, timebin; + unsigned int image_flags; + +#ifndef OPTIMIZE_MEMORY_USAGE + double xx, yy, zz; + IntegerMapType ix, iy, iz; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + +#ifdef DOUBLE_STENCIL + MyFloat Hsml; + int first_connection; + int last_connection; + char flag_primary_triangle; +#endif /* #ifdef DOUBLE_STENCIL */ +} point; + +typedef struct tetra_data +{ + int p[DIMS + 1]; /*!< oriented tetrahedron points */ + int t[DIMS + 1]; /*!< adjacent tetrahedrons, always opposite to corresponding point */ + unsigned char s[DIMS + 1]; /*!< gives the index of the point in the adjacent tetrahedron that + lies opposite to the common face */ + + /* Note: if t[0] == -1, the tetrahedron has been deleted */ +} tetra; + +typedef struct tetra_center_data +{ +#ifndef OPTIMIZE_MEMORY_USAGE + double cx, cy, cz; /*!< describes circumcircle center */ +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + MyFloat cx, cy, cz; +#endif /*#ifndef OPTIMIZE_MEMORY_USAGE */ +} tetra_center; + +typedef struct tri_data +{ + double p[DIMS + 1][DIMS]; + int owner; +} triangle; + +extern unsigned char *Edge_visited; + +extern struct list_export_data +{ + unsigned int image_bits; + int origin, index; + int nextexport; +} * ListExports; + +extern int Ninlist, MaxNinlist; + +extern struct area_list_data +{ + int task, index; + double darea; +} * AreaList; + +extern int Narea, MaxNarea; + +extern int NumGasInMesh; +extern int *List_InMesh; + +extern struct list_P_data +{ + int firstexport, currentexport; + +} * List_P; + +typedef struct connection_data +{ + int task; + int index; + int image_flags; + int next; + + int dp_index; /*!< this seems to be needed always the way voronoi_makeimage is implemented at the moment */ + int vf_index; /*!< index to the corresponding face */ +#if defined(TETRA_INDEX_IN_FACE) + int dt_index; +#endif /* #if defined(TETRA_INDEX_IN_FACE)*/ + MyIDType ID; +} connection; + +/*! This structure contains the points where a line segment intersects + * the tetrahedron faces and the internal voronoi faces. Is returned + * by calc_voronoi_intersections(). + */ +typedef struct intersection_list_data +{ + double s; /*!< the distance from the entry point (fraction of whole segment) */ + point p; /*!< the intersection point */ + int indA, indB; /*!< the indices of the tetra points (0-4) defining the face */ +} intersection_list; + +extern int CountInSphereTests, CountInSphereTestsExact; +extern int CountConvexEdgeTest, CountConvexEdgeTestExact; +extern int CountFlips, Count_1_to_3_Flips2d, Count_2_to_4_Flips2d; +extern int Count_1_to_4_Flips, Count_2_to_3_Flips, Count_3_to_2_Flips, Count_4_to_4_Flips; +extern int Count_EdgeSplits, Count_FaceSplits; +extern int Count_InTetra, Count_InTetraExact; +extern int Largest_N_DP_Buffer; + +extern int Ninlist, MaxNinlist; + +typedef struct individual_alloc_data +{ + double AllocFacNdp; + double AllocFacNdt; + double AllocFacNvf; + double AllocFacNinlist; + double AllocFacN_DP_Buffer; + double AllocFacNflux; + double AllocFacNradinflux; + double AllocFacNvc; +} mesh_alloc_facs; + +typedef struct tessellation_data +{ + int Ndp; /*!< number of delaunay points */ + int MaxNdp; /*!< maximum number of delaunay points */ + point *DP; /*!< delaunay points */ + + int Ndt; + int MaxNdt; /*!< number of delaunary tetrahedra */ + tetra *DT; /*!< Delaunay tetrahedra */ + tetra_center *DTC; /*!< circumcenters of delaunay tetrahedra */ + char *DTF; + + int Nvf; /*!< number of Voronoi faces */ + int MaxNvf; /*!< maximum number of Voronoi faces */ + face *VF; /*!< Voronoi faces */ + + mesh_alloc_facs Indi; +} tessellation; + +extern tessellation Mesh, DeRefMesh; + +extern int DPinfinity; + +extern int Nvc; /* number of connections */ +extern int MaxNvc; /* maximum number of connections */ +extern int Largest_Nvc; +extern connection *DC; /* Connections */ +extern int FirstUnusedConnection; + +extern double CentralOffsetX, CentralOffsetY, CentralOffsetZ, ConversionFac; + +int derefine_add_point_and_split_tri(int q, triangle *trilist, int n, int max_n, double vol); +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr); +void derefine_refine_compute_volumes(double *vol); +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri); +void create_mesh(void); +void mesh_setup_exchange(void); +void free_mesh(void); +void free_mesh_structures_not_needed_for_derefinement_refinement(void); +void free_all_remaining_mesh_structures(void); +void apply_area_list(void); +int area_list_data_compare(const void *a, const void *b); +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask); +void initialize_and_create_first_tetra(tessellation *T); +void compute_voronoi_faces_and_volumes(void); +void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements); +double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n); +void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int *nof_elements); +void intersection_plane_grid(double *center, double *n, const char *filename); +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr); +int insert_point(tessellation *T, int pp, int ttstart); +void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist); +void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2); +double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3); +void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr); +double get_tri_volume(int i, triangle *trilist); +void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3); +void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom); +void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2); +int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr); +int InTetra(tessellation *T, int tt, point *pp, int *edgeface_nr, int *nexttetra); +double InSphere(point *p0, point *p1, point *p2, point *p3, point *p); +void update_circumcircle(tessellation *T, int tt); +int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3); +int voronoi_ghost_search_alternative(tessellation *T); +void compute_circumcircles(tessellation *T); +int compute_max_delaunay_radius(void); +void check_for_min_distance(tessellation *T); +void check_links(tessellation *T); +void check_orientations(tessellation *T); +void check_tetras(tessellation *T, int npoints); +int voronoi_get_local_particles(void); +int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr); +void calculate_gradients(void); +void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi); +void exchange_primitive_variables(void); +void exchange_primitive_variables_and_gradients(void); +int compare_primexch(const void *a, const void *b); + +/* 2D voronoi routines */ +void check_edge_and_flip_if_needed(tessellation *T, int ip, int it); +int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart); +double InCircle(point *p0, point *p1, point *p2, point *p); +void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2); +double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2); +void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0); +void dump_points(tessellation *T); +void set_integers_for_pointer(point *p); + +#if !defined(ONEDIMS) +#ifndef OPTIMIZE_MEMORY_USAGE +static inline void set_integers_for_point(tessellation *T, int pp) +{ + point *p = &T->DP[pp]; + set_integers_for_pointer(p); +} +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ +static inline void get_integers_for_point(point *p, IntegerMapType ixyz[], double xyz[]) +{ + xyz[0] = (p->x - CentralOffsetX) * ConversionFac + 1.0; + xyz[1] = (p->y - CentralOffsetY) * ConversionFac + 1.0; + xyz[2] = (p->z - CentralOffsetZ) * ConversionFac + 1.0; + + ixyz[0] = double_to_voronoiint(xyz[0]); + ixyz[1] = double_to_voronoiint(xyz[1]); + ixyz[2] = double_to_voronoiint(xyz[2]); + + xyz[0] = mask_voronoi_int(xyz[0]); + xyz[1] = mask_voronoi_int(xyz[1]); + xyz[2] = mask_voronoi_int(xyz[2]); +} +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + +#else /* #if !defined(ONEDIMS) */ +void set_integers_for_point(tessellation *T, int pp); +#endif /* #if !defined(ONEDIMS) #else */ + +/* quick function to compare a point to the infinity point */ +static inline int isInfinity(point *p) { return p->x == MAX_DOUBLE_NUMBER; } + +int solve_linear_equations(double *m, double *res); +void check_triangles(tessellation *T, int npoints); +int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp); +int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp); +int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp); +int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2); +int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2); +int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra); +int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p); +int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p); +int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p); +int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3); +int Orient3d(point *p0, point *p1, point *p2, point *p3); +int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3); +int count_undecided_tetras(tessellation *T); +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int mode, int thread_id, int numnodes, int *firstnode); +int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id); +int voronoi_ghost_search(tessellation *T); +double distance_to_border(int cell); + +#endif /* HAVE_H_VORONOI */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c new file mode 100644 index 0000000000..54c325cd3b --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d.c @@ -0,0 +1,363 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_1d.c + * \date 05/2018 + * \brief Routines to build a 1d Voronoi mesh + * \details Note that some of these routines have the same name as the ones + * in voronoi_2d.c and voronoi_3d.c and just replace them in case + * the Config-option ONEDIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * void initialize_and_create_first_tetra(tessellation * T) + * void compute_circumcircles(tessellation * T) + * void set_integers_for_point(tessellation * T, int pp) + * int insert_point(tessellation * T, int pp, int ttstart) + * int voronoi_ghost_search(tessellation * T) + * int count_undecided_tetras(tessellation * T) + * int voronoi_ghost_search_alternative(tessellation * T) + * void compute_voronoi_faces_and_volumes(void) + * void voronoi_1D_order(void) + * int voronoi_1D_compare_key(const void *a, const void *b) + * void voronoi_1D_reorder_gas(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(ONEDIMS) && !defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D case */ + +/*! \brief Output of Voroioi mesh to file. + * + * Not supported for 1d. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + terminate("write_voronoi_mesh not supported in 1d case!"); +} + +/*! \brief Initialises 1d tessellation and create all-enclosing segment. + * + * \param[out] T Pointer to tessllation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + char msg[200]; + + if(NTask > 1) + { + mpi_printf("1D code works only for 1 CPU\n"); + endrun(); + } + + T->MaxNdp = NumGas + 4; + T->MaxNdt = 4 + T->MaxNdp * 2; + T->MaxNvf = T->MaxNdt; + + if(NumGas == 0) + { + sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask); + terminate(msg); + } + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); +} + +/*! \brief Computes circumcircles in 1d. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \param[in] T Pointer to tessllation structure. + * + * \return void + */ +void compute_circumcircles(tessellation *T) {} + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return void + */ +void set_integers_for_point(tessellation *T, int pp) {} + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return 0 + */ +int insert_point(tessellation *T, int pp, int ttstart) { return 0; } + +/*! \brief Wrapper routine to search for ghost cells for boundary cells. + * + * \param[out] T Pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); } + +/*! \brief Empty funciton in 1d case. + * + * Not necessary in 1d. However, this function has to exist for the 1d code + * to work. + * + * \return 0 + */ +int count_undecided_tetras(tessellation *T) { return 0; } + +/*! \brief Searches for ghost cells in 1d Voronoi mesh. + * + * This routine assumes an x ordered cell array. + * + * \param[out] T pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search_alternative(tessellation *T) +{ + double xl, xr; + int index_l, index_r; + +#if defined(REFLECTIVE_X) + xl = -P[0].Pos[0]; + index_l = 0; + + xr = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]); + index_r = NumGas - 1; +#else /* #if defined(REFLECTIVE_X) */ + xl = P[NumGas - 1].Pos[0] - boxSize_X; + index_l = NumGas - 1; + + xr = P[0].Pos[0] + boxSize_X; + index_r = 0; +#endif /* #if defined(REFLECTIVE_X) #else */ + + point *DP = T->DP; + + DP[-1].x = xl; + DP[-1].y = 0; + DP[-1].z = 0; + DP[-1].task = ThisTask; + DP[-1].ID = P[index_l].ID; + DP[-1].index = index_l + NumGas; /* this is a mirrored local point */ +#if defined(REFLECTIVE_X) + DP[-1].image_flags = REFL_X_FLAGS; +#if(REFLECTIVE_X == 2) + DP[-1].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#endif /* #if defined(REFLECTIVE_X) */ + DP[NumGas].x = xr; + DP[NumGas].y = 0; + DP[NumGas].z = 0; + DP[NumGas].task = ThisTask; + DP[NumGas].ID = P[index_r].ID; + DP[NumGas].index = index_r + NumGas; /* this is a mirrored local point */ +#if defined(REFLECTIVE_X) + DP[NumGas].image_flags = REFL_X_FLAGS; +#if(REFLECTIVE_X == 2) + DP[NumGas].image_flags |= OUTFLOW_X; +#endif /* #if (REFLECTIVE_X == 2) */ +#endif /* #if defined(REFLECTIVE_X) */ + return 0; +} + +/*! \brief Computes faces and volume of cells in 1d Voronoi mesh. + * + * Also computes the center of mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int i; + + tessellation *T = &Mesh; + + T->Nvf = 0; + point *DP = T->DP; + face *VF = T->VF; + + for(i = -1; i < NumGas; i++) + { + VF[T->Nvf].p1 = i; + VF[T->Nvf].p2 = i + 1; + + VF[T->Nvf].cx = 0.5 * (DP[i].x + DP[i + 1].x); + + VF[T->Nvf].cy = 0; + VF[T->Nvf].cz = 0; + VF[T->Nvf].area = 1; + + T->Nvf++; + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Volume = VF[i + 1].cx - VF[i].cx; + SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx); + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; + + SphP[i].SurfaceArea = 2.; + } +} + +/*! \brief Data for 1d Voronoi mesh. + */ +static struct voronoi_1D_data +{ + double x; + int index; +} * mp; + +static int *Id; + +/*! \brief Sort cells by their position and reorder in P and SphP array. + * + * \return void + */ +void voronoi_1D_order(void) +{ + int i; + + mpi_printf("begin 1D order...\n"); + + if(NumGas) + { + mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].x = P[i].Pos[0]; + } + + mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + voronoi_1D_reorder_gas(); + + myfree(Id); + myfree(mp); + } + + mpi_printf("1D order done.\n"); +} + +/*! \brief Compare x value of voronoi_1D_data objects. + * + * \param[in] a Pointer to first voronoi_1D_data object. + * \param[in] b Pointer to second voronoi_1D_data object. + * + * \return (-1,0,1) -1 if a->x < b->x. + */ +int voronoi_1D_compare_key(const void *a, const void *b) +{ + if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x)) + return -1; + + if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x)) + return +1; + + return 0; +} + +/*! \brief Order the gas cells according to the index given in the ID array. + * + * \return void + */ +void voronoi_1D_reorder_gas(void) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #if defined (ONEDIMS) && !defined (ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c new file mode 100644 index 0000000000..c0212da41d --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_1d_spherical.c @@ -0,0 +1,339 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_1d_spherical.c + * \date 05/2018 + * \brief Routines to build a 1d Voronoi mesh in spherical coordinates. + * \details Note that some of these routines have the same name as the ones + * in voronoi_2d.c and voronoi_3d.c and just replace them in case + * the Config-option ONEDIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * void initialize_and_create_first_tetra(tessellation * T) + * void compute_circumcircles(tessellation * T) + * void set_integers_for_point(tessellation * T, int pp) + * int insert_point(tessellation * T, int pp, int ttstart) + * int voronoi_ghost_search(tessellation * T) + * int count_undecided_tetras(tessellation * T) + * int voronoi_ghost_search_alternative(tessellation * T) + * void compute_voronoi_faces_and_volumes(void) + * void voronoi_1D_order(void) + * int voronoi_1D_compare_key(const void *a, const void *b) + * void voronoi_1D_reorder_gas(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(ONEDIMS) && defined(ONEDIMS_SPHERICAL) /* will only be compiled in 1D spherical case */ + +/*! \brief Output of Voroioi mesh to file. + * + * Not supported for 1d spherical. + * + * \retur void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + terminate("write_voronoi_mesh not supported in 1d spherical case!"); +} + +/*! \brief Initialises spherical 1d tesslation and create all-enclosing + * segment. + * + * \param[out] T Pointer to tessllation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + char msg[200]; + + if(NTask > 1) + { + mpi_terminate("1D code works only for 1 CPU\n"); + } + + T->MaxNdp = NumGas + 4; + T->MaxNdt = 4 + T->MaxNdp * 2; + T->MaxNvf = T->MaxNdt; + + if(NumGas == 0) + { + sprintf(msg, "NumGas=%d on Task=%d, but need at least one particle!\n", NumGas, ThisTask); + terminate(msg); + } + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc("VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc("DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc("DT", T->MaxNdt * sizeof(tetra)); +} + +/*! \brief Computes circumcircles in 1d spherical coordinates. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \param[in] T Pointer to tessllation structure. + * + * \return void + */ +void compute_circumcircles(tessellation *T) {} + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for the + * 1d spherical code to work. + * + * \return void + */ +void set_integers_for_point(tessellation *T, int pp) {} + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \return 0 + */ +int insert_point(tessellation *T, int pp, int ttstart) { return 0; } + +/*! \brief Wrapper routine to search for ghost cells for boundary cells. + * + * \param[out] T Pointer to tessellation. + * + * \return 0 + */ +int voronoi_ghost_search(tessellation *T) { return voronoi_ghost_search_alternative(T); } + +/*! \brief Empty funciton in 1d spherical case. + * + * Not necessary in 1d spherical. However, this function has to exist for + * the 1d spherical code to work. + * + * \return 0 + */ +int count_undecided_tetras(tessellation *T) { return 0; } + +/*! \brief Searches for ghost cells in 1d spherical Voronoi mesh. + * + * This routine assumes an radius ordered cell array. + * + * \param[out] T pointer to tesslation. + * + * \return 0 + */ +int voronoi_ghost_search_alternative(tessellation *T) +{ + point *DP = T->DP; + + /* reflective inner boundaries */ + DP[-1].x = 2. * All.CoreRadius - P[0].Pos[0]; + DP[-1].y = 0; + DP[-1].z = 0; + DP[-1].task = ThisTask; + DP[-1].ID = P[0].ID; + DP[-1].index = NumGas; /* this is a mirrored local point */ + + /* outflow outer boundaries */ + DP[NumGas].x = boxSize_X + (boxSize_X - P[NumGas - 1].Pos[0]); + DP[NumGas].y = 0; + DP[NumGas].z = 0; + DP[NumGas].task = ThisTask; + DP[NumGas].ID = P[NumGas - 1].ID; + DP[NumGas].index = NumGas - 1 + NumGas; /* this is a mirrored local point */ + + return 0; +} + +/*! \brief Compute faces and volume of cells in 1d spherical Voronoi mesh. + * + * Also computes the center of mass. + * + * \return void + */ +void compute_voronoi_faces_and_volumes(void) +{ + int i; + + tessellation *T = &Mesh; + + T->Nvf = 0; + point *DP = T->DP; + face *VF = T->VF; + + for(i = -1; i < NumGas; i++) + { + VF[T->Nvf].p1 = i; + VF[T->Nvf].p2 = i + 1; + + VF[T->Nvf].cx = 0.5 * (DP[i].x + DP[i + 1].x); + VF[T->Nvf].cy = 0; + VF[T->Nvf].cz = 0; + VF[T->Nvf].area = 4. * M_PI * VF[T->Nvf].cx * VF[T->Nvf].cx; + + T->Nvf++; + } + + for(i = 0; i < NumGas; i++) + { + SphP[i].Volume = 4.0 / 3.0 * M_PI * (VF[i + 1].cx * VF[i + 1].cx * VF[i + 1].cx - VF[i].cx * VF[i].cx * VF[i].cx); + SphP[i].Center[0] = 0.5 * (VF[i + 1].cx + VF[i].cx); + SphP[i].Center[1] = 0; + SphP[i].Center[2] = 0; + + SphP[i].SurfaceArea = VF[i].area + VF[i + 1].area; + SphP[i].ActiveArea = SphP[i].SurfaceArea; + } +} + +/*! \brief Structure for 1d spherical Voronoi mesh. + */ +static struct voronoi_1D_data +{ + double x; + int index; +} * mp; + +static int *Id; + +/*! \brief Sort cells by their position (i.e. radius) and reorder in P and + * SphP array. + * + * \return void + */ +void voronoi_1D_order(void) +{ + int i; + + mpi_printf("begin 1D order...\n"); + + if(NumGas) + { + mp = (struct voronoi_1D_data *)mymalloc("mp", sizeof(struct voronoi_1D_data) * NumGas); + Id = (int *)mymalloc("Id", sizeof(int) * NumGas); + + for(i = 0; i < NumGas; i++) + { + mp[i].index = i; + mp[i].x = P[i].Pos[0]; + } + + mysort(mp, NumGas, sizeof(struct voronoi_1D_data), voronoi_1D_compare_key); + + for(i = 0; i < NumGas; i++) + Id[mp[i].index] = i; + + voronoi_1D_reorder_gas(); + + myfree(Id); + myfree(mp); + } + + mpi_printf("1D order done.\n"); +} + +/*! \brief Compare x value of voronoi_1D_data objects. + * + * \param[in] a Pointer to first voronoi_1D_data object. + * \param[in] b Pointer to second voronoi_1D_data object. + * + * \return (-1,0,1) -1 if a->x < b->x. + */ +int voronoi_1D_compare_key(const void *a, const void *b) +{ + if(((struct voronoi_1D_data *)a)->x < (((struct voronoi_1D_data *)b)->x)) + return -1; + + if(((struct voronoi_1D_data *)a)->x > (((struct voronoi_1D_data *)b)->x)) + return +1; + + return 0; +} + +/*! \brief Order the gas cells according to the index given in the ID array. + * + * \return void + */ +void voronoi_1D_reorder_gas(void) +{ + int i; + struct particle_data Psave, Psource; + struct sph_particle_data SphPsave, SphPsource; + int idsource, idsave, dest; + + for(i = 0; i < NumGas; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + SphPsource = SphP[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + Psave = P[dest]; + SphPsave = SphP[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + SphP[dest] = SphPsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + SphPsource = SphPsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #if defined (ONEDIMS) && defined (ONEDIMS_SPHERICAL) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c new file mode 100644 index 0000000000..7e9e519c13 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_2d.c @@ -0,0 +1,2110 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_2d.c + * \date 05/2018 + * \brief Routines to build a 2d Voronoi mesh. + * \details Note that some of these routines have the same name as the ones + * in voronoi_1d.c and voronoi_3d.c and just replace them in case + * the Config-option TWODIMS is active. This is also the reason + * why some of these functions are empty but nonetheless have to + * exist in this file. + * contains functions: + * void initialize_and_create_first_tetra(tessellation * T) + * int insert_point(tessellation * T, int pp, int ttstart) + * void make_a_2_to_4_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2, int tt3, int i0, int j0) + * void make_a_1_to_3_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2) + * void check_edge_and_flip_if_needed(tessellation * T, int ip, + * int it) + * int get_triangle(tessellation * T, int pp, int *moves, int + * *degenerate_flag, int ttstart) + * static inline void add_row_2d(double *m, int r1, int r2, + * double fac) + * int solve_linear_equations_2d(double *m, double *res) + * int FindTriangle(tessellation * T, int tt, int pp, + * int *degnerate_flag, int *nexttetra) + * int InCircle_Quick(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * int InCircle_Errorbound(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * int InCircle_Exact(tessellation * T, int pp0, int pp1, + * int pp2, int pp) + * double test_triangle_orientation(tessellation * T, int pp0, + * int pp1, int pp2) + * int Orient2d_Quick(tessellation * T, int pp0, int pp1, + * int pp2) + * int Orient2d_Exact(tessellation * T, int pp0, int pp1, + * int pp2) + * void process_edge_faces_and_volumes(tessellation * T, int tt, + * int nr) + * int derefine_refine_get_triangles(tessellation * T, int tt, + * int nr, point * dtip, triangle * trilist, int ntri, + * int max_n_tri) + * int derefine_add_point_and_split_tri(int q, triangle + * * trilist, int ntri, int max_ntri, double vol) + * double get_tri_volume(int i, triangle * trilist) + * void derefine_refine_process_edge(tessellation * T, double + * *vol, int tt, int nr) + * void compute_circumcircles(tessellation * T) + * void update_circumcircle(tessellation * T, int tt) + * void set_integers_for_pointer(point * p) + * void write_voronoi_mesh(tessellation * T, char *fname, int + * writeTask, int lastTask) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 2D case */ + +#define INSIDE_EPS 1.0e-8 +#define GAUSS_EPS 1.0e-8 + +/*! \brief Initializes 2d tessellation and create all-enclosing triangle. + * + * \param[out] T Pointer to tessellation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + point *p; + int i, n; + + T->MaxNdp = T->Indi.AllocFacNdp; + T->MaxNdt = T->Indi.AllocFacNdt; + T->MaxNvf = T->Indi.AllocFacNvf; + + T->Ndp = 0; + T->Nvf = 0; + T->Ndt = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); + + /* construct all encompassing huge triangle */ + double box, tetra_incircle, tetra_sidelength, tetra_height; + + box = boxSize_X; + if(box < boxSize_Y) + box = boxSize_Y; + + box *= 1.05; + + tetra_incircle = 2.001 * (1 + sqrt(3)) / 3.0 * box; /* to give room for ghost particles needed for periodic/reflective + boundary conditions, the incircle is twice as large, i.e. + [-0.5*box, 1.5*box,-0.5*box, 1.5*box] should be inside triangle */ + tetra_sidelength = tetra_incircle * sqrt(12); + tetra_height = sqrt(3.0) / 2 * tetra_sidelength; + + if(ThisTask == 0) + printf("side-length of enclosing triangle=%g tetra_height=%g box=%g\n", tetra_sidelength, tetra_height, box); + + point *DP = T->DP; + tetra *DT = T->DT; + + /* first, let's make the points */ + DP[-3].x = 0.5 * tetra_sidelength; + DP[-3].y = -1.0 / 3 * tetra_height; + DP[-3].z = 0; + + DP[-2].x = 0; + DP[-2].y = 2.0 / 3 * tetra_height; + DP[-2].z = 0; + + DP[-1].x = -0.5 * tetra_sidelength; + DP[-1].y = -1.0 / 3 * tetra_height; + DP[-1].z = 0; + + for(i = -3; i <= -1; i++) + { + DP[i].x += 0.5 * box; + DP[i].y += 1.0 / 3 * tetra_height - 0.5 * box; + } + + for(i = -3, p = &DP[-3]; i < 0; i++, p++) + { + p->index = -1; + p->task = ThisTask; + p->timebin = 0; + } + + /* we also define a neutral element at infinity */ + DPinfinity = -4; + + DP[DPinfinity].x = MAX_DOUBLE_NUMBER; + DP[DPinfinity].y = MAX_DOUBLE_NUMBER; + DP[DPinfinity].z = MAX_DOUBLE_NUMBER; + DP[DPinfinity].index = -1; + DP[DPinfinity].task = ThisTask; + DP[DPinfinity].timebin = 0; + + /* now let's make the big triangle */ + DT[0].p[0] = -3; + DT[0].p[1] = -2; + DT[0].p[2] = -1; + + /* On the outer faces, we attach tetrahedra with the neutral element as tip. + * This way we will be able to navigate nicely within the tesselation, + * and all tetrahedra have defined neighbouring tetrahedra. + */ + + for(i = 0; i < 3; i++) + { + n = i + 1; /* tetra index */ + + DT[0].t[i] = n; + DT[0].s[i] = 2; + + DT[n].t[2] = 0; + DT[n].s[2] = i; + DT[n].p[2] = DPinfinity; + } + + DT[1].p[0] = DT[0].p[2]; + DT[1].p[1] = DT[0].p[1]; + + DT[2].p[0] = DT[0].p[0]; + DT[2].p[1] = DT[0].p[2]; + + DT[3].p[0] = DT[0].p[1]; + DT[3].p[1] = DT[0].p[0]; + + DT[1].t[0] = 3; + DT[3].t[1] = 1; + DT[1].s[0] = 1; + DT[3].s[1] = 0; + + DT[1].t[1] = 2; + DT[2].t[0] = 1; + DT[1].s[1] = 0; + DT[2].s[0] = 1; + + DT[2].t[1] = 3; + DT[3].t[0] = 2; + DT[2].s[1] = 0; + DT[3].s[0] = 1; + + T->Ndt = 4; /* we'll start out with 4 triangles */ + + CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength; + CentralOffsetY = -0.5000001 * box; + + ConversionFac = 1.0 / (1.001 * tetra_sidelength); + + for(i = -3; i < 0; i++) + set_integers_for_point(T, i); +} + +/*! \brief Insert a point into mesh. + * + * Finds the triangle that contains this point, splits the triangle (usually + * into three). After this, flip the edges if needed restore + * Delaunayhood (which is applied recursively) until a valid Delaunay mesh + * is restored. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] ttstart Initial guess in which triangle it might be, + * index in DT array. + * + * \return Index of triangle containing point pp. + */ +int insert_point(tessellation *T, int pp, int ttstart) +{ + int tt0, tt1, tt2, tt3, ttetra_with_p; + int moves, degenerate_flag; + + /* first, need to do a point location */ + tt0 = get_triangle(T, pp, &moves, °enerate_flag, ttstart); + + ttetra_with_p = tt0; + + if(degenerate_flag == 1) /* that's the normal split of a triangle into 3 */ + { + /* we now need to split this triangle into three */ + tt1 = T->Ndt++; + tt2 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + T->DT[tt1] = T->DT[tt0]; + T->DT[tt2] = T->DT[tt0]; + + make_a_1_to_3_flip(T, pp, tt0, tt1, tt2); + + T->DTF[tt0] = 0; + T->DTF[tt1] = 0; + T->DTF[tt2] = 0; + + check_edge_and_flip_if_needed(T, pp, tt0); + check_edge_and_flip_if_needed(T, pp, tt1); + check_edge_and_flip_if_needed(T, pp, tt2); + } + else + { + degenerate_flag -= 10; + + tt1 = T->DT[tt0].t[degenerate_flag]; + + /* we now need to split this into two triangles */ + tt2 = T->Ndt++; + tt3 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + T->DT[tt2] = T->DT[tt0]; + T->DT[tt3] = T->DT[tt1]; + + make_a_2_to_4_flip(T, pp, tt0, tt1, tt2, tt3, degenerate_flag, T->DT[tt0].s[degenerate_flag]); + + T->DTF[tt0] = 0; + T->DTF[tt1] = 0; + T->DTF[tt2] = 0; + T->DTF[tt3] = 0; + + check_edge_and_flip_if_needed(T, pp, tt0); + check_edge_and_flip_if_needed(T, pp, tt1); + check_edge_and_flip_if_needed(T, pp, tt2); + check_edge_and_flip_if_needed(T, pp, tt3); + } + + return ttetra_with_p; +} + +/*! \brief Make a 2 to 4 flip needed if point is on edge of a Delaunay + * triangle. + * + * If a new point is at the edge of a Delaunay triangle, both adjacent + * triangles need to be split into two. See Springel (2010) for a + * detailed discussion. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] tt0 Index of point 0 in DT array. + * \param[in] tt1 Index of point 1 in DT array. + * \param[in] tt2 Index of point 2 in DT array. + * \param[in] tt3 Index of point 3 in DT array. + * \param[in] i0 Index (in DT->s) of point opposite to common face that needs + * to be involved in flip. + * \param[in] j0 Second Index (in DT->s) of point opposite to common face that + * needs to be involved in flip. + * + * \return void + */ +void make_a_2_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3, int i0, int j0) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra *t3 = &DT[tt3]; + + int i1, i2, j1, j2; + + CountFlips++; + Count_2_to_4_Flips2d++; + + i1 = i0 + 1; + i2 = i0 + 2; + j1 = j0 + 1; + j2 = j0 + 2; + + if(i1 > 2) + i1 -= 3; + if(i2 > 2) + i2 -= 3; + + if(j1 > 2) + j1 -= 3; + if(j2 > 2) + j2 -= 3; + + t0->p[i1] = pp; + t1->p[j2] = pp; + t2->p[i2] = pp; + t3->p[j1] = pp; + + t0->t[i0] = tt1; + t1->t[j0] = tt0; + t0->s[i0] = j0; + t1->s[j0] = i0; + + t1->t[j1] = tt3; + t3->t[j2] = tt1; + t1->s[j1] = j2; + t3->s[j2] = j1; + + t2->t[i1] = tt0; + t0->t[i2] = tt2; + t2->s[i1] = i2; + t0->s[i2] = i1; + + t2->t[i0] = tt3; + t3->t[j0] = tt2; + t2->s[i0] = j0; + t3->s[j0] = i0; + + DT[t0->t[i1]].t[t0->s[i1]] = tt0; + DT[t1->t[j2]].t[t1->s[j2]] = tt1; + DT[t2->t[i2]].t[t2->s[i2]] = tt2; + DT[t3->t[j1]].t[t3->s[j1]] = tt3; +} + +/*! \brief Makes a 1 to 3 flip needed if point is in a Delaunay triangle. + * + * If a new point is in a Delaunay triangle, this + * triangles need to be split into three. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[in] tt0 Index of point 0 in DT array. + * \param[in] tt1 Index of point 1 in DT array. + * \param[in] tt2 Index of point 2 in DT array. + * + * \return void + */ +void make_a_1_to_3_flip(tessellation *T, int pp, int tt0, int tt1, int tt2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + + CountFlips++; + Count_1_to_3_Flips2d++; + + t0->p[0] = pp; + t1->p[1] = pp; + t2->p[2] = pp; + + t0->t[1] = tt1; + t1->t[0] = tt0; + t0->s[1] = 0; + t1->s[0] = 1; + + t1->t[2] = tt2; + t2->t[1] = tt1; + t1->s[2] = 1; + t2->s[1] = 2; + + t2->t[0] = tt0; + t0->t[2] = tt2; + t2->s[0] = 2; + t0->s[2] = 0; + + DT[t0->t[0]].t[t0->s[0]] = tt0; + DT[t1->t[1]].t[t1->s[1]] = tt1; + DT[t2->t[2]].t[t2->s[2]] = tt2; +} + +/*! \brief Flips trangle if needed. + * + * See Springel (2010) for detailed discussion how mesh is constructed. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] ip Index to Delaunay point, DP array. + * \param[in] it Index to corner of triangle, DT array. + * + * \return void + */ +void check_edge_and_flip_if_needed(tessellation *T, int ip, int it) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + + tetra *t = &DT[it]; + + int tt, pp, t0, t2; + int pi, pi1, pi2; + int ni, ni1, ni2; + int st2, st0; + + if(t->p[0] == ip) + pi = 0; + else if(t->p[1] == ip) + pi = 1; + else + pi = 2; + + /* get the point that lies accross the edge to obtain the quadriliteral */ + + tt = t->t[pi]; + ni = t->s[pi]; + pp = DT[tt].p[ni]; + + int ret, ret_exact; + + ret = InCircle_Errorbound(T, t->p[0], t->p[1], t->p[2], pp); + CountInSphereTests++; + + if(ret != 0) + ret_exact = ret; + else + { + ret_exact = InCircle_Exact(T, t->p[0], t->p[1], t->p[2], pp); + CountInSphereTestsExact++; + } + + if(ret_exact > 0) + { + /* pp lies in the triangle, the edge is not Delaunay. Need to do a flip */ + + CountFlips++; + + ni1 = ni + 1; + if(ni1 > 2) + ni1 -= 3; + ni2 = ni + 2; + if(ni2 > 2) + ni2 -= 3; + + pi1 = pi + 1; + if(pi1 > 2) + pi1 -= 3; + pi2 = pi + 2; + if(pi2 > 2) + pi2 -= 3; + + t0 = DT[tt].t[ni1]; + t2 = t->t[pi1]; + + st0 = DT[tt].s[ni1]; + st2 = t->s[pi1]; + + /* change the points of the triangles */ + t->p[pi2] = pp; + DT[tt].p[ni2] = ip; + + /* change the pointers to the neighbouring triangles, and fix + the adjency relations */ + + t->t[pi1] = tt; + DT[tt].t[ni1] = it; + t->s[pi1] = ni1; + DT[tt].s[ni1] = pi1; + + t->t[pi] = t0; + DT[t0].t[st0] = it; + t->s[pi] = st0; + DT[t0].s[st0] = pi; + + DT[tt].t[ni] = t2; + DT[t2].t[st2] = tt; + DT[tt].s[ni] = st2; + DT[t2].s[st2] = ni; + + DTF[tt] = 0; + DTF[it] = 0; + + /* now we need to test also the two sides opposite of p */ + check_edge_and_flip_if_needed(T, ip, it); + check_edge_and_flip_if_needed(T, ip, tt); + } +} + +/*! \brief Finds triangle in which new Delaunay point is located. + * + * Starts with a suggested triangle ttstart and checks if the point is + * contained in this triangle. If not, the procedure is repeated for the + * neighboring triangle. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp Index of Delaunay point in DP array. + * \param[out] moves Number of iterations to find the correct triangle. + * \param[out] degenerate_flag Flag if point lies on edge of a triangle. + * \param[in] ttstart Starting index for the search for the correct triangle. + * + * \return Index of triangle in DT array. + */ +int get_triangle(tessellation *T, int pp, int *moves, int *degenerate_flag, int ttstart) +{ + int count_moves = 0; + int ret; + int tt, next_tetra; + + tt = ttstart; + +#define MAX_COUNT_MOVES 1000000 + + while((ret = FindTriangle(T, tt, pp, degenerate_flag, &next_tetra)) == 0) + { + /* we need to see in which of the three possible neighbouring triangles + we should walk. We'll choose the one which lies along the face that + is traversed by a line from the cm of the triangle to the point in + question. + */ + count_moves++; + + if(count_moves > MAX_COUNT_MOVES) + { + printf("ta=%d triangle=%d xy=(%g|%g) ID=%d\n", ThisTask, (int)(tt), T->DP[pp].x, T->DP[pp].y, T->DP[pp].ID); + if(count_moves > MAX_COUNT_MOVES + 10) + terminate("too many moves, problem to find triangle"); + } + + tt = next_tetra; + } + + *moves = count_moves; + + return tt; +} + +/*! \brief Add row in matrix equation. + * + * Auxiliary function for solve_linear_equations_2d. + * + * \param[in, out] m Matrix. + * \param[in] r1 Index of row to be modified. + * \param[in] r2 Index of row which is added to r1. + * \param[in] fac Factor by which row r2 is multiplied before adding to r1. + * + * \return void + */ +static inline void add_row_2d(double *m, int r1, int r2, double fac) +{ + int i; + + for(i = 0; i < 3; i++) + m[r1 * 3 + i] += fac * m[r2 * 3 + i]; +} + +/*! \brief Solve system of linear equations for 2d Voronoi construction. + * + * This is needed in get_triangle routine. + * + * \param[in, out] m Matrix. + * \param[in, out] res Array for result. + * + * \return 0 if success, -1 else. + */ +int solve_linear_equations_2d(double *m, double *res) +{ + int ix, iy; + + if(fabs(m[0]) > fabs(m[3])) + { + ix = 0; + iy = 1; + } + else + { + ix = 1; + iy = 0; + } + + add_row_2d(m, iy, ix, -m[iy * 3] / m[ix * 3]); + + res[1] = m[iy * 3 + 2] / m[iy * 3 + 1]; + res[0] = (m[ix * 3 + 2] - res[1] * m[ix * 3 + 1]) / m[ix * 3]; + + if(fabs(m[ix * 3]) < 1.0e-12) + return -1; + + return 0; +} + +/*! \brief Does point lie in triangle? + * + * Tests whether point pp lies in the triangle, on an edge, or outside. In the + * latter case, a neighboring triangle is returned. First, a fast search is + * performed and if this yields that point might be on an edge, a (more + * expensive) exact determination is performed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * \param[in] pp Index of Delaunay point in DP array. + * \param[out] degenerate_flag Flag if point lies on edge of a triangle. + * \param[out] nexttetra Index of neighboring triangle in direction of point. + * + * \return 1: point inside triangle; 0 outside; 10,11,12: on edge. + */ +int FindTriangle(tessellation *T, int tt, int pp, int *degnerate_flag, int *nexttetra) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + point *p = &DP[pp]; + + int pp0, pp1, pp2; + point *p0, *p1, *p2; + + pp0 = t->p[0]; + pp1 = t->p[1]; + pp2 = t->p[2]; + + p0 = &DP[pp0]; + p1 = &DP[pp1]; + p2 = &DP[pp2]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity) + { + char buf[1000]; + sprintf(buf, "we are in a triangle with an infinity point. tetra=%d p=(%g|%g)\n", (int)(tt), p->x, p->y); + terminate(buf); + } + + Count_InTetra++; + + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + + double qx = p->xx - p0->xx; + double qy = p->yy - p0->yy; + + double mv_data[] = {ax, bx, qx, ay, by, qy}; + double x[2]; + + int ivol, flag2, flag1, flag0; + int count_zeros = 0; + + int status; + + status = solve_linear_equations_2d(mv_data, x); + + if(status < 0) + { + ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]); + if(ivol <= 0) + { + char buf[1000]; + sprintf(buf, "flat or negatively triangle found (ivol=%d)\n", ivol); + terminate(buf); + } + } + + if(status >= 0) + { + if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && (1 - (x[0] + x[1])) > INSIDE_EPS) + { + /* looks like we are safely inside the triangle */ + + *degnerate_flag = 1; + return 1; + } + + if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || (1 - (x[0] + x[1])) < -INSIDE_EPS) + { + /* looks like we are clearly outside the triangle. + Let's look for a good neighbouring triangle to continue the search */ + + /* note: in the (a,b) basis, the center-of-mass has coordinates (1/3, 1/3) */ + + double w, u; + + if(fabs(x[1] - (1.0 / 3)) > INSIDE_EPS) + { + w = (1.0 / 3) / ((1.0 / 3) - x[1]); + if(w > 0) + { + u = (1.0 / 3) + w * (x[0] - (1.0 / 3)); + if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS) + { + *nexttetra = t->t[2]; + return 0; + } + } + } + + if(fabs(x[0] - (1.0 / 3)) > INSIDE_EPS) + { + w = (1.0 / 3) / ((1.0 / 3) - x[0]); + if(w > 0) + { + u = (1.0 / 3) + w * (x[1] - (1.0 / 3)); + if(u > -INSIDE_EPS && (1 - u) > -INSIDE_EPS) + { + *nexttetra = t->t[1]; + return 0; + } + } + } + + *nexttetra = t->t[0]; + return 0; + } + } + + /* here we need to decide whether we have a degenerate case, i.e. + whether we think the point lies on an edge of the triangle */ + + Count_InTetraExact++; + + ivol = Orient2d_Exact(T, t->p[0], t->p[1], t->p[2]); + + if(ivol <= 0) + { + char buf[1000]; + sprintf(buf, "flat or negatively oriented triangle found (ivol=%d)\n", ivol); + terminate(buf); + } + + flag0 = Orient2d_Exact(T, pp1, pp2, pp); + flag1 = Orient2d_Exact(T, pp2, pp0, pp); + flag2 = Orient2d_Exact(T, pp0, pp1, pp); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(count_zeros >= 2) + { + printf("flags=%d %d %d\n", flag0, flag1, flag2); + + printf("points: %d %d %d %d\n", (int)(pp0), (int)(pp1), (int)(pp2), (int)(pp)); + printf("Ngas=%d\n", NumGas); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp0), p0->x, p0->y, p0->index, p0->task, + P[p0->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp1), p1->x, p1->y, p1->index, p1->task, + P[p1->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp2), p2->x, p2->y, p2->index, p2->task, + P[p2->index % NumGas].ID); + printf("xyz, p=%d: (%g|%g) index=%d task=%d ID=%d flags\n", (int)(pp), p->x, p->y, p->index, p->task, P[p->index % NumGas].ID); + terminate("too many zeros - (perhaps identical points inserted?)"); + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0) + { + /* we have a point inside the triangle, but it may still be on one of the edges */ + + if(count_zeros == 0) + { + /* ok, we are inside */ + *degnerate_flag = 1; + return 1; + } + + if(count_zeros == 1) /* we lie on a face */ + { + if(flag2 == 0) + { + *degnerate_flag = 12; + return 12; /* point lies on side A */ + } + if(flag1 == 0) + { + *degnerate_flag = 11; + return 11; /* point lies on side C */ + } + + if(flag0 == 0) + { + *degnerate_flag = 10; + return 10; /* point lies on side B */ + } + } + } + + /* we are clearly outside, let's select the suitable neighbour */ + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0) + { + *nexttetra = t->t[0]; + return 0; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0) + { + *nexttetra = t->t[1]; + return 0; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0) + { + *nexttetra = t->t[2]; + return 0; + } + + /* there are apparently two negative values. Let's pick a random one */ + + int ind = -1; + + if(flag0 < 0) + { + if(ind < 0) + ind = 0; + else + { + if(get_random_number() < 0.5) + ind = 0; + } + } + + if(flag1 < 0) + { + if(ind < 0) + ind = 1; + else + { + if(get_random_number() < 0.5) + ind = 1; + } + } + + if(flag2 < 0) + { + if(ind < 0) + ind = 2; + else + { + if(get_random_number() < 0.5) + ind = 2; + } + } + + *nexttetra = t->t[ind]; + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle, 1: outside circle. + */ +int InCircle_Quick(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + double ax, ay, bx, by, cx, cy; + double ab, bc, ca, a2, b2, c2, x; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + + ab = ax * by - bx * ay; + bc = bx * cy - cx * by; + ca = cx * ay - ax * cy; + + a2 = ax * ax + ay * ay; + b2 = bx * bx + by * by; + c2 = cx * cx + cy * cy; + + x = a2 * bc + b2 * ca + c2 * ab; + + if(x < 0) + return -1; + if(x > 0) + return +1; + + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2 with some error margin. + * + * This error margin should be large enough to exclude that close cases are + * misclssified due to numerical round-off errors. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle (within tolerance), + * 1: outside circle. + */ +int InCircle_Errorbound(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + double ax, ay, bx, by, cx, cy; + double ab, bc, ca, a2, b2, c2, x; + double axby, bxay, bxcy, cxby, cxay, axcy; + + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + + axby = ax * by; + bxay = bx * ay; + bxcy = bx * cy; + cxby = cx * by; + cxay = cx * ay; + axcy = ax * cy; + + ca = cxay - axcy; + ab = axby - bxay; + bc = bxcy - cxby; + + a2 = ax * ax + ay * ay; + b2 = bx * bx + by * by; + c2 = cx * cx + cy * cy; + + x = a2 * bc + b2 * ca + c2 * ab; + + /* calculate absolute maximum size */ + + double sizelimit = a2 * (fabs(bxcy) + fabs(cxby)) + b2 * (fabs(cxay) + fabs(axcy)) + c2 * (fabs(axby) + fabs(bxay)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return 0; +} + +/*! \brief Tests whether point pp lies in the circumcircle around triangle + * p0,p1,p2 using arbitrary precision operations. + * + * This is the exact solution, but computationally very expensive, thus only + * called for the unclear cases. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * \param[in] pp Index in DP of point to be checked. + * + * \return (-1,0,1); -1: in circle; 0 on circle, + * 1: outside circle. + */ +int InCircle_Exact(tessellation *T, int pp0, int pp1, int pp2, int pp) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + point *p = &DP[pp]; + + if(pp0 == DPinfinity || pp1 == DPinfinity || pp2 == DPinfinity || pp == DPinfinity) + return -1; + + IntegerMapType ax, ay, bx, by, cx, cy; + + ax = p0->ix - p->ix; + ay = p0->iy - p->iy; + bx = p1->ix - p->ix; + by = p1->iy - p->iy; + cx = p2->ix - p->ix; + cy = p2->iy - p->iy; + + mpz_t axby, bxay, bxcy, cxby, cxay, axcy, tmp; + + mpz_init(tmp); + + mpz_init(axby); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(axby, tmp, by); + mpz_init(bxay); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(bxay, tmp, ay); + mpz_init(bxcy); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(bxcy, tmp, cy); + mpz_init(cxby); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(cxby, tmp, by); + mpz_init(cxay); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(cxay, tmp, ay); + mpz_init(axcy); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(axcy, tmp, cy); + + mpz_t ca, ab, bc; + + mpz_init(ca); + mpz_init(ab); + mpz_init(bc); + + mpz_sub(ca, cxay, axcy); + mpz_sub(ab, axby, bxay); + mpz_sub(bc, bxcy, cxby); + + mpz_t AA, BB, a2, b2, c2; + + mpz_init(AA); + mpz_init(BB); + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, ax); + MY_mpz_set_si(tmp, ay); + MY_mpz_mul_si(BB, tmp, ay); + mpz_add(a2, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, bx); + MY_mpz_set_si(tmp, by); + MY_mpz_mul_si(BB, tmp, by); + mpz_add(b2, AA, BB); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, cx); + MY_mpz_set_si(tmp, cy); + MY_mpz_mul_si(BB, tmp, cy); + mpz_add(c2, AA, BB); + + /* now calculate the final result */ + + mpz_mul(AA, a2, bc); + mpz_mul(BB, b2, ca); + mpz_add(tmp, AA, BB); + mpz_mul(BB, c2, ab); + mpz_add(AA, BB, tmp); + + int sign = mpz_sgn(AA); + + mpz_clear(c2); + mpz_clear(b2); + mpz_clear(a2); + mpz_clear(BB); + mpz_clear(AA); + mpz_clear(bc); + mpz_clear(ab); + mpz_clear(ca); + mpz_clear(axcy); + mpz_clear(cxay); + mpz_clear(cxby); + mpz_clear(bxcy); + mpz_clear(bxay); + mpz_clear(axby); + mpz_clear(tmp); + + return sign; +} + +/*! \brief Returns the orientation of the triangle. + * + * Defined as the determinant of the matrix of the position of the three edge + * points a, b and c: + * | ax, ay, 1 | + * | bx, by, 1 | + * | cx, cy, 1 | + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return Determinant of orientation matrix. + */ +double test_triangle_orientation(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + + return (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x); +} + +/*! \brief Check if triangle is positively or negatively oriented. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively + * oriented. + */ +int Orient2d_Quick(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + + double x; + + x = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx); + + if(x < 0) + return -1; + if(x > 0) + return +1; + return 0; +} + +/*! \brief Check if triangle is positively or negatively oriented. + * + * Uses arbitrary precision operations, which is computationally expensive but + * garantees the correct result. + * + * \param[in] T Pointer to tessellation. + * \param[in] pp0 Index in DP of first point in triangle. + * \param[in] pp1 Index in DP of second point in triangle. + * \param[in] pp2 Index in DP of third point in triangle. + * + * \return -1 if negatively, 0 if degenerate (in a line) and 1 if positively + * oriented. + */ +int Orient2d_Exact(tessellation *T, int pp0, int pp1, int pp2) +{ + point *DP = T->DP; + point *p0 = &DP[pp0]; + point *p1 = &DP[pp1]; + point *p2 = &DP[pp2]; + +#if USEDBITS > 31 + IntegerMapType dx1, dy1, dx2, dy2; + + dx1 = (p1->ix - p0->ix); + dy1 = (p1->iy - p0->iy); + dx2 = (p2->ix - p0->ix); + dy2 = (p2->iy - p0->iy); + + mpz_t dx1dy2, dx2dy1, tmp; + + mpz_init(tmp); + mpz_init(dx1dy2); + mpz_init(dx2dy1); + + MY_mpz_set_si(tmp, dx1); + MY_mpz_mul_si(dx1dy2, tmp, dy2); + + MY_mpz_set_si(tmp, dx2); + MY_mpz_mul_si(dx2dy1, tmp, dy1); + + mpz_sub(tmp, dx1dy2, dx2dy1); + + int sign = mpz_sgn(tmp); + + mpz_clear(dx2dy1); + mpz_clear(dx1dy2); + mpz_clear(tmp); + + return (sign); + +#else /* #if USEDBITS > 31 */ + signed long long dx1, dy1, dx2, dy2, x; + + dx1 = (p1->ix - p0->ix); + dy1 = (p1->iy - p0->iy); + dx2 = (p2->ix - p0->ix); + dy2 = (p2->iy - p0->iy); + + x = dx1 * dy2 - dy1 * dx2; + + if(x < 0) + return -1; + if(x > 0) + return +1; + return 0; +#endif /* #if USEDBITS > 31 #else */ +} + +const int edge_start[3] = {1, 2, 0}; +const int edge_end[3] = {2, 0, 1}; + +/*! \brief Calculate cell volumes and face areas of mesh. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] nr Index in edges. + * + * \return void + */ +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr) +{ + int i, j, qq, p1, p2, k; + face *f; + double nx, ny; + double sx, sy; + double hx, hy; + double dvol, h; + + if(T->Nvf + 1 >= T->MaxNvf) + { + T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR; + T->MaxNvf = T->Indi.AllocFacNvf; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf); +#endif /* #ifdef VERBOSE */ + T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face)); + + if(T->Nvf + 1 >= T->MaxNvf) + terminate("Nvf larger than MaxNvf"); + } + + tetra *DT = T->DT; + point *DP = T->DP; + face *VF = T->VF; + tetra_center *DTC = T->DTC; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + + point *dpi = &DP[t->p[i]]; + point *dpj = &DP[t->p[j]]; + + qq = t->t[nr]; + + Edge_visited[tt] |= (1 << nr); + Edge_visited[qq] |= (1 << (t->s[nr])); + + p1 = t->p[i]; + p2 = t->p[j]; + + f = &VF[T->Nvf++]; + + f->p1 = p1; + f->p2 = p2; + + f->cx = 0.5 * (DTC[tt].cx + DTC[qq].cx); + f->cy = 0.5 * (DTC[tt].cy + DTC[qq].cy); + f->cz = 0; + +#ifdef TETRA_INDEX_IN_FACE + f->dt_index = tt; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +#ifdef REFINEMENT_MERGE_CELLS + f->t = tt; + f->nr = nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + nx = DTC[tt].cx - DTC[qq].cx; + ny = DTC[tt].cy - DTC[qq].cy; + + f->area = sqrt(nx * nx + ny * ny); + + hx = 0.5 * (dpi->x - dpj->x); + hy = 0.5 * (dpi->y - dpj->y); + + h = sqrt(hx * hx + hy * hy); + dvol = 0.5 * f->area * h; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + double angle = 0.5 * f->area / h; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + if(dpi->task == ThisTask && dpi->index >= 0 && dpi->index < NumGas) + { + if(TimeBinSynchronized[P[dpi->index].TimeBinHydro]) + { + SphP[dpi->index].Volume += dvol; + SphP[dpi->index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[dpi->index].MaxFaceAngle < angle) + SphP[dpi->index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[dpi->index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[dpi->index].MinimumEdgeDistance > h) + SphP[dpi->index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid at the bottom top */ + sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpi->x; + sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpi->y; + + SphP[dpi->index].Center[0] += dvol * sx; + SphP[dpi->index].Center[1] += dvol * sy; + } + } + + if(dpj->task == ThisTask && dpj->index >= 0 && dpj->index < NumGas) + { + if(TimeBinSynchronized[P[dpj->index].TimeBinHydro]) + { + SphP[dpj->index].Volume += dvol; + SphP[dpj->index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[dpj->index].MaxFaceAngle < angle) + SphP[dpj->index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[dpj->index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[dpj->index].MinimumEdgeDistance > h) + SphP[dpj->index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + + /* let's now compute the center-of-mass of the pyramid on top */ + sx = (2.0 / 3) * f->cx + (1.0 / 3) * dpj->x; + sy = (2.0 / 3) * f->cy + (1.0 / 3) * dpj->y; + + SphP[dpj->index].Center[0] += dvol * sx; + SphP[dpj->index].Center[1] += dvol * sy; + } + } + int low_p, high_p; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + } + else + { + low_p = p2; + high_p = p1; + } + + int this_task_responsible_flag = 0; + + if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas) + this_task_responsible_flag = 1; + } + else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas) + this_task_responsible_flag = 1; + } + + if(this_task_responsible_flag) + { + for(k = 0; k < 2; k++) + { + int p, q; + + if(k == 0) + { + q = p1; + p = DP[q].index; + } + else + { + q = p2; + p = DP[q].index; + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + SphP[p].ActiveArea += f->area; + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Narea >= MaxNarea) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNarea = T->Indi.AllocFacNflux; + AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data)); + + if(Narea >= MaxNarea) + terminate("Narea >= MaxNarea"); + } + + AreaList[Narea].task = DP[q].task; + AreaList[Narea].index = DP[q].originalindex; + AreaList[Narea].darea = f->area; + Narea++; + } + } + } +} + +/*! \brief Copies triangle information from DTC array to trilist. + * + * Performs an orientation check and swaps orientation if needed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in DT[tt].t array (adjacent tetrahedrons). + * \param[in] dtip Pointer to point to be inserted. + * \param[out] trilist Array of triangles. + * \param[in] ntri Index in trilist array. + * \param[in] max_n_tri Maximum index in trilist array. + * + * \return Next index in trilist array. + */ +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + tetra *t = &DT[tt]; + int qq = t->t[nr]; + + if(ntri >= max_n_tri) + terminate("ntri >= max_n_tri"); + + trilist[ntri].p[0][0] = DTC[tt].cx; + trilist[ntri].p[0][1] = DTC[tt].cy; + + trilist[ntri].p[1][0] = DTC[qq].cx; + trilist[ntri].p[1][1] = DTC[qq].cy; + + trilist[ntri].p[2][0] = dtip->x; + trilist[ntri].p[2][1] = dtip->y; + + if(get_tri_volume(ntri, trilist) < 0) + { + /* swap two points to get proper orientation */ + trilist[ntri].p[1][0] = DTC[tt].cx; + trilist[ntri].p[1][1] = DTC[tt].cy; + + trilist[ntri].p[0][0] = DTC[qq].cx; + trilist[ntri].p[0][1] = DTC[qq].cy; + } + + ntri++; + + return ntri; +} + +/*! \brief Add point and adjust triangles accordingly. + * + * \param[in] q Index of point in DP array. + * \param[in, out] trilist Array of triangles. + * \param[in] ntri Number of elements in trilist before splitting. + * \param[in] max_ntri Maximum number of triangles allowed. + * \param[in] vol (Unused) + * + * \return Updated number of triangles. + */ +int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol) +{ + double m[2], n[2], sc[3], *a; + double cut[2][2], ed[2]; + int i, j, k, kk, l, nnew, flag[3], count, oldq; + + for(i = 0, nnew = ntri; i < ntri; i++) + { + if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "i=%d trilist[i].owner=%d\n", i, trilist[i].owner); + terminate(buf); + } + + if(q < 0 || q >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "i=%d q=%d\n", i, q); + terminate(buf); + } + + /* midpoint */ + m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x); + m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y); + + n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x); + n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y); + + if(q == trilist[i].owner) + terminate("q == trilist[i].owner"); + + for(k = 0, count = 0; k < 3; k++) /* determine the side of each point */ + { + a = &trilist[i].p[k][0]; + + sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1]; + + if(sc[k] > 0) + { + flag[k] = 1; + count++; + } + else + flag[k] = 0; + } + + switch(count) + { + case 0: /* the whole tetra is on the side of current owner - nothing to be done */ + break; + + case 3: /* the whole tetra is on the side of new point */ + trilist[i].owner = q; /* change owner */ + break; + + case 1: + case 2: + + if(nnew + 2 > max_ntri) + terminate("nnew + 2 > max_ntri"); + + trilist[nnew] = trilist[i]; + trilist[nnew + 1] = trilist[i]; + + /* find the point index that is on the other side */ + for(k = 0; k < 3; k++) + { + if(flag[k] == 1 && count == 1) + break; + if(flag[k] == 0 && count == 2) + break; + } + + for(j = 0; j < 2; j++) + { + kk = k + j + 1; + if(kk > 2) + kk -= 3; + + double *b = trilist[i].p[k]; + double *a = trilist[i].p[kk]; + + for(l = 0; l < 2; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1]); + double t; + if(prod) + t = -sc[k] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 2; l++) + cut[j][l] = b[l] + t * ed[l]; + } + + /* modify the tetra that's assigned to the new point */ + for(j = 0; j < 2; j++) + { + kk = k + j + 1; + if(kk > 2) + kk -= 3; + + for(l = 0; l < 2; l++) + trilist[i].p[kk][l] = cut[j][l]; + } + + oldq = trilist[i].owner; + + if(count == 1) + trilist[i].owner = q; + + /* modify the two new tetras */ + kk = k + 1; + if(kk > 2) + kk -= 3; + + for(l = 0; l < 2; l++) + { + trilist[nnew].p[k][l] = cut[0][l]; + + trilist[nnew + 1].p[k][l] = cut[1][l]; + trilist[nnew + 1].p[kk][l] = cut[0][l]; + } + + if(count == 1) + { + trilist[nnew].owner = oldq; + trilist[nnew + 1].owner = oldq; + } + else + { + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + } + nnew += 2; + break; + } + } + + return nnew; +} + +/*! \brief Determines area of triangle (i.e. 2d Volume). + * + * \param i Index in trilist array. + * \param trilist Array with triangles. + * + * \return Area of triangle. + */ +double get_tri_volume(int i, triangle *trilist) +{ + double *p0 = &trilist[i].p[0][0]; + double *p1 = &trilist[i].p[1][0]; + double *p2 = &trilist[i].p[2][0]; + + double nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]); + + return 0.5 * nz; +} + +/*! \brief Process edge for volume calculation. + * + * Calculates the contribution of edge to volumes of neighboring + * Voronoi cells in vol array. + * + * \param[in] T Pointer to tessellation. + * \param[in, out] vol Volume of tetrahedra. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in edge array. + * + * \return void + */ +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra_center *DTC = T->DTC; + + int i, j, qq, p1, p2; + double nx, ny; + double hx, hy; + double dvol, h; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + + point *dpi = &DP[t->p[i]]; + point *dpj = &DP[t->p[j]]; + + qq = t->t[nr]; + + Edge_visited[tt] |= (1 << nr); + Edge_visited[qq] |= (1 << (t->s[nr])); + + p1 = t->p[i]; + p2 = t->p[j]; + + nx = DTC[tt].cx - DTC[qq].cx; + ny = DTC[tt].cy - DTC[qq].cy; + + double area = sqrt(nx * nx + ny * ny); + + hx = 0.5 * (dpi->x - dpj->x); + hy = 0.5 * (dpi->y - dpj->y); + + h = sqrt(hx * hx + hy * hy); + dvol = 0.5 * area * h; + + if(p1 >= 0 && p1 < DeRefMesh.Ndp) + vol[p1] += dvol; + + if(p2 >= 0 && p2 < DeRefMesh.Ndp) + vol[p2] += dvol; +} + +/*! \brief Computes the circum-circle of all triangles in mesh. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void compute_circumcircles(tessellation *T) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + + int i; + + for(i = 0; i < T->Ndt; i++) + { + if(DTF[i] & 1) + continue; + DTF[i] |= 1; + + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + update_circumcircle(T, i); + } +} + +/*! \brief Computes the circum-circle of triangle tt. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * + * \return void + */ +void update_circumcircle(tessellation *T, int tt) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + point *DP = T->DP; + + tetra *t = &DT[tt]; + point *p0, *p1, *p2; + int pp0, pp1, pp2; + + pp0 = t->p[0]; + pp1 = t->p[1]; + pp2 = t->p[2]; + + p0 = &DP[pp0]; + p1 = &DP[pp1]; + p2 = &DP[pp2]; + + if(t->p[0] == DPinfinity) + return; + if(t->p[1] == DPinfinity) + return; + if(t->p[2] == DPinfinity) + return; + + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + + double aa = 0.5 * (ax * ax + ay * ay); + double bb = 0.5 * (bx * bx + by * by); + + double mv_data[] = {ax, ay, aa, bx, by, bb}; + double x[2]; + + int status = solve_linear_equations_2d(mv_data, x); + + if(status < 0) + { + terminate("trouble in circum-circle calculation\n"); + } + else + { + x[0] += p0->xx; + x[1] += p0->yy; + + DTC[tt].cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX; + DTC[tt].cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY; + DTC[tt].cz = 0; + } +} + +/*! \brief Computes the integer coordinates from coordinates for a point. + * + * \pararm[in, out] p Pointer to point. + * + * \return void + */ +void set_integers_for_pointer(point *p) +{ + p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0; + p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0; + + if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0) + { + printf("(%g, %g) (%g, %g)\n", p->x, p->y, p->xx, p->yy); + terminate("invalid coordinate range"); + } + + p->ix = double_to_voronoiint(p->xx); + p->iy = double_to_voronoiint(p->yy); + + p->xx = mask_voronoi_int(p->xx); + p->yy = mask_voronoi_int(p->yy); +} + +/*! \brief Outputs Voronoi mesh to file. + * + * Outputs the Voronoi mesh data from task write Task to lastTask in file + * fname. + * + * \param[in] T Pointer to tesselation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + CPU_Step[CPU_MISC] += measure_time(); + + FILE *fd; + char msg[1000]; + MPI_Status status; + int i, j, k, MaxNel, Nel; + int ngas_tot, nel_tot, ndt_tot, nel_before, ndt_before, task; + int *EdgeList, *Nedges, *NedgesOffset, *whichtetra; + int *ngas_list, *nel_list, *ndt_list, *tmp; + float *xyz_edges; + tetra *q, *qstart; + + tetra_center *DTC = T->DTC; + tetra *DT = T->DT; + point *DP = T->DP; + + MaxNel = 10 * NumGas; /* max edge list */ + Nel = 0; /* length of edge list */ + + EdgeList = mymalloc("EdgeList", MaxNel * sizeof(int)); + Nedges = mymalloc("Nedges", NumGas * sizeof(int)); + NedgesOffset = mymalloc("NedgesOffset", NumGas * sizeof(int)); + whichtetra = mymalloc("whichtetra", NumGas * sizeof(int)); + xyz_edges = mymalloc("xyz_edges", T->Ndt * DIMS * sizeof(float)); + ngas_list = mymalloc("ngas_list", sizeof(int) * NTask); + nel_list = mymalloc("nel_list", sizeof(int) * NTask); + ndt_list = mymalloc("ndt_list", sizeof(int) * NTask); + + for(i = 0; i < T->Ndt; i++) + { + xyz_edges[i * DIMS + 0] = DTC[i].cx; + xyz_edges[i * DIMS + 1] = DTC[i].cy; + } + + for(i = 0; i < NumGas; i++) + { + Nedges[i] = 0; + whichtetra[i] = -1; + } + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < DIMS + 1; j++) + if(DP[DT[i].p[j]].task == ThisTask && DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + whichtetra[DP[DT[i].p[j]].index] = i; + } + + for(i = 0; i < NumGas; i++) + { + if(whichtetra[i] < 0) + continue; + + qstart = q = &DT[whichtetra[i]]; + + do + { + Nedges[i]++; + + if(Nel >= MaxNel) + terminate("Nel >= MaxNel"); + + EdgeList[Nel++] = q - DT; + + for(j = 0; j < 3; j++) + if(DP[q->p[j]].task == ThisTask && DP[q->p[j]].index == i) + break; + + k = j + 1; + if(k >= 3) + k -= 3; + + q = &DT[q->t[k]]; + } + while(q != qstart); + } + + for(i = 1, NedgesOffset[0] = 0; i < NumGas; i++) + NedgesOffset[i] = NedgesOffset[i - 1] + Nedges[i - 1]; + + /* determine particle numbers and number of edges in file */ + + if(ThisTask == writeTask) + { + ngas_tot = NumGas; + nel_tot = Nel; + ndt_tot = T->Ndt; + + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&ngas_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + MPI_Recv(&nel_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status); + MPI_Recv(&ndt_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status); + + MPI_Send(&nel_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + MPI_Send(&ndt_tot, 1, MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD); + + ngas_tot += ngas_list[task]; + nel_tot += nel_list[task]; + ndt_tot += ndt_list[task]; + } + + if(!(fd = fopen(fname, "w"))) + { + sprintf(msg, "can't open file `%s' for writing snapshot.\n", fname); + terminate(msg); + } + + my_fwrite(&ngas_tot, sizeof(int), 1, fd); + my_fwrite(&nel_tot, sizeof(int), 1, fd); + my_fwrite(&ndt_tot, sizeof(int), 1, fd); + + my_fwrite(Nedges, sizeof(int), NumGas, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]); + MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 2, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), ngas_list[task], fd); + myfree(tmp); + } + + my_fwrite(NedgesOffset, sizeof(int), NumGas, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * ngas_list[task]); + MPI_Recv(tmp, ngas_list[task], MPI_INT, task, TAG_N + 3, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), ngas_list[task], fd); + myfree(tmp); + } + + my_fwrite(EdgeList, sizeof(int), Nel, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(int) * nel_list[task]); + MPI_Recv(tmp, nel_list[task], MPI_INT, task, TAG_N + 4, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(int), nel_list[task], fd); + myfree(tmp); + } + + my_fwrite(xyz_edges, sizeof(float), T->Ndt * DIMS, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", sizeof(float) * DIMS * ndt_list[task]); + MPI_Recv(tmp, sizeof(float) * DIMS * ndt_list[task], MPI_BYTE, task, TAG_N + 5, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(float), DIMS * ndt_list[task], fd); + myfree(tmp); + } + + fclose(fd); + } + else + { + MPI_Send(&NumGas, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(&Nel, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD); + MPI_Send(&T->Ndt, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD); + + MPI_Recv(&nel_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + MPI_Recv(&ndt_before, 1, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD, &status); + + for(i = 0; i < NumGas; i++) + NedgesOffset[i] += nel_before; + for(i = 0; i < Nel; i++) + EdgeList[i] += ndt_before; + + MPI_Send(Nedges, NumGas, MPI_INT, writeTask, TAG_N + 2, MPI_COMM_WORLD); + MPI_Send(NedgesOffset, NumGas, MPI_INT, writeTask, TAG_N + 3, MPI_COMM_WORLD); + MPI_Send(EdgeList, Nel, MPI_INT, writeTask, TAG_N + 4, MPI_COMM_WORLD); + MPI_Send(xyz_edges, sizeof(float) * DIMS * T->Ndt, MPI_BYTE, writeTask, TAG_N + 5, MPI_COMM_WORLD); + } + + myfree(ndt_list); + myfree(nel_list); + myfree(ngas_list); + myfree(xyz_edges); + myfree(whichtetra); + myfree(NedgesOffset); + myfree(Nedges); + myfree(EdgeList); + + mpi_printf("wrote Voronoi mesh to file\n"); + + CPU_Step[CPU_MAKEIMAGES] += measure_time(); +} + +#endif /* #if defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c new file mode 100644 index 0000000000..f8cc3ad712 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_3d.c @@ -0,0 +1,5111 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_3d.c + * \date 05/2018 + * \brief Routines to build a 3d Voronoi mesh. + * \details Note that some of these routines have the same name as the ones + * in voronoi_1d.c and voronoi_2d.c and just replace them in case + * neither the Config-option TWODIMS nor ONEDIMS is active. + * contains functions: + * void initialize_and_create_first_tetra(tessellation * T) + * void get_line_segments(int sphp_index, int dp_index, double + * *segments, unsigned int *nof_elements, unsigned int + * max_elements) + * void process_edge_faces_and_volumes(tessellation * T, + * int tt, int nr) + * int derefine_refine_get_triangles(tessellation * T, int tt, + * int nr, point * dtip, triangle * trilist, int ntri, + * int max_n_tri) + * double get_tri_volume(int i, triangle * trilist) + * int derefine_add_point_and_split_tri(int q, triangle + * * trilist, int ntri, int max_ntri, double vol) + * void derefine_refine_process_edge(tessellation * T, + * double *vol, int tt, int nr) + * int insert_point(tessellation * T, int pp, int ttstart) + * int convex_edge_test(tessellation * T, int tt, int tip, + * int *edgenr) + * void make_a_face_split(tessellation * T, int tt0, + * int face_nr, int pp, int tt1, int tt2, int qq1, int qq2) + * void make_an_edge_split(tessellation * T, int tt0, + * int edge_nr, int count, int pp, int *ttlist) + * void make_a_4_to_4_flip(tessellation * T, int tt, + * int tip_index, int edge_nr) + * void make_a_1_to_4_flip(tessellation * T, int pp, int tt0, + * int tt1, int tt2, int tt3) + * void make_a_3_to_2_flip(tessellation * T, int tt0, int tt1, + * int tt2, int tip, int edge, int bottom) + * void make_a_2_to_3_flip(tessellation * T, int tt0, int tip, + * int tt1, int bottom, int qq, int tt2) + * int get_tetra(tessellation * T, point * p, int *moves, + * int ttstart, int *flag, int *edgeface_nr) + * int InTetra(tessellation * T, int tt, point * p, + * int *edgeface_nr, int *nexttetra) + * void compute_circumcircles(tessellation * T) + * void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay, + * mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx, + * mpz_t cy, mpz_t cz) + * void get_circumcircle_exact(tessellation * T, int tt, + * double *x, double *y, double *z) + * void update_circumcircle(tessellation * T, int tt) + * int test_tetra_orientation(point * p0, point * p1, + * point * p2, point * p3) + * double calculate_tetra_volume(point * p0, point * p1, + * point * p2, point * p3) + * void add_row(double *m, int r1, int r2, double fac) + * int solve_linear_equations(double *m, double *res) + * void set_integers_for_pointer(point * p) + * int InSphere_Exact(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int InSphere_Quick(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int InSphere_Errorbound(point * p0, point * p1, point * p2, + * point * p3, point * p) + * int Orient3d_Exact(point * p0, point * p1, point * p2, + * point * p3) + * int Orient3d_Quick(point * p0, point * p1, point * p2, + * point * p3) + * int Orient3d(point * p0, point * p1, point * p2, point * p3) + * int compare_face_sort(const void *a, const void *b) + * void get_voronoi_face_vertex_indices(tessellation * T) + * void get_voronoi_face_vertex_coordinates(tessellation * T) + * void sort_faces_by_ID(void) + * void write_voronoi_face_vertex_indices(tessellation * T, + * char *fname1, char *fname2, int writeTask, int lastTask) + * void write_voronoi_face_vertex_coordinates(tessellation * T, + * char *fname, int writeTask, int lastTask) + * void write_voronoi_mesh(tessellation * T, char *fname, + * int writeTask, int lastTask) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) /* will only be compiled in 3D case */ + +#define INSIDE_EPS 1.0e-6 +#define GAUSS_EPS 1.0e-8 + +const int access_triangles[4][3] = {{1, 3, 2}, {0, 2, 3}, {0, 3, 1}, {0, 1, 2}}; + +const int edge_start[6] = {0, 0, 0, 1, 1, 2}; +const int edge_end[6] = {1, 2, 3, 2, 3, 3}; +const int edge_opposite[6] = {3, 1, 2, 3, 0, 1}; +const int edge_nexttetra[6] = {2, 3, 1, 0, 2, 0}; + +/*! \brief Initializes 3d tessellation and create all-enclosing tetrahedron. + * + * \param[out] T Pointer to tessellation structure which is set and its arrays + * are allocated in this routine. + * + * \return void + */ +void initialize_and_create_first_tetra(tessellation *T) +{ + point *p; + int i, n; + + T->MaxNdp = T->Indi.AllocFacNdp; + T->MaxNdt = T->Indi.AllocFacNdt; + T->MaxNvf = T->Indi.AllocFacNvf; + + T->Ndp = 0; + T->Ndt = 0; + T->Nvf = 0; + + T->VF = mymalloc_movable(&T->VF, "VF", T->MaxNvf * sizeof(face)); + + T->DP = mymalloc_movable(&T->DP, "DP", (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + T->DT = mymalloc_movable(&T->DT, "DT", T->MaxNdt * sizeof(tetra)); + + /* construct all encompassing huge tetrahedron */ + + double box, tetra_incircle, tetra_sidelength, tetra_height, tetra_face_height; + + box = boxSize_X; + if(box < boxSize_Y) + box = boxSize_Y; + if(box < boxSize_Z) + box = boxSize_Z; + + tetra_incircle = 1.5 * box; + tetra_sidelength = tetra_incircle * sqrt(24); + tetra_height = sqrt(2.0 / 3) * tetra_sidelength; + tetra_face_height = sqrt(3.0) / 2.0 * tetra_sidelength; + + point *DP = T->DP; + tetra *DT = T->DT; + + /* first, let's make the points */ + DP[-4].x = 0.5 * tetra_sidelength; + DP[-4].y = -1.0 / 3 * tetra_face_height; + DP[-4].z = -0.25 * tetra_height; + + DP[-3].x = 0; + DP[-3].y = 2.0 / 3 * tetra_face_height; + DP[-3].z = -0.25 * tetra_height; + + DP[-2].x = -0.5 * tetra_sidelength; + DP[-2].y = -1.0 / 3 * tetra_face_height; + DP[-2].z = -0.25 * tetra_height; + + DP[-1].x = 0; + DP[-1].y = 0; + DP[-1].z = 0.75 * tetra_height; + + for(i = -4; i <= -1; i++) + { + DP[i].x += 0.5 * box; + DP[i].y += 0.5 * box; + DP[i].z += 0.5 * box; + } + + for(i = -4, p = &DP[-4]; i < 0; i++, p++) + { + p->index = -1; + p->task = ThisTask; + p->timebin = 0; + } + + /* we also define a neutral element at infinity */ + DPinfinity = -5; + + DP[DPinfinity].x = MAX_DOUBLE_NUMBER; + DP[DPinfinity].y = MAX_DOUBLE_NUMBER; + DP[DPinfinity].z = MAX_DOUBLE_NUMBER; + DP[DPinfinity].index = -1; + DP[DPinfinity].task = ThisTask; + DP[DPinfinity].timebin = 0; + + /* now let's make the big tetrahedron */ + DT[0].p[0] = -4; + DT[0].p[1] = -3; + DT[0].p[2] = -2; + DT[0].p[3] = -1; + + /* On the outer faces, we attach tetrahedra with the neutral element as tip. + * This way we will be able to navigate nicely within the tesselation, + * and all tetrahedra have defined neighbouring tetrahedra. + */ + + for(i = 0; i < 4; i++) + { + n = i + 1; /* tetra index */ + + DT[0].t[i] = n; + DT[0].s[i] = 3; + + DT[n].t[3] = 0; + DT[n].s[3] = i; + DT[n].p[3] = DPinfinity; + } + + DT[1].p[0] = DT[0].p[1]; + DT[1].p[1] = DT[0].p[2]; + DT[1].p[2] = DT[0].p[3]; + + DT[2].p[0] = DT[0].p[0]; + DT[2].p[1] = DT[0].p[3]; + DT[2].p[2] = DT[0].p[2]; + + DT[3].p[0] = DT[0].p[0]; + DT[3].p[1] = DT[0].p[1]; + DT[3].p[2] = DT[0].p[3]; + + DT[4].p[0] = DT[0].p[0]; + DT[4].p[1] = DT[0].p[2]; + DT[4].p[2] = DT[0].p[1]; + + DT[1].t[0] = 2; + DT[2].t[0] = 1; + DT[1].s[0] = 0; + DT[2].s[0] = 0; + + DT[1].t[1] = 3; + DT[3].t[0] = 1; + DT[1].s[1] = 0; + DT[3].s[0] = 1; + + DT[1].t[2] = 4; + DT[4].t[0] = 1; + DT[1].s[2] = 0; + DT[4].s[0] = 2; + + DT[2].t[2] = 3; + DT[3].t[1] = 2; + DT[2].s[2] = 1; + DT[3].s[1] = 2; + + DT[2].t[1] = 4; + DT[4].t[2] = 2; + DT[2].s[1] = 2; + DT[4].s[2] = 1; + + DT[3].t[2] = 4; + DT[4].t[1] = 3; + DT[3].s[2] = 1; + DT[4].s[1] = 2; + + T->Ndt = 5; /* we'll start out with 5 tetras */ + + CentralOffsetX = 0.5 * box - 0.5000001 * tetra_sidelength; + CentralOffsetY = 0.5 * box - (1.0000001 / 3) * tetra_face_height; + CentralOffsetZ = 0.5 * box - 0.25000001 * tetra_height; + + ConversionFac = 1.0 / (1.001 * tetra_sidelength); + +#ifndef OPTIMIZE_MEMORY_USAGE + for(i = -4; i < 0; i++) + set_integers_for_point(T, i); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ +} + +#ifdef TETRA_INDEX_IN_FACE +/*! \brief Gets the line segments of a Voronoi cell. + * + * Warning: The correspondance sphp_index == dp_index holds only for a global + * timestep! + * + * \param[in] sphp_index The index of the Voronoi cell. + * \param[in] dp_index The index of the corresponding Delaunay point. + * \param[out] segments The array in which the line segments are stored. + * \param[out] nof_elements The number of elements written in segments during + * this function call. + * \param[in] max_elements The maximum size of the segments array. + * + * \return void + */ +void get_line_segments(int sphp_index, int dp_index, double *segments, unsigned int *nof_elements, unsigned int max_elements) +{ + // index for segments array + unsigned int a = 0; + + int edge = SphP[sphp_index].first_connection; + int last_edge = SphP[sphp_index].last_connection; + + // loop over all interfaces of the cell + while(1) + { + int dq_index = DC[edge].dp_index; + + // one of the tetrahedras around the Delaunay connection + int tt = DC[edge].dt_index; + tetra *t = &Mesh.DT[tt]; + + // find the local index of the edge + int nr = 6; + int e, dp_start_index, dp_end_index; + + for(e = 0; e < 6; e++) + { + dp_start_index = t->p[edge_start[e]]; + dp_end_index = t->p[edge_end[e]]; + + if((dp_start_index == dp_index && dp_end_index == dq_index) || (dp_start_index == dq_index && dp_end_index == dp_index)) + { + nr = e; + break; + } + } + + // ensure that the local edge index has been found + assert(nr != 6); + + // already set: t,tt,nr + int i, j, k, l, m, ii, jj, kk, ll, nn; + tetra *prev, *next; + tetra_center *prevc, *nextc; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + prev = t; + prevc = &Mesh.DTC[tt]; + + do + { + nn = prev->t[l]; + next = &Mesh.DT[nn]; + nextc = &Mesh.DTC[nn]; + + if(a > max_elements - 7) + { + terminate("termination in voronoi_3d.c get_line_segments: not enough memory!"); + } + + segments[a++] = prevc->cx; + segments[a++] = prevc->cy; + segments[a++] = prevc->cz; + segments[a++] = nextc->cx; + segments[a++] = nextc->cy; + segments[a++] = nextc->cz; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + prevc = nextc; + + i = ii; + l = ll; + j = jj; + k = kk; + } + while(next != t); + + if(edge == last_edge) + { + break; + } + + edge = DC[edge].next; + + } // end of while loop + + *nof_elements = a; + + return; +} +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +/*! \brief Calculate cell volumes and face areas of mesh. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] nr Index in edges. + * + * \return void + */ +void process_edge_faces_and_volumes(tessellation *T, int tt, int nr) +{ + int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2; + face *f; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double nx, ny, nz; + double sx, sy, sz; + double hhx, hhy, hhz; + double darea, dvol, h; + + if(T->Nvf + 1 >= T->MaxNvf) + { + T->Indi.AllocFacNvf *= ALLOC_INCREASE_FACTOR; + T->MaxNvf = T->Indi.AllocFacNvf; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvf=%d Indi.AllocFacNvf=%g\n", ThisTask, T->MaxNvf, T->Indi.AllocFacNvf); +#endif /* #ifdef VERBOSE */ + T->VF = myrealloc_movable(T->VF, T->MaxNvf * sizeof(face)); + + if(T->Nvf + 1 >= T->MaxNvf) + terminate("Nvf larger than MaxNvf"); + } + + tetra *DT = T->DT; + point *DP = T->DP; + face *VF = T->VF; + tetra_center *DTC = T->DTC; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + Edge_visited[tt] |= (1 << nr); + + p1 = t->p[i]; + p2 = t->p[j]; + + f = &VF[T->Nvf++]; + + f->area = 0; + f->p1 = p1; + f->p2 = p2; + + f->cx = 0; + f->cy = 0; + f->cz = 0; + +#ifdef TETRA_INDEX_IN_FACE + f->dt_index = tt; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + + hhx = 0.5 * (DP[p1].x - DP[p2].x); + hhy = 0.5 * (DP[p1].y - DP[p2].y); + hhz = 0.5 * (DP[p1].z - DP[p2].z); + + h = sqrt(hhx * hhx + hhy * hhy + hhz * hhz); + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + ax = prevc->cx - cx; + ay = prevc->cy - cy; + az = prevc->cz - cz; + + bx = nextc->cx - cx; + by = nextc->cy - cy; + bz = nextc->cz - cz; + + nx = ay * bz - az * by; + ny = az * bx - ax * bz; + nz = ax * by - ay * bx; + + sx = nextc->cx + prevc->cx + cx; + sy = nextc->cy + prevc->cy + cy; + sz = nextc->cz + prevc->cz + cz; + + darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz); + f->area += darea; + + darea *= (1.0 / 3); + + f->cx += darea * sx; + f->cy += darea * sy; + f->cz += darea * sz; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* need to determine the edge number to be able to flag it */ + + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[nn] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[nn] |= (1 << nr_next); + break; + } + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + i = edge_start[nr]; + j = edge_end[nr]; + + if(f->area) + { + f->cx /= f->area; + f->cy /= f->area; + f->cz /= f->area; + } + +#ifdef REFINEMENT_MERGE_CELLS + f->t = tt; + f->nr = nr; /* delaunay tetra and edge number that generated this face */ +#endif /* #ifdef REFINEMENT_MERGE_CELLS */ + + dvol = (1.0 / 3) * f->area * h; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + double angle = sqrt(f->area / M_PI) / h; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + + if(DP[p1].task == ThisTask && DP[p1].index >= 0 && DP[p1].index < NumGas) + { + if(TimeBinSynchronized[P[DP[p1].index].TimeBinHydro]) + { + SphP[DP[p1].index].Volume += dvol; + SphP[DP[p1].index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[DP[p1].index].MaxFaceAngle < angle) + SphP[DP[p1].index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[DP[p1].index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ + +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[DP[p1].index].MinimumEdgeDistance > h) + SphP[DP[p1].index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid at the bottom top */ + sx = 0.75 * f->cx + 0.25 * DP[p1].x; + sy = 0.75 * f->cy + 0.25 * DP[p1].y; + sz = 0.75 * f->cz + 0.25 * DP[p1].z; + + SphP[DP[p1].index].Center[0] += dvol * sx; + SphP[DP[p1].index].Center[1] += dvol * sy; + SphP[DP[p1].index].Center[2] += dvol * sz; + } + } + + if(DP[p2].task == ThisTask && DP[p2].index >= 0 && DP[p2].index < NumGas) + { + if(TimeBinSynchronized[P[DP[p2].index].TimeBinHydro]) + { + SphP[DP[p2].index].Volume += dvol; + SphP[DP[p2].index].SurfaceArea += f->area; + +#if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) + if(SphP[DP[p2].index].MaxFaceAngle < angle) + SphP[DP[p2].index].MaxFaceAngle = angle; +#endif /* #if defined(REGULARIZE_MESH_FACE_ANGLE) || defined(OUTPUT_MESH_FACE_ANGLE) */ + +#ifdef OUTPUT_SURFACE_AREA + if(f->area) + SphP[DP[p2].index].CountFaces++; +#endif /* #ifdef OUTPUT_SURFACE_AREA */ +#if defined(REFINEMENT_SPLIT_CELLS) + if(SphP[DP[p2].index].MinimumEdgeDistance > h) + SphP[DP[p2].index].MinimumEdgeDistance = h; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) */ + /* let's now compute the center-of-mass of the pyramid on top */ + sx = 0.75 * f->cx + 0.25 * DP[p2].x; + sy = 0.75 * f->cy + 0.25 * DP[p2].y; + sz = 0.75 * f->cz + 0.25 * DP[p2].z; + + SphP[DP[p2].index].Center[0] += dvol * sx; + SphP[DP[p2].index].Center[1] += dvol * sy; + SphP[DP[p2].index].Center[2] += dvol * sz; + } + } + + int low_p, high_p; + + if(DP[p1].ID < DP[p2].ID) + { + low_p = p1; + high_p = p2; + } + else + { + low_p = p2; + high_p = p1; + } + + int this_task_responsible_flag = 0; + + if(TimeBinSynchronized[DP[low_p].timebin]) /* the one with the lower ID is active */ + { + /* we need to check whether the one with the lower ID is a local particle */ + if(DP[low_p].task == ThisTask && DP[low_p].index >= 0 && DP[low_p].index < NumGas) + this_task_responsible_flag = 1; + } + else if(TimeBinSynchronized[DP[high_p].timebin]) /* only the side with the higher ID is active */ + { + /* we need to check whether we hold the one with the higher ID, if yes, we'll do it */ + if(DP[high_p].task == ThisTask && DP[high_p].index >= 0 && DP[high_p].index < NumGas) + this_task_responsible_flag = 1; + } + + if(this_task_responsible_flag) + { + for(k = 0; k < 2; k++) + { + int p, q; + + if(k == 0) + { + q = p1; + p = DP[q].index; + } + else + { + q = p2; + p = DP[q].index; + } + + if(DP[q].task == ThisTask) + { + if(DP[q].index >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + SphP[p].ActiveArea += f->area; + } + else + { + /* here we have a foreign ghost point */ + if(DP[q].originalindex < 0) + terminate("should not happen"); + + if(Narea >= MaxNarea) + { + T->Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNarea = T->Indi.AllocFacNflux; + AreaList = myrealloc_movable(AreaList, MaxNarea * sizeof(struct area_list_data)); + + if(Narea >= MaxNarea) + terminate("Narea >= MaxNarea"); + } + + AreaList[Narea].task = DP[q].task; + AreaList[Narea].index = DP[q].originalindex; + AreaList[Narea].darea = f->area; + Narea++; + } + } + } +} + +/*! \brief Gathers tetrahedron data as elements in array called 'trilist'. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index of tetrahedron in T->DT array. + * \param[in] nr Index in (global) edge arrays. + * \param[in] dtip Point representing tip of tetrahedron. + * \param[out] trilist List of triangles. + * \param[in] ntri Index in trilist which should be filled. + * \param[in] max_n_tri Maximum index in trilist. + * + * \return New length of trilist data. + */ +int derefine_refine_get_triangles(tessellation *T, int tt, int nr, point *dtip, triangle *trilist, int ntri, int max_n_tri) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + + int i, j, k, l, m, ii, jj, kk, ll, nn, count; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double cx, cy, cz; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + if(ntri >= max_n_tri) + terminate("ntri >= max_n_tri"); + + trilist[ntri].p[0][0] = cx; + trilist[ntri].p[0][1] = cy; + trilist[ntri].p[0][2] = cz; + + trilist[ntri].p[1][0] = prevc->cx; + trilist[ntri].p[1][1] = prevc->cy; + trilist[ntri].p[1][2] = prevc->cz; + + trilist[ntri].p[2][0] = nextc->cx; + trilist[ntri].p[2][1] = nextc->cy; + trilist[ntri].p[2][2] = nextc->cz; + + trilist[ntri].p[3][0] = dtip->x; + trilist[ntri].p[3][1] = dtip->y; + trilist[ntri].p[3][2] = dtip->z; + + if(get_tri_volume(ntri, trilist) < 0) + { + /* swap two points to get proper orientation */ + trilist[ntri].p[3][0] = nextc->cx; + trilist[ntri].p[3][1] = nextc->cy; + trilist[ntri].p[3][2] = nextc->cz; + + trilist[ntri].p[2][0] = dtip->x; + trilist[ntri].p[2][1] = dtip->y; + trilist[ntri].p[2][2] = dtip->z; + } + + ntri++; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + return ntri; +} + +/*! \brief Returns volume of a tetrahedron. + * + * \param[in] i Index of tetrahedron in trilist. + * \param[in] trilist Array with tetrahedra. + * + * \return Volume of tetrahedron. + */ +double get_tri_volume(int i, triangle *trilist) +{ + double nx, ny, nz; + + double *p0 = &trilist[i].p[0][0]; + double *p1 = &trilist[i].p[1][0]; + double *p2 = &trilist[i].p[2][0]; + double *p3 = &trilist[i].p[3][0]; + + nx = (p1[1] - p0[1]) * (p2[2] - p0[2]) - (p1[2] - p0[2]) * (p2[1] - p0[1]); + ny = (p1[2] - p0[2]) * (p2[0] - p0[0]) - (p1[0] - p0[0]) * (p2[2] - p0[2]); + nz = (p1[0] - p0[0]) * (p2[1] - p0[1]) - (p1[1] - p0[1]) * (p2[0] - p0[0]); + + return (nx * (p3[0] - p0[0]) + ny * (p3[1] - p0[1]) + nz * (p3[2] - p0[2])) / 6.0; +} + +/*! \brief Add point and adjust tetrahedra accordingly. + * + * \param[in] q Index of point in DP array. + * \param[in, out] trilist Array of tetrahedra. + * \param[in] ntri Number of elements in trilist before splitting. + * \param[in] max_ntri Maximum number of tetrahedron allowed. + * \param[in] vol Volume of tetrahedron to be split. + * + * \return Updated number of triangles. + */ +int derefine_add_point_and_split_tri(int q, triangle *trilist, int ntri, int max_ntri, double vol) +{ +#define MIN_VOL_FAC 1.0e-6 + double m[3], n[3], sc[4], *a; + double cut[3][3], p[8][3], ed[3]; + int i, j, k, l, nnew, flag[4], count, oldq; + double vvi, vlargest, vv[5]; + int ilargest, nadd; + + for(i = 0, nnew = ntri; i < ntri; i++) + { + if(q < 0 || q >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "q=%d\n", q); + terminate(buf); + } + + if(trilist[i].owner < 0 || trilist[i].owner >= Mesh.Ndp) + { + char buf[1000]; + sprintf(buf, "trilist[i].owner=%d\n", trilist[i].owner); + terminate(buf); + } + + /* midpoint */ + m[0] = 0.5 * (Mesh.DP[q].x + Mesh.DP[trilist[i].owner].x); + m[1] = 0.5 * (Mesh.DP[q].y + Mesh.DP[trilist[i].owner].y); + m[2] = 0.5 * (Mesh.DP[q].z + Mesh.DP[trilist[i].owner].z); + + n[0] = (Mesh.DP[q].x - Mesh.DP[trilist[i].owner].x); + n[1] = (Mesh.DP[q].y - Mesh.DP[trilist[i].owner].y); + n[2] = (Mesh.DP[q].z - Mesh.DP[trilist[i].owner].z); + + if(q == trilist[i].owner) + terminate("q == trilist[i].owner"); + + for(k = 0, count = 0; k < 4; k++) /* determine the side of each point */ + { + a = &trilist[i].p[k][0]; + + sc[k] = (a[0] - m[0]) * n[0] + (a[1] - m[1]) * n[1] + (a[2] - m[2]) * n[2]; + + if(sc[k] > 0) + { + flag[k] = 1; + count++; + } + else + flag[k] = 0; + } + + switch(count) + { + case 0: /* the whole tetra is on the side of current owner - nothing to be done */ + break; + + case 4: /* the whole tetra is on the side of new point */ + trilist[i].owner = q; /* change owner */ + break; + + case 1: + case 3: + + /* we have one point on either side */ + /* for count=1 the tip of the tetra is cut off and assigned to the new point. */ + /* the rest is subdivided into three tetras */ + + if(nnew + 3 > max_ntri) + { + terminate("nnew + 3 > max_ntri"); + } + + trilist[nnew] = trilist[i]; + trilist[nnew + 1] = trilist[i]; + trilist[nnew + 2] = trilist[i]; + + /* find the point index that is on the other side */ + for(k = 0; k < 4; k++) + { + if(flag[k] == 1 && count == 1) + break; + if(flag[k] == 0 && count == 3) + break; + } + + /* determine the cut-points on the corresponding edges */ + + for(j = 0; j < 3; j++) + { + double *b = trilist[i].p[k]; + double *a = trilist[i].p[access_triangles[k][j]]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[k] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + cut[j][l] = b[l] + t * ed[l]; + } + + /* modify the tetra that's assigned to the new point */ + for(j = 0; j < 3; j++) + { + double *a = trilist[i].p[access_triangles[k][j]]; + for(l = 0; l < 3; l++) + a[l] = cut[j][l]; + } + + oldq = trilist[i].owner; + + if(count == 1) + trilist[i].owner = q; + + /* modify the three new tetras */ + + for(l = 0; l < 3; l++) + { + trilist[nnew].p[k][l] = cut[0][l]; + + trilist[nnew + 1].p[access_triangles[k][0]][l] = cut[0][l]; + trilist[nnew + 1].p[k][l] = cut[2][l]; + + trilist[nnew + 2].p[access_triangles[k][0]][l] = cut[0][l]; + trilist[nnew + 2].p[access_triangles[k][2]][l] = cut[2][l]; + trilist[nnew + 2].p[k][l] = cut[1][l]; + } + + if(count == 1) + { + trilist[nnew].owner = oldq; + trilist[nnew + 1].owner = oldq; + trilist[nnew + 2].owner = oldq; + } + else + { + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + trilist[nnew + 2].owner = q; + } + + nadd = 3; + + vvi = fabs(get_tri_volume(i, trilist)); + for(l = 0; l < nadd; l++) + vv[l] = fabs(get_tri_volume(nnew + l, trilist)); + + /* determine largest */ + ilargest = i; + vlargest = vvi; + for(l = 0; l < nadd; l++) + if(vv[l] > vlargest) + { + vlargest = vv[l]; + ilargest = nnew + l; + } + if(i != ilargest) + { + /* swap the largest to location i */ + triangle trisave = trilist[i]; + trilist[i] = trilist[ilargest]; + trilist[ilargest] = trisave; + + vv[ilargest - nnew] = vvi; + } + + for(l = 0; l < nadd; l++) + { + if(vv[l] < MIN_VOL_FAC * vol) + { + vv[l] = vv[nadd - 1]; + trilist[nnew + l] = trilist[nnew + nadd - 1]; + l--; + nadd--; + } + } + + nnew += nadd; + break; + + case 2: + /* we have two points on either side */ + + if(nnew + 5 > max_ntri) + terminate("nnew + 5 > max_ntri"); + + int kfirst, ksecond, jfirst, jsecond; + + if(flag[2] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 2; + jfirst = 0; + jsecond = 1; + } + else if(flag[1] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 1; + jfirst = 2; + jsecond = 0; + } + else if(flag[0] == 1 && flag[3] == 1) + { + kfirst = 3; + ksecond = 0; + jfirst = 1; + jsecond = 2; + } + else if(flag[1] == 1 && flag[2] == 1) + { + kfirst = 1; + ksecond = 2; + jfirst = 3; + jsecond = 0; + } + else if(flag[0] == 1 && flag[2] == 1) + { + kfirst = 0; + ksecond = 2; + jfirst = 1; + jsecond = 3; + } + else if(flag[0] == 1 && flag[1] == 1) + { + kfirst = 0; + ksecond = 1; + jfirst = 3; + jsecond = 2; + } + else + terminate("can't be"); + + int next = 0; + + for(l = 0; l < 3; l++) + p[next][l] = trilist[i].p[kfirst][l]; + next++; + + /* determine cuts with the corresponding two edges */ + { + double *b = trilist[i].p[kfirst]; + double *a = trilist[i].p[jfirst]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[kfirst] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + + for(l = 0; l < 3; l++) + p[next][l] = a[l]; + next++; + } + + { + double *b = trilist[i].p[kfirst]; + double *a = trilist[i].p[jsecond]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[kfirst] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + + for(l = 0; l < 3; l++) + p[next][l] = a[l]; + next++; + } + + for(l = 0; l < 3; l++) + p[next][l] = trilist[i].p[ksecond][l]; + next++; + + { + double *b = trilist[i].p[ksecond]; + double *a = trilist[i].p[jfirst]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[ksecond] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + } + + { + double *b = trilist[i].p[ksecond]; + double *a = trilist[i].p[jsecond]; + + for(l = 0; l < 3; l++) + ed[l] = a[l] - b[l]; + + double prod = (ed[0] * n[0] + ed[1] * n[1] + ed[2] * n[2]); + double t; + + if(prod) + t = -sc[ksecond] / prod; + else + t = 0.5; + + if(t < 0) + t = 0; + if(t > 1) + t = 1; + + for(l = 0; l < 3; l++) + p[next][l] = b[l] + t * ed[l]; + next++; + } + + oldq = trilist[i].owner; + + /* now let's initialize the new triangles */ + for(l = 0; l < 3; l++) + { + /* first the ones that get to the new side */ + trilist[i].p[0][l] = p[0][l]; + trilist[i].p[1][l] = p[6][l]; + trilist[i].p[2][l] = p[5][l]; + trilist[i].p[3][l] = p[7][l]; + + trilist[nnew].p[0][l] = p[1][l]; + trilist[nnew].p[1][l] = p[3][l]; + trilist[nnew].p[2][l] = p[7][l]; + trilist[nnew].p[3][l] = p[0][l]; + + trilist[nnew + 1].p[0][l] = p[1][l]; + trilist[nnew + 1].p[1][l] = p[7][l]; + trilist[nnew + 1].p[2][l] = p[6][l]; + trilist[nnew + 1].p[3][l] = p[0][l]; + + /* now the ones that are on the old side */ + trilist[nnew + 2].p[0][l] = p[1][l]; + trilist[nnew + 2].p[1][l] = p[2][l]; + trilist[nnew + 2].p[2][l] = p[6][l]; + trilist[nnew + 2].p[3][l] = p[4][l]; + + trilist[nnew + 3].p[0][l] = p[3][l]; + trilist[nnew + 3].p[1][l] = p[1][l]; + trilist[nnew + 3].p[2][l] = p[6][l]; + trilist[nnew + 3].p[3][l] = p[4][l]; + + trilist[nnew + 4].p[0][l] = p[3][l]; + trilist[nnew + 4].p[1][l] = p[6][l]; + trilist[nnew + 4].p[2][l] = p[7][l]; + trilist[nnew + 4].p[3][l] = p[4][l]; + } + + trilist[i].owner = q; + trilist[nnew].owner = q; + trilist[nnew + 1].owner = q; + + trilist[nnew + 2].owner = oldq; + trilist[nnew + 3].owner = oldq; + trilist[nnew + 4].owner = oldq; + + nadd = 5; + + vvi = fabs(get_tri_volume(i, trilist)); + for(l = 0; l < nadd; l++) + vv[l] = fabs(get_tri_volume(nnew + l, trilist)); + + /* determine largest */ + ilargest = i; + vlargest = vvi; + for(l = 0; l < nadd; l++) + if(vv[l] > vlargest) + { + vlargest = vv[l]; + ilargest = nnew + l; + } + if(i != ilargest) + { + /* swap the largest to location i */ + triangle trisave = trilist[i]; + trilist[i] = trilist[ilargest]; + trilist[ilargest] = trisave; + + vv[ilargest - nnew] = vvi; + } + + for(l = 0; l < nadd; l++) + { + if(vv[l] < MIN_VOL_FAC * vol) + { + vv[l] = vv[nadd - 1]; + trilist[nnew + l] = trilist[nnew + nadd - 1]; + l--; + nadd--; + } + } + + nnew += nadd; + break; + } + } + + return nnew; +} + +/*! \brief Processes edge for volume calculation. + * + * Calculates the contribution of edge to volumes of neighboring + * Voronoi cells in vol array. + * + * \param[in] T Pointer to tesselation. + * \param[in, out] volume of tetrahedra. + * \param[in] tt Index of triangle in DT array. + * \param[in] nr Index in edge array. + * + * \return void + */ +void derefine_refine_process_edge(tessellation *T, double *vol, int tt, int nr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra_center *DTC = T->DTC; + + int i, j, k, l, m, ii, jj, kk, ll, nn, count, nr_next, p1, p2; + tetra *prev, *next; + tetra_center *prevc, *nextc; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double nx, ny, nz; + double hhx, hhy, hhz; + double darea, dvol, h; + + tetra *t = &DT[tt]; + + i = edge_start[nr]; + j = edge_end[nr]; + k = edge_opposite[nr]; + l = edge_nexttetra[nr]; + + Edge_visited[tt] |= (1 << nr); + + p1 = t->p[i]; + p2 = t->p[j]; + + double area = 0; + + cx = DTC[tt].cx; + cy = DTC[tt].cy; + cz = DTC[tt].cz; + + count = 0; + + prev = t; + prevc = &DTC[tt]; + do + { + nn = prev->t[l]; + next = &DT[nn]; + nextc = &DTC[nn]; + + if(prev != t && next != t) + { + ax = prevc->cx - cx; + ay = prevc->cy - cy; + az = prevc->cz - cz; + + bx = nextc->cx - cx; + by = nextc->cy - cy; + bz = nextc->cz - cz; + + nx = ay * bz - az * by; + ny = az * bx - ax * bz; + nz = ax * by - ay * bx; + + darea = 0.5 * sqrt(nx * nx + ny * ny + nz * nz); + area += darea; + } + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* need to determine the edge number to be able to flag it */ + + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[nn] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[nn] |= (1 << nr_next); + break; + } + + prev = next; + prevc = nextc; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count is too large"); + } + while(next != t); + + i = edge_start[nr]; + j = edge_end[nr]; + + hhx = 0.5 * (DP[p1].x - DP[p2].x); + hhy = 0.5 * (DP[p1].y - DP[p2].y); + hhz = 0.5 * (DP[p1].z - DP[p2].z); + + h = sqrt(hhx * hhx + hhy * hhy + hhz * hhz); + dvol = (1.0 / 3) * area * h; + + if(p1 >= 0 && p1 < DeRefMesh.Ndp) + vol[p1] += dvol; + + if(p2 >= 0 && p2 < DeRefMesh.Ndp) + vol[p2] += dvol; +} + +/*! \brief Insert a point into mesh. + * + * Finds the tetrahedron that contains this point, splits the tetrahedron. + * After this, flip the edges if needed restore Delaunayhood (which is applied + * recursively) until a valid Delaunay mesh is restored. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp index of Delaunay point in DP array. + * \param[in] ttstart initial guess in which triangle it might be, + * index in DT array. + * + * \return index to tetra that (currently) contains the point pp. + */ +int insert_point(tessellation *T, int pp, int ttstart) +{ + int tt0, tt1, tt2, tt3, tt4, tetra_with_p, tt; + int to_check[STACKSIZE_TETRA], freestack[STACKSIZE_TETRA]; + int n_faces_to_check = 0, nfree_on_stack = 0, moves; + int tip_index, flag, edgeface_nr; + int non_convex, convex_edge = 0, i, j; + + /* first, need to do a point location */ + tt0 = get_tetra(T, &T->DP[pp], &moves, ttstart, &flag, &edgeface_nr); + + tetra_with_p = tt0; + + if(flag == 1) /* that's the normal split of a tetrahedron into 4 */ + { + if(n_faces_to_check >= STACKSIZE_TETRA - 4) + terminate("stacksize exceeded"); + + /* we now need to split this tetrahedron into four */ + if(nfree_on_stack) + tt1 = freestack[--nfree_on_stack]; + else + tt1 = T->Ndt++; + + if(nfree_on_stack) + tt2 = freestack[--nfree_on_stack]; + else + tt2 = T->Ndt++; + + if(nfree_on_stack) + tt3 = freestack[--nfree_on_stack]; + else + tt3 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + make_a_1_to_4_flip(T, pp, tt0, tt1, tt2, tt3); + + /* now we have a triangulation again - need to check whether there are + facets that are not Delaunay */ + /* let's initialize a stack with the facets that we need to check */ + + n_faces_to_check = 0; + + to_check[n_faces_to_check++] = tt0; + to_check[n_faces_to_check++] = tt1; + to_check[n_faces_to_check++] = tt2; + to_check[n_faces_to_check++] = tt3; + char *DTF = T->DTF; + DTF[tt0] = 0; + DTF[tt1] = 0; + DTF[tt2] = 0; + DTF[tt3] = 0; + } + + if(flag == 2) + { + /* create four new tetra */ + if(nfree_on_stack) + tt1 = freestack[--nfree_on_stack]; + else + tt1 = T->Ndt++; + + if(nfree_on_stack) + tt2 = freestack[--nfree_on_stack]; + else + tt2 = T->Ndt++; + + if(nfree_on_stack) + tt3 = freestack[--nfree_on_stack]; + else + tt3 = T->Ndt++; + + if(nfree_on_stack) + tt4 = freestack[--nfree_on_stack]; + else + tt4 = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + n_faces_to_check = 0; + + to_check[n_faces_to_check++] = tt0; + to_check[n_faces_to_check++] = T->DT[tt0].t[edgeface_nr]; + to_check[n_faces_to_check++] = tt1; + to_check[n_faces_to_check++] = tt2; + to_check[n_faces_to_check++] = tt3; + to_check[n_faces_to_check++] = tt4; + + char *DTF = T->DTF; + DTF[tt0] = 0; + DTF[T->DT[tt0].t[edgeface_nr]] = 0; + DTF[tt1] = 0; + DTF[tt2] = 0; + DTF[tt3] = 0; + DTF[tt4] = 0; + + make_a_face_split(T, tt0, edgeface_nr, pp, tt1, tt2, tt3, tt4); + } + + if(flag == 3) /* here we need to split an edge */ + { + int i, j, k, l, ii, jj, kk, ll, m, count; + int prev, next; + + /* count how many triangles share the edge */ + i = edge_start[edgeface_nr]; + j = edge_end[edgeface_nr]; + k = edge_opposite[edgeface_nr]; + l = edge_nexttetra[edgeface_nr]; + + count = 0; + n_faces_to_check = 0; + + prev = tt0; + do + { + to_check[n_faces_to_check++] = prev; + T->DTF[prev] = 0; + + tetra *DT = T->DT; + next = DT[prev].t[l]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(DT[next].p[m] == DT[prev].p[k]) + ll = m; + if(DT[next].p[m] == DT[prev].p[i]) + ii = m; + if(DT[next].p[m] == DT[prev].p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + i = ii; + l = ll; + j = jj; + k = kk; + + count++; + + if(count > 1000) + terminate("count exceeded"); + } + while(next != tt0); + + int *ttlist = mymalloc_movable(&ttlist, "ttlist", count * sizeof(int)); + + for(i = 0; i < count; i++) + { + if(nfree_on_stack) + ttlist[i] = freestack[--nfree_on_stack]; + else + { + ttlist[i] = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, + T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + } + + to_check[n_faces_to_check++] = ttlist[i]; + T->DTF[ttlist[i]] = 0; + } + + make_an_edge_split(T, tt0, edgeface_nr, count, pp, ttlist); + + myfree(ttlist); + } + + int iter = 0; + + while(n_faces_to_check) + { + iter++; + if(iter > 200000) + terminate("too many iterations"); + + tt = to_check[--n_faces_to_check]; /* this is the current tetra to look at. + The facet in question lies opposite to q */ + if(T->DT[tt].t[0] < 0) /* deleted? */ + continue; + + for(tip_index = 0; tip_index < 4; tip_index++) + if(T->DT[tt].p[tip_index] == pp) + break; + + if(tip_index < 4) /* otherwise the facet has been removed in a 3-2 flip */ + { + tetra *DT = T->DT; + point *DP = T->DP; + int qq = DT[tt].t[tip_index]; /* tetrahedron that's opposite of ours and shares the facet */ + int ppp = DT[qq].p[DT[tt].s[tip_index]]; /* point that's opposite of the facet in the other tetrahedron */ + + int ret, ret_exact; + + ret = InSphere_Errorbound(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]); + CountInSphereTests++; + + if(ret != 0) + ret_exact = ret; + else + { + // let's decide with exact integer arithmetic + ret_exact = InSphere_Exact(&DP[DT[qq].p[0]], &DP[DT[qq].p[1]], &DP[DT[qq].p[2]], &DP[DT[qq].p[3]], &DP[pp]); + CountInSphereTestsExact++; + } + + if(ret_exact > 0) /* facet is illegal, because point lies inside */ + { + /* let's see whether the point lies in the triangle, or on a side, or opposite of one convex edge */ + + non_convex = convex_edge_test(T, tt, tip_index, &convex_edge); + + if(non_convex == 0) /* we can make a 2-3 flip */ + { + int ww; + + if(nfree_on_stack) + ww = freestack[--nfree_on_stack]; + else + ww = T->Ndt++; + + if(T->Ndt > T->MaxNdt) + { + T->Indi.AllocFacNdt *= ALLOC_INCREASE_FACTOR; + T->MaxNdt = T->Indi.AllocFacNdt; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdt=%d Indi.AllocFacNdt=%g\n", ThisTask, T->MaxNdt, + T->Indi.AllocFacNdt); +#endif /* #ifdef VERBOSE */ + T->DT = myrealloc_movable(T->DT, T->MaxNdt * sizeof(tetra)); + T->DTC = myrealloc_movable(T->DTC, T->MaxNdt * sizeof(tetra_center)); + T->DTF = myrealloc_movable(T->DTF, T->MaxNdt * sizeof(char)); + + if(T->Ndt > T->MaxNdt) + terminate("Ndt > MaxNdt"); + } + + if(n_faces_to_check >= STACKSIZE_TETRA - 3) + terminate("stacksize exceeded"); + + make_a_2_to_3_flip(T, tt, tip_index, qq, T->DT[tt].s[tip_index], ppp, ww); + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + to_check[n_faces_to_check++] = ww; + T->DTF[tt] = 0; + T->DTF[qq] = 0; + T->DTF[ww] = 0; + } + else if(non_convex == 1) /* we might be able to make a 3-2 flip, or we deal with a convex edge on the outer hull */ + { + /* test whether the reflex edge is surrounded by exactly three tetrahedra */ + + i = convex_edge + 2; + if(i >= 3) + i -= 3; + i = access_triangles[tip_index][i]; + + for(j = 0; j < 4; j++) + if(DT[tt].p[i] == DT[qq].p[j]) + break; + + if(j >= 4) + { + terminate("not found"); + } + + if(DT[tt].t[i] == DT[qq].t[j]) /* this means there is exactly one tetrahedron between them, i.e. we have found the + third partner for the flip */ + { + int ww; + + ww = DT[tt].t[i]; + + make_a_3_to_2_flip(T, tt, qq, ww, tip_index, convex_edge, DT[tt].s[tip_index]); + + DT[ww].t[0] = -1; /* mark as deleted */ + + if(nfree_on_stack < STACKSIZE_TETRA) + freestack[nfree_on_stack++] = ww; + else + terminate("stack full"); + + tetra_with_p = tt; + if(n_faces_to_check >= STACKSIZE_TETRA - 2) + terminate("stack too full"); + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + T->DTF[tt] = 0; + T->DTF[qq] = 0; + } + else + { + if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DPinfinity && DT[DT[qq].t[j]].p[DT[qq].s[j]] == DPinfinity) + { + printf("convex edge between points=%d %d on outer hull found\n", + (int)(DT[tt].p[access_triangles[tip_index][convex_edge]]), + (int)(DT[tt].p[access_triangles[tip_index][convex_edge < 2 ? convex_edge + 1 : 0]])); + + terminate("inconsistency"); /* this should not occur since we have embedded the points into a convex big + triangle */ + } + } + } + else if(non_convex == 2) /* we might be able to make a 4-4 flip */ + { + i = convex_edge + 2; + if(i >= 3) + i -= 3; + i = access_triangles[tip_index][i]; /* this is the point opposite of edge (but not tip) */ + + tetra *DT = T->DT; + char *DTF = T->DTF; + + for(j = 0; j < 4; j++) + if(DT[tt].p[i] == DT[qq].p[j]) + break; + + if(DT[DT[tt].t[i]].p[DT[tt].s[i]] == DT[DT[qq].t[j]].p[DT[qq].s[j]]) + { + /* ok, so we really have 4 tetra. The opposite points match up */ + + to_check[n_faces_to_check++] = tt; + to_check[n_faces_to_check++] = qq; + to_check[n_faces_to_check++] = DT[tt].t[i]; + to_check[n_faces_to_check++] = DT[qq].t[j]; + DTF[tt] = 0; + DTF[qq] = 0; + DTF[DT[tt].t[i]] = 0; + DTF[DT[qq].t[j]] = 0; + + make_a_4_to_4_flip(T, tt, tip_index, convex_edge); + } + } + } + else + tetra_with_p = tt; + } + } + + return tetra_with_p; +} + +/*! \brief Tests edges and detects if a flip is needed. + * + * \param[in] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] tip Index of forth point (tip of tetrahedron). + * \param[out] edgenr Index of edge. + * + * \return (-1,0,1,2), depending on which flip is necessary. + */ +int convex_edge_test(tessellation *T, int tt, int tip, int *edgenr) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + int i0, i1, i2, i3; + int vol, flag0, flag1, flag2; + int count_zeros = 0; + + i0 = access_triangles[tip][0]; + i1 = access_triangles[tip][1]; + i2 = access_triangles[tip][2]; + i3 = tip; + + point *p0 = &DP[t->p[i0]]; + point *p1 = &DP[t->p[i1]]; + point *p2 = &DP[t->p[i2]]; + point *p3 = &DP[t->p[i3]]; + point *p4 = &DP[DT[t->t[i3]].p[t->s[i3]]]; + + CountConvexEdgeTest++; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; + + double qx = p4->xx - p0->xx; + double qy = p4->yy - p0->yy; + double qz = p4->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p4, pB_ixyz, pB_xyz); + qx = pB_xyz[0] - pA_xyz[0]; + qy = pB_xyz[1] - pA_xyz[1]; + qz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + + double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz}; + double x[3]; + + int status; + + status = solve_linear_equations(mv_data, x); + + /* x now contains the coordinates of the point p4 expanded in the basis (a,b,c) */ + /* the coordinates of point 3 in this basis are (0,0,1) */ + + if(status >= 0) + { + if(fabs(1.0 - x[2]) < INSIDE_EPS) + terminate("inconsistency"); + + double u, v, w; + + w = 1.0 / (1.0 - x[2]); + + u = w * x[0]; + v = w * x[1]; + + if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + /* we have a point safely in the triangle: 2-3 flip should be fine */ + return 0; + } + + if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + /* edge 0 is clearly reflect, 3-2 flip allowed around edge 0 */ + *edgenr = 0; + return 1; + } + + if(u > INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + { + // printf("3-2 flip allowed since edge 1 is reflex\n"); + *edgenr = 1; + return 1; + } + + if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + { + // printf("3-2 flip allowed since edge 2 is reflex\n"); + *edgenr = 2; + return 1; + } + + if(u < -INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) > INSIDE_EPS) + return -1; /* two reflex edges */ + + if(u < -INSIDE_EPS && v > INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + return -1; /* two reflex edges */ + + if(u > INSIDE_EPS && v < -INSIDE_EPS && (1 - (u + v)) < -INSIDE_EPS) + return -1; /* two reflex edges */ + } + + CountConvexEdgeTestExact++; + + /* Now we need to test in more detail if we are on one of the edges */ + + vol = Orient3d_Exact(p0, p1, p2, p3); + + if(vol <= 0) + { + printf("flat or negatively tetrahedron found (vol=%d)\n", vol); + { + printf("p0=%d %g %g %g\n", (int)(p0 - DP), p0->x, p0->y, p0->z); + printf("p1=%d %g %g %g\n", (int)(p1 - DP), p1->x, p1->y, p1->z); + printf("p2=%d %g %g %g\n", (int)(p2 - DP), p2->x, p2->y, p2->z); + printf("p3=%d %g %g %g\n", (int)(p3 - DP), p3->x, p3->y, p3->z); + dump_points(T); + terminate("inconsistent tetrahedron"); + } + } + + flag0 = Orient3d_Exact(p1, p3, p2, p4); + flag1 = Orient3d_Exact(p0, p2, p3, p4); + flag2 = Orient3d_Exact(p0, p3, p1, p4); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0) + { + // printf("3-2 flip allowed since edge 0 is reflex\n"); + *edgenr = 0; + return 1; + } + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0) + { + // printf("3-2 flip allowed since edge 1 is reflex\n"); + *edgenr = 1; + return 1; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0) + { + // printf("3-2 flip allowed since edge 2 is reflex\n"); + *edgenr = 2; + return 1; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 == 0) + { + // printf("4-4 flip around edge 0 may be possible\n"); + *edgenr = 0; + return 2; + } + + if(flag0 >= 0 && flag1 == 0 && flag2 >= 0) + { + // printf("4-4 flip around edge 2 may be possible\n"); + *edgenr = 2; + return 2; + } + + if(flag0 == 0 && flag1 >= 0 && flag2 >= 0) + { + // printf("4-4 flip around edge 1 may be possible\n"); + *edgenr = 1; + return 2; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0) + { + /* we seem to have a point in the triangle: 2-3 flip should be fine */ + return 0; + } + + return -1; +} + +/*! \brief Performs face split. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt0 First index in DT array. + * \param[in] face_nr Index of face. + * \param[in] pp Index of point. + * \param[in] tt1 Second index in DT array. + * \param[in] tt2 Third index in DT array. + * \param[in] qq1 Index in DT array. + * \param[in] qq2 Index in DT array. + * + * \return void + */ +void make_a_face_split(tessellation *T, int tt0, int face_nr, int pp, int tt1, int tt2, int qq1, int qq2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + int qq0 = t0->t[face_nr]; + tetra *q0 = &DT[qq0]; + tetra *q1 = &DT[qq1]; + tetra *q2 = &DT[qq2]; + + int m, i0 = -1, i1 = -1, i2 = -1, i3 = -1, j0 = -1, j1 = -1, j2 = -1, j3 = -1; + + Count_FaceSplits++; + CountFlips++; + + *t1 = *t0; + *t2 = *t0; + + *q1 = *q0; + *q2 = *q0; + + i3 = face_nr; + j3 = t0->s[face_nr]; + + switch(i3) + { + case 3: + i0 = 0; + i1 = 1; + i2 = 2; + break; + case 2: + i0 = 0; + i1 = 3; + i2 = 1; + break; + case 1: + i0 = 0; + i1 = 2; + i2 = 3; + break; + case 0: + i0 = 1; + i1 = 3; + i2 = 2; + break; + } + + for(m = 0; m < 4; m++) + { + if(q0->p[m] == t0->p[i0]) + j0 = m; + if(q0->p[m] == t0->p[i1]) + j2 = m; + if(q0->p[m] == t0->p[i2]) + j1 = m; + } + + if(i0 < 0 || i1 < 0 || i2 < 0 || i3 < 0 || j0 < 0 || j1 < 0 || j2 < 0 || j3 < 0) + terminate("inconsistency"); + + t0->p[i2] = pp; + t1->p[i0] = pp; + t2->p[i1] = pp; + + q0->p[j1] = pp; + q1->p[j0] = pp; + q2->p[j2] = pp; + + t0->t[i0] = tt1; + t1->t[i2] = tt0; + t0->s[i0] = i2; + t1->s[i2] = i0; + + t1->t[i1] = tt2; + t2->t[i0] = tt1; + t1->s[i1] = i0; + t2->s[i0] = i1; + + t2->t[i2] = tt0; + t0->t[i1] = tt2; + t2->s[i2] = i1; + t0->s[i1] = i2; + + q0->t[j0] = qq1; + q1->t[j1] = qq0; + q0->s[j0] = j1; + q1->s[j1] = j0; + + q1->t[j2] = qq2; + q2->t[j0] = qq1; + q1->s[j2] = j0; + q2->s[j0] = j2; + + q2->t[j1] = qq0; + q0->t[j2] = qq2; + q2->s[j1] = j2; + q0->s[j2] = j1; + + t0->t[i3] = qq0; + q0->t[j3] = tt0; + t0->s[i3] = j3; + q0->s[j3] = i3; + + t1->t[i3] = qq1; + q1->t[j3] = tt1; + t1->s[i3] = j3; + q1->s[j3] = i3; + + t2->t[i3] = qq2; + q2->t[j3] = tt2; + t2->s[i3] = j3; + q2->s[j3] = i3; + + DT[t0->t[i2]].t[t0->s[i2]] = tt0; + DT[t1->t[i0]].t[t1->s[i0]] = tt1; + DT[t2->t[i1]].t[t2->s[i1]] = tt2; + + DT[q0->t[j1]].t[q0->s[j1]] = qq0; + DT[q1->t[j0]].t[q1->s[j0]] = qq1; + DT[q2->t[j2]].t[q2->s[j2]] = qq2; +} + +/*! \brief Performs edge split. + * + * \param[in, out] T Pointer to tessellation + * \param[in] tt0 Index in DT array + * \param[in] edge_nr Index of edge + * \param[in] count Number of elements in lists. + * \param[in] pp Index to point. + * \param[in] ttlist List of indices in DT. + */ +void make_an_edge_split(tessellation *T, int tt0, int edge_nr, int count, int pp, int *ttlist) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *prev, *next; + tetra **tlist, **t_orig_list; + int *i_list, *j_list, *k_list, *l_list; + int i, j, k, l, ii, jj, kk, ll, m, nr, nrm, nrp; + + Count_EdgeSplits++; + CountFlips++; + + tlist = mymalloc("tlist", count * sizeof(tetra *)); + t_orig_list = mymalloc("t_orig_list", count * sizeof(tetra *)); + i_list = mymalloc("i_list", sizeof(int) * count); + j_list = mymalloc("j_list", sizeof(int) * count); + k_list = mymalloc("k_list", sizeof(int) * count); + l_list = mymalloc("l_list", sizeof(int) * count); + + for(i = 0; i < count; i++) + tlist[i] = &DT[ttlist[i]]; + + i = edge_start[edge_nr]; + j = edge_end[edge_nr]; + k = edge_opposite[edge_nr]; + l = edge_nexttetra[edge_nr]; + + nr = 0; + prev = t0; + do + { + t_orig_list[nr] = prev; + i_list[nr] = i; + j_list[nr] = j; + k_list[nr] = k; + l_list[nr] = l; + + next = &DT[prev->t[l]]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + prev = next; + i = ii; + l = ll; + j = jj; + k = kk; + + nr++; + } + while(next != t0); + + for(nr = 0; nr < count; nr++) + { + *tlist[nr] = *t_orig_list[nr]; + + t_orig_list[nr]->p[j_list[nr]] = pp; + tlist[nr]->p[i_list[nr]] = pp; + + t_orig_list[nr]->t[i_list[nr]] = tlist[nr] - DT; + tlist[nr]->t[j_list[nr]] = t_orig_list[nr] - DT; + + t_orig_list[nr]->s[i_list[nr]] = j_list[nr]; + tlist[nr]->s[j_list[nr]] = i_list[nr]; + + DT[tlist[nr]->t[i_list[nr]]].t[tlist[nr]->s[i_list[nr]]] = tlist[nr] - DT; + + nrp = nr + 1; + if(nrp >= count) + nrp -= count; + + nrm = nr - 1; + if(nrm < 0) + nrm += count; + + tlist[nr]->t[l_list[nr]] = tlist[nrp] - DT; + tlist[nr]->s[l_list[nr]] = k_list[nrp]; + + tlist[nr]->t[k_list[nr]] = tlist[nrm] - DT; + tlist[nr]->s[k_list[nr]] = l_list[nrm]; + } + + myfree(l_list); + myfree(k_list); + myfree(j_list); + myfree(i_list); + + myfree(t_orig_list); + myfree(tlist); +} + +/*! \brief Make a 4 to 4 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[in] tip_index Index of the point making up the tip of the + * tetrahedron. + * \param[in] edge_nr Index of edge. + * + * \return void + */ +void make_a_4_to_4_flip(tessellation *T, int tt, int tip_index, int edge_nr) +{ + tetra *DT = T->DT; + // printf("4-to-4 flip\n"); + tetra *t = &DT[tt]; + int i0, i1, i2, j; + int ww, qq, uu; + tetra *w, *q, *u; + tetra *t_top[4], *t_bottom[4]; + int s_top[4], s_bottom[4]; + int p[6]; + + Count_4_to_4_Flips++; + CountFlips++; + + uu = 0; + u = NULL; + + for(j = 0; j < 4; j++) + { + t_top[j] = NULL; + t_bottom[j] = NULL; + s_top[j] = -1; + s_bottom[j] = -1; + } + + i0 = access_triangles[tip_index][edge_nr]; + edge_nr += 1; + if(edge_nr >= 3) + edge_nr -= 3; + i1 = access_triangles[tip_index][edge_nr]; + edge_nr += 1; + if(edge_nr >= 3) + edge_nr -= 3; + i2 = access_triangles[tip_index][edge_nr]; + + t_top[0] = &DT[t->t[i0]]; + s_top[0] = t->s[i0]; + + t_top[1] = &DT[t->t[i1]]; + s_top[1] = t->s[i1]; + + ww = t->t[i2]; + w = &DT[ww]; + qq = t->t[tip_index]; + q = &DT[qq]; + + for(j = 0; j < 4; j++) + { + if(w->p[j] == t->p[i0]) + { + t_top[3] = &DT[w->t[j]]; + s_top[3] = w->s[j]; + } + + if(w->p[j] == t->p[i1]) + { + t_top[2] = &DT[w->t[j]]; + s_top[2] = w->s[j]; + } + + if(w->p[j] == t->p[tip_index]) + { + uu = w->t[j]; + u = &DT[uu]; + } + } + + for(j = 0; j < 4; j++) + { + if(u->p[j] == t->p[i0]) + { + t_bottom[3] = &DT[u->t[j]]; + s_bottom[3] = u->s[j]; + } + + if(u->p[j] == t->p[i1]) + { + t_bottom[2] = &DT[u->t[j]]; + s_bottom[2] = u->s[j]; + } + + if(q->p[j] == t->p[i0]) + { + t_bottom[0] = &DT[q->t[j]]; + s_bottom[0] = q->s[j]; + } + + if(q->p[j] == t->p[i1]) + { + t_bottom[1] = &DT[q->t[j]]; + s_bottom[1] = q->s[j]; + } + } + + p[0] = t->p[i1]; + p[1] = t->p[i2]; + p[2] = t->p[i0]; + p[3] = DT[t->t[i2]].p[t->s[i2]]; + p[4] = t->p[tip_index]; + p[5] = DT[t->t[tip_index]].p[t->s[tip_index]]; + + for(j = 0; j < 4; j++) + { + if(t_top[j] == NULL || t_bottom[j] == NULL) + { + printf("bad!\n"); + terminate("inconsistency"); + } + } + + for(j = 0; j < 4; j++) + { + if(t_top[j] == NULL || t_bottom[j] == NULL) + { + printf("bad!\n"); + terminate("inconsistency"); + } + } + + t->p[0] = p[0]; + t->p[1] = p[1]; + t->p[2] = p[5]; + t->p[3] = p[4]; + + q->p[0] = p[1]; + q->p[1] = p[2]; + q->p[2] = p[5]; + q->p[3] = p[4]; + + u->p[0] = p[2]; + u->p[1] = p[3]; + u->p[2] = p[5]; + u->p[3] = p[4]; + + w->p[0] = p[3]; + w->p[1] = p[0]; + w->p[2] = p[5]; + w->p[3] = p[4]; + + t->t[0] = qq; + q->t[1] = tt; + t->s[0] = 1; + q->s[1] = 0; + + q->t[0] = uu; + u->t[1] = qq; + q->s[0] = 1; + u->s[1] = 0; + + u->t[0] = ww; + w->t[1] = uu; + u->s[0] = 1; + w->s[1] = 0; + + w->t[0] = tt; + t->t[1] = ww; + w->s[0] = 1; + t->s[1] = 0; + + t->t[2] = t_top[0] - DT; + t->s[2] = s_top[0]; + DT[t->t[2]].t[t->s[2]] = tt; + DT[t->t[2]].s[t->s[2]] = 2; + + t->t[3] = t_bottom[0] - DT; + t->s[3] = s_bottom[0]; + DT[t->t[3]].t[t->s[3]] = tt; + DT[t->t[3]].s[t->s[3]] = 3; + + q->t[2] = t_top[1] - DT; + q->s[2] = s_top[1]; + DT[q->t[2]].t[q->s[2]] = qq; + DT[q->t[2]].s[q->s[2]] = 2; + + q->t[3] = t_bottom[1] - DT; + q->s[3] = s_bottom[1]; + DT[q->t[3]].t[q->s[3]] = qq; + DT[q->t[3]].s[q->s[3]] = 3; + + u->t[2] = t_top[2] - DT; + u->s[2] = s_top[2]; + DT[u->t[2]].t[u->s[2]] = uu; + DT[u->t[2]].s[u->s[2]] = 2; + + u->t[3] = t_bottom[2] - DT; + u->s[3] = s_bottom[2]; + DT[u->t[3]].t[u->s[3]] = uu; + DT[u->t[3]].s[u->s[3]] = 3; + + w->t[2] = t_top[3] - DT; + w->s[2] = s_top[3]; + DT[w->t[2]].t[w->s[2]] = ww; + DT[w->t[2]].s[w->s[2]] = 2; + + w->t[3] = t_bottom[3] - DT; + w->s[3] = s_bottom[3]; + DT[w->t[3]].t[w->s[3]] = ww; + DT[w->t[3]].s[w->s[3]] = 3; +} + +/*! \brief Make a 1 to 4 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tt1 Index of second point in DT array. + * \param[in] tt2 Index of third point in DT array. + * \param[in] tt3 Index of forth point in DT array. + * + * \return void + */ +void make_a_1_to_4_flip(tessellation *T, int pp, int tt0, int tt1, int tt2, int tt3) +{ + tetra *DT = T->DT; + + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra *t3 = &DT[tt3]; + + Count_1_to_4_Flips++; + CountFlips++; + + *t1 = *t0; + *t2 = *t0; + *t3 = *t0; + + t0->p[0] = pp; + t1->p[1] = pp; + t2->p[2] = pp; + t3->p[3] = pp; + + t0->t[1] = tt1; + t1->t[0] = tt0; + t0->s[1] = 0; + t1->s[0] = 1; + + t1->t[2] = tt2; + t2->t[1] = tt1; + t1->s[2] = 1; + t2->s[1] = 2; + + t2->t[0] = tt0; + t0->t[2] = tt2; + t2->s[0] = 2; + t0->s[2] = 0; + + t0->t[3] = tt3; + t3->t[0] = tt0; + t0->s[3] = 0; + t3->s[0] = 3; + + t1->t[3] = tt3; + t3->t[1] = tt1; + t1->s[3] = 1; + t3->s[1] = 3; + + t2->t[3] = tt3; + t3->t[2] = tt2; + t2->s[3] = 2; + t3->s[2] = 3; + + DT[t0->t[0]].t[t0->s[0]] = tt0; + DT[t1->t[1]].t[t1->s[1]] = tt1; + DT[t2->t[2]].t[t2->s[2]] = tt2; + DT[t3->t[3]].t[t3->s[3]] = tt3; +} + +/*! \brief Make a 3 to 2 flip. + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tt1 Index of second point in DT array. + * \param[in] tt2 Index of third point in DT array. + * \param[in] tip Index of point making up tip of tetrahedron. + * \param[in] edge Index of edge. + * \param[in] bottom Tetrahedron on bottom. + * + * \return void + */ +void make_a_3_to_2_flip(tessellation *T, int tt0, int tt1, int tt2, int tip, int edge, int bottom) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + + int i, j, k, ii, jj, iii, jjj; + tetra qbak, tbak, wbak; + + Count_3_to_2_Flips++; + CountFlips++; + + tbak = *t0; + qbak = *t1; + wbak = *t2; + + i = edge; + j = i + 1; + k = i + 2; + if(j >= 3) + j -= 3; + if(k >= 3) + k -= 3; + + i = access_triangles[tip][i]; + j = access_triangles[tip][j]; + k = access_triangles[tip][k]; + + for(ii = 0; ii < 4; ii++) + if(tbak.p[i] == qbak.p[ii]) + break; + + for(iii = 0; iii < 4; iii++) + if(tbak.p[i] == wbak.p[iii]) + break; + + for(jj = 0; jj < 4; jj++) + if(tbak.p[j] == qbak.p[jj]) + break; + + for(jjj = 0; jjj < 4; jjj++) + if(tbak.p[j] == wbak.p[jjj]) + break; + + t0->p[0] = qbak.p[bottom]; + t0->p[1] = tbak.p[k]; + t0->p[2] = tbak.p[i]; + t0->p[3] = tbak.p[tip]; + + t1->p[0] = qbak.p[bottom]; + t1->p[1] = tbak.p[j]; + t1->p[2] = tbak.p[k]; + t1->p[3] = tbak.p[tip]; + + t0->t[2] = tt1; + t1->t[1] = tt0; + t0->s[2] = 1; + t1->s[1] = 2; + + t0->t[0] = tbak.t[j]; + t0->s[0] = tbak.s[j]; + DT[t0->t[0]].s[t0->s[0]] = 0; + DT[t0->t[0]].t[t0->s[0]] = tt0; + + t0->t[3] = qbak.t[jj]; + t0->s[3] = qbak.s[jj]; + DT[t0->t[3]].s[t0->s[3]] = 3; + DT[t0->t[3]].t[t0->s[3]] = tt0; + + t0->t[1] = wbak.t[jjj]; + t0->s[1] = wbak.s[jjj]; + DT[t0->t[1]].s[t0->s[1]] = 1; + DT[t0->t[1]].t[t0->s[1]] = tt0; + + t1->t[0] = tbak.t[i]; + t1->s[0] = tbak.s[i]; + DT[t1->t[0]].s[t1->s[0]] = 0; + DT[t1->t[0]].t[t1->s[0]] = tt1; + + t1->t[3] = qbak.t[ii]; + t1->s[3] = qbak.s[ii]; + DT[t1->t[3]].s[t1->s[3]] = 3; + DT[t1->t[3]].t[t1->s[3]] = tt1; + + t1->t[2] = wbak.t[iii]; + t1->s[2] = wbak.s[iii]; + DT[t1->t[2]].s[t1->s[2]] = 2; + DT[t1->t[2]].t[t1->s[2]] = tt1; + + CountFlips++; +} + +/*! \brief Make a 2 to 3 flip + * + * See Springel (2010) for discussion on flips. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] pp Index of new point. + * \param[in] tt0 Index or first point in DT array. + * \param[in] tip Index of point makting up tip of tetrahedron. + * \param[in] tt1 Index of second point in DT array. + * \param[in] bottom Tetrahedron on bottom. + * \param[in] qq Index of point. + * \param[in] tt2 Index of third point in DT array. + * + * \return void + */ +void make_a_2_to_3_flip(tessellation *T, int tt0, int tip, int tt1, int bottom, int qq, int tt2) +{ + tetra *DT = T->DT; + tetra *t0 = &DT[tt0]; + tetra *t1 = &DT[tt1]; + tetra *t2 = &DT[tt2]; + tetra qbak, tbak; + int k; + + Count_2_to_3_Flips++; + + tbak = *t0; + qbak = *t1; /* to save info */ + + *t1 = *t0; + *t2 = *t0; + + /* redefine points */ + t0->p[access_triangles[tip][0]] = qq; + t1->p[access_triangles[tip][1]] = qq; + t2->p[access_triangles[tip][2]] = qq; + + /* make neighbour connections */ + t0->t[access_triangles[tip][1]] = tt1; + t1->t[access_triangles[tip][0]] = tt0; + t0->s[access_triangles[tip][1]] = access_triangles[tip][0]; + t1->s[access_triangles[tip][0]] = access_triangles[tip][1]; + + t0->t[access_triangles[tip][2]] = tt2; + t2->t[access_triangles[tip][0]] = tt0; + t0->s[access_triangles[tip][2]] = access_triangles[tip][0]; + t2->s[access_triangles[tip][0]] = access_triangles[tip][2]; + + t1->t[access_triangles[tip][2]] = tt2; + t2->t[access_triangles[tip][1]] = tt1; + t1->s[access_triangles[tip][2]] = access_triangles[tip][1]; + t2->s[access_triangles[tip][1]] = access_triangles[tip][2]; + + /* these are the ones on the top */ + DT[t0->t[access_triangles[tip][0]]].t[t0->s[access_triangles[tip][0]]] = tt0; + DT[t1->t[access_triangles[tip][1]]].t[t1->s[access_triangles[tip][1]]] = tt1; + DT[t2->t[access_triangles[tip][2]]].t[t2->s[access_triangles[tip][2]]] = tt2; + + /* now the one at the bottom */ + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][0]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][0]]) + k = 1; + else + k = 2; + + t0->t[tip] = qbak.t[access_triangles[bottom][k]]; + t0->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t0->t[tip]].t[t0->s[tip]] = tt0; + DT[t0->t[tip]].s[t0->s[tip]] = tip; + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][1]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][1]]) + k = 1; + else + k = 2; + + t1->t[tip] = qbak.t[access_triangles[bottom][k]]; + t1->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t1->t[tip]].t[t1->s[tip]] = tt1; + DT[t1->t[tip]].s[t1->s[tip]] = tip; + + if(qbak.p[access_triangles[bottom][0]] == tbak.p[access_triangles[tip][2]]) + k = 0; + else if(qbak.p[access_triangles[bottom][1]] == tbak.p[access_triangles[tip][2]]) + k = 1; + else + k = 2; + + t2->t[tip] = qbak.t[access_triangles[bottom][k]]; + t2->s[tip] = qbak.s[access_triangles[bottom][k]]; + DT[t2->t[tip]].t[t2->s[tip]] = tt2; + DT[t2->t[tip]].s[t2->s[tip]] = tip; +} + +static int ErrorFlag = 0; + +/*! \brief Gets tetrahedron. + * + * Returns the index of the tetrahedron containing the point DP[pp]. + * The search is started from the tetrahedron DT[ttstart]. + * + * \param[in] T Pointer to tessellation. + * \param[in] p Point. + * \param[out] moves The number of moves necessary to find tetrahedron. + * \param[out] flag The return value from InTetra, specifying whether + * the point is inside or on the edge/face. + * \param[out] edgeface_nr The edge/face number on the tetrahedron containing + * the point, in case flag is >1. + * + * \return Index of tetrahedron. + */ +int get_tetra(tessellation *T, point *p, int *moves, int ttstart, int *flag, int *edgeface_nr) +{ + int ret, count_moves = 0; + int tt, next_tetra; + + tt = ttstart; + +#define MAX_COUNT_MOVES 1000000 + + while((ret = InTetra(T, tt, p, edgeface_nr, &next_tetra)) == 0) + { + count_moves++; + + if(count_moves > MAX_COUNT_MOVES) + { + ErrorFlag = 1; + + if(count_moves > MAX_COUNT_MOVES + 10) + terminate("too many moves"); + } + + tt = next_tetra; + } + + *moves = count_moves; + *flag = ret; + + return tt; +} + +/*! \brief Is point in tetrahedron? + * + * Tests whether point DP[pp] lies in the tetrahedron DT[tt]. The + * return value is 0 if the point is outside, 1 if it's inside, 2 if + * it's on a face, and 3 if it's on an edge. If it's either of the + * last two, the edgeface_nr is set to the corresponding index of the + * edge or face. If the point is outside, nexttetra is set to the + * index of a neighboring tetrahedron in the direction of the + * point, otherwise it's unmodified. + * + * \param[in] T Tesslation. + * \param[in] tt Index of tetrahedron in DT array. + * \param[in] p Point. + * \param[out] edgeface_nr The edge/face number on the tetrahedron containing + * the point, in case flag is >1. + * \param[out] nexttetra Index of tetrahedron. + * + * \return Point in thetrahedron? + * + */ +int InTetra(tessellation *T, int tt, point *p, int *edgeface_nr, int *nexttetra) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + // test if we are in an "infinity tetra", which are the ones that + // bound the tesselated volume. Arepo terminates if this happens, + // but for Sunrise this is a valid occurence so we'll return -1 to + // indicate the point is outside the volume. XXX Actually it + // shouldn't do this anymore because we now do box tests instead + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + { +#ifndef LONGIDS + printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%d\n", ThisTask, tt, + p->x, p->y, p->z, p->ID); +#else /* #ifndef LONGIDS */ + printf("task=%d: we are in a tetraeder with an infinity point. tetra=%d, coordinates of point=(%g|%g|%g) ID=%llu\n", ThisTask, + tt, p->x, p->y, p->z, p->ID); +#endif /* #ifndef LONGIDS #else */ + terminate("invalid tetrahedron"); + } + + Count_InTetra++; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; + + double qx = p->xx - p0->xx; + double qy = p->yy - p0->yy; + double qz = p->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz, qx, qy, qz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p, pB_ixyz, pB_xyz); + qx = pB_xyz[0] - pA_xyz[0]; + qy = pB_xyz[1] - pA_xyz[1]; + qz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double mv_data[] = {ax, bx, cx, qx, ay, by, cy, qy, az, bz, cz, qz}; + double x[3]; + + int ivol, flag3, flag2, flag1, flag0; + int count_zeros = 0; + + int status; + + status = solve_linear_equations(mv_data, x); + + if(status < 0) + { + ivol = Orient3d_Exact(p0, p1, p2, p3); + if(ivol <= 0) + { + printf("flat or negatively tetrahedron found (ivol=%d) tt=%d\n", ivol, tt); + terminate("invalid tetrahedron"); + } + } + + /* x now contains the coordinates of the point p expanded in the basis (a,b,c) */ + + if(ErrorFlag) + { + ivol = Orient3d_Exact(p0, p1, p2, p3); + flag3 = Orient3d_Exact(p0, p1, p2, p); + flag2 = Orient3d_Exact(p0, p3, p1, p); + flag1 = Orient3d_Exact(p0, p2, p3, p); + flag0 = Orient3d_Exact(p1, p3, p2, p); + + printf("\n\nTetra=%d\n", (int)(t - DT)); + printf("ivol=%d flag0=%d %d %d %d\n", ivol, flag0, flag1, flag2, flag3); + printf("xx = %g %g %g 1-sum=%g\n", x[0], x[1], x[2], 1 - (x[0] + x[1] + x[2])); + printf("a= %g %g %g\n", ax, ay, az); + printf("b= %g %g %g\n", bx, by, bz); + printf("c= %g %g %g\n", cx, cy, cz); + printf("q= %g %g %g\n", qx, qy, qz); + printf("(axb)*c) = %g\n", (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz); + printf("next tetras=%d %d %d %d\n", t->t[0], t->t[1], t->t[2], t->t[3]); + } + + if(status >= 0) + { + if(x[0] > INSIDE_EPS && x[1] > INSIDE_EPS && x[2] > INSIDE_EPS && (1 - (x[0] + x[1] + x[2])) > INSIDE_EPS) + { + /* looks like we are safely inside the tetrahedron */ + + return 1; /* our point is really nicely inside the tetrahedron */ + } + + if(x[0] < -INSIDE_EPS || x[1] < -INSIDE_EPS || x[2] < -INSIDE_EPS || (1 - (x[0] + x[1] + x[2])) < -INSIDE_EPS) + { + /* looks like we are clearly outside the tetrahedron. + Let's look for a good neighbouring tetrahedron to continue the search */ + + /* note: in the (a,b,c) basis, the center-of-mass has coordinates (1/4, 1/4, 1/4) */ + + double w, u, v; + + if(ErrorFlag) + { + w = 0.25 / (0.25 - x[2]); + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[1] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[2] - 0.25)=%g\n", w, u, v, fabs(x[2] - 0.25)); + + w = 0.25 / (0.25 - x[1]); + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[1] - 0.25)=%g\n", w, u, v, fabs(x[1] - 0.25)); + + w = 0.25 / (0.25 - x[0]); + u = 0.25 + w * (x[1] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + printf("[3] w=%g u=%g v=%g fabs(x[0] - 0.25)=%g\n", w, u, v, fabs(x[0] - 0.25)); + } + + if(fabs(x[2] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[2]); + if(w > 0) + { + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[1] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[3]; + return 0; + } + } + } + + if(fabs(x[1] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[1]); + if(w > 0) + { + u = 0.25 + w * (x[0] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[2]; + return 0; + } + } + } + + if(fabs(x[0] - 0.25) > INSIDE_EPS) + { + w = 0.25 / (0.25 - x[0]); + if(w > 0) + { + u = 0.25 + w * (x[1] - 0.25); + v = 0.25 + w * (x[2] - 0.25); + if(u > -INSIDE_EPS && v > -INSIDE_EPS && (1 - (u + v) > -INSIDE_EPS)) + { + *nexttetra = t->t[1]; + return 0; + } + } + } + + *nexttetra = t->t[0]; + return 0; + } + } + + /* here we need to decide whether we have a degenerate case, i.e. + whether we think the point lies on a face or an edge of the tetrahedron */ + + if(ErrorFlag) + { + printf("doing exact test for tetra=%d\n", (int)(t - DT)); + } + + Count_InTetraExact++; + + if((ivol = Orient3d_Exact(p0, p1, p2, p3)) <= 0) + { + printf("flat or negatively oriented tetrahedron found (vol=%d)\n", ivol); + terminate("invalid tetrahedron"); + } + + flag3 = Orient3d_Exact(p0, p1, p2, p); + flag2 = Orient3d_Exact(p0, p3, p1, p); + flag1 = Orient3d_Exact(p0, p2, p3, p); + flag0 = Orient3d_Exact(p1, p3, p2, p); + + if(flag0 == 0) + count_zeros++; + + if(flag1 == 0) + count_zeros++; + + if(flag2 == 0) + count_zeros++; + + if(flag3 == 0) + count_zeros++; + + if(count_zeros > 2) + { + printf("task=%d flags=%d %d %d %d (axb)*c = %g\n", ThisTask, flag0, flag1, flag2, flag3, + (ay * bz - az * by) * cx + (az * bx - ax * bz) * cy + (ax * by - ay * bx) * cz); + + printf( + "task=%d pp0=%ld pp1=%ld pp2=%ld pp3=%ld p=%ld IDs=(%llu %llu %llu %llu %llu) pos_0=(%g|%g|%g) pos_1=(%g|%g|%g) " + "pos_2=(%g|%g|%g) pos_3=(%g|%g|%g) pos=(%g|%g|%g)\n", + ThisTask, p0 - DP, p1 - DP, p2 - DP, p3 - DP, p - DP, (long long)p0->ID, (long long)p1->ID, (long long)p2->ID, + (long long)p3->ID, (long long)p->ID, p0->x, p0->y, p0->z, p1->x, p1->y, p1->z, p2->x, p2->y, p2->z, p3->x, p3->y, p3->z, + p->x, p->y, p->z); + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + printf("task=%d imageflags=(%d %d %d %d %d)\n", ThisTask, p0->image_flags, p1->image_flags, p2->image_flags, p3->image_flags, + p->image_flags); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + terminate("strange zero count"); + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0) + { + /* we have a point inside the tetra, but it may still be on one of the edges */ + + if(count_zeros == 0) + { + /* ok, let's split the tetra in 4, we are apparently well enough inside */ + return 1; + } + + if(count_zeros == 1) /* we lie on a face */ + { + if(flag0 == 0) + { + *edgeface_nr = 0; + return 2; + } + + if(flag1 == 0) + { + *edgeface_nr = 1; + return 2; + } + + if(flag2 == 0) + { + *edgeface_nr = 2; + return 2; + } + + if(flag3 == 0) + { + *edgeface_nr = 3; + return 2; + } + } + + if(count_zeros == 2) /* we lie on an edge */ + { + if(flag0 == 0 && flag1 == 0) + { + *edgeface_nr = 5; + return 3; + } + + if(flag0 == 0 && flag2 == 0) + { + *edgeface_nr = 4; + return 3; + } + + if(flag0 == 0 && flag3 == 0) + { + *edgeface_nr = 3; + return 3; + } + + if(flag1 == 0 && flag2 == 0) + { + *edgeface_nr = 2; + return 3; + } + + if(flag1 == 0 && flag3 == 0) + { + *edgeface_nr = 1; + return 3; + } + + if(flag2 == 0 && flag3 == 0) + { + *edgeface_nr = 0; + return 3; + } + } + } + + /* we seem to be lying clearly outside the tetrahedron */ + /* Let's determine a suitable neighbour */ + + /* if there is a single negative value, let's pick this side */ + + if(flag0 < 0 && flag1 >= 0 && flag2 >= 0 && flag3 >= 0) + { + *nexttetra = t->t[0]; + return 0; + } + + if(flag0 >= 0 && flag1 < 0 && flag2 >= 0 && flag3 >= 0) + { + *nexttetra = t->t[1]; + return 0; + } + + if(flag0 >= 0 && flag1 >= 0 && flag2 < 0 && flag3 >= 0) + { + *nexttetra = t->t[2]; + return 0; + } + if(flag0 >= 0 && flag1 >= 0 && flag2 >= 0 && flag3 < 0) + { + *nexttetra = t->t[3]; + return 0; + } + + /* there are at least two negative values. Let's pick a random one */ + + int ind = -1; + + if(flag0 < 0) + { + if(ind < 0) + ind = 0; + else + { + if(get_random_number() < 0.5) + ind = 0; + } + } + + if(flag1 < 0) + { + if(ind < 0) + ind = 1; + else + { + if(get_random_number() < 0.5) + ind = 1; + } + } + + if(flag2 < 0) + { + if(ind < 0) + ind = 2; + else + { + if(get_random_number() < 0.5) + ind = 2; + } + } + + if(flag3 < 0) + { + if(ind < 0) + ind = 3; + else + { + if(get_random_number() < 0.5) + ind = 3; + } + } + + *nexttetra = t->t[ind]; + return 0; +} + +/*! \brief Computes the circum-circle of all tetrahedra in mesh. + * + * \param[in, out] T Pointer to tessellation. + * + * \return void + */ +void compute_circumcircles(tessellation *T) +{ + tetra *DT = T->DT; + char *DTF = T->DTF; + int i; + + for(i = 0; i < T->Ndt; i++) + { + if(DTF[i] & 1) + continue; + DTF[i] |= 1; + + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + if(DT[i].p[3] == DPinfinity) + continue; + + update_circumcircle(T, i); + } +} + +/*! \brief Determinant calculation with arbitrary precision arithmetics. + * + * Auxiliary function for exact circum-circle calculation. + * + * \return void + */ +void calc_mpz_determinant(mpz_t det, mpz_t ax, mpz_t ay, mpz_t az, mpz_t bx, mpz_t by, mpz_t bz, mpz_t cx, mpz_t cy, mpz_t cz) +{ + mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz; + + mpz_init(bz_cy); + mpz_mul(bz_cy, bz, cy); + + mpz_init(by_cz); + mpz_mul(by_cz, by, cz); + + mpz_init(cz_ay); + mpz_mul(cz_ay, cz, ay); + + mpz_init(cy_az); + mpz_mul(cy_az, cy, az); + + mpz_init(az_by); + mpz_mul(az_by, az, by); + + mpz_init(ay_bz); + mpz_mul(ay_bz, ay, bz); + + mpz_t bzcy_bycz, czay_cyaz, azby_aybz; + + mpz_init(bzcy_bycz); + mpz_init(czay_cyaz); + mpz_init(azby_aybz); + + mpz_sub(bzcy_bycz, bz_cy, by_cz); + mpz_sub(czay_cyaz, cz_ay, cy_az); + mpz_sub(azby_aybz, az_by, ay_bz); + + mpz_t a, b, c, ab; + + mpz_init(a); + mpz_init(b); + mpz_init(c); + + mpz_mul(a, bzcy_bycz, ax); + mpz_mul(b, czay_cyaz, bx); + mpz_mul(c, azby_aybz, cx); + + mpz_init(ab); + + mpz_add(ab, a, b); + mpz_add(det, ab, c); + + mpz_clear(ab); + mpz_clear(c); + mpz_clear(b); + mpz_clear(a); + mpz_clear(azby_aybz); + mpz_clear(czay_cyaz); + mpz_clear(bzcy_bycz); + mpz_clear(ay_bz); + mpz_clear(az_by); + mpz_clear(cy_az); + mpz_clear(cz_ay); + mpz_clear(by_cz); + mpz_clear(bz_cy); +} + +/*! \brief Arbitrary precision calculation of circum-circle. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index in DT array. + * \param[out] x X coordinate of circum-circle center. + * \param[out] y Y coordinate of circum-circle center. + * \param[out] z Z coordinate of circum-circle center. + * + * \return void + */ +void get_circumcircle_exact(tessellation *T, int tt, double *x, double *y, double *z) +{ + tetra *DT = T->DT; + point *DP = T->DP; + tetra *t = &DT[tt]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + mpz_t det, detA, detB, detC; + mpz_t qx, qy, qz; + mpz_t a2, b2, c2, tmp, AA, BB, CC; + mpz_t ax, ay, az, bx, by, bz, cx, cy, cz; + + mpz_init(det); + mpz_init(detA); + mpz_init(detB); + mpz_init(detC); + mpz_init(qx); + mpz_init(qy); + mpz_init(qz); + + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + mpz_init(tmp); + mpz_init(AA); + mpz_init(BB); + mpz_init(CC); + + mpz_init(ax); + mpz_init(ay); + mpz_init(az); + mpz_init(bx); + mpz_init(by); + mpz_init(bz); + mpz_init(cx); + mpz_init(cy); + mpz_init(cz); + +#ifndef OPTIMIZE_MEMORY_USAGE + MY_mpz_set_si(tmp, p1->ix); + MY_mpz_sub_ui(ax, tmp, p0->ix); + MY_mpz_set_si(tmp, p1->iy); + MY_mpz_sub_ui(ay, tmp, p0->iy); + MY_mpz_set_si(tmp, p1->iz); + MY_mpz_sub_ui(az, tmp, p0->iz); + + MY_mpz_set_si(tmp, p2->ix); + MY_mpz_sub_ui(bx, tmp, p0->ix); + MY_mpz_set_si(tmp, p2->iy); + MY_mpz_sub_ui(by, tmp, p0->iy); + MY_mpz_set_si(tmp, p2->iz); + MY_mpz_sub_ui(bz, tmp, p0->iz); + + MY_mpz_set_si(tmp, p3->ix); + MY_mpz_sub_ui(cx, tmp, p0->ix); + MY_mpz_set_si(tmp, p3->iy); + MY_mpz_sub_ui(cy, tmp, p0->iy); + MY_mpz_set_si(tmp, p3->iz); + MY_mpz_sub_ui(cz, tmp, p0->iz); +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + double pA_xyz[3], pB_xyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(ax, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(ay, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(az, tmp, pA_ixyz[2]); + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(bx, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(by, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(bz, tmp, pA_ixyz[2]); + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + MY_mpz_set_si(tmp, pB_ixyz[0]); + MY_mpz_sub_ui(cx, tmp, pA_ixyz[0]); + MY_mpz_set_si(tmp, pB_ixyz[1]); + MY_mpz_sub_ui(cy, tmp, pA_ixyz[1]); + MY_mpz_set_si(tmp, pB_ixyz[2]); + MY_mpz_sub_ui(cz, tmp, pA_ixyz[2]); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + mpz_set(tmp, ax); + mpz_mul(AA, tmp, ax); + mpz_set(tmp, ay); + mpz_mul(BB, tmp, ay); + mpz_set(tmp, az); + mpz_mul(CC, tmp, az); + mpz_add(tmp, AA, BB); + mpz_add(a2, tmp, CC); + + mpz_set(tmp, bx); + mpz_mul(AA, tmp, bx); + mpz_set(tmp, by); + mpz_mul(BB, tmp, by); + mpz_set(tmp, bz); + mpz_mul(CC, tmp, bz); + mpz_add(tmp, AA, BB); + mpz_add(b2, tmp, CC); + + mpz_set(tmp, cx); + mpz_mul(AA, tmp, cx); + mpz_set(tmp, cy); + mpz_mul(BB, tmp, cy); + mpz_set(tmp, cz); + mpz_mul(CC, tmp, cz); + mpz_add(tmp, AA, BB); + mpz_add(c2, tmp, CC); + + calc_mpz_determinant(det, ax, ay, az, bx, by, bz, cx, cy, cz); + calc_mpz_determinant(detA, a2, ay, az, b2, by, bz, c2, cy, cz); + calc_mpz_determinant(detB, ax, a2, az, bx, b2, bz, cx, c2, cz); + calc_mpz_determinant(detC, ax, ay, a2, bx, by, b2, cx, cy, c2); + + mpz_cdiv_q(tmp, detA, det); + mpz_tdiv_q_2exp(qx, tmp, 1); + + mpz_cdiv_q(tmp, detB, det); + mpz_tdiv_q_2exp(qy, tmp, 1); + + mpz_cdiv_q(tmp, detC, det); + mpz_tdiv_q_2exp(qz, tmp, 1); + +#ifndef OPTIMIZE_MEMORY_USAGE + MY_mpz_set_si(tmp, p0->ix); + mpz_add(AA, qx, tmp); + + MY_mpz_set_si(tmp, p0->iy); + mpz_add(BB, qy, tmp); + + MY_mpz_set_si(tmp, p0->iz); + mpz_add(CC, qz, tmp); +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + MY_mpz_set_si(tmp, pA_ixyz[0]); + mpz_add(AA, qx, tmp); + + MY_mpz_set_si(tmp, pA_ixyz[1]); + mpz_add(BB, qy, tmp); + + MY_mpz_set_si(tmp, pA_ixyz[2]); + mpz_add(CC, qz, tmp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + double xx, yy, zz; + + xx = mpz_get_d(AA); + yy = mpz_get_d(BB); + zz = mpz_get_d(CC); + + xx /= (1LLu << USEDBITS); + yy /= (1LLu << USEDBITS); + zz /= (1LLu << USEDBITS); + + xx = xx / ConversionFac + CentralOffsetX; + yy = yy / ConversionFac + CentralOffsetY; + zz = zz / ConversionFac + CentralOffsetZ; + + *x = xx; + *y = yy; + *z = zz; + + mpz_clear(det); + mpz_clear(detA); + mpz_clear(detB); + mpz_clear(detC); + mpz_clear(qx); + mpz_clear(qy); + mpz_clear(qz); + + mpz_clear(a2); + mpz_clear(b2); + mpz_clear(c2); + mpz_clear(tmp); + mpz_clear(AA); + mpz_clear(BB); + mpz_clear(CC); + + mpz_clear(ax); + mpz_clear(ay); + mpz_clear(az); + mpz_clear(bx); + mpz_clear(by); + mpz_clear(bz); + mpz_clear(cx); + mpz_clear(cy); + mpz_clear(cz); +} + +/*! \brief Computes the circum-circle of tetrahedron tt. + * + * \param[in, out] T Pointer to tessellation. + * \param[in] tt Index of triangle in DT array. + * + * \return void + */ +void update_circumcircle(tessellation *T, int tt) +{ + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + point *DP = T->DP; + tetra *t = &DT[tt]; + tetra_center *tc = &DTC[tt]; + + if(t->t[0] < 0) /* deleted ? */ + return; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p1->xx - p0->xx; + double ay = p1->yy - p0->yy; + double az = p1->zz - p0->zz; + + double bx = p2->xx - p0->xx; + double by = p2->yy - p0->yy; + double bz = p2->zz - p0->zz; + + double cx = p3->xx - p0->xx; + double cy = p3->yy - p0->yy; + double cz = p3->zz - p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p0, pA_ixyz, pA_xyz); + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double aa = 0.5 * (ax * ax + ay * ay + az * az); + double bb = 0.5 * (bx * bx + by * by + bz * bz); + double cc = 0.5 * (cx * cx + cy * cy + cz * cz); + + double mv_data[] = {ax, ay, az, aa, bx, by, bz, bb, cx, cy, cz, cc}; + double x[3]; + + int status = solve_linear_equations(mv_data, x); + + if(status < 0) + { + if(Orient3d_Exact(p0, p1, p2, p3) != 1) + { + printf("p0 = %g %g %g\n", p0->x, p0->y, p0->z); + printf("p1 = %g %g %g\n", p1->x, p1->y, p1->z); + printf("p2 = %g %g %g\n", p2->x, p2->y, p2->z); + printf("p3 = %g %g %g\n", p3->x, p3->y, p3->z); + + printf("Orient-Test=%d\n", Orient3d_Exact(p0, p1, p2, p3)); + printf("tetra-volume=%g tetra=%d\n", calculate_tetra_volume(p0, p1, p2, p3), tt); + + return; + } + + double xc, yc, zc; + + get_circumcircle_exact(T, tt, &xc, &yc, &zc); + + tc->cx = xc; + tc->cy = yc; + tc->cz = zc; + } + else + { +#ifndef OPTIMIZE_MEMORY_USAGE + x[0] += p0->xx; + x[1] += p0->yy; + x[2] += p0->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + x[0] += pA_xyz[0]; + x[1] += pA_xyz[1]; + x[2] += pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + tc->cx = (x[0] - 1.0) / ConversionFac + CentralOffsetX; + tc->cy = (x[1] - 1.0) / ConversionFac + CentralOffsetY; + tc->cz = (x[2] - 1.0) / ConversionFac + CentralOffsetZ; + } +} + +/*! \brief Returns the orientation of the tetrahedron. + * + * \param[in] p0 Point spanning the tetrahedron. + * \param[in] p1 Point spanning the tetrahedron. + * \param[in] p2 Point spanning the tetrahedron. + * \param[in] p3 Point spanning the tetrahedron. + * + * \return -1: negative orientation; +1 positive orientation. + */ +int test_tetra_orientation(point *p0, point *p1, point *p2, point *p3) +{ + double nx, ny, nz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return +1; + +#ifndef OPTIMIZE_MEMORY_USAGE + nx = (p1->yy - p0->yy) * (p2->zz - p0->zz) - (p1->zz - p0->zz) * (p2->yy - p0->yy); + ny = (p1->zz - p0->zz) * (p2->xx - p0->xx) - (p1->xx - p0->xx) * (p2->zz - p0->zz); + nz = (p1->xx - p0->xx) * (p2->yy - p0->yy) - (p1->yy - p0->yy) * (p2->xx - p0->xx); + if(nx * (p3->xx - p0->xx) + ny * (p3->yy - p0->yy) + nz * (p3->zz - p0->zz) >= 0) + return +1; + else + return -1; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + IntegerMapType p0_ixyz[3], p1_ixyz[3], p2_ixyz[3], p3_ixyz[3]; + double p0_xyz[3], p1_xyz[3], p2_xyz[3], p3_xyz[3]; + + get_integers_for_point(p0, p0_ixyz, p0_xyz); + get_integers_for_point(p1, p1_ixyz, p1_xyz); + get_integers_for_point(p2, p2_ixyz, p2_xyz); + get_integers_for_point(p3, p3_ixyz, p3_xyz); + + nx = (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[2] - p0_xyz[2]) - (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[1] - p0_xyz[1]); + ny = (p1_xyz[2] - p0_xyz[2]) * (p2_xyz[0] - p0_xyz[0]) - (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[2] - p0_xyz[2]); + nz = (p1_xyz[0] - p0_xyz[0]) * (p2_xyz[1] - p0_xyz[1]) - (p1_xyz[1] - p0_xyz[1]) * (p2_xyz[0] - p0_xyz[0]); + + get_integers_for_point(p3, p3_ixyz, p3_xyz); + + if(nx * (p3_xyz[0] - p0_xyz[0]) + ny * (p3_xyz[1] - p0_xyz[1]) + nz * (p3_xyz[2] - p0_xyz[2]) >= 0) + return +1; + else + return -1; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ +} + +/*! \brief Calculate the volume of a tetrahedron. + * + * \param[in] p0 Point spanning the tetrahedron. + * \param[in] p1 Point spanning the tetrahedron. + * \param[in] p2 Point spanning the tetrahedron. + * \param[in] p3 Point spanning the tetrahedron. + * + * \return Volume of the tetrahedron. + */ +double calculate_tetra_volume(point *p0, point *p1, point *p2, point *p3) +{ + double nx, ny, nz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return +1; + + nx = (p1->y - p0->y) * (p2->z - p0->z) - (p1->z - p0->z) * (p2->y - p0->y); + ny = (p1->z - p0->z) * (p2->x - p0->x) - (p1->x - p0->x) * (p2->z - p0->z); + nz = (p1->x - p0->x) * (p2->y - p0->y) - (p1->y - p0->y) * (p2->x - p0->x); + + return nx * (p3->x - p0->x) + ny * (p3->y - p0->y) + nz * (p3->z - p0->z); +} + +/*! \brief Add row in matrix equation. + * + * Auxiliary function for solve_linear_equations. + * + * \param[in, out] m Matrix. + * \param[in] r1 Index of row to be modified. + * \param[in] r2 Index of row which is added to r1. + * \param[in] fac Factor by which row r2 is multiplied before adding to r1. + * + * \return void + */ +void add_row(double *m, int r1, int r2, double fac) +{ + int i; + + for(i = 0; i < 4; i++) + m[r1 * 4 + i] += fac * m[r2 * 4 + i]; +} + +/*! \brief Solve system of linear equations for 3d Voronoi construction. + * + * \param[in, out] m Matrix. + * \param[out] res Result. + * + * \return 0 if success, <0 else. + */ +int solve_linear_equations(double *m, double *res) +{ + int ix, iy, iz, itmp; + + if(fabs(m[4]) > fabs(m[0])) + { + ix = 1; + iy = 0; + iz = 2; + } + else + { + ix = 0; + iy = 1; + iz = 2; + } + + if(fabs(m[8]) > fabs(m[ix * 4])) + { + ix = 2; + iy = 0; + iz = 1; + } + + add_row(m, iy, ix, -m[iy * 4] / m[ix * 4]); + add_row(m, iz, ix, -m[iz * 4] / m[ix * 4]); + + if(fabs(m[iz * 4 + 1]) > fabs(m[iy * 4 + 1])) + { + /* swap iy/iz */ + itmp = iy; + iy = iz; + iz = itmp; + } + + if(fabs(m[iy * 4 + 1]) < GAUSS_EPS) + return -1; + + add_row(m, iz, iy, -m[iz * 4 + 1] / m[iy * 4 + 1]); + + res[2] = m[iz * 4 + 3] / m[iz * 4 + 2]; + res[1] = (m[iy * 4 + 3] - res[2] * m[iy * 4 + 2]) / m[iy * 4 + 1]; + res[0] = (m[ix * 4 + 3] - res[2] * m[ix * 4 + 2] - res[1] * m[ix * 4 + 1]) / m[ix * 4]; + + if(fabs(m[iz * 4 + 2]) < GAUSS_EPS) + { + return -1; + } + if(fabs(m[iy * 4 + 1]) < GAUSS_EPS) + { + return -2; + } + if(fabs(m[ix * 4]) < GAUSS_EPS) + { + return -3; + } + + return 0; +} + +/*! \brief Converts coordinates of point p to integer values. + * + * \param[in, out] p Point. + * + * \return void + */ +#ifndef OPTIMIZE_MEMORY_USAGE +void set_integers_for_pointer(point *p) +{ + p->xx = (p->x - CentralOffsetX) * ConversionFac + 1.0; + p->yy = (p->y - CentralOffsetY) * ConversionFac + 1.0; + p->zz = (p->z - CentralOffsetZ) * ConversionFac + 1.0; + + if(p->xx < 1.0 || p->xx >= 2.0 || p->yy < 1.0 || p->yy >= 2.0 || p->zz < 1.0 || p->zz >= 2.0) + { + printf("(%g, %g, %g) (%g, %g, %g)\n", p->x, p->y, p->z, p->xx, p->yy, p->zz); + terminate("invalid coordinate range"); + } + + p->ix = double_to_voronoiint(p->xx); + p->iy = double_to_voronoiint(p->yy); + p->iz = double_to_voronoiint(p->zz); + + p->xx = mask_voronoi_int(p->xx); + p->yy = mask_voronoi_int(p->yy); + p->zz = mask_voronoi_int(p->zz); +} +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + +/*! \brief Checks if point is within a sphere using arbitrary precision + * operations. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,1); -1 in sphere, 1 outside. + */ +int InSphere_Exact(point *p0, point *p1, point *p2, point *p3, point *p) +{ + IntegerMapType ax, bx, cx, dx; + IntegerMapType ay, by, cy, dy; + IntegerMapType az, bz, cz, dz; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->ix - p->ix; + ay = p0->iy - p->iy; + az = p0->iz - p->iz; + + bx = p1->ix - p->ix; + by = p1->iy - p->iy; + bz = p1->iz - p->iz; + + cx = p2->ix - p->ix; + cy = p2->iy - p->iy; + cz = p2->iz - p->iz; + + dx = p3->ix - p->ix; + dy = p3->iy - p->iy; + dz = p3->iz - p->iz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_ixyz[0] - pA_ixyz[0]; + ay = pB_ixyz[1] - pA_ixyz[1]; + az = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_ixyz[0] - pA_ixyz[0]; + by = pB_ixyz[1] - pA_ixyz[1]; + bz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_ixyz[0] - pA_ixyz[0]; + cy = pB_ixyz[1] - pA_ixyz[1]; + cz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_ixyz[0] - pA_ixyz[0]; + dy = pB_ixyz[1] - pA_ixyz[1]; + dz = pB_ixyz[2] - pA_ixyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + mpz_t ab, bc, cd, da, ac, bd; + + mpz_init(ab); + mpz_init(bc); + mpz_init(cd); + mpz_init(da); + mpz_init(ac); + mpz_init(bd); + + mpz_t tmp, AA, BB, CC; + + mpz_init(tmp); + mpz_init(AA); + mpz_init(BB); + mpz_init(CC); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, by); + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(BB, tmp, ay); + mpz_sub(ab, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, cy); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(BB, tmp, by); + mpz_sub(bc, AA, BB); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, dy); + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(BB, tmp, cy); + mpz_sub(cd, AA, BB); + + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(AA, tmp, ay); + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(BB, tmp, dy); + mpz_sub(da, AA, BB); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, cy); + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(BB, tmp, ay); + mpz_sub(ac, AA, BB); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, dy); + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(BB, tmp, by); + mpz_sub(bd, AA, BB); + + mpz_t abc, bcd, cda, dab; + + mpz_init(abc); + mpz_init(bcd); + mpz_init(cda); + mpz_init(dab); + + MY_mpz_mul_si(AA, bc, az); + MY_mpz_mul_si(BB, ac, -bz); + MY_mpz_mul_si(CC, ab, cz); + mpz_add(tmp, AA, BB); + mpz_add(abc, tmp, CC); + + MY_mpz_mul_si(AA, cd, bz); + MY_mpz_mul_si(BB, bd, -cz); + MY_mpz_mul_si(CC, bc, dz); + mpz_add(tmp, AA, BB); + mpz_add(bcd, tmp, CC); + + MY_mpz_mul_si(AA, da, cz); + MY_mpz_mul_si(BB, ac, dz); + MY_mpz_mul_si(CC, cd, az); + mpz_add(tmp, AA, BB); + mpz_add(cda, tmp, CC); + + MY_mpz_mul_si(AA, ab, dz); + MY_mpz_mul_si(BB, bd, az); + MY_mpz_mul_si(CC, da, bz); + mpz_add(tmp, AA, BB); + mpz_add(dab, tmp, CC); + + mpz_t a2, b2, c2, d2; + + mpz_init(a2); + mpz_init(b2); + mpz_init(c2); + mpz_init(d2); + + MY_mpz_set_si(tmp, ax); + MY_mpz_mul_si(AA, tmp, ax); + MY_mpz_set_si(tmp, ay); + MY_mpz_mul_si(BB, tmp, ay); + MY_mpz_set_si(tmp, az); + MY_mpz_mul_si(CC, tmp, az); + mpz_add(tmp, AA, BB); + mpz_add(a2, tmp, CC); + + MY_mpz_set_si(tmp, bx); + MY_mpz_mul_si(AA, tmp, bx); + MY_mpz_set_si(tmp, by); + MY_mpz_mul_si(BB, tmp, by); + MY_mpz_set_si(tmp, bz); + MY_mpz_mul_si(CC, tmp, bz); + mpz_add(tmp, AA, BB); + mpz_add(b2, tmp, CC); + + MY_mpz_set_si(tmp, cx); + MY_mpz_mul_si(AA, tmp, cx); + MY_mpz_set_si(tmp, cy); + MY_mpz_mul_si(BB, tmp, cy); + MY_mpz_set_si(tmp, cz); + MY_mpz_mul_si(CC, tmp, cz); + mpz_add(tmp, AA, BB); + mpz_add(c2, tmp, CC); + + MY_mpz_set_si(tmp, dx); + MY_mpz_mul_si(AA, tmp, dx); + MY_mpz_set_si(tmp, dy); + MY_mpz_mul_si(BB, tmp, dy); + MY_mpz_set_si(tmp, dz); + MY_mpz_mul_si(CC, tmp, dz); + mpz_add(tmp, AA, BB); + mpz_add(d2, tmp, CC); + + /* now calculate final result */ + + mpz_mul(AA, c2, dab); + mpz_mul(BB, d2, abc); + mpz_sub(tmp, AA, BB); + + mpz_mul(AA, a2, bcd); + mpz_mul(BB, b2, cda); + mpz_sub(CC, AA, BB); + + mpz_add(AA, tmp, CC); + + /* AA now contains the result */ + + int sign = mpz_sgn(AA); + + mpz_clear(d2); + mpz_clear(c2); + mpz_clear(b2); + mpz_clear(a2); + mpz_clear(dab); + mpz_clear(cda); + mpz_clear(bcd); + mpz_clear(abc); + mpz_clear(CC); + mpz_clear(BB); + mpz_clear(AA); + mpz_clear(tmp); + mpz_clear(bd); + mpz_clear(ac); + mpz_clear(da); + mpz_clear(cd); + mpz_clear(bc); + mpz_clear(ab); + + return sign; +} + +/*! \brief Checks if point is within a sphere. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,0,1); -1: in sphere, 0: on surfrace, 1: outside. + */ +int InSphere_Quick(point *p0, point *p1, point *p2, point *p3, point *p) +{ + double ax, bx, cx, dx; + double ay, by, cy, dy; + double az, bz, cz, dz; + double a2, b2, c2, d2; + double ab, bc, cd, da, ac, bd; + double abc, bcd, cda, dab; + double x; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + az = p0->zz - p->zz; + + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + bz = p1->zz - p->zz; + + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + cz = p2->zz - p->zz; + + dx = p3->xx - p->xx; + dy = p3->yy - p->yy; + dz = p3->zz - p->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_xyz[0] - pA_xyz[0]; + dy = pB_xyz[1] - pA_xyz[1]; + dz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + ab = ax * by - bx * ay; + bc = bx * cy - cx * by; + cd = cx * dy - dx * cy; + da = dx * ay - ax * dy; + ac = ax * cy - cx * ay; + bd = bx * dy - dx * by; + + abc = az * bc - bz * ac + cz * ab; + bcd = bz * cd - cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + a2 = ax * ax + ay * ay + az * az; + b2 = bx * bx + by * by + bz * bz; + c2 = cx * cx + cy * cy + cz * cz; + d2 = dx * dx + dy * dy + dz * dz; + + x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda)); + + if(x < 0) + return -1; + if(x > 0) + return +1; + + return 0; +} + +/*! \brief Checks if point is within a sphere with some error margin. + * + * \param p0 Point 1 of tetrahedron. + * \param p1 Point 2 of tetrahedron. + * \param p2 Point 3 of tetrahedron. + * \param p3 Point 4 of tetrahedron. + * \param p Point to be checked if it is in cricumsphere. + * + * \return (-1,0,1); -1: in sphere, 0: on surfrace (within error margin), + * +1: outside. + */ +int InSphere_Errorbound(point *p0, point *p1, point *p2, point *p3, point *p) +{ + double ax, bx, cx, dx; + double ay, by, cy, dy; + double az, bz, cz, dz; + double a2, b2, c2, d2; + double ab, bc, cd, da, ac, bd; + double abc, bcd, cda, dab; + double x; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return -1; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p->xx; + ay = p0->yy - p->yy; + az = p0->zz - p->zz; + + bx = p1->xx - p->xx; + by = p1->yy - p->yy; + bz = p1->zz - p->zz; + + cx = p2->xx - p->xx; + cy = p2->yy - p->yy; + cz = p2->zz - p->zz; + + dx = p3->xx - p->xx; + dy = p3->yy - p->yy; + dz = p3->zz - p->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p3, pB_ixyz, pB_xyz); + dx = pB_xyz[0] - pA_xyz[0]; + dy = pB_xyz[1] - pA_xyz[1]; + dz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double axby = ax * by; + double bxay = bx * ay; + double bxcy = bx * cy; + double cxby = cx * by; + double cxdy = cx * dy; + double dxcy = dx * cy; + double dxay = dx * ay; + double axdy = ax * dy; + double axcy = ax * cy; + double cxay = cx * ay; + double bxdy = bx * dy; + double dxby = dx * by; + + ab = axby - bxay; + bc = bxcy - cxby; + cd = cxdy - dxcy; + da = dxay - axdy; + ac = axcy - cxay; + bd = bxdy - dxby; + + abc = az * bc - bz * ac + cz * ab; + bcd = bz * cd - cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + a2 = ax * ax + ay * ay + az * az; + b2 = bx * bx + by * by + bz * bz; + c2 = cx * cx + cy * cy + cz * cz; + d2 = dx * dx + dy * dy + dz * dz; + + x = ((c2 * dab - d2 * abc) + (a2 * bcd - b2 * cda)); + + /* calculate absolute maximum size */ + + ab = fabs(axby) + fabs(bxay); + bc = fabs(bxcy) + fabs(cxby); + cd = fabs(cxdy) + fabs(dxcy); + da = fabs(dxay) + fabs(axdy); + ac = fabs(axcy) + fabs(cxay); + bd = fabs(bxdy) + fabs(dxby); + + az = fabs(az); + bz = fabs(bz); + cz = fabs(cz); + dz = fabs(dz); + + abc = az * bc + bz * ac + cz * ab; + bcd = bz * cd + cz * bd + dz * bc; + cda = cz * da + dz * ac + az * cd; + dab = dz * ab + az * bd + bz * da; + + double sizelimit = ((c2 * dab + d2 * abc) + (a2 * bcd + b2 * cda)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return 0; +} + +/*! \brief Returns orientation of tetrahedron using arbitrary precision + * floating point operations. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if + * positively oriented. + */ +int Orient3d_Exact(point *p0, point *p1, point *p2, point *p3) +{ + IntegerMapType ax, bx, cx; + IntegerMapType ay, by, cy; + IntegerMapType az, bz, cz; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->ix - p3->ix; + ay = p0->iy - p3->iy; + az = p0->iz - p3->iz; + + bx = p1->ix - p3->ix; + by = p1->iy - p3->iy; + bz = p1->iz - p3->iz; + + cx = p2->ix - p3->ix; + cy = p2->iy - p3->iy; + cz = p2->iz - p3->iz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_ixyz[0] - pA_ixyz[0]; + ay = pB_ixyz[1] - pA_ixyz[1]; + az = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_ixyz[0] - pA_ixyz[0]; + by = pB_ixyz[1] - pA_ixyz[1]; + bz = pB_ixyz[2] - pA_ixyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_ixyz[0] - pA_ixyz[0]; + cy = pB_ixyz[1] - pA_ixyz[1]; + cz = pB_ixyz[2] - pA_ixyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + + mpz_t bz_cy, by_cz, cz_ay, cy_az, az_by, ay_bz; + mpz_t bz2, by2, cz2, cy2, az2, ay2; + + mpz_init(bz_cy); + mpz_init(bz2); + MY_mpz_set_si(bz2, bz); + MY_mpz_mul_si(bz_cy, bz2, cy); + + mpz_init(by_cz); + mpz_init(by2); + MY_mpz_set_si(by2, by); + MY_mpz_mul_si(by_cz, by2, cz); + + mpz_init(cz_ay); + mpz_init(cz2); + MY_mpz_set_si(cz2, cz); + MY_mpz_mul_si(cz_ay, cz2, ay); + + mpz_init(cy_az); + mpz_init(cy2); + MY_mpz_set_si(cy2, cy); + MY_mpz_mul_si(cy_az, cy2, az); + + mpz_init(az_by); + mpz_init(az2); + MY_mpz_set_si(az2, az); + MY_mpz_mul_si(az_by, az2, by); + + mpz_init(ay_bz); + mpz_init(ay2); + MY_mpz_set_si(ay2, ay); + MY_mpz_mul_si(ay_bz, ay2, bz); + + mpz_t bzcy_bycz, czay_cyaz, azby_aybz; + + mpz_init(bzcy_bycz); + mpz_init(czay_cyaz); + mpz_init(azby_aybz); + + mpz_sub(bzcy_bycz, bz_cy, by_cz); + mpz_sub(czay_cyaz, cz_ay, cy_az); + mpz_sub(azby_aybz, az_by, ay_bz); + + mpz_t a, b, c, ab, res; + + mpz_init(a); + mpz_init(b); + mpz_init(c); + + MY_mpz_mul_si(a, bzcy_bycz, ax); + MY_mpz_mul_si(b, czay_cyaz, bx); + MY_mpz_mul_si(c, azby_aybz, cx); + + mpz_init(ab); + mpz_init(res); + + mpz_add(ab, a, b); + mpz_add(res, ab, c); + + int sign = mpz_sgn(res); + + mpz_clear(res); + mpz_clear(ab); + mpz_clear(c); + mpz_clear(b); + mpz_clear(a); + mpz_clear(azby_aybz); + mpz_clear(czay_cyaz); + mpz_clear(bzcy_bycz); + mpz_clear(ay2); + mpz_clear(ay_bz); + mpz_clear(az2); + mpz_clear(az_by); + mpz_clear(cy2); + mpz_clear(cy_az); + mpz_clear(cz2); + mpz_clear(cz_ay); + mpz_clear(by2); + mpz_clear(by_cz); + mpz_clear(bz2); + mpz_clear(bz_cy); + + return sign; +} + +/*! \brief Returns orientation of tetrahedron. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) -1 if negatively oriented, 0 if degenerate and 1 if + * positively oriented. + */ +int Orient3d_Quick(point *p0, point *p1, point *p2, point *p3) +{ + double ax, bx, cx; + double ay, by, cy; + double az, bz, cz; + +#ifndef OPTIMIZE_MEMORY_USAGE + ax = p0->xx - p3->xx; + ay = p0->yy - p3->yy; + az = p0->zz - p3->zz; + + bx = p1->xx - p3->xx; + by = p1->yy - p3->yy; + bz = p1->zz - p3->zz; + + cx = p2->xx - p3->xx; + cy = p2->yy - p3->yy; + cz = p2->zz - p3->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double x = (ax * (bz * cy - by * cz) + bx * (cz * ay - cy * az) + cx * (az * by - ay * bz)); + + if(x < 0) + return -1; + else if(x > 0) + return +1; + + return 0; +} + +/* \brief Returns orientation of tetrahedron. + * + * \param[in] p0 First point of tetrahedron. + * \param[in] p1 Second point of tetrahedron. + * \param[in] p2 Third point of tetrahedron. + * \param[in] p3 Forth point of tetrahedron. + * + * \return (-1,0,1) the orientation of the 4 points as +/-1. If either of the + * points is an infinity point, return 0. + */ +int Orient3d(point *p0, point *p1, point *p2, point *p3) +{ + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + return 0; + +#ifndef OPTIMIZE_MEMORY_USAGE + double ax = p0->xx - p3->xx; + double ay = p0->yy - p3->yy; + double az = p0->zz - p3->zz; + + double bx = p1->xx - p3->xx; + double by = p1->yy - p3->yy; + double bz = p1->zz - p3->zz; + + double cx = p2->xx - p3->xx; + double cy = p2->yy - p3->yy; + double cz = p2->zz - p3->zz; +#else /* #ifndef OPTIMIZE_MEMORY_USAGE */ + double ax, ay, az, bx, by, bz, cx, cy, cz; + double pA_xyz[3], pB_xyz[3]; + IntegerMapType pA_ixyz[3], pB_ixyz[3]; + + get_integers_for_point(p3, pA_ixyz, pA_xyz); + + get_integers_for_point(p0, pB_ixyz, pB_xyz); + ax = pB_xyz[0] - pA_xyz[0]; + ay = pB_xyz[1] - pA_xyz[1]; + az = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p1, pB_ixyz, pB_xyz); + bx = pB_xyz[0] - pA_xyz[0]; + by = pB_xyz[1] - pA_xyz[1]; + bz = pB_xyz[2] - pA_xyz[2]; + + get_integers_for_point(p2, pB_ixyz, pB_xyz); + cx = pB_xyz[0] - pA_xyz[0]; + cy = pB_xyz[1] - pA_xyz[1]; + cz = pB_xyz[2] - pA_xyz[2]; +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE #else */ + + double bzcy = bz * cy; + double bycz = by * cz; + double czay = cz * ay; + double cyaz = cy * az; + double azby = az * by; + double aybz = ay * bz; + + double x = ax * (bzcy - bycz) + bx * (czay - cyaz) + cx * (azby - aybz); + + double sizelimit = + fabs(ax) * (fabs(bzcy) + fabs(bycz)) + fabs(bx) * (fabs(czay) + fabs(cyaz)) + fabs(cx) * (fabs(azby) + fabs(aybz)); + + double errbound = 1.0e-14 * sizelimit; + + if(x < -errbound) + return -1; + else if(x > errbound) + return +1; + + return Orient3d_Exact(p0, p1, p2, p3); +} + +/*! \brief Data structure for face sort + */ +struct data_face_sort /* for sorting faces */ +{ + MyIDType ID; /* ID of corresponding cell */ + float normal[3]; /* non-normalized normal vector */ + int start; /* start index into vertex list */ + int len; /* number of vertices */ +}; + +static int *VertexEntries; /* face index list */ +static float *VertexCoordinates; /* Voronoi vertex coordinates (circumsphere centers of delaunay tetras) */ +static float *FaceNormals; /* normal vectors */ +static int Nvertices; /* number of Voronoi vertices */ +static int Nnormals; /* number of normals */ +static int Nentries; /* number of entries in Voronoi face vertex list (including IDs and face vertex count) */ +static int Nsort; /* number of ID sorted faces */ +static int MaxEntries, MaxFaces; /* for allocation */ +static struct data_face_sort *FaceSort; + +/*! \brief Face sorting kernel + * + * Compares ID of data_face_sort types. + * + * \param[in] a Fist element. + * \param[in] b Second element. + * + * \return (-1,0,1), -1 if a->ID < b ->ID. + */ +int compare_face_sort(const void *a, const void *b) +{ + if(((struct data_face_sort *)a)->ID < ((struct data_face_sort *)b)->ID) + return -1; + + if(((struct data_face_sort *)a)->ID > ((struct data_face_sort *)b)->ID) + return +1; + + return 0; +} + +/*! \brief Gathers faces in list. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void get_voronoi_face_vertex_indices(tessellation *T) +{ + int i, j, k, l, m, ii, jj, kk, ll, tetra_nr, edge_nr, next_tetra_nr, count, dp_1, dp_2; + tetra *prev, *next; + tetra *DT = T->DT; + point *DP = T->DP; + int bit, nr_next; + + /* loop over tetras */ + for(tetra_nr = 0; tetra_nr < Mesh.Ndt; tetra_nr++) + { + if(Mesh.DT[tetra_nr].t[0] < 0) /* skip deleted tetras */ + continue; + + /* edge flagging */ + bit = 1; + edge_nr = 0; + + /* loop over edges */ + while(Edge_visited[tetra_nr] != EDGE_ALL) + { + if((Edge_visited[tetra_nr] & bit) != 0) + { + bit <<= 1; + edge_nr++; + continue; + } + + tetra *t = &DT[tetra_nr]; + + /* edge-point relation */ + i = edge_start[edge_nr]; + j = edge_end[edge_nr]; + k = edge_opposite[edge_nr]; + l = edge_nexttetra[edge_nr]; + + /* mark edge as visited */ + Edge_visited[tetra_nr] |= (1 << edge_nr); + + /* delaunay points on both side of face */ + dp_1 = t->p[i]; + dp_2 = t->p[j]; + + /* skip large tetra */ + if(dp_1 < 0 || dp_2 < 0) + { + bit <<= 1; + edge_nr++; + continue; + } + + /* skip ghost points (both local and foreign) */ + if((DP[dp_1].task != ThisTask || DP[dp_1].index < 0 || DP[dp_1].index >= NumGas) && + (DP[dp_2].task != ThisTask || DP[dp_2].index < 0 || DP[dp_2].index >= NumGas)) + { + bit <<= 1; + edge_nr++; + continue; + } + + /* count number of face vertices */ + count = 0; + prev = t; + + do + { + count++; + next_tetra_nr = prev->t[l]; + next = &DT[next_tetra_nr]; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + i = ii; + l = ll; + j = jj; + k = kk; + + prev = next; + } + while(next != t); + + count++; + + /* get face normals (from both sides) */ + FaceNormals[Nnormals++] = (DP[dp_2].x - DP[dp_1].x); + FaceNormals[Nnormals++] = (DP[dp_2].y - DP[dp_1].y); + FaceNormals[Nnormals++] = (DP[dp_2].z - DP[dp_1].z); + FaceNormals[Nnormals++] = (DP[dp_1].x - DP[dp_2].x); + FaceNormals[Nnormals++] = (DP[dp_1].y - DP[dp_2].y); + FaceNormals[Nnormals++] = (DP[dp_1].z - DP[dp_2].z); + + /* fill vertex entry list, first ID, count then tetra numbers */ + VertexEntries[Nentries++] = (int)DP[dp_1].ID; + VertexEntries[Nentries++] = (int)DP[dp_2].ID; + VertexEntries[Nentries++] = (int)count; + VertexEntries[Nentries++] = (int)tetra_nr; + + /* get tetra indices of face vertices */ + count = 0; + prev = t; + do + { + count++; + next_tetra_nr = prev->t[l]; + next = &DT[next_tetra_nr]; + + VertexEntries[Nentries++] = (int)next_tetra_nr; + + for(m = 0, ll = ii = jj = -1; m < 4; m++) + { + if(next->p[m] == prev->p[k]) + ll = m; + if(next->p[m] == prev->p[i]) + ii = m; + if(next->p[m] == prev->p[j]) + jj = m; + } + + if(ll < 0 || ii < 0 || jj < 0) + terminate("inconsistency"); + + kk = 6 - (ll + ii + jj); + + /* flag edge */ + for(nr_next = 0; nr_next < 6; nr_next++) + if((edge_start[nr_next] == ii && edge_end[nr_next] == jj) || (edge_start[nr_next] == jj && edge_end[nr_next] == ii)) + { + if((Edge_visited[next_tetra_nr] & (1 << nr_next)) && next != t) + terminate("inconsistency"); + + Edge_visited[next_tetra_nr] |= (1 << nr_next); + break; + } + + i = ii; + l = ll; + j = jj; + k = kk; + + prev = next; + + if(Nentries > MaxEntries) + terminate("Nentries > MaxEntries"); + + if(Nnormals > MaxFaces) + terminate("Nentries > MaxEntries"); + } + while(next != t); + + bit <<= 1; + edge_nr++; + } + } +} + +/*! \brief Set Vertex coordinates in the respective array. + * + * Copys the coordinates from the DTC array of the tessellation to a + * designated array VertexCoordinates. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void get_voronoi_face_vertex_coordinates(tessellation *T) +{ + int tetra_nr = 0; + + for(tetra_nr = 0; tetra_nr < T->Ndt; tetra_nr++) + { + VertexCoordinates[3 * Nvertices + 0] = T->DTC[tetra_nr].cx; + VertexCoordinates[3 * Nvertices + 1] = T->DTC[tetra_nr].cy; + VertexCoordinates[3 * Nvertices + 2] = T->DTC[tetra_nr].cz; + Nvertices++; + } +} + +/*! \brief Function calls qsort for sorting faces by ID. + * + * Uses compare_face_sort as comparison function. Requires array FaceSort. + * + * \return void + */ +void sort_faces_by_ID(void) +{ + int i = 0, j = 0, k = 0; + + do + { + FaceSort[j].ID = VertexEntries[i + 0]; + FaceSort[j].start = i + 3; + FaceSort[j].len = VertexEntries[i + 2]; + FaceSort[j].normal[0] = FaceNormals[k++]; + FaceSort[j].normal[1] = FaceNormals[k++]; + FaceSort[j].normal[2] = FaceNormals[k++]; + j++; + + FaceSort[j].ID = VertexEntries[i + 1]; + FaceSort[j].start = i + 3; + FaceSort[j].len = VertexEntries[i + 2]; + FaceSort[j].normal[0] = FaceNormals[k++]; + FaceSort[j].normal[1] = FaceNormals[k++]; + FaceSort[j].normal[2] = FaceNormals[k++]; + j++; + + i += 3 + VertexEntries[i + 2]; + + if(j > MaxFaces) + terminate("j > MaxFaces"); + } + while(i < Nentries); + + Nsort = j; + + /* sort faces by ID */ + qsort(FaceSort, Nsort, sizeof(struct data_face_sort), compare_face_sort); +} + +/*! \brief Outputs Voronoi vertex indices to file. + * + * Outputs the Voronoi vertex indices from task writeTask to lastTask in file + * fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname1 File name of file index data is written in. + * \param[in] fname2 File name of file face data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_face_vertex_indices(tessellation *T, char *fname1, char *fname2, int writeTask, int lastTask) +{ + FILE *fd1, *fd2; + MPI_Status status; + int nVertices_tot, nEntries_tot, nNormals_tot; + int nVertices_before, i, task, *tmp; + int *Nvertices_list, *Nentries_list, *Nnormals_list, *Nsort_list; + struct data_face_sort *tmp_sort; + + VertexEntries = mymalloc("VertexEntries", MaxEntries * sizeof(int)); + FaceNormals = mymalloc("VertexEntries", MaxFaces * sizeof(int)); + + /* get faces */ + get_voronoi_face_vertex_indices(T); + + FaceSort = (struct data_face_sort *)mymalloc("face_sort", sizeof(struct data_face_sort) * MaxFaces); + + /* sort faces */ + sort_faces_by_ID(); + + Nentries = 0; + for(i = 0; i < Nsort; i++) + Nentries += FaceSort[i].len + 2; + + /* I/O */ + Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask); + Nentries_list = mymalloc("Nentries_list", sizeof(int) * NTask); + Nsort_list = mymalloc("Nsort_list", sizeof(int) * NTask); + Nnormals_list = mymalloc("Nnormals_list", sizeof(int) * NTask); + + if(ThisTask == writeTask) + { + nVertices_tot = Nvertices; + nEntries_tot = Nentries; + nNormals_tot = Nnormals; + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + MPI_Recv(&Nentries_list[task], 1, MPI_INT, task, TAG_LOCALN + 1, MPI_COMM_WORLD, &status); + MPI_Recv(&Nsort_list[task], 1, MPI_INT, task, TAG_LOCALN + 2, MPI_COMM_WORLD, &status); + MPI_Recv(&Nnormals_list[task], 1, MPI_INT, task, TAG_LOCALN + 3, MPI_COMM_WORLD, &status); + MPI_Send(&nVertices_tot, 1, MPI_INT, task, TAG_N, MPI_COMM_WORLD); + nVertices_tot += Nvertices_list[task]; + nEntries_tot += Nentries_list[task]; + nNormals_tot += Nnormals_list[task]; + } + if(!(fd1 = fopen(fname1, "w"))) + terminate("I/O error"); + + if(!(fd2 = fopen(fname2, "w"))) + terminate("I/O error"); + + my_fwrite(&nEntries_tot, sizeof(int), 1, fd1); + my_fwrite(&nNormals_tot, sizeof(int), 1, fd2); + for(i = 0; i < Nsort; i++) + { + my_fwrite(&FaceSort[i].ID, sizeof(int), 1, fd1); + my_fwrite(&FaceSort[i].len, sizeof(int), 1, fd1); + my_fwrite(&VertexEntries[FaceSort[i].start], sizeof(int) * FaceSort[i].len, 1, fd1); + my_fwrite(FaceSort[i].normal, 3 * sizeof(float), 1, fd2); + } + + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp_sort = (struct data_face_sort *)mymalloc("tmp_sort", sizeof(struct data_face_sort) * Nsort_list[task]); + tmp = mymalloc("tmp", sizeof(int) * Nentries_list[task]); + MPI_Recv(tmp, Nentries_list[task], MPI_INT, task, TAG_N + 1, MPI_COMM_WORLD, &status); + MPI_Recv(tmp_sort, Nsort_list[task] * sizeof(struct data_face_sort), MPI_BYTE, task, TAG_N + 2, MPI_COMM_WORLD, &status); + + for(i = 0; i < Nsort_list[task]; i++) + { + my_fwrite(&tmp_sort[i].ID, sizeof(int), 1, fd1); + my_fwrite(&tmp_sort[i].len, sizeof(int), 1, fd1); + my_fwrite(&tmp[tmp_sort[i].start], sizeof(int) * tmp_sort[i].len, 1, fd1); + my_fwrite(tmp_sort[i].normal, 3 * sizeof(float), 1, fd2); + } + myfree(tmp); + myfree(tmp_sort); + } + fclose(fd2); + fclose(fd1); + } + else + { + MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(&Nentries, 1, MPI_INT, writeTask, TAG_LOCALN + 1, MPI_COMM_WORLD); + MPI_Send(&Nsort, 1, MPI_INT, writeTask, TAG_LOCALN + 2, MPI_COMM_WORLD); + MPI_Send(&Nnormals, 1, MPI_INT, writeTask, TAG_LOCALN + 3, MPI_COMM_WORLD); + MPI_Recv(&nVertices_before, 1, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status); + for(i = 0; i < Nentries; i++) + if(VertexEntries[i] >= 0) + VertexEntries[i] += nVertices_before; + MPI_Send(VertexEntries, Nentries, MPI_INT, writeTask, TAG_N + 1, MPI_COMM_WORLD); + MPI_Send(FaceSort, Nsort * sizeof(struct data_face_sort), MPI_BYTE, writeTask, TAG_N + 2, MPI_COMM_WORLD); + } + + myfree(Nnormals_list); + myfree(Nsort_list); + myfree(Nentries_list); + myfree(Nvertices_list); + myfree(FaceSort); + myfree(FaceNormals); + myfree(VertexEntries); +} + +/*! \brief Outputs Voronoi vertex coordinates to file. + * + * Outputs the Voronoi vertex coordinates from task write Task to lastTask in + * file fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_face_vertex_coordinates(tessellation *T, char *fname, int writeTask, int lastTask) +{ + FILE *fd; + MPI_Status status; + int *Nvertices_list; + int nVertices_tot, task; + float *tmp; + + VertexCoordinates = mymalloc("VertexCoordinates", MaxEntries * 3 * sizeof(float)); + + /* get coordinates */ + get_voronoi_face_vertex_coordinates(T); + + /* I/O */ + Nvertices_list = mymalloc("Nvertices_list", sizeof(int) * NTask); + if(ThisTask == writeTask) + { + nVertices_tot = Nvertices; + for(task = writeTask + 1; task <= lastTask; task++) + { + MPI_Recv(&Nvertices_list[task], 1, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status); + nVertices_tot += Nvertices_list[task]; + } + + if(!(fd = fopen(fname, "w"))) + terminate("I/O error"); + + my_fwrite(&nVertices_tot, sizeof(int), 1, fd); + my_fwrite(VertexCoordinates, sizeof(float), 3 * Nvertices, fd); + for(task = writeTask + 1; task <= lastTask; task++) + { + tmp = mymalloc("tmp", 3 * sizeof(float) * Nvertices_list[task]); + MPI_Recv(tmp, 3 * Nvertices_list[task], MPI_FLOAT, task, TAG_N + 1, MPI_COMM_WORLD, &status); + my_fwrite(tmp, sizeof(float), 3 * Nvertices_list[task], fd); + myfree(tmp); + } + fclose(fd); + } + else + { + MPI_Send(&Nvertices, 1, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD); + MPI_Send(VertexCoordinates, 3 * Nvertices, MPI_FLOAT, writeTask, TAG_N + 1, MPI_COMM_WORLD); + } + myfree(Nvertices_list); + myfree(VertexCoordinates); +} + +/*! \brief Outputs Voronoi mesh to file. + * + * Outputs the Voronoi mesh data from task write Task to lastTask in file + * fname. + * + * \param[in] T Pointer to tessellation. + * \param[in] fname File name of file the data is written in. + * \param[in] writeTask Task that gathers information and writes data. + * \param[in] lastTask Last task that is included in this dump. + * + * \return void + */ +void write_voronoi_mesh(tessellation *T, char *fname, int writeTask, int lastTask) +{ + char buf1[255], buf2[255]; + + MaxEntries = 1000 * NumGas; + MaxFaces = 100 * NumGas; + + /* coordinates */ + Nvertices = 0; + sprintf(buf1, "%s_coordinates.dat", fname); + write_voronoi_face_vertex_coordinates(T, buf1, writeTask, lastTask); + + /* indices */ + Edge_visited = mymalloc_movable(&Edge_visited, "Edge_visited", Mesh.Ndt * sizeof(unsigned char)); + int i; + for(i = 0; i < Mesh.Ndt; i++) + Edge_visited[i] = 0; + + Nentries = 0; + Nnormals = 0; + sprintf(buf1, "%s_indices.dat", fname); + sprintf(buf2, "%s_normals.dat", fname); + write_voronoi_face_vertex_indices(T, buf1, buf2, writeTask, lastTask); + myfree(Edge_visited); +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c new file mode 100644 index 0000000000..42c6f06b1f --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_check.c @@ -0,0 +1,407 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_check.c + * \date 05/2018 + * \brief Algorithms to check Voronoi mesh construction. + * \details contains functions: + * void check_for_min_distance(tessellation * T) + * void check_links(tessellation * T) + * void check_orientations(tessellation * T) + * void check_tetras(tessellation * T, int npoints) + * int points_compare(const void *a, const void *b) + * void check_triangles(tessellation * T, int npoints) + * void check_orientations(tessellation * T) + * void check_links(tessellation * T) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) /* three-dimensional test code */ + +int points_compare(const void *a, const void *b); + +/*! \brief Checks minimum distance between Delaunay points making sure it is + * nonzero. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_for_min_distance(tessellation *T) +{ + point *DP = T->DP; + int i, j; + double r2, r2min; + char msg[200]; + + for(i = 0, r2min = 1.0e30; i < T->Ndp; i++) + { + printf("i=%d\n", i); + + for(j = i + 1; j < T->Ndp; j++) + { + r2 = (DP[i].x - DP[j].x) * (DP[i].x - DP[j].x) + (DP[i].y - DP[j].y) * (DP[i].y - DP[j].y) + + (DP[i].z - DP[j].z) * (DP[i].z - DP[j].z); + if(r2 < r2min) + r2min = r2; + + if(r2min == 0) + { + sprintf(msg, "i=%d j=%d equal. DP[i].index=%d DP[j].index=%d\n", i, j, DP[i].index, DP[j].index); + terminate(msg) + } + } + } + + printf("min distance=%g\n", sqrt(r2min)); +} + +/*! \brief Checks if tessellation links are correct. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_links(tessellation *T) +{ + tetra *DT = T->DT; + int i, j, s, c, flag = 0; + int pl[3], pr[3]; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + for(j = 0; j < 4; j++) + { + if(DT[DT[i].t[j]].t[DT[i].s[j]] != i) + { + printf("LINK for tetra=%d j=%d DT[i].s[j]=%d incorrect %d\n", i, j, DT[i].s[j], (int)(DT[DT[i].t[j]].t[DT[i].s[j]])); + } + } + + for(j = 0; j < 4; j++) + { + for(s = 0, c = 0; s < 4; s++) + if(s != j) + pl[c++] = DT[i].p[s]; + + for(s = 0, c = 0; s < 4; s++) + if(s != DT[i].s[j]) + pr[c++] = DT[DT[i].t[j]].p[s]; + + /* sort the points */ + + mysort(&pl[0], 3, sizeof(int), points_compare); + mysort(&pr[0], 3, sizeof(int), points_compare); + + for(s = 0; s < 3; s++) + { + if(pl[s] != pr[s]) + { + sprintf(msg, "LINK for i=%d j=%d incorrect. points of triangles don't match up s=%d\n", i, j, s); + flag = 1; + } + } + + if(flag) + terminate(msg); + } + } + + printf("links ok\n"); +} + +/*! \brief Checks if orientations of tetrahedra are positive. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_orientations(tessellation *T) +{ + tetra *DT = T->DT; + point *DP = T->DP; + int i, ivol; + double vol, volmin = 1.0e30; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + tetra *t = &DT[i]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(t->t[0] < 0) /* deleted ? */ + continue; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + continue; + + vol = calculate_tetra_volume(p0, p1, p2, p3); + ivol = Orient3d_Exact(p0, p1, p2, p3); + + if(ivol <= 0) + { + sprintf(msg, "Tetra %d is NEGATIVE (%d %d %d %d) oriented or FLAT: ivol=%d vol=%g\n", i, (int)(t->p[0]), (int)(t->p[1]), + (int)(t->p[2]), (int)(t->p[3]), ivol, vol); + terminate(msg); + } + + if(vol < volmin) + volmin = vol; + } + + printf("orientations ok, volmin=%g\n", volmin); +} + +/*! \brief Checks if tetrahedra are valid. + * + * \param[in] T pointer to tessellation. + * \param[in] npoints Number of points. + * + * \return void + */ +void check_tetras(tessellation *T, int npoints) +{ + tetra *DT = T->DT; + point *DP = T->DP; + int i, j, res, res_exact; + char msg[200]; + + for(i = 0; i < T->Ndt; i++) + { + if((i % 100) == 0) + printf("check tetra i=%d/%d\n", i, T->Ndt); + + tetra *t = &DT[i]; + + point *p0 = &DP[t->p[0]]; + point *p1 = &DP[t->p[1]]; + point *p2 = &DP[t->p[2]]; + point *p3 = &DP[t->p[3]]; + + if(t->t[0] < 0) /* deleted ? */ + continue; + + if(isInfinity(p0) || isInfinity(p1) || isInfinity(p2) || isInfinity(p3)) + continue; + + if(test_tetra_orientation(p0, p1, p2, p3) > 0) + { + } + else + { + sprintf(msg, "Tetra %d is NEGATIVE oriented\n", i); + terminate(msg); + } + + for(j = 0; j < npoints; j++) + { + if(t->p[0] != j) + if(t->p[1] != j) + if(t->p[2] != j) + if(t->p[3] != j) + { + res = InSphere_Errorbound(p0, p1, p2, p3, &DP[j]); + + if(res >= 0) + { + res_exact = InSphere_Exact(p0, p1, p2, p3, &DP[j]); + + if(res_exact > 0) + { + sprintf(msg, "ERROR tetra=%d: point=%d in tetra with edges=%d|%d|%d|%d res=%d|%d\n", i, j, + (int)(t->p[0]), (int)(t->p[1]), (int)(t->p[2]), (int)(t->p[3]), res, res_exact); + terminate(msg); + } + } + } + } + } + + printf("Tetrahedra OK\n"); +} + +/*! \brief Compare integer value of two variables. + * + * \param[in] a Pointer to first value. + * \param[in] b Pointer to second value. + * + * \return (-1,0,1) -1 iF a < b. + */ +int points_compare(const void *a, const void *b) +{ + if(*((int *)a) < *((int *)b)) + return -1; + + if(*((int *)a) > *((int *)b)) + return +1; + + return 0; +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ + +#ifdef TWODIMS /* two-dimensional test code */ + +/*! \brief Check 2d Voronoi mesh triangles. + * + * \param[in] T Pointer to tessellation. + * \param[in] npoints Number of points. + * + * \return void + */ +void check_triangles(tessellation *T, int npoints) +{ + int i, j, res, res_exact; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + if(Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]) != 1) + { + sprintf(msg, "Triangle %d is NEGATIVE oriented or FLAT\n", i); + terminate(msg); + } + + for(j = 0; j < npoints; j++) + { + if(DT[i].p[0] != j) + if(DT[i].p[1] != j) + if(DT[i].p[2] != j) + { + res = InCircle_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j); + + if(res > 0) + { + res_exact = InCircle_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2], j); + + if(res_exact > 0) + { + sprintf(msg, "ERROR: point=%d lies in triangle=%d with edges=%d|%d|%d res=%d|%d\n", j, i, + (int)(DT[i].p[0]), (int)(DT[i].p[1]), (int)(DT[i].p[2]), res, res_exact); + terminate(msg); + } + } + } + } + } + + printf("triangles ok\n"); +} + +/*! \brief Check the orientations of triangles in 2d Voronoi mesh. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void check_orientations(tessellation *T) +{ + int i, ivol; + double vol, volmin = 1.0e30; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + if(DT[i].p[0] == DPinfinity) + continue; + if(DT[i].p[1] == DPinfinity) + continue; + if(DT[i].p[2] == DPinfinity) + continue; + + vol = test_triangle_orientation(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + ivol = Orient2d_Exact(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + + if(ivol <= 0) + { + double vol2 = Orient2d_Quick(T, DT[i].p[0], DT[i].p[1], DT[i].p[2]); + + sprintf(msg, "Triangle %d is NEGATIVE (%d %d %d) oriented or FLAT: ivol=%d vol=%g|%g\n", i, (int)(DT[i].p[0]), + (int)(DT[i].p[1]), (int)(DT[i].p[2]), ivol, vol, vol2); + terminate(msg); + } + + if(vol < volmin) + volmin = vol; + } + + printf("orientations ok, volmin=%g\n", volmin); +} + +/*! \brief Check links in 2d Voronoi mesh. + * + * \param[in] T Pointer to tesselation. + * + * \return void + */ +void check_links(tessellation *T) +{ + int i, j; + char msg[200]; + + tetra *DT = T->DT; + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < 3; j++) + { + if(DT[DT[i].t[j]].t[DT[i].s[j]] != i) + { + sprintf(msg, "LINK for i=%d j=%d incorrect\n", i, j); + terminate(msg); + } + } + } +} + +#endif /* #ifdef TWODIMS */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c new file mode 100644 index 0000000000..99afd85cc0 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_derefinement.c @@ -0,0 +1,1088 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_derefinement.c + * \date 05/2018 + * \brief Contains routines for de-refinement. + * \details contains functions: + * static void derefine_add_ngb(int edge, int i, int j, double + * area, int t, int nr) + * int do_derefinements(void) + * static void derefine_apply_probe_list(void) + * static void derefine_apply_flux_list(void) + * static int derefine_flux_list_data_compare(const void *a, + * const void *b) + * static int derefine_probe_list_data_compare_task(const + * void *a, const void *b) + * static int derefine_compare_seq_DP_ID(const void *a, + * const void *b) + * static void derefine_exchange_flag(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) +#define DEREF_SA_FAC 1.0e-4 + +int do_derefinements(void); +static void derefine_add_ngb(int edge, int i, int j, double area, int tt, int nr); +static int derefine_compare_seq_DP_ID(const void *a, const void *b); +static int derefine_flux_list_data_compare(const void *a, const void *b); +static void derefine_apply_flux_list(void); +static void derefine_exchange_flag(void); +static void derefine_apply_probe_list(void); +static int derefine_probe_list_data_compare_task(const void *a, const void *b); + +/*! \brief Data for derefinement: flag for de-refinement and index of cell. + */ +static struct derefine_particle_data +{ + int Flag; + int dp_index; +} * deref_SphP; + +/*! \brief Data structure for communicating de-refinement flags. + */ +static struct flagexch +{ + int Flag; + MyIDType ID; +} * FlagExch; + +/*! \brief Data structure to flag Delaunay data. + */ +static struct flag_delaunay_data +{ + int Flag; +} * flag_DP; + +/*! \brief Structure defining auxiliary Delaunay data (for sorting). + */ +static struct seq_delaunay_data +{ + MyFloat rnd; + int rank, index; + MyIDType ID; +} * seq_DP; + +/*! \brief Structure defining probe list element. + */ +static struct probe_list_data +{ + int task, index; + int sendpart; + int flag; +} * ProbeList; + +/*! \brief Structure defining flux list element. + */ +static struct flux_list_data +{ + int task, index; + double dM, dP[3]; +#ifdef MHD + double dB[3]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + double dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + double dConservedScalars[MAXSCALARS]; +#endif /* #ifdef MAXSCALARS */ +} * FluxList; + +static int Nflux, MaxNflux; + +static int *first_ngb, *last_ngb, first_free_ngb; + +/*! \brief Structure defining neighbour data. + */ +static struct ngb_data +{ +#ifdef OPTIMIZE_MEMORY_USAGE + MyFloat area; +#else /* #ifdef OPTIMIZE_MEMORY_USAGE */ + double area; +#endif /* #ifdef OPTIMIZE_MEMORY_USAGE #else */ + int index; + int edge; + int next_ngb; + int t, nr; /* delaunay tetra and edge number that generated this face */ +} * ngb; + +static int n_tri, max_n_tri; +static triangle *trilist; + +#ifdef REFINEMENT_SPLIT_CELLS +extern char *FlagDoNotRefine; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + +/*! \brief Adds cell in list ngb. + * + * \param[in] edge Element 'edge' in ngb. + * \param[in] i Index in first_ngb and last_ngb lists. + * \param[in] j Element 'index' in ngb. + * \param[in] area Element 'area' in ngb. + * \param[in] t Element 't' in ngb. + * \param[in] nr Element 'nr' in ngb. + * + * \return void + */ +static void derefine_add_ngb(int edge, int i, int j, double area, int t, int nr) +{ + if(i >= 0 && j >= 0) + { + if(i >= Mesh.Ndp || j >= Mesh.Ndp) + { + terminate("i>= Ndp || j>= Ndp"); + } + + if(first_ngb[i] >= 0) + { + ngb[last_ngb[i]].next_ngb = first_free_ngb; + last_ngb[i] = first_free_ngb; + } + else + { + first_ngb[i] = last_ngb[i] = first_free_ngb; + } + + ngb[first_free_ngb].area = area; + ngb[first_free_ngb].edge = edge; + ngb[first_free_ngb].t = t; + ngb[first_free_ngb].nr = nr; + ngb[first_free_ngb].index = j; + ngb[first_free_ngb].next_ngb = -1; + first_free_ngb++; + } +} + +/*! \brief Loop over all active cells and derefine the ones that need to be + * derefined. + * + * \return Number of derefined cells. + */ +int do_derefinements(void) +{ + int idx, i, j, k, count, countall; + + TIMER_START(CPU_DEREFINE); + + deref_SphP = mymalloc_movable(&deref_SphP, "deref_SphP", NumGas * sizeof(struct derefine_particle_data)); + + FlagExch = mymalloc_movable(&FlagExch, "FlagExch", Mesh_nimport * sizeof(struct flagexch)); + + /* first, check whether we have cells to derefine */ + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[i] = 0; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + + if(i >= NumGas) + terminate("index of gas cell greater than NumGas"); + + deref_SphP[i].Flag = 0; + deref_SphP[i].dp_index = -1; + + if(derefine_should_this_cell_be_merged(i, deref_SphP[i].Flag)) + { + deref_SphP[i].Flag = 1; + count++; + } + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + mpi_printf("DEREFINE: Number of cells that want to be de-refined: %d\n", countall); + + if(countall) + { + int max_assumed_ntri = 0; + + /* tell the ghost cells whether they want to be refined or not */ + derefine_exchange_flag(); + + /* let's create an explicit list of the neighbors of each cell */ + + first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int)); + ngb = mymalloc("ngb", 2 * Mesh.Nvf * sizeof(struct ngb_data)); + + last_ngb = mymalloc("last_ngb", Mesh.Ndp * sizeof(int)); + + for(i = 0; i < Mesh.Ndp; i++) + first_ngb[i] = last_ngb[i] = -1; + + for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++) + { + derefine_add_ngb(i, Mesh.VF[i].p1, Mesh.VF[i].p2, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr); + derefine_add_ngb(i, Mesh.VF[i].p2, Mesh.VF[i].p1, Mesh.VF[i].area, Mesh.VF[i].t, Mesh.VF[i].nr); + } + + myfree(last_ngb); + + /* we now make a list of the delaunay points that we can sort in a globally unique way */ + flag_DP = mymalloc_movable(&flag_DP, "flag_DP", Mesh.Ndp * sizeof(struct flag_delaunay_data)); + seq_DP = mymalloc("seq_DP", Mesh.Ndp * sizeof(struct seq_delaunay_data)); + + for(i = 0; i < Mesh.Ndp; i++) + { + seq_DP[i].rank = i; + seq_DP[i].index = Mesh.DP[i].index; + + if(Mesh.DP[i].task == ThisTask) + { + int li = Mesh.DP[i].index; + if(li < 0) + { + flag_DP[i].Flag = 0; + seq_DP[i].ID = 0; + seq_DP[i].rnd = 0; + } + else + { + if(li < NumGas) + if(deref_SphP[li].dp_index < 0) + deref_SphP[li].dp_index = i; /* only guaranteed to be set for active cells */ + + if(li >= NumGas) + li -= NumGas; + + flag_DP[i].Flag = deref_SphP[li].Flag; + seq_DP[i].ID = P[li].ID; + seq_DP[i].rnd = get_random_number(); + } + } + else + { + flag_DP[i].Flag = FlagExch[Mesh.DP[i].index].Flag; + seq_DP[i].ID = FlagExch[Mesh.DP[i].index].ID; + seq_DP[i].rnd = get_random_number(); + } + } + + /* sort according to ID */ + mysort(seq_DP, Mesh.Ndp, sizeof(struct seq_delaunay_data), derefine_compare_seq_DP_ID); + + /* now let's go through in sorted order. For each cell that is supposed to be refined, check whether any of the + * neighbors is already refined. If yes, don't allow it to be refined. + * Also, if there is a neighbour with the same ID, don't refine it, because this must be a mirrored particle + */ + + for(i = 0; i < Mesh.Ndp; i++) + { + if(seq_DP[i].ID != 0) + { + j = seq_DP[i].rank; + + if(flag_DP[j].Flag == 1) /* this cell is still eligible for derefinement */ + { + /* go through its neighbours and check whether one of them is already up for derefinement */ + + int n = 0; + k = first_ngb[j]; + while(k >= 0) + { + /* we only need to consider neighboring cells if they are active */ + int q = ngb[k].index; + + if(q >= 0) + { + int timebin; + + if(Mesh.DP[q].task == ThisTask) + { + if(Mesh.DP[q].index < NumGas) + timebin = P[Mesh.DP[q].index].TimeBinHydro; + else + timebin = P[Mesh.DP[q].index - NumGas].TimeBinHydro; + } + else + { +#ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT + timebin = PrimExch[Mesh.DP[q].index].TimeBinHydro; +#else /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT */ + timebin = RefExch[Mesh.DP[q].index].TimeBinHydro; +#endif /* #ifndef OPTIMIZE_MESH_MEMORY_FOR_REFINEMENT #else */ + } + + if(TimeBinSynchronized[timebin]) + { + if(flag_DP[q].Flag == 2 || flag_DP[q].Flag == 3) + n++; + + if(Mesh.DP[q].ID == seq_DP[i].ID) /* same ID, so we have a mirrored particle */ + n++; + } + } + + k = ngb[k].next_ngb; + } + + if(n == 0) /* ok, none have been found. This means this cell is allowed to be refined */ + flag_DP[j].Flag = 2; + else + flag_DP[j].Flag = 3; + } + } + } + + myfree(seq_DP); + + /* copy of the refinement flags to the cell structure */ + for(i = 0; i < Mesh.Ndp; i++) + if(Mesh.DP[i].task == ThisTask && Mesh.DP[i].index >= 0 && Mesh.DP[i].index < NumGas) + deref_SphP[Mesh.DP[i].index].Flag = flag_DP[i].Flag; + + myfree(flag_DP); + + /* now let's count again how many cells we would like to derefine */ + + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + count++; + } + + int in[2], out[2]; + in[0] = count; + + /* now we carry out an auxiliary check to make sure that we really + avoid de-refining two neighboring cells. If such a pair is + found, both cells will not be derefined. */ + + MaxNflux = Mesh.Indi.AllocFacNflux; + Nflux = 0; + ProbeList = mymalloc_movable(&ProbeList, "ProbeList", MaxNflux * sizeof(struct probe_list_data)); + + count = 0; + + for(idx = 0, count = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + { + j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */ + if(j < 0) + terminate("j < 0"); + + k = first_ngb[j]; + + int flag = 0; + + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + + if(Mesh.DP[q].task == ThisTask) + { + int p = Mesh.DP[q].index; + + if(p < 0) + terminate("p < 0"); + + if(p >= NumGas) /* this is a local ghost point */ + p -= NumGas; + + if(TimeBinSynchronized[P[p].TimeBinHydro]) + if(deref_SphP[p].Flag == 2) + flag++; + } + else + { + /* here we have a foreign ghost point */ + if(Nflux >= MaxNflux) + { + Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = Mesh.Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + Mesh.Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + ProbeList = myrealloc_movable(ProbeList, MaxNflux * sizeof(struct probe_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + ProbeList[Nflux].task = Mesh.DP[q].task; + ProbeList[Nflux].index = Mesh.DP[q].originalindex; + ProbeList[Nflux].sendpart = i; + ProbeList[Nflux].flag = 0; + + Nflux++; + } + } + k = ngb[k].next_ngb; + } + + if(flag) + { + /* ups. It looks like a neigboring point is also about to be dissolved. We hence do not + dissolve the current point + */ + deref_SphP[i].Flag = 0; + count++; + } + } + } + + /* now let's probe on other tasks */ + + derefine_apply_probe_list(); + + for(i = 0; i < Nflux; i++) + { + if(ProbeList[i].flag) + if(deref_SphP[ProbeList[i].sendpart].Flag == 2) + { + deref_SphP[ProbeList[i].sendpart].Flag = 0; + count++; + } + } + + myfree(ProbeList); + + in[1] = count; + MPI_Reduce(in, out, 2, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + mpi_printf("DEREFINE: Number of cells that we could de-refine: %d, number of cells we exclude from this set: %d\n", out[0], + out[1]); + + /* we now distribute the conserved quantities of the cell among the neighbours */ + + MaxNflux = Mesh.Indi.AllocFacNflux; + Nflux = 0; + FluxList = mymalloc_movable(&FluxList, "FluxList", MaxNflux * sizeof(struct flux_list_data)); + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(deref_SphP[i].Flag == 2) + { + j = deref_SphP[i].dp_index; /* this is the delaunay point of this cell */ + if(j < 0) + terminate("j < 0"); + + max_n_tri = 300000; + n_tri = 0; + + trilist = mymalloc("trilist", max_n_tri * sizeof(triangle)); + + /* get a list of all the triangles that make up the Voronoi cell of j */ + k = first_ngb[j]; + while(k >= 0) + { + n_tri = derefine_refine_get_triangles(&Mesh, ngb[k].t, ngb[k].nr, &Mesh.DP[j], trilist, n_tri, max_n_tri); + + k = ngb[k].next_ngb; + } + + /* assign the first point as owner to all tetras */ + k = first_ngb[j]; + int q = ngb[k].index; + int t; + for(t = 0; t < n_tri; t++) + trilist[t].owner = q; + + double vol = 0; + for(k = 0; k < n_tri; k++) + vol += get_tri_volume(k, trilist); + + /* now consider all the other points and split the triangles if needed */ + k = first_ngb[j]; + k = ngb[k].next_ngb; + while(k >= 0) + { + int q = ngb[k].index; + n_tri = derefine_add_point_and_split_tri(q, trilist, n_tri, max_n_tri, vol); + k = ngb[k].next_ngb; + } + + if(n_tri > max_assumed_ntri) + max_assumed_ntri = n_tri; + + double *volume = mymalloc("volume", Mesh.Ndp * sizeof(double)); + + /* clear the volume entries of the neighbors */ + k = first_ngb[j]; + while(k >= 0) + { + int q = ngb[k].index; + volume[q] = 0; + k = ngb[k].next_ngb; + } + + /* now assign the volume of the triangles to the neighbors */ + for(k = 0; k < n_tri; k++) + { + if(trilist[k].owner < 0 || trilist[k].owner >= Mesh.Ndp) + terminate("strange owner"); + + volume[trilist[k].owner] += get_tri_volume(k, trilist); + } + + /* first, let's establish the surface area sum for this cell */ + double voltot = 0; + k = first_ngb[j]; + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + voltot += volume[q]; + } + k = ngb[k].next_ngb; + } + + /* now, distribute conserved quantities proportional to the gained volume */ + double facsum = 0; + k = first_ngb[j]; + while(k >= 0) + { + if(ngb[k].area > DEREF_SA_FAC * SphP[i].SurfaceArea) + { + int q = ngb[k].index; + + double fac = volume[q] / voltot; + + if(fac < 0) + { + warn("strange: fac=%g\n", fac); + fac = 0; + // terminate("strange"); + } + facsum += fac; + + if(Mesh.DP[q].task == ThisTask) + { + int p = Mesh.DP[q].index; + + if(p < 0) + terminate("p < 0"); + + if(p >= NumGas) /* this is a local ghost point */ + p -= NumGas; + P[p].Mass += fac * P[i].Mass; + SphP[p].Momentum[0] += fac * SphP[i].Momentum[0]; + SphP[p].Momentum[1] += fac * SphP[i].Momentum[1]; + SphP[p].Momentum[2] += fac * SphP[i].Momentum[2]; + +#ifdef MHD + SphP[p].BConserved[0] += fac * SphP[i].BConserved[0]; + SphP[p].BConserved[1] += fac * SphP[i].BConserved[1]; + SphP[p].BConserved[2] += fac * SphP[i].BConserved[2]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += fac * SphP[i].Energy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[s].offset_mass) += + fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass)); +#endif /* #ifdef MAXSCALARS */ + +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[p] = 1; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + } + else + { + /* here we have a foreign ghost point */ + if(Mesh.DP[q].originalindex < 0) + { + char buf[1000]; + sprintf(buf, "---> task=%d q=%d j=%d Ndp=%d\n", ThisTask, q, j, Mesh.Ndp); + terminate(buf); + } + + if(Nflux >= MaxNflux) + { + Mesh.Indi.AllocFacNflux *= ALLOC_INCREASE_FACTOR; + MaxNflux = Mesh.Indi.AllocFacNflux; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNflux=%d Indi.AllocFacNflux=%g\n", ThisTask, MaxNflux, + Mesh.Indi.AllocFacNflux); +#endif /* #ifdef VERBOSE */ + FluxList = myrealloc_movable(FluxList, MaxNflux * sizeof(struct flux_list_data)); + + if(Nflux >= MaxNflux) + terminate("Nflux >= MaxNflux"); + } + + FluxList[Nflux].task = Mesh.DP[q].task; + FluxList[Nflux].index = Mesh.DP[q].originalindex; + FluxList[Nflux].dM = fac * P[i].Mass; + FluxList[Nflux].dP[0] = fac * SphP[i].Momentum[0]; + FluxList[Nflux].dP[1] = fac * SphP[i].Momentum[1]; + FluxList[Nflux].dP[2] = fac * SphP[i].Momentum[2]; +#ifdef MHD + FluxList[Nflux].dB[0] = fac * SphP[i].BConserved[0]; + FluxList[Nflux].dB[1] = fac * SphP[i].BConserved[1]; + FluxList[Nflux].dB[2] = fac * SphP[i].BConserved[2]; +#endif /* #ifdef MHD */ + +#ifndef ISOTHERM_EQS + FluxList[Nflux].dEnergy = fac * SphP[i].Energy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) + FluxList[Nflux].dConservedScalars[s] = + fac * (*(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass)); +#endif /* #ifdef MAXSCALARS */ + Nflux++; + } + } + + k = ngb[k].next_ngb; + } + + if(fabs(facsum - 1) > 1.0e-3) + { + char buf[1000]; + sprintf(buf, "facsum=%g\n", facsum); + terminate(buf); + } + + myfree(volume); + myfree(trilist); + + /* we set the dissolved cell to zero mass and zero ID. It will be eliminated from the list + * of cells in the next domain decomposition + */ + P[i].Mass = 0; + P[i].ID = 0; + P[i].Vel[0] = 0; + P[i].Vel[1] = 0; + P[i].Vel[2] = 0; + + SphP[i].VelVertex[0] = 0; + SphP[i].VelVertex[1] = 0; + SphP[i].VelVertex[2] = 0; + + timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro); + + voronoi_remove_connection(i); + } + } + + /* now let's apply the flux-list */ + derefine_apply_flux_list(); + myfree(FluxList); + + myfree(ngb); + myfree(first_ngb); + +#ifdef VERBOSE + MPI_Reduce(&max_assumed_ntri, &n_tri, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + if(ThisTask == 0) + printf("DEREFINE: maximum assumed n_tri = %d\n", n_tri); +#endif /* #ifdef VERBOSE */ + } + + myfree(FlagExch); + myfree(deref_SphP); + + /* remove removed cells from list of active gravity cells */ + timebin_cleanup_list_of_active_particles(&TimeBinsGravity); + + TIMER_STOP(CPU_DEREFINE); + + return countall; +} + +/*! \brief Communicates probe list data if needed. + * + * \return void + */ +static void derefine_apply_probe_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the probe-list and apply it where needed */ + + mysort(ProbeList, Nflux, sizeof(struct probe_list_data), derefine_probe_list_data_compare_task); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[ProbeList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct probe_list_data *ProbeListGet = (struct probe_list_data *)mymalloc("ProbeListGet", nimport * sizeof(struct probe_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&ProbeList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, + recvTask, TAG_DENS_A, &ProbeListGet[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + /* apply the probes */ + + for(i = 0; i < nimport; i++) + { + p = ProbeListGet[i].index; + + if(TimeBinSynchronized[P[p].TimeBinHydro]) + if(deref_SphP[p].Flag == 2) + ProbeListGet[i].flag = 1; + } + + /* send results back */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&ProbeListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, + recvTask, TAG_DENS_A, &ProbeList[Send_offset[recvTask]], + Send_count[recvTask] * sizeof(struct probe_list_data), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(ProbeListGet); +} + +/*! \brief Communicate flux list data if needed. + * + * \return void + */ +static void derefine_apply_flux_list(void) +{ + int i, j, p, nimport, ngrp, recvTask; + + /* now exchange the flux-list and apply it when needed */ + + mysort(FluxList, Nflux, sizeof(struct flux_list_data), derefine_flux_list_data_compare); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0; i < Nflux; i++) + Send_count[FluxList[i].task]++; + + if(Send_count[ThisTask] > 0) + terminate("Send_count[ThisTask]"); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + struct flux_list_data *FluxListGet = (struct flux_list_data *)mymalloc("FluxListGet", nimport * sizeof(struct flux_list_data)); + + /* exchange particle data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&FluxList[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct flux_list_data), MPI_BYTE, recvTask, + TAG_DENS_A, &FluxListGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct flux_list_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + /* apply the fluxes */ + + for(i = 0; i < nimport; i++) + { + p = FluxListGet[i].index; + + if(P[p].ID == 0) + { + char buf[1000]; +#ifndef LONGIDS + printf("On task=%d flux to ID=%d, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p); +#else /* #ifndef LONGIDS */ + printf("On task=%d flux to ID=%llu, but this is already deleted (index p=%d)\n", ThisTask, P[p].ID, p); +#endif /* #ifndef LONGIDS #else */ + terminate(buf); + } + + P[p].Mass += FluxListGet[i].dM; + SphP[p].Momentum[0] += FluxListGet[i].dP[0]; + SphP[p].Momentum[1] += FluxListGet[i].dP[1]; + SphP[p].Momentum[2] += FluxListGet[i].dP[2]; +#ifdef MHD + SphP[p].BConserved[0] += FluxListGet[i].dB[0]; + SphP[p].BConserved[1] += FluxListGet[i].dB[1]; + SphP[p].BConserved[2] += FluxListGet[i].dB[2]; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + int k; + for(k = 0; k < N_Scalar; k++) + *(MyFloat *)(((char *)(&SphP[p])) + scalar_elements[k].offset_mass) += FluxListGet[i].dConservedScalars[k]; +#endif /* #ifdef MAXSCALARS */ + +#ifndef ISOTHERM_EQS + SphP[p].Energy += FluxListGet[i].dEnergy; +#endif /* #ifndef ISOTHERM_EQS */ + +#ifdef REFINEMENT_SPLIT_CELLS + FlagDoNotRefine[p] = 1; +#endif /* #ifdef REFINEMENT_SPLIT_CELLS */ + } + + myfree(FluxListGet); +} + +/*! \brief Compares flux list data task of two elements. + * + * \param[in] a Pointer to first flux list data object. + * \param[in] b Pointer to second flux list data object. + * + * \return (-1,0,1); -1 if a->task < b->task. + */ +static int derefine_flux_list_data_compare(const void *a, const void *b) +{ + if(((struct flux_list_data *)a)->task < (((struct flux_list_data *)b)->task)) + return -1; + + if(((struct flux_list_data *)a)->task > (((struct flux_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Compares probe list data task of two elements. + * + * \param[in] a Pointer to first probe list data object. + * \param[in] b Pointer to second probe list data object. + * + * \return (-1,0,1); -1 if a->task < b->task. + */ +static int derefine_probe_list_data_compare_task(const void *a, const void *b) +{ + if(((struct probe_list_data *)a)->task < (((struct probe_list_data *)b)->task)) + return -1; + + if(((struct probe_list_data *)a)->task > (((struct probe_list_data *)b)->task)) + return +1; + + return 0; +} + +/*! \brief Compares seq delaunay data task of two elements. + * + * Comparison criteria (most important first) + * rnd + * ID + * index + * rank + * + * \param[in] a Pointer to first seq delaunay data object. + * \param[in] b Pointer to second seq delaunay data object. + * + * \return (-1,0,1); -1 if a < b. + */ +static int derefine_compare_seq_DP_ID(const void *a, const void *b) +{ + if(((struct seq_delaunay_data *)a)->rnd < (((struct seq_delaunay_data *)b)->rnd)) + return -1; + + if(((struct seq_delaunay_data *)a)->rnd > (((struct seq_delaunay_data *)b)->rnd)) + return +1; + + if(((struct seq_delaunay_data *)a)->ID < (((struct seq_delaunay_data *)b)->ID)) + return -1; + + if(((struct seq_delaunay_data *)a)->ID > (((struct seq_delaunay_data *)b)->ID)) + return +1; + + if(((struct seq_delaunay_data *)a)->index < (((struct seq_delaunay_data *)b)->index)) + return -1; + + if(((struct seq_delaunay_data *)a)->index > (((struct seq_delaunay_data *)b)->index)) + return +1; + + if(((struct seq_delaunay_data *)a)->rank < (((struct seq_delaunay_data *)b)->rank)) + return -1; + + if(((struct seq_delaunay_data *)a)->rank > (((struct seq_delaunay_data *)b)->rank)) + return +1; + + return 0; +} + +/*! \brief Sets exchange flag in de-refinement algorithm. + * + * Loops through gas cells in mesh, sets set export flag and communicates this + * information to the appropriate tasks. + * + * \return void + */ +static void derefine_exchange_flag(void) +{ + int listp; + int i, j, p, task, off; + int ngrp, recvTask, place; + + struct exchange_data + { + int Flag; + MyIDType ID; + } * tmpExch, *tmpRecv; + + tmpExch = (struct exchange_data *)mymalloc("tmpExch", Mesh_nexport * sizeof(struct exchange_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpExch[off].Flag = 0; + tmpExch[off].ID = P[place].ID; + + if(P[place].Type == 0) + if(TimeBinSynchronized[P[place].TimeBinHydro]) + if(!(P[place].Mass == 0 && P[place].ID == 0)) + tmpExch[off].Flag = deref_SphP[place].Flag; + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + tmpRecv = (struct exchange_data *)mymalloc("tmpRecv", Mesh_Recv_count[recvTask] * sizeof(struct exchange_data)); + + /* get the values */ + MPI_Sendrecv(&tmpExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE, + recvTask, TAG_DENS_A, tmpRecv, Mesh_Recv_count[recvTask] * sizeof(struct exchange_data), MPI_BYTE, recvTask, + TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + for(i = 0; i < Mesh_Recv_count[recvTask]; i++) + { + if(Mesh_Recv_offset[recvTask] + i >= Mesh_nimport) + terminate("number of imported mesh points grater than Mesh_nimport"); + FlagExch[Mesh_Recv_offset[recvTask] + i].Flag = tmpRecv[i].Flag; + FlagExch[Mesh_Recv_offset[recvTask] + i].ID = tmpRecv[i].ID; + } + + myfree(tmpRecv); + } + } + } + + myfree(tmpExch); +} + +#endif /* #if defined(REFINEMENT_MERGE_CELLS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c new file mode 100644 index 0000000000..7640029045 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_dynamic_update.c @@ -0,0 +1,1037 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_dynamic_update.c + * \date 05/2018 + * \brief Algorithms for Voronoi dynamic update. + * \details contains functions: + * int voronoi_get_connected_particles(tessellation * T) + * void voronoi_init_connectivity(tessellation * T) + * void voronoi_update_connectivity(tessellation * T) + * void voronoi_remove_connection(int i) + * int compare_foreign_connection(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +int Nvc; /* number of connections */ +int MaxNvc; /* maximum number of connections */ +int Largest_Nvc; +connection *DC; /* Connections */ + +/*! Data structure for non-local connection. + */ +struct foreign_connection +{ + int task; + int origin; + int index; + int image_flags; +} * ForeignDC, *ImportedDC; + +#define MASK_X_SHIFT_RIGHT 38347922 +#define MASK_X_SHIFT_LEFT 76695844 +#define MASK_Y_SHIFT_RIGHT 14708792 +#define MASK_Y_SHIFT_LEFT 117670336 +#define MASK_Z_SHIFT_RIGHT 261632 +#define MASK_Z_SHIFT_LEFT 133955584 +#define MASK ((1 << 27) - 1) + +int FirstUnusedConnection; + +/*! \brief Gets connected active cells from a mesh. + * + * \param[in] T Pointer to tesselation. + * + * \return Number of cells. + */ +int voronoi_get_connected_particles(tessellation *T) +{ + int idx, i, j, p, q, count = 0, duplicates, image_flags, listp, nexport, nimport, origin; + int ngrp, recvTask; + + CPU_Step[CPU_MISC] += measure_time(); + + /* first, let's add all the primary active points */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + p = TimeBinsHydro.ActiveParticleList[idx]; + if(p < 0) + continue; + + if(P[p].Type == 0) + { + Ngb_Marker[p] = Ngb_MarkerValue; + + if(P[p].Mass == 0 && P[p].ID == 0) /* skip cells that have been swallowed or eliminated */ + { + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + continue; + } + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 1; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("VORONOI: Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, + T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + + dp->x = P[p].Pos[0]; + dp->y = P[p].Pos[1]; + dp->z = P[p].Pos[2]; + dp->ID = P[p].ID; + dp->task = ThisTask; + dp->index = p; + dp->originalindex = -1; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = 1; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + count++; + } + } + + /* now, we go through the connection list and see whether we have any additional points to add */ + int count_foreign = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + q = SphP[i].first_connection; + + while(q >= 0) + { + if(q < 0 || q >= MaxNvc) + { + char buf[1000]; + sprintf(buf, "strange connectivity q=%d Nvc=%d", q, MaxNvc); + terminate(buf); + } + + if(DC[q].task >= 0 && DC[q].task < NTask) + { + if(ThisTask == DC[q].task) /* this one is local */ + { + p = DC[q].index; /* particle index */ + + if(P[p].Type == 0) + { + if(!(P[p].Mass == 0 && P[p].ID == 0)) /* skip cells that have been swallowed or dissolved */ + { + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + if(p < 0 || p >= NumGas) + { + char buf[1000]; + sprintf(buf, "strange p=%d (Ngas=%d) for q=%d Nvc=%d", p, NumGas, q, Nvc); + terminate(buf); + } + + image_flags = (DC[q].image_flags & MASK); + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + listp = List_P[p].firstexport; + + /* now we need to check whether this particle has already been made part of the list */ + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != ThisTask) + terminate("can't be"); + } + else + { + /* this one apparently hasn't been added at all yet */ + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, + MaxNinlist, T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = ThisTask; + ListExports[List_P[p].currentexport].index = p; + } + + if(!(ListExports[List_P[p].currentexport].image_bits & image_flags)) /* already in list */ + { + ListExports[List_P[p].currentexport].image_bits |= image_flags; + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, + T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + MyDouble x = P[p].Pos[0]; + MyDouble y = P[p].Pos[1]; + MyDouble z = P[p].Pos[2]; + + /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three, + * i.e. x*3^0 + y*3^1 + z*3^2 */ + +#ifndef REFLECTIVE_X + if((image_flags & MASK_X_SHIFT_RIGHT)) + x += boxSize_X; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x -= boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + if((image_flags & MASK_X_SHIFT_RIGHT)) + x = -x; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x = 2 * boxSize_X - x; +#endif /* #ifndef REFLECTIVE_X #else */ +#ifndef REFLECTIVE_Y + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y += boxSize_Y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y -= boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y = -y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y = 2 * boxSize_Y - y; +#endif /* #ifndef REFLECTIVE_Y #else */ +#ifndef REFLECTIVE_Z + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z += boxSize_Z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z -= boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z = -z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z = 2 * boxSize_Z - z; +#endif /* #ifndef REFLECTIVE_Z #else */ + + point *dp = &T->DP[T->Ndp]; + + dp->x = x; + dp->y = y; + dp->z = z; + + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flags != 1) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + + dp->image_flags = image_flags; +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + count++; + } + } + } + } + else + { + /* here we have a foreign neighbor that we want */ + count_foreign++; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } + + /* we now compile a list of the foreign neighbors we want in the mesh */ + + ForeignDC = mymalloc_movable(&ForeignDC, "ForeignDC", count_foreign * sizeof(struct foreign_connection)); + + int count_foreign_bak = count_foreign; + + count_foreign = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + q = SphP[i].first_connection; + + while(q >= 0) + { + if(DC[q].task >= 0 && DC[q].task < NTask) + { + if(ThisTask != DC[q].task) /* this one is not local */ + { + p = DC[q].index; + + ForeignDC[count_foreign].task = DC[q].task; + ForeignDC[count_foreign].origin = ThisTask; + ForeignDC[count_foreign].index = DC[q].index; + ForeignDC[count_foreign].image_flags = (DC[q].image_flags & MASK); + + /* here we have a foreign neighbor that we want */ + count_foreign++; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + } + + if(count_foreign_bak != count_foreign) + terminate("bad"); + + /* we sort this list by tasks, and then eliminate duplicates */ + mysort(ForeignDC, count_foreign, sizeof(struct foreign_connection), compare_foreign_connection); + + for(j = 0; j < NTask; j++) + Send_count[j] = 0; + + for(i = 0, j = -1, duplicates = 0; i < count_foreign; i++) + { + if(j >= 0) + if(memcmp(&ForeignDC[i], &ForeignDC[j], sizeof(struct foreign_connection)) == 0) + { + duplicates++; + continue; + } + + j++; + + ForeignDC[j] = ForeignDC[i]; + Send_count[ForeignDC[j].task]++; + } + + count_foreign -= duplicates; + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + if(nexport != count_foreign) + { + char buf[1000]; + sprintf(buf, "nexport=%d count_foreign=%d\n", nexport, count_foreign); + terminate(buf); + } + + if(Send_count[ThisTask] != 0) + terminate("bad"); + + ImportedDC = mymalloc_movable(&ImportedDC, "ImportedDC", nimport * sizeof(struct foreign_connection)); + + /* get the point requests */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + MPI_Sendrecv(&ForeignDC[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE, + recvTask, TAG_DENS_B, &ImportedDC[Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct foreign_connection), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + point *DP_Buffer = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", nimport * sizeof(point)); + + /* now we prepare the points */ + for(j = 0; j < NTask; j++) + Recv_count[j] = 0; + + for(i = 0; i < nimport; i++) + { + p = ImportedDC[i].index; + origin = ImportedDC[i].origin; + image_flags = ImportedDC[i].image_flags; + + /* it could happen that the requested point has been refined or was turned into a star, that's why + * we not necessarily will find all the points requested. + */ + if(P[p].Type != 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + /* mark the points in the export lists */ + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + ListExports[List_P[p].currentexport].image_bits |= image_flags; + + MyDouble x = P[p].Pos[0]; + MyDouble y = P[p].Pos[1]; + MyDouble z = P[p].Pos[2]; + + /* for each coordinates there are three possibilities. They are encoded in image_flag to basis three, i.e. x*3^0 + y*3^1 + z*3^2 + */ +#ifndef REFLECTIVE_X + if((image_flags & MASK_X_SHIFT_RIGHT)) + x += boxSize_X; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x -= boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + if((image_flags & MASK_X_SHIFT_RIGHT)) + x = -x; + else if((image_flags & MASK_X_SHIFT_LEFT)) + x = 2 * boxSize_X - x; +#endif /* #ifndef REFLECTIVE_X #else */ + +#ifndef REFLECTIVE_Y + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y += boxSize_Y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y -= boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + if((image_flags & MASK_Y_SHIFT_RIGHT)) + y = -y; + else if((image_flags & MASK_Y_SHIFT_LEFT)) + y = 2 * boxSize_Y - y; +#endif /* #ifndef REFLECTIVE_Y #else */ + +#ifndef REFLECTIVE_Z + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z += boxSize_Z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z -= boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + if((image_flags & MASK_Z_SHIFT_RIGHT)) + z = -z; + else if((image_flags & MASK_Z_SHIFT_LEFT)) + z = 2 * boxSize_Z - z; +#endif /* #ifndef REFLECTIVE_Z #else */ + + int k = Recv_offset[origin] + Recv_count[origin]++; + + SphP[p].ActiveArea = 0; + + DP_Buffer[k].x = x; + DP_Buffer[k].y = y; + DP_Buffer[k].z = z; + DP_Buffer[k].ID = P[p].ID; + DP_Buffer[k].task = ThisTask; + DP_Buffer[k].index = p; + DP_Buffer[k].originalindex = p; + DP_Buffer[k].timebin = P[p].TimeBinHydro; + + DP_Buffer[k].image_flags = image_flags; +#ifdef DOUBLE_STENCIL + DP_Buffer[k].Hsml = SphP[p].Hsml; + DP_Buffer[k].first_connection = -1; + DP_Buffer[k].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + } + + /* because we may have dropped some of the points because they were turned + * into stars we need to redetermine the send-offsets and counts + */ + + MPI_Alltoall(Recv_count, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + /* note: the Recv_offsets stay at this point */ + } + } + + /* now get the additional Delaunay points from the other processors */ + + while(nexport + T->Ndp > T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the Delaunay points */ + + MPI_Sendrecv(&DP_Buffer[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B, + &T->DP[T->Ndp + Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, + TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + T->Ndp += nexport; + count += nexport; + + myfree(DP_Buffer); + myfree(ImportedDC); + myfree(ForeignDC); + + mpi_printf("VORONOI: done with connected particles\n"); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); + + /* at this point, it might make sense to sort the Delaunay point again + * according to Peano-Hilbert, in an extended region that allows for the + * ghost regions + */ + + peano_hilbert_order_DP(); + + CPU_Step[CPU_PEANO] += measure_time(); + + return count; +} + +/*! \brief Initialises connectivity. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void voronoi_init_connectivity(tessellation *T) +{ + int i; + + mpi_printf("VORONOI: init connectivity\n"); + + MaxNvc = T->Indi.AllocFacNvc; + DC = mymalloc_movable(&DC, "DC", MaxNvc * sizeof(connection)); + + Nvc = 0; + + /* we use a chaining list to keep track of unused entries in the list of connections */ + /* here we set it up to contain all available spaces */ + FirstUnusedConnection = 0; + for(i = 0; i < MaxNvc - 1; i++) + { + DC[i].next = i + 1; + DC[i].task = -1; /* mark that this is unused */ + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + + /* initially, all particle have empty connection lists */ + for(i = 0; i < NumGas; i++) + SphP[i].first_connection = SphP[i].last_connection = -1; + + mpi_printf("VORONOI: done with init of connectivity\n"); +} + +/*! \brief Updates connectivity. + * + * \param[in] T Pointer to tessellation. + * + * \return void + */ +void voronoi_update_connectivity(tessellation *T) +{ + int idx, i, k, q, p_task, p_index, q_task, q_index, q_dp_index, q_image_flags; + MyIDType p_ID; + + CPU_Step[CPU_MISC] += measure_time(); + + /* let's clear the connection lists of active particles */ + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(i >= NumGas) + terminate("i >= NumGas"); + + q = SphP[i].first_connection; + + if(q >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[SphP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = SphP[i].first_connection; + + SphP[i].first_connection = -1; + SphP[i].last_connection = -1; + } + } + + for(i = 0; i < T->Nvf; i++) + { + for(k = 0; k < 2; k++) + { + point *DP = T->DP; + face *VF = T->VF; + + if(k == 0) + { + p_task = DP[VF[i].p1].task; + p_index = DP[VF[i].p1].index; + p_ID = DP[VF[i].p1].ID; + q_task = DP[VF[i].p2].task; + q_index = DP[VF[i].p2].index; + q_dp_index = VF[i].p2; + q_image_flags = (DP[VF[i].p2].image_flags & MASK); + } + else + { + p_task = DP[VF[i].p2].task; + p_index = DP[VF[i].p2].index; + p_ID = DP[VF[i].p2].ID; + q_task = DP[VF[i].p1].task; + q_index = DP[VF[i].p1].index; + q_dp_index = VF[i].p1; + q_image_flags = (DP[VF[i].p1].image_flags & MASK); + } + + if(p_task == ThisTask && p_index >= 0 && p_index < NumGas) + { + if(TimeBinSynchronized[P[p_index].TimeBinHydro]) + { + if(P[p_index].Type != 0) + continue; + + if(P[p_index].Mass == 0 && P[p_index].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + /* need to add the connection to the other point to this particle */ + + if(FirstUnusedConnection < 0 || Nvc == MaxNvc) + { + if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc)) + { + char buf[1000]; + sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc); + terminate(buf); + } + + int n, old_MaxNvc = MaxNvc; + T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = T->Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, + T->Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + DP = T->DP; + VF = T->VF; + + FirstUnusedConnection = old_MaxNvc; + for(n = old_MaxNvc; n < MaxNvc - 1; n++) + { + DC[n].next = n + 1; + DC[n].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + } + + if(SphP[p_index].last_connection >= 0) + { + DC[SphP[p_index].last_connection].next = FirstUnusedConnection; + SphP[p_index].last_connection = FirstUnusedConnection; + } + else + { + SphP[p_index].last_connection = FirstUnusedConnection; + SphP[p_index].first_connection = FirstUnusedConnection; + } + + FirstUnusedConnection = DC[FirstUnusedConnection].next; + Nvc++; + + DC[SphP[p_index].last_connection].task = q_task; + DC[SphP[p_index].last_connection].image_flags = q_image_flags; + DC[SphP[p_index].last_connection].ID = p_ID; + + if(q_task == ThisTask && q_index >= NumGas) + DC[SphP[p_index].last_connection].index = q_index - NumGas; + else + DC[SphP[p_index].last_connection].index = q_index; + + DC[SphP[p_index].last_connection].dp_index = q_dp_index; +#ifdef TETRA_INDEX_IN_FACE + DC[SphP[p_index].last_connection].dt_index = VF[i].dt_index; +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + DC[SphP[p_index].last_connection].vf_index = i; /* index to the corresponding face */ + + if(SphP[p_index].last_connection >= MaxNvc) + { + terminate("this is wrong"); + } + } + } + +#ifdef DOUBLE_STENCIL + int index; + if(k == 0) + index = VF[i].p1; + else + index = VF[i].p2; + + if(!(p_task == ThisTask && p_index >= 0 && p_index < NumGas) && DP[index].flag_primary_triangle > 0 && index >= 0) + { + /* need to add the connection to the other point to this particle */ + + if(FirstUnusedConnection < 0 || Nvc == MaxNvc) + { + if(!(FirstUnusedConnection < 0 && Nvc == MaxNvc)) + { + char buf[1000]; + sprintf(buf, "strange: FirstUnusedConnection=%d Nvc=%d MaxNvc=%d\n", FirstUnusedConnection, Nvc, MaxNvc); + terminate(buf); + } + + int n, old_MaxNvc = MaxNvc; + T->Indi.AllocFacNvc *= ALLOC_INCREASE_FACTOR; + MaxNvc = T->Indi.AllocFacNvc; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNvc=%d Indi.AllocFacNvc=%g\n", ThisTask, MaxNvc, + T->Indi.AllocFacNvc); +#endif /* #ifdef VERBOSE */ + DC = myrealloc_movable(DC, MaxNvc * sizeof(connection)); + DP = T->DP; + VF = T->VF; + + FirstUnusedConnection = old_MaxNvc; + for(n = old_MaxNvc; n < MaxNvc - 1; n++) + { + DC[n].next = n + 1; + DC[n].task = -1; + } + DC[MaxNvc - 1].next = -1; + DC[MaxNvc - 1].task = -1; + } + + if(DP[index].last_connection >= 0) + { + DC[DP[index].last_connection].next = FirstUnusedConnection; + DP[index].last_connection = FirstUnusedConnection; + } + else + { + DP[index].last_connection = FirstUnusedConnection; + DP[index].first_connection = FirstUnusedConnection; + } + + FirstUnusedConnection = DC[FirstUnusedConnection].next; + Nvc++; + + DC[DP[index].last_connection].task = q_task; + DC[DP[index].last_connection].image_flags = q_image_flags; + DC[DP[index].last_connection].ID = p_ID; + + if(q_task == ThisTask && q_index >= NumGas) + DC[DP[index].last_connection].index = q_index - NumGas; + else + DC[DP[index].last_connection].index = q_index; + + DC[DP[index].last_connection].dp_index = q_dp_index; + + DC[DP[index].last_connection].vf_index = i; /* index to the corresponding face */ + + if(DP[index].last_connection >= MaxNvc) + { + terminate("this is wrong"); + } + } +#endif /* #ifdef DOUBLE_STENCIL */ + } + } + + mpi_printf("VORONOI: done with updating connectivity.\n"); + + CPU_Step[CPU_MESH_DYNAMIC] += measure_time(); +} + +/*! \brief Remove connection from cell. + * + * \param[in] i Index of cell. + * + * \return void + */ +void voronoi_remove_connection(int i) +{ + int q; + if((q = SphP[i].first_connection) >= 0) /* we have connections, let's add them to the free list */ + { + while(q >= 0) + { + Nvc--; + DC[q].task = -1; /* mark that this is unused */ + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + /* we add the new free spots at the beginning of the free list */ + DC[SphP[i].last_connection].next = FirstUnusedConnection; + FirstUnusedConnection = SphP[i].first_connection; + + SphP[i].first_connection = -1; + SphP[i].last_connection = -1; + } +} + +/*! \brief Compares two foreign connection objects. + * + * Compares (highest priority first): + * task + * index + * image_flags + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1); -1: a < b. + */ +int compare_foreign_connection(const void *a, const void *b) +{ + if(((struct foreign_connection *)a)->task < (((struct foreign_connection *)b)->task)) + return -1; + + if(((struct foreign_connection *)a)->task > (((struct foreign_connection *)b)->task)) + return +1; + + if(((struct foreign_connection *)a)->index < (((struct foreign_connection *)b)->index)) + return -1; + + if(((struct foreign_connection *)a)->index > (((struct foreign_connection *)b)->index)) + return +1; + + if(((struct foreign_connection *)a)->image_flags < (((struct foreign_connection *)b)->image_flags)) + return -1; + + if(((struct foreign_connection *)a)->image_flags > (((struct foreign_connection *)b)->image_flags)) + return +1; + + return 0; +} diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c new file mode 100644 index 0000000000..9b2f79684e --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_exchange.c @@ -0,0 +1,531 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_exchange.c + * \date 05/2018 + * \brief Algorithms that handle communication of Voronoi mesh data + * between MPI tasks. + * \details contains functions: + * void mesh_setup_exchange(void) + * void exchange_primitive_variables(void) + * void exchange_primitive_variables_and_gradients(void) + * int compare_primexch(const void *a, const void *b) + * void voronoi_update_ghost_velvertex(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 22.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +/*! \brief Auxiliary data structure for communication of primitive variables. + * + */ +struct data_primexch_compare +{ + int rank, task, index; +} * SortPrimExch, *SortPrimExch2; + +/*! \brief Prepares exchange of primitive variables. + * + * \return void + */ +void mesh_setup_exchange(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct indexexch + { + int task, index; + } * tmpIndexExch, *IndexExch; + int i, j, p, task, off, count; + int ngrp, recvTask, place; + + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin != ThisTask) + { + Mesh_Send_count[ListExports[listp].origin]++; + } + listp = ListExports[listp].nextexport; + } + } + + MPI_Alltoall(Mesh_Send_count, 1, MPI_INT, Mesh_Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, Mesh_nimport = 0, Mesh_nexport = 0, Mesh_Recv_offset[0] = 0, Mesh_Send_offset[0] = 0; j < NTask; j++) + { + Mesh_nimport += Mesh_Recv_count[j]; + Mesh_nexport += Mesh_Send_count[j]; + + if(j > 0) + { + Mesh_Send_offset[j] = Mesh_Send_offset[j - 1] + Mesh_Send_count[j - 1]; + Mesh_Recv_offset[j] = Mesh_Recv_offset[j - 1] + Mesh_Recv_count[j - 1]; + } + } + + IndexExch = (struct indexexch *)mymalloc("IndexExch", Mesh_nimport * sizeof(struct indexexch)); + tmpIndexExch = (struct indexexch *)mymalloc("tmpIndexExch", Mesh_nexport * sizeof(struct indexexch)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpIndexExch[off].task = ThisTask; + tmpIndexExch[off].index = place; + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&tmpIndexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct indexexch), MPI_BYTE, + recvTask, TAG_DENS_A, &IndexExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct indexexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpIndexExch); + + /* now we need to associate the imported data with the points stored in the DP[] array */ + + SortPrimExch = (struct data_primexch_compare *)mymalloc("SortPrimExch", Mesh_nimport * sizeof(struct data_primexch_compare)); + + for(i = 0; i < Mesh_nimport; i++) + { + SortPrimExch[i].rank = i; + SortPrimExch[i].task = IndexExch[i].task; + SortPrimExch[i].index = IndexExch[i].index; + } + + /* let sort the data according to task and index */ + mysort(SortPrimExch, Mesh_nimport, sizeof(struct data_primexch_compare), compare_primexch); + + SortPrimExch2 = (struct data_primexch_compare *)mymalloc("SortPrimExch2", Mesh.Ndp * sizeof(struct data_primexch_compare)); + + for(i = 0, count = 0; i < Mesh.Ndp; i++) + { + if(Mesh.DP[i].task != ThisTask) + { + SortPrimExch2[count].rank = i; + SortPrimExch2[count].task = Mesh.DP[i].task; + SortPrimExch2[count].index = Mesh.DP[i].index; + count++; + } + } + + /* let sort according to task and index */ + mysort(SortPrimExch2, count, sizeof(struct data_primexch_compare), compare_primexch); + + /* count can be larger than nimport because a foreigh particle can appear + multiple times on the local domain, due to periodicity */ + + for(i = 0, j = 0; i < count; i++) + { + if(SortPrimExch2[i].task != SortPrimExch[j].task || SortPrimExch2[i].index != SortPrimExch[j].index) + j++; + + if(j >= Mesh_nimport) + terminate("j >= Mesh_nimport"); + + Mesh.DP[SortPrimExch2[i].rank].index = + SortPrimExch[j].rank; /* note: this change is now permanent and available for next exchange */ + } + + myfree(SortPrimExch2); + myfree(SortPrimExch); + myfree(IndexExch); + + /* allocate structures needed to exchange the actual information for ghost cells */ + PrimExch = (struct primexch *)mymalloc_movable(&PrimExch, "PrimExch", Mesh_nimport * sizeof(struct primexch)); + GradExch = (struct grad_data *)mymalloc_movable(&GradExch, "GradExch", Mesh_nimport * sizeof(struct grad_data)); + + TIMER_STOP(CPU_MESH_EXCHANGE); +} + +/*! \brief Communicate primitive variables across MPI tasks. + * + * This routine is called before gradient calculation, afterwards, + * exchange_primitive_variables_and_gradients is called. + * + * \return void + */ +void exchange_primitive_variables(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct primexch *tmpPrimExch; + int i, j, p, task, off; + int ngrp, recvTask, place; + + tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpPrimExch[off].Volume = SphP[place].Volume; + + tmpPrimExch[off].Density = SphP[place].Density; + + tmpPrimExch[off].Pressure = SphP[place].Pressure; + +#ifdef MHD + tmpPrimExch[off].B[0] = SphP[place].B[0]; + tmpPrimExch[off].B[1] = SphP[place].B[1]; + tmpPrimExch[off].B[2] = SphP[place].B[2]; +#ifdef MHD_POWELL + tmpPrimExch[off].DivB = SphP[place].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + + tmpPrimExch[off].OldMass = SphP[place].OldMass; + tmpPrimExch[off].SurfaceArea = SphP[place].SurfaceArea; + tmpPrimExch[off].ActiveArea = SphP[place].ActiveArea; + tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro; + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate; + + for(j = 0; j < 3; j++) + { + tmpPrimExch[off].VelGas[j] = P[place].Vel[j]; + tmpPrimExch[off].Center[j] = SphP[place].Center[j]; + } + tmpPrimExch[off].Csnd = get_sound_speed(place); + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE, + recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpPrimExch); + + TIMER_STOP(CPU_MESH_EXCHANGE); +} + +/*! \brief Communicate primitive variables and gradients across MPI tasks. + * + * This routine is called after gradient calculation. + * + * \return void + */ +void exchange_primitive_variables_and_gradients(void) +{ + if(All.TotNumGas == 0) + return; + + TIMER_START(CPU_MESH_EXCHANGE); + + int listp; + struct grad_data *tmpGradExch; + struct primexch *tmpPrimExch; + + int i, j, p, task, off; + int ngrp, recvTask, place; + + tmpPrimExch = (struct primexch *)mymalloc("tmpPrimExch", Mesh_nexport * sizeof(struct primexch)); + tmpGradExch = (struct grad_data *)mymalloc("tmpGradExch", Mesh_nexport * sizeof(struct grad_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + /* in case previous steps already lowered the Mass, update OldMass to yield together with metallicity vector conservative + * estimate of metal mass of each species contained in cell */ + if(P[p].Mass < SphP[p].OldMass) + SphP[p].OldMass = P[p].Mass; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + tmpPrimExch[off].Volume = SphP[place].Volume; + tmpPrimExch[off].Density = SphP[place].Density; + tmpPrimExch[off].Pressure = SphP[place].Pressure; + +#ifdef MHD + tmpPrimExch[off].B[0] = SphP[place].B[0]; + tmpPrimExch[off].B[1] = SphP[place].B[1]; + tmpPrimExch[off].B[2] = SphP[place].B[2]; +#ifdef MHD_POWELL + tmpPrimExch[off].DivB = SphP[place].DivB; +#endif /* #ifdef MHD_POWELL */ +#endif /* #ifdef MHD */ + + tmpPrimExch[off].OldMass = SphP[place].OldMass; + tmpPrimExch[off].SurfaceArea = SphP[place].SurfaceArea; + tmpPrimExch[off].ActiveArea = SphP[place].ActiveArea; + + tmpPrimExch[off].TimeBinHydro = P[place].TimeBinHydro; + +#ifdef MAXSCALARS + for(j = 0; j < N_Scalar; j++) + tmpPrimExch[off].Scalars[j] = *(MyFloat *)(((char *)(&SphP[place])) + scalar_elements[j].offset); +#endif /* #ifdef MAXSCALARS */ + + tmpPrimExch[off].TimeLastPrimUpdate = SphP[place].TimeLastPrimUpdate; + + for(j = 0; j < 3; j++) + { + tmpPrimExch[off].VelGas[j] = P[place].Vel[j]; + tmpPrimExch[off].Center[j] = SphP[place].Center[j]; + tmpPrimExch[off].VelVertex[j] = SphP[place].VelVertex[j]; + } + + tmpGradExch[off] = SphP[place].Grad; + + tmpPrimExch[off].Csnd = get_sound_speed(place); + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + /* exchange the data */ + MPI_Sendrecv(&tmpPrimExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct primexch), MPI_BYTE, + recvTask, TAG_DENS_A, &PrimExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct primexch), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + MPI_Sendrecv(&tmpGradExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct grad_data), MPI_BYTE, + recvTask, TAG_HYDRO_A, &GradExch[Mesh_Recv_offset[recvTask]], + Mesh_Recv_count[recvTask] * sizeof(struct grad_data), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + myfree(tmpGradExch); + myfree(tmpPrimExch); + + TIMER_STOP(CPU_MESH_EXCHANGE); + + /* note: because the sequence is the same as before, we don't have to do the sorts again */ +} + +/*! \brief Compare two data primexch compare objects. + * + * The following variables (most important first): + * task + * index + * + * \param[in] a Pointer to first data primexch compare object. + * \param[in] b Pointer to second data primexch compare object. + * + * \return (-1,0,1); -1 if a < b. + */ +int compare_primexch(const void *a, const void *b) +{ + if(((struct data_primexch_compare *)a)->task < ((struct data_primexch_compare *)b)->task) + return -1; + + if(((struct data_primexch_compare *)a)->task > ((struct data_primexch_compare *)b)->task) + return +1; + + if(((struct data_primexch_compare *)a)->index < ((struct data_primexch_compare *)b)->index) + return -1; + + if(((struct data_primexch_compare *)a)->index > ((struct data_primexch_compare *)b)->index) + return +1; + + return 0; +} + +/*! \brief Communicates vertex velocity divergence data across MPI tasks. + * + * \return 0 + */ +#ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE +void voronoi_update_ghost_velvertex(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + int listp; + int i, j, p, task, off; + int ngrp, recvTask, place; + struct velvertex_data + { + MyFloat VelVertex[3]; + } * tmpVelVertexExch, *tmpVelVertexRecv; + + tmpVelVertexExch = (struct velvertex_data *)mymalloc("tmpVelVertexExch", Mesh_nexport * sizeof(struct velvertex_data)); + + /* prepare data for export */ + for(j = 0; j < NTask; j++) + Mesh_Send_count[j] = 0; + + for(i = 0; i < NumGasInMesh; i++) + { + p = List_InMesh[i]; + + listp = List_P[p].firstexport; + while(listp >= 0) + { + if((task = ListExports[listp].origin) != ThisTask) + { + place = ListExports[listp].index; + off = Mesh_Send_offset[task] + Mesh_Send_count[task]++; + + for(j = 0; j < 3; j++) + { + tmpVelVertexExch[off].VelVertex[j] = SphP[place].VelVertex[j]; + } + } + listp = ListExports[listp].nextexport; + } + } + + /* exchange data */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Mesh_Send_count[recvTask] > 0 || Mesh_Recv_count[recvTask] > 0) + { + tmpVelVertexRecv = + (struct velvertex_data *)mymalloc("tmpVelVertexRecv", Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data)); + + /* get the values */ + MPI_Sendrecv(&tmpVelVertexExch[Mesh_Send_offset[recvTask]], Mesh_Send_count[recvTask] * sizeof(struct velvertex_data), + MPI_BYTE, recvTask, TAG_DENS_A, tmpVelVertexRecv, Mesh_Recv_count[recvTask] * sizeof(struct velvertex_data), + MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + for(i = 0; i < Mesh_Recv_count[recvTask]; i++) + { + for(j = 0; j < 3; j++) + { + PrimExch[Mesh_Recv_offset[recvTask] + i].VelVertex[j] = tmpVelVertexExch[i].VelVertex[j]; + } + } + + myfree(tmpVelVertexRecv); + } + } + } + + myfree(tmpVelVertexExch); + + CPU_Step[CPU_SET_VERTEXVELS] += measure_time(); +} +#endif /* #ifdef OUTPUT_VERTEX_VELOCITY_DIVERGENCE */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c new file mode 100644 index 0000000000..6c147b7901 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_ghost_search.c @@ -0,0 +1,1773 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_ghost_search.c + * \date 05/2018 + * \brief Algorithms to search for (ghost) cells from other domains. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * int voronoi_ghost_search(tessellation * TT) + * static void voronoi_pick_up_additional_DP_points(void) + * int voronoi_ghost_search_evaluate(tessellation * T, + * int target, int mode, int q, int thread_id) + * int ngb_treefind_ghost_search(tessellation * T, MyDouble + * searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat + * maxdist, int target, int origin, int *startnode, int + * bitflags, int mode, int *nexport, int *nsend_local) + * int ngb_treefind_ghost_search(tessellation * T, MyDouble + * searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat + * maxdist, int target, int origin, int mode, int thread_id, + * int numnodes, int *firstnode) + * int count_undecided_tetras(tessellation * T) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#include "voronoi.h" + +#if !defined(ONEDIMS) + +static void voronoi_pick_up_additional_DP_points(void); + +static tessellation *T; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyDouble RefPos[3]; + MyFloat MaxDist; + int Origin; + + int Firstnode; + +#ifdef EXTENDED_GHOST_SEARCH + unsigned char BitFlagList[NODELISTLENGTH]; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ +} data_in; + +static data_in *DataGet, *DataIn; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + point *DP = T->DP; + tetra *DT = T->DT; + tetra_center *DTC = T->DTC; + + int k, q; + + for(k = 0, q = -1; k < (NUMDIMS + 1); k++) + { +#ifndef DOUBLE_STENCIL + if(DP[DT[i].p[k]].task == ThisTask) + if(DP[DT[i].p[k]].index >= 0 && DP[DT[i].p[k]].index < NumGas) + { + if(TimeBinSynchronized[P[DP[DT[i].p[k]].index].TimeBinHydro]) + { + q = DT[i].p[k]; + break; + } + } +#else /* #ifndef DOUBLE_STENCIL */ + if(DP[DT[i].p[k]].flag_primary_triangle && DT[i].p[k] >= 0) + { + q = DT[i].p[k]; + break; + } +#endif /* #ifndef DOUBLE_STENCIL #else */ + } + + if(q == -1) + terminate("q=-1"); + + in->Pos[0] = DTC[i].cx; + in->Pos[1] = DTC[i].cy; + in->Pos[2] = DTC[i].cz; + + in->RefPos[0] = DP[q].x; + in->RefPos[1] = DP[q].y; + in->RefPos[2] = DP[q].z; + + in->Origin = ThisTask; + + in->MaxDist = SphP[DP[q].index].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Count; /* counts how many have been found */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES || mode == MODE_IMPORTED_PARTICLES) + if(out->Count) + T->DTF[i] -= (T->DTF[i] & 2); +} + +#include "../../utils/generic_comm_helpers2.h" + +#ifdef EXTENDED_GHOST_SEARCH +/*! Data structure for extended ghost search. + */ +static struct data_nodelist_special +{ + unsigned char BitFlagList[NODELISTLENGTH]; +} * DataNodeListSpecial; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + +static point *DP_Buffer; +static int MaxN_DP_Buffer, N_DP_Buffer; +static int NadditionalPoints; +static int *send_count_new; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i, j, q; + + /* do local particles and prepare export list */ + { + int thread_id = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[thread_id].Exportflag[j] = -1; + + while(1) + { + if(Thread[thread_id].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= T->Ndt) + break; + + if((T->DTF[i] & 2) == 0) /* DT that is not flagged as tested ok */ + { + T->DTF[i] |= 2; /* if we find a particle, need to clear this flag again! */ + + point *DP = T->DP; + tetra *DT = T->DT; + + if(DT[i].t[0] < 0) /* deleted ? */ + continue; + + if(DT[i].p[0] == DPinfinity || DT[i].p[1] == DPinfinity || DT[i].p[2] == DPinfinity) + continue; + +#ifndef TWODIMS + if(DT[i].p[3] == DPinfinity) + continue; +#endif /* #ifndef TWODIMS */ + +#ifndef DOUBLE_STENCIL + for(j = 0, q = -1; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].task == ThisTask) + if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + { + if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro]) + { + q = DT[i].p[j]; + break; + } + } + } + + if(j == (NUMDIMS + 1)) /* this triangle does not have a local point. No need to test it */ + continue; + + if(q == -1) + terminate("q==-1"); +#else /* #ifndef DOUBLE_STENCIL */ + /* here comes the check for a double stencil */ + for(j = 0, q = -1; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].flag_primary_triangle && DT[i].p[j] >= 0) + { + q = DT[i].p[j]; + break; + } + } + + if(j == + (NUMDIMS + + 1)) /* this triangle does not have a point which is not at least neighbor to a primary point. No need to test it */ + continue; + + if(q == -1) + terminate("q==-1"); +#endif /* #ifndef DOUBLE_STENCIL #else */ + voronoi_ghost_search_evaluate(T, i, MODE_LOCAL_PARTICLES, q, thread_id); + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, count = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = count++; + + if(i >= Nimport) + break; + + voronoi_ghost_search_evaluate(T, i, MODE_IMPORTED_PARTICLES, 0, threadid); + } + } +} + +/*! \brief Main routine to perform ghost search. + * + * \param[in, out] TT Pointer to tessellation. + * + * \return Number of additional points. + */ +int voronoi_ghost_search(tessellation *TT) +{ + T = TT; + int j, ndone, ndone_flag; + + NadditionalPoints = 0; + + /* allocate buffers to arrange communication */ + + send_count_new = (int *)mymalloc_movable(&send_count_new, "send_count_new", NTask * sizeof(int)); + + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; + DP_Buffer = (point *)mymalloc_movable(&DP_Buffer, "DP_Buffer", MaxN_DP_Buffer * sizeof(point)); + +#ifdef DOUBLE_STENCIL + { + point *DP = T->DP; + tetra *DT = T->DT; + int i; + + for(i = 0; i < T->Ndp; i++) + DP[i].flag_primary_triangle = 0; + + for(i = 0; i < T->Ndt; i++) + { + for(j = 0; j < (NUMDIMS + 1); j++) + { + if(DP[DT[i].p[j]].task == ThisTask) + if(DP[DT[i].p[j]].index >= 0 && DP[DT[i].p[j]].index < NumGas) + if(TimeBinSynchronized[P[DP[DT[i].p[j]].index].TimeBinHydro]) + break; + } + + if(j != (NUMDIMS + 1)) /* this triangle does have a local point, so mark all its points */ + { + for(j = 0; j < (NUMDIMS + 1); j++) + DP[DT[i].p[j]].flag_primary_triangle = 1; + } + } + } +#endif /* #ifdef DOUBLE_STENCIL */ + + generic_set_MaxNexport(); + + NextParticle = 0; + + do + { + for(j = 0; j < NTask; j++) + send_count_new[j] = 0; + + N_DP_Buffer = 0; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + kernel_local(); + + /* do all necessary bookkeeping and the data exchange */ + generic_exchange(kernel_imported); + + generic_free_partlist_nodelist_ngblist_threadbufs(); + + voronoi_pick_up_additional_DP_points(); + + if(NextParticle >= T->Ndt) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } + while(ndone < NTask); + + myfree(DP_Buffer); + myfree(send_count_new); + +#ifdef EXTENDED_GHOST_SEARCH + myfree(DataNodeListSpecial); +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + + return NadditionalPoints; +} + +/*! \brief Gets additional Delaunay points. + * + * \return void + */ +static void voronoi_pick_up_additional_DP_points(void) +{ + int nimport; + + /* The data blocks stored in DP_Buffer is not ordered according to processor rank, but rather in a permutated way. + * We need to take this into account in calculating the offsets to in the send buffer. + */ + + for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + Send_count[ncnt++] = send_count_new[recvTask]; + } + + Recv_offset[0] = 0; + for(int j = 1; j < NTask; j++) + Recv_offset[j] = Recv_offset[j - 1] + Send_count[j - 1]; + + for(int ngrp = 0, ncnt = 0; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + Send_offset[recvTask] = Recv_offset[ncnt++]; + } + + memcpy(Send_count, send_count_new, NTask * sizeof(int)); + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + Recv_offset[0] = 0; + nimport = Recv_count[0]; + + for(int j = 1; j < NTask; j++) + { + nimport += Recv_count[j]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + + while(nimport + T->Ndp > T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(nimport + T->Ndp > T->MaxNdp && NumGas == 0) + terminate("nimport + Ndp > MaxNdp"); + } + + /* get the delaunay points */ + for(int ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the particles */ + MPI_Sendrecv(&DP_Buffer[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, TAG_DENS_B, + &T->DP[T->Ndp + Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point), MPI_BYTE, recvTask, + TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } + + T->Ndp += nimport; + NadditionalPoints += nimport; + + if(N_DP_Buffer > Largest_N_DP_Buffer) + Largest_N_DP_Buffer = N_DP_Buffer; +} + +/*! \brief Evaluate function for voronoi_ghost_search. + * + * Called in both mode local particles and then in mode imported particles. + * + * \param[] T Pointer to tessellation. + * \param[in] target index in DTC and DTF arrays. + * \param[in] mode Mode of call (local/imported). + * \param[in] q index in DP array. + * \param[in] thread_id Thread_id, needed for ngb_treefind_ghost_search. + * + * \return 0 + */ +int voronoi_ghost_search_evaluate(tessellation *T, int target, int mode, int q, int thread_id) +{ + int origin, numnodes, *firstnode; + int numngb; + double h, dx, dy, dz, maxdist; + MyDouble pos[3], refpos[3]; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + pos[0] = T->DTC[target].cx; + pos[1] = T->DTC[target].cy; + pos[2] = T->DTC[target].cz; + refpos[0] = T->DP[q].x; + refpos[1] = T->DP[q].y; + refpos[2] = T->DP[q].z; +#ifndef DOUBLE_STENCIL + maxdist = SphP[T->DP[q].index].Hsml; +#else /* #ifndef DOUBLE_STENCIL */ + maxdist = T->DP[q].Hsml; +#endif /* #ifndef DOUBLE_STENCIL #else */ + origin = ThisTask; + + numnodes = 1; + firstnode = NULL; + } + else + { + /* note: we do not use a pointer here to VoroDataGet[target].Pos, because VoroDataGet may be moved in a realloc operation */ + pos[0] = DataGet[target].Pos[0]; + pos[1] = DataGet[target].Pos[1]; + pos[2] = DataGet[target].Pos[2]; + refpos[0] = DataGet[target].RefPos[0]; + refpos[1] = DataGet[target].RefPos[1]; + refpos[2] = DataGet[target].RefPos[2]; + maxdist = DataGet[target].MaxDist; + origin = DataGet[target].Origin; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + dx = refpos[0] - pos[0]; + dy = refpos[1] - pos[1]; + dz = refpos[2] - pos[2]; + + h = 1.0001 * sqrt(dx * dx + dy * dy + dz * dz); + + if(mode == MODE_LOCAL_PARTICLES) + if(maxdist < 2 * h) + T->DTF[target] -= + (T->DTF[target] & + 2); /* since we restrict the search radius, we are not guaranteed to search the full circumcircle of the triangle */ + + numngb = ngb_treefind_ghost_search(T, pos, refpos, h, maxdist, target, origin, mode, thread_id, numnodes, firstnode); + + out.Count = numngb; + + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#ifdef EXTENDED_GHOST_SEARCH /* this allowes for mirrored images in a full 3x3 grid in terms of the principal domain */ +/*! \brief Tree-search algorithm for ghost cells in EXTENDED_GHOST_SEARCH mode. + * + * \param[in] T Pointer to tessellation. + * \param[in] searchcenter[3] Postion of the search center. + * \param[in] refpos[3] Reference position. + * \param[in] hsml Search radius. + * \param[in] maxdist Maximum distance. + * \param[in] target Index in DTF array. + * \param[in] origin Original task. + * \param[in] startnode Startnode. + * \param[in] bitflags Bitflags for ghost search. + * \param[in] mode Mode. + * \param[in, out] nexport Number of exported particles. + * \param[out] nsend_local Array with number of particles to be sent. + * + * \return Number of points found. + */ +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int *startnode, int bitflags, int mode, int *nexport, int *nsend_local) +{ + int i, numngb, no, p, task, nexport_save, ndp_save, nadditionalpoints_save; + int image_flag; + struct NgbNODE *current; + MyDouble dx, dy, dz, hsml2, maxdist2; + int listp; + double dx_ref, dy_ref, dz_ref, mindistance, thisdistance; + double min_x = 0, min_y = 0, min_z = 0; + int min_p = 0, min_imageflag = 0; + MyFloat search_min[3], search_max[3], newcenter[3], newrefpos[3]; + MyFloat refsearch_min[3], refsearch_max[3]; + + nadditionalpoints_save = NadditionalPoints; + ndp_save = T->Ndp; + nexport_save = *nexport; + + numngb = 0; + mindistance = 1.0e70; + + int repx, repy, repz = 0; + int repx_A, repy_A, repz_A; + int repx_B, repy_B, repz_B; + int xbits; + int ybits; + int zbits; + int count; + + if(mode == 0) + { + repx_A = -1; + repx_B = 1; + repy_A = -1; + repy_B = 1; + repz_A = -1; + repz_B = 1; + xbits = ybits = zbits = 0; + } + else + { + zbits = (bitflags / 9); + ybits = (bitflags - zbits * 9) / 3; + xbits = bitflags - zbits * 9 - ybits * 3; + + if(xbits == 1) + repx_A = repx_B = -1; + else if(xbits == 2) + repx_A = repx_B = 1; + else + repx_A = repx_B = 0; + + if(ybits == 1) + repy_A = repy_B = -1; + else if(ybits == 2) + repy_A = repy_B = 1; + else + repy_A = repy_B = 0; + + if(zbits == 1) + repz_A = repz_B = -1; + else if(zbits == 2) + repz_A = repz_B = 1; + else + repz_A = repz_B = 0; + } + + hsml2 = hsml * hsml; + maxdist2 = maxdist * maxdist; + + for(repx = repx_A; repx <= repx_B; repx++) + for(repy = repy_A; repy <= repy_B; repy++) +#if !defined(TWODIMS) + for(repz = repz_A; repz <= repz_B; repz++) +#endif /* #if !defined(TWODIMS) */ + { + image_flag = 0; /* for each coordinate there are three possibilities. + We encodee them to basis three, i.e. x*3^0 + y*3^1 + z*3^2 + */ + if(repx == 0) + { + newcenter[0] = searchcenter[0]; + newrefpos[0] = refpos[0]; + } + else if(repx == -1) + { +#ifndef REFLECTIVE_X + newcenter[0] = searchcenter[0] - boxSize_X; + newrefpos[0] = refpos[0] - boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + newcenter[0] = -searchcenter[0]; + newrefpos[0] = -refpos[0]; +#endif /* #ifndef REFLECTIVE_X #else */ + image_flag += 1; + } + else /* repx == 1 */ + { +#ifndef REFLECTIVE_X + newcenter[0] = searchcenter[0] + boxSize_X; + newrefpos[0] = refpos[0] + boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + newcenter[0] = -searchcenter[0] + 2 * boxSize_X; + newrefpos[0] = -refpos[0] + 2 * boxSize_X; +#endif /* #ifndef REFLECTIVE_X #else */ + image_flag += 2; + } + + if(repy == 0) + { + newcenter[1] = searchcenter[1]; + newrefpos[1] = refpos[1]; + } + else if(repy == -1) + { +#ifndef REFLECTIVE_Y + newcenter[1] = searchcenter[1] - boxSize_Y; + newrefpos[1] = refpos[1] - boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + newcenter[1] = -searchcenter[1]; + newrefpos[1] = -refpos[1]; +#endif /* #ifndef REFLECTIVE_Y #else */ + image_flag += 1 * 3; + } + else /* repy == 1 */ + { +#ifndef REFLECTIVE_Y + newcenter[1] = searchcenter[1] + boxSize_Y; + newrefpos[1] = refpos[1] + boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + newcenter[1] = -searchcenter[1] + 2 * boxSize_Y; + newrefpos[1] = -refpos[1] + 2 * boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y #else */ + image_flag += 2 * 3; + } + + if(repz == 0) + { + newcenter[2] = searchcenter[2]; + newrefpos[2] = refpos[2]; + } +#if !defined(TWODIMS) + else if(repz == -1) + { +#ifndef REFLECTIVE_Z + newcenter[2] = searchcenter[2] - boxSize_Z; + newrefpos[2] = refpos[2] - boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + newcenter[2] = -searchcenter[2]; + newrefpos[2] = -refpos[2]; +#endif /* #ifndef REFLECTIVE_Z #else */ + image_flag += 1 * 9; + } + else /* repz == 1 */ + { +#ifndef REFLECTIVE_Z + newcenter[2] = searchcenter[1] + boxSize_Z; + newrefpos[2] = refpos[1] + boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + newcenter[2] = -searchcenter[2] + 2 * boxSize_Z; + newrefpos[2] = -refpos[2] + 2 * boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z #else */ + image_flag += 2 * 9; + } +#endif /* #if !defined(TWODIMS) */ + + for(i = 0; i < 3; i++) + { + search_min[i] = newcenter[i] - hsml; + search_max[i] = newcenter[i] + hsml; + refsearch_min[i] = newrefpos[i] - maxdist; + refsearch_max[i] = newrefpos[i] + maxdist; + } + + if(mode == 1) + if(bitflags != image_flag) + { + printf("bitflags=%d image_flag=%d xbits=%d ybits=%d zbits=%d \n", bitflags, image_flag, xbits, ybits, zbits); + terminate("problem"); + } + + no = *startnode; + count = 0; + + while(no >= 0) + { + count++; + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or dissolved */ + + dx = P[p].Pos[0] - newcenter[0]; + dy = P[p].Pos[1] - newcenter[1]; + dz = P[p].Pos[2] - newcenter[2]; + + if(dx * dx + dy * dy + dz * dz > hsml2) + continue; + + dx_ref = P[p].Pos[0] - newrefpos[0]; + dy_ref = P[p].Pos[1] - newrefpos[1]; + dz_ref = P[p].Pos[2] - newrefpos[2]; + + if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdist2) + continue; + + /* now we need to check whether this particle has already been sent to + the requesting cpu for this particular image shift */ + + if(thisdistance >= mindistance) + continue; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + listp = ListExports[listp].nextexport; + } + + if(listp >= 0) + if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + else + { + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + } + + /* here we have found a new closest particle that has not been inserted yet */ + + numngb = 1; + mindistance = thisdistance; + min_p = p; + min_imageflag = image_flag; + + /* determine the point coordinates in min_x, min_y, min_z */ + if(repx == 0) + min_x = P[p].Pos[0]; + else if(repx == -1) + { +#ifndef REFLECTIVE_X + min_x = P[p].Pos[0] + boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + min_x = -P[p].Pos[0]; +#endif /* #ifndef REFLECTIVE_X #else */ + } + else if(repx == 1) + { +#ifndef REFLECTIVE_X + min_x = P[p].Pos[0] - boxSize_X; +#else /* #ifndef REFLECTIVE_X */ + min_x = -P[p].Pos[0] + 2 * boxSize_X; +#endif /* #ifndef REFLECTIVE_X #else */ + } + + if(repy == 0) + min_y = P[p].Pos[1]; + else if(repy == -1) + { +#ifndef REFLECTIVE_Y + min_y = P[p].Pos[1] + boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + min_y = -P[p].Pos[1]; +#endif /* #ifndef REFLECTIVE_Y #else */ + } + else if(repy == 1) + { +#ifndef REFLECTIVE_Y + min_y = P[p].Pos[1] - boxSize_Y; +#else /* #ifndef REFLECTIVE_Y */ + min_y = -P[p].Pos[1] + 2 * boxSize_Y; +#endif /* #ifndef REFLECTIVE_Y #else */ + } + + if(repz == 0) + min_z = P[p].Pos[2]; +#if !defined(TWODIMS) + else if(repz == -1) + { +#ifndef REFLECTIVE_Z + min_z = P[p].Pos[2] + boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + min_z = -P[p].Pos[2]; +#endif /* #ifndef REFLECTIVE_Z #else */ + } + else if(repz == 1) + { +#ifndef REFLECTIVE_Z + min_z = P[p].Pos[2] - boxSize_Z; +#else /* #ifndef REFLECTIVE_Z */ + min_z = -P[p].Pos[2] + 2 * boxSize_Z; +#endif /* #ifndef REFLECTIVE_Z #else */ + } +#endif /* #if !defined(TWODIMS) */ + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + { + break; + } + } + + current = &Ngb_Nodes[no]; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(search_min[0] > current->u.d.range_max[0]) + continue; + if(search_max[0] < current->u.d.range_min[0]) + continue; + if(refsearch_min[0] > current->u.d.range_max[0]) + continue; + if(refsearch_max[0] < current->u.d.range_min[0]) + continue; + + if(search_min[1] > current->u.d.range_max[1]) + continue; + if(search_max[1] < current->u.d.range_min[1]) + continue; + if(refsearch_min[1] > current->u.d.range_max[1]) + continue; + if(refsearch_max[1] < current->u.d.range_min[1]) + continue; + + if(search_min[2] > current->u.d.range_max[2]) + continue; + if(search_max[2] < current->u.d.range_min[2]) + continue; + if(refsearch_min[2] > current->u.d.range_max[2]) + continue; + if(refsearch_max[2] < current->u.d.range_min[2]) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == 1) + terminate("mode == 1"); + + if(target >= 0) /* if no target is given, export will not occur */ + { + if(Exportflag[task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)]] != target) + { + Exportflag[task] = target; + Exportnodecount[task] = NODELISTLENGTH; + } + + if(Exportnodecount[task] == NODELISTLENGTH) + { + if(*nexport >= All.BunchSize) + { + T->Ndp = ndp_save; + NadditionalPoints = nadditionalpoints_save; + *nexport = nexport_save; + if(nexport_save == 0) + terminate( + "nexport_save == 0"); /* in this case, the buffer is too small to process even a single particle */ + for(task = 0; task < NTask; task++) + nsend_local[task] = 0; + for(no = 0; no < nexport_save; no++) + nsend_local[DataIndexTable[no].Task]++; + return -1; + } + Exportnodecount[task] = 0; + Exportindex[task] = *nexport; + DataIndexTable[*nexport].Task = task; + DataIndexTable[*nexport].Index = target; + DataIndexTable[*nexport].IndexGet = *nexport; + *nexport = *nexport + 1; + nsend_local[task]++; + } + + DataNodeListSpecial[Exportindex[task]].BitFlagList[Exportnodecount[task]] = image_flag; + DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]++] = + Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)]; + + if(Exportnodecount[task] < NODELISTLENGTH) + DataNodeListSpecial[Exportindex[task]].NodeList[Exportnodecount[task]] = -1; + } + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + *startnode = -1; + + if(numngb) + { + p = min_p; + + image_flag = min_imageflag; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) + terminate("this should not happen"); + + ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag); + + /* add the particle to the ones that need to be exported */ + + if(origin == ThisTask) + { + if(mode == 1) + terminate("mode==1: how can this be?"); + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + dp->x = min_x; + dp->y = min_y; + dp->z = min_z; + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flag) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = (1 << image_flag); + +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + NadditionalPoints++; + } + else + { + if(mode == 0) + terminate("mode == 0: how can this be?"); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + { + T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR; + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer, + T->Indi.AllocFacN_DP_Buffer); +#endif /* #ifdef VERBOSE */ + DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point)); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + terminate("(N_DP_Buffer >= MaxN_DP_Buffer"); + } + + SphP[p].ActiveArea = 0; + + DP_Buffer[N_DP_Buffer].x = min_x; + DP_Buffer[N_DP_Buffer].y = min_y; + DP_Buffer[N_DP_Buffer].z = min_z; + DP_Buffer[N_DP_Buffer].ID = P[p].ID; + DP_Buffer[N_DP_Buffer].task = ThisTask; + DP_Buffer[N_DP_Buffer].index = p; + DP_Buffer[N_DP_Buffer].originalindex = p; + DP_Buffer[N_DP_Buffer].timebin = P[p].TimeBinHydro; + DP_Buffer[N_DP_Buffer].image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + DP_Buffer[N_DP_Buffer].Hsml = SphP[p].Hsml; + DP_Buffer[N_DP_Buffer].first_connection = -1; + DP_Buffer[N_DP_Buffer].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + send_count_new[origin]++; + N_DP_Buffer++; + } + } + + return numngb; +} + +#else /* #ifdef EXTENDED_GHOST_SEARCH */ + +/*! \brief Tree-search algorithm for ghost cells without EXTENDED_GHOST_SEARCH. + * + * \param[in] T Pointer to tessellation. + * \param[in] searchcenter[3] Postion of the search center. + * \param[in] refpos[3] Reference position. + * \param[in] hsml Search radius. + * \param[in] maxdist Maximum distance. + * \param[in] target Index in DTF array. + * \param[in] origin Original task. + * \param[in] mode Mode (local/imported). + * \param[in] thread_id ID of this thread. + * \param[in] numnodes Number of nodes. + * \param[in] firstnode Index of first node. + * + * \return Number of points found. + */ +int ngb_treefind_ghost_search(tessellation *T, MyDouble searchcenter[3], MyDouble refpos[3], MyFloat hsml, MyFloat maxdist, int target, + int origin, int mode, int thread_id, int numnodes, int *firstnode) +{ + int i, k, numngb, no, p; + int image_flag = 0; + struct NgbNODE *current; + MyDouble x, y, z, dx, dy, dz; + int listp; + double dx_ref, dy_ref, dz_ref, mindistance, thisdistance, maxdistSquared, hsmlSquared; + double min_x = 0, min_y = 0, min_z = 0; + int min_p = 0, min_imageflag = 0; + double offx, offy, offz; + MyFloat search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3]; + MyFloat refsearch_min[3], refsearch_max[3], refsearch_max_Lsub[3], refsearch_min_Ladd[3]; + + for(i = 0; i < 3; i++) + { + search_min[i] = searchcenter[i] - hsml; + search_max[i] = searchcenter[i] + hsml; + refsearch_min[i] = refpos[i] - maxdist; + refsearch_max[i] = refpos[i] + maxdist; + } + +#if !defined(REFLECTIVE_X) + search_max_Lsub[0] = search_max[0] - boxSize_X; + search_min_Ladd[0] = search_min[0] + boxSize_X; + refsearch_max_Lsub[0] = refsearch_max[0] - boxSize_X; + refsearch_min_Ladd[0] = refsearch_min[0] + boxSize_X; +#else /* #if !defined(REFLECTIVE_X) */ + search_max_Lsub[0] = 2 * boxSize_X - search_max[0]; + search_min_Ladd[0] = -search_min[0]; + refsearch_max_Lsub[0] = 2 * boxSize_X - refsearch_max[0]; + refsearch_min_Ladd[0] = -refsearch_min[0]; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + search_max_Lsub[1] = search_max[1] - boxSize_Y; + search_min_Ladd[1] = search_min[1] + boxSize_Y; + refsearch_max_Lsub[1] = refsearch_max[1] - boxSize_Y; + refsearch_min_Ladd[1] = refsearch_min[1] + boxSize_Y; +#else /* #if !defined(REFLECTIVE_Y) */ + search_max_Lsub[1] = 2 * boxSize_Y - search_max[1]; + search_min_Ladd[1] = -search_min[1]; + refsearch_max_Lsub[1] = 2 * boxSize_Y - refsearch_max[1]; + refsearch_min_Ladd[1] = -refsearch_min[1]; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + search_max_Lsub[2] = search_max[2] - boxSize_Z; + search_min_Ladd[2] = search_min[2] + boxSize_Z; + refsearch_max_Lsub[2] = refsearch_max[2] - boxSize_Z; + refsearch_min_Ladd[2] = refsearch_min[2] + boxSize_Z; +#else /* #if !defined(REFLECTIVE_Z) */ + search_max_Lsub[2] = 2 * boxSize_Z - search_max[2]; + search_min_Ladd[2] = -search_min[2]; + refsearch_max_Lsub[2] = 2 * boxSize_Z - refsearch_max[2]; + refsearch_min_Ladd[2] = -refsearch_min[2]; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + numngb = 0; + mindistance = 1.0e70; + int count; + + count = 0; + + maxdistSquared = maxdist * maxdist; + hsmlSquared = hsml * hsml; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + +#ifdef EXTENDED_GHOST_SEARCH + bitflags = 0; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + } + else + { + no = firstnode[k]; + +#ifdef EXTENDED_GHOST_SEARCH + bitflags = first_bitflag[k]; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + count++; + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + offx = offy = offz = 0; + + image_flag = 0; /* for each coordinates there are three possibilities. We + encode them to basis three, i.e. x*3^0 + y*3^1 + z*3^2 */ + +#if !defined(REFLECTIVE_X) + if(P[p].Pos[0] - refpos[0] < -boxHalf_X) + { + offx = boxSize_X; + image_flag += 1; + } + else if(P[p].Pos[0] - refpos[0] > boxHalf_X) + { + offx = -boxSize_X; + image_flag += 2; + } +#endif /* #if !defined(REFLECTIVE_X) */ + +#if !defined(REFLECTIVE_Y) + if(P[p].Pos[1] - refpos[1] < -boxHalf_Y) + { + offy = boxSize_Y; + image_flag += 1 * 3; + } + else if(P[p].Pos[1] - refpos[1] > boxHalf_Y) + { + offy = -boxSize_Y; + image_flag += 2 * 3; + } +#endif /* #if !defined(REFLECTIVE_Y) */ + +#if !defined(REFLECTIVE_Z) && !defined(TWODIMS) + if(P[p].Pos[2] - refpos[2] < -boxHalf_Z) + { + offz = boxSize_Z; + image_flag += 1 * 9; + } + else if(P[p].Pos[2] - refpos[2] > boxHalf_Z) + { + offz = -boxSize_Z; + image_flag += 2 * 9; + } +#endif /* #if !defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + + int image_flag_periodic_bnds = image_flag; + +#if defined(REFLECTIVE_X) + int repx; + for(repx = -1; repx <= 1; repx++, offx = 0) +#endif /* #if defined(REFLECTIVE_X) */ + { +#if defined(REFLECTIVE_Y) + int repy; + for(repy = -1; repy <= 1; repy++, offy = 0) +#endif /* #if defined(REFLECTIVE_Y) */ + { +#if defined(REFLECTIVE_Z) && !defined(TWODIMS) + int repz; + for(repz = -1; repz <= 1; repz++, offz = 0) +#endif /* #if defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + { + image_flag = image_flag_periodic_bnds; + + x = P[p].Pos[0]; + y = P[p].Pos[1]; + z = P[p].Pos[2]; + +#if defined(REFLECTIVE_X) + if(repx == 1) + { + offx = 2 * boxSize_X; + image_flag += 2; + } + else if(repx == -1) + { + image_flag += 1; + } + if(repx != 0) + x = -x; +#endif /* #if defined(REFLECTIVE_X) */ + +#if defined(REFLECTIVE_Y) + if(repy == 1) + { + offy = 2 * boxSize_Y; + image_flag += 2 * 3; + } + else if(repy == -1) + { + image_flag += 1 * 3; + } + if(repy != 0) + y = -y; +#endif /* #if defined(REFLECTIVE_Y) */ + +#if defined(REFLECTIVE_Z) && !defined(TWODIMS) + if(repz == 1) + { + offz = 2 * boxSize_Z; + image_flag += 2 * 9; + } + else if(repz == -1) + { + image_flag += 1 * 9; + } + if(repz != 0) + z = -z; +#endif /* #if defined(REFLECTIVE_Z) && !defined(TWODIMS) */ + + x += offx; + y += offy; + z += offz; + + dx_ref = x - refpos[0]; + dy_ref = y - refpos[1]; + dz_ref = z - refpos[2]; + + if((thisdistance = dx_ref * dx_ref + dy_ref * dy_ref + dz_ref * dz_ref) > maxdistSquared) + continue; + + dx = x - searchcenter[0]; + dy = y - searchcenter[1]; + dz = z - searchcenter[2]; + + if(dx * dx + dy * dy + dz * dz > hsmlSquared) + continue; + + /* now we need to check whether this particle has already been sent to + the requesting cpu for this particular image shift */ + + if(thisdistance >= mindistance) + continue; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + listp = ListExports[listp].nextexport; + } + + if(listp >= 0) + if((ListExports[listp].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + else + { + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) /* already in list */ + continue; + } + } + + /* here we have found a new closest particle that has not been inserted yet */ + + numngb = 1; + mindistance = thisdistance; + min_p = p; + min_imageflag = image_flag; + min_x = x; + min_y = y; + min_z = z; + + maxdistSquared = thisdistance; + } + } + } + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Ngb_Nodes[no]; + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + +#if !defined(REFLECTIVE_X) + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0]) + continue; +#else /* #if !defined(REFLECTIVE_X) */ + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] > current->u.d.range_max[0]) + continue; + if(search_min_Ladd[0] < current->u.d.range_min[0] && search_max[0] < current->u.d.range_min[0]) + continue; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1]) + continue; +#else /* #if !defined(REFLECTIVE_Y) */ + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] > current->u.d.range_max[1]) + continue; + if(search_min_Ladd[1] < current->u.d.range_min[1] && search_max[1] < current->u.d.range_min[1]) + continue; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2]) + continue; +#else /* #if !defined(REFLECTIVE_Z) */ + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] > current->u.d.range_max[2]) + continue; + if(search_min_Ladd[2] < current->u.d.range_min[2] && search_max[2] < current->u.d.range_min[2]) + continue; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + /* now deal with the search region of the reference point */ + +#if !defined(REFLECTIVE_X) + if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(refsearch_min_Ladd[0] > current->u.d.range_max[0] && refsearch_max[0] < current->u.d.range_min[0]) + continue; +#else /* #if !defined(REFLECTIVE_X) */ + if(refsearch_min[0] > current->u.d.range_max[0] && refsearch_max_Lsub[0] > current->u.d.range_max[0]) + continue; + if(refsearch_min_Ladd[0] < current->u.d.range_min[0] && refsearch_max[0] < current->u.d.range_min[0]) + continue; +#endif /* #if !defined(REFLECTIVE_X) #else */ + +#if !defined(REFLECTIVE_Y) + if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(refsearch_min_Ladd[1] > current->u.d.range_max[1] && refsearch_max[1] < current->u.d.range_min[1]) + continue; +#else /* #if !defined(REFLECTIVE_Y) */ + if(refsearch_min[1] > current->u.d.range_max[1] && refsearch_max_Lsub[1] > current->u.d.range_max[1]) + continue; + if(refsearch_min_Ladd[1] < current->u.d.range_min[1] && refsearch_max[1] < current->u.d.range_min[1]) + continue; +#endif /* #if !defined(REFLECTIVE_Y) #else */ + +#if !defined(REFLECTIVE_Z) + if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(refsearch_min_Ladd[2] > current->u.d.range_max[2] && refsearch_max[2] < current->u.d.range_min[2]) + continue; +#else /* #if !defined(REFLECTIVE_Z) */ + if(refsearch_min[2] > current->u.d.range_max[2] && refsearch_max_Lsub[2] > current->u.d.range_max[2]) + continue; + if(refsearch_min_Ladd[2] < current->u.d.range_min[2] && refsearch_max[2] < current->u.d.range_min[2]) + continue; +#endif /* #if !defined(REFLECTIVE_Z) #else */ + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == 1) + terminate("mode == 1"); + + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES should not occur here"); + + if(target >= 0) /* if no target is given, export will not occur */ + ngb_treefind_export_node_threads(no, target, thread_id, image_flag); + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + if(numngb) + { + p = min_p; + + image_flag = min_imageflag; + + if(Ngb_Marker[p] != Ngb_MarkerValue) + { + Ngb_Marker[p] = Ngb_MarkerValue; + List_P[p].firstexport = -1; + List_P[p].currentexport = -1; + } + + if(List_P[p].firstexport >= 0) + { + if(ListExports[List_P[p].currentexport].origin != origin) + { + listp = List_P[p].firstexport; + while(listp >= 0) + { + if(ListExports[listp].origin == origin) + { + List_P[p].currentexport = listp; + break; + } + + if(ListExports[listp].nextexport < 0) + { + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_P[p].currentexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + ListExports[listp].nextexport = List_P[p].currentexport; + break; + } + listp = ListExports[listp].nextexport; + } + } + } + else + { + /* here we have a local particle that hasn't been made part of the mesh */ + + if(Ninlist >= MaxNinlist) + { + T->Indi.AllocFacNinlist *= ALLOC_INCREASE_FACTOR; + MaxNinlist = T->Indi.AllocFacNinlist; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNinlist=%d Indi.AllocFacNinlist=%g\n", ThisTask, MaxNinlist, + T->Indi.AllocFacNinlist); +#endif /* #ifdef VERBOSE */ + ListExports = myrealloc_movable(ListExports, MaxNinlist * sizeof(struct list_export_data)); + + if(Ninlist >= MaxNinlist) + terminate("Ninlist >= MaxNinlist"); + } + + List_InMesh[NumGasInMesh++] = p; + + List_P[p].currentexport = List_P[p].firstexport = Ninlist++; + ListExports[List_P[p].currentexport].image_bits = 0; + ListExports[List_P[p].currentexport].nextexport = -1; + ListExports[List_P[p].currentexport].origin = origin; + ListExports[List_P[p].currentexport].index = p; + } + + if((ListExports[List_P[p].currentexport].image_bits & (1 << image_flag))) + terminate("this should not happen"); + + ListExports[List_P[p].currentexport].image_bits |= (1 << image_flag); + + /* add the particle to the ones that need to be exported */ + + if(P[p].Ti_Current != All.Ti_Current) + terminate("surprise! we don't expect this here anymore"); + + if(origin == ThisTask) + { + if(mode == 1) + terminate("mode==1: how can this be?"); + + if(T->Ndp >= T->MaxNdp) + { + T->Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + T->MaxNdp = T->Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, T->MaxNdp, T->Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + T->DP -= 5; + T->DP = myrealloc_movable(T->DP, (T->MaxNdp + 5) * sizeof(point)); + T->DP += 5; + + if(T->Ndp >= T->MaxNdp) + terminate("Ndp >= MaxNdp"); + } + + SphP[p].ActiveArea = 0; + + point *dp = &T->DP[T->Ndp]; + dp->x = min_x; + dp->y = min_y; + dp->z = min_z; + dp->task = ThisTask; + dp->ID = P[p].ID; + if(image_flag) + dp->index = p + NumGas; /* this is a replicated/mirrored local point */ + else + dp->index = p; /* this is actually a local point that wasn't made part of the mesh yet */ + dp->originalindex = p; + dp->timebin = P[p].TimeBinHydro; + dp->image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + dp->Hsml = SphP[p].Hsml; + dp->first_connection = -1; + dp->last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + T->Ndp++; + NadditionalPoints++; + } + else + { + if(mode == 0) + terminate("mode == 0: how can this be?"); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + { + T->Indi.AllocFacN_DP_Buffer *= ALLOC_INCREASE_FACTOR; + MaxN_DP_Buffer = T->Indi.AllocFacN_DP_Buffer; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxN_DP_Buffer=%d Indi.AllocFacN_DP_Buffer=%g\n", ThisTask, MaxN_DP_Buffer, + T->Indi.AllocFacN_DP_Buffer); +#endif /* #ifdef VERBOSE */ + DP_Buffer = (point *)myrealloc_movable(DP_Buffer, MaxN_DP_Buffer * sizeof(point)); + + if(N_DP_Buffer >= MaxN_DP_Buffer) + terminate("(N_DP_Buffer >= MaxN_DP_Buffer"); + } + + SphP[p].ActiveArea = 0; + + DP_Buffer[N_DP_Buffer].x = min_x; + DP_Buffer[N_DP_Buffer].y = min_y; + DP_Buffer[N_DP_Buffer].z = min_z; + DP_Buffer[N_DP_Buffer].ID = P[p].ID; + DP_Buffer[N_DP_Buffer].task = ThisTask; + DP_Buffer[N_DP_Buffer].index = p; + DP_Buffer[N_DP_Buffer].originalindex = p; + DP_Buffer[N_DP_Buffer].timebin = P[p].TimeBinHydro; + DP_Buffer[N_DP_Buffer].image_flags = (1 << image_flag); +#ifdef DOUBLE_STENCIL + DP_Buffer[N_DP_Buffer].Hsml = SphP[p].Hsml; + DP_Buffer[N_DP_Buffer].first_connection = -1; + DP_Buffer[N_DP_Buffer].last_connection = -1; +#endif /* #ifdef DOUBLE_STENCIL */ + send_count_new[origin]++; + N_DP_Buffer++; + } + } + + return numngb; +} + +#endif /* #ifdef EXTENDED_GHOST_SEARCH #else */ + +/*! \brief Counts up undecided tetrahedra. + * + * \param[in] T Pointer to tessellation. + * + * \return (Local) number of undecided tetrahedra. + */ +int count_undecided_tetras(tessellation *T) +{ + int i, count; + + for(i = 0, count = 0; i < T->Ndt; i++) + if((T->DTF[i] & 2) == 0) + count++; + + return count; +} + +#endif /* #if !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c new file mode 100644 index 0000000000..4323ab0a6d --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_lsf.c @@ -0,0 +1,944 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_gradients.c + * \date 05/2018 + * \brief Least square fit gradient calculation. + * \details Described in Pakmor et al (2016). + * contains functions: + * static void inline add_row(double X[NUMDIMS][NUMDIMS], + * double y[NUMDIMS], int source_row, double fac, + * int target_row) + * static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS], + * double y[NUMDIMS], double grad[NUMDIMS]) + * void calculate_gradients(void) + * void compute_divergences() + * void correct_for_reflective_boundaries(double *ValueOther, + * double Value, int type, unsigned int *image_flags) + * void limit_gradients(void) + * void limit_vel_gradient(double *d, MySingle * grad_vx, + * MySingle * grad_vy, MySingle * grad_vz, double csnd) + * void limit_gradient(double *d, double phi, double min_phi, + * double max_phi, MySingle * dphi) + * double boundaryX(double dx) + * double boundaryY(double dx) + * double boundaryZ(double dx) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if !defined(ONEDIMS) + +static double *minvalues, *maxvalues; + +static void limit_gradients(); +static void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags); + +static double boundaryX(double dx); +static double boundaryY(double dy); +static double boundaryZ(double dz); + +#if defined(OUTPUT_DIVVEL) || defined(MHD) +static void compute_divergences(); +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */ + +/*! \brief Adds row to another one in matrix equation. + * + * Auxiliary routine to solve_matrix_problem. + * + * \param[in, out] X Matrix. + * \param[in, out] y Vector. + * \param[in] source_row Index of row that should be added. + * \param[in] fac Factor by which row is multiplied before adding. + * \param[in] target_row Index of row to which to add source row. + * + * \return void + */ +static void inline add_row(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], int source_row, double fac, int target_row) +{ + y[target_row] += fac * y[source_row]; + + for(int i = 0; i < NUMDIMS; i++) + { + X[target_row][i] += fac * X[source_row][i]; + } +} + +/*! \brief Solve a matrix problem X*grad = y. + * + * Note that we know here that X is symmetric, and that we can pivot on the + * diagonal elements. + * + * \param[in, out] x Matrix. + * \param[in, out] y Vector. + * \param[out] grad Gradient. + * + */ +static void solve_matrix_problem(double X[NUMDIMS][NUMDIMS], double y[NUMDIMS], double grad[NUMDIMS]) +{ +#if NUMDIMS == 2 + int perm[NUMDIMS]; + + if(fabs(X[0][0]) > fabs(X[1][1])) + { + perm[0] = 0; + perm[1] = 1; + } + else + { + perm[0] = 1; + perm[1] = 0; + } + + add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]); + + grad[perm[1]] = y[perm[1]] / X[perm[1]][perm[1]]; + grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]]) / X[perm[0]][perm[0]]; + +#else /* #if NUMDIMS==2 */ + + int perm[NUMDIMS]; + + if(fabs(X[2][2]) > fabs(X[1][1]) && fabs(X[2][2]) > fabs(X[0][0])) + { + perm[0] = 2; + perm[1] = 0; + perm[2] = 1; + } + else if(fabs(X[1][1]) > fabs(X[0][0])) + { + perm[0] = 1; + perm[1] = 0; + perm[2] = 2; + } + else + { + perm[0] = 0; + perm[1] = 1; + perm[2] = 2; + } + + add_row(X, y, perm[0], -X[perm[1]][perm[0]] / X[perm[0]][perm[0]], perm[1]); + add_row(X, y, perm[0], -X[perm[2]][perm[0]] / X[perm[0]][perm[0]], perm[2]); + + if(fabs(X[perm[1]][perm[1]]) < fabs(X[perm[2]][perm[2]])) + { + int p = perm[1]; + perm[1] = perm[2]; + perm[2] = p; + } + + add_row(X, y, perm[1], -X[perm[2]][perm[1]] / X[perm[1]][perm[1]], perm[2]); + + grad[perm[2]] = y[perm[2]] / X[perm[2]][perm[2]]; + grad[perm[1]] = (y[perm[1]] - X[perm[1]][perm[2]] * grad[perm[2]]) / X[perm[1]][perm[1]]; + grad[perm[0]] = (y[perm[0]] - X[perm[0]][perm[1]] * grad[perm[1]] - X[perm[0]][perm[2]] * grad[perm[2]]) / X[perm[0]][perm[0]]; + +#endif /* #if NUMDIMS==2 #else */ +} + +/*! \brief Loop through all active cells and calculate gradients. + * + * \return void + */ +void calculate_gradients(void) +{ + TIMER_START(CPU_GRADIENTS); + + mpi_printf("VORONOI: Calculating Gradients...\n"); + + minvalues = mymalloc("gradmin", NumGas * N_Grad * sizeof(double)); + maxvalues = mymalloc("gradmax", NumGas * N_Grad * sizeof(double)); + + struct matrix_vec_data + { + double X[NUMDIMS][NUMDIMS]; /* input matrix */ + double y[NUMDIMS]; /* input vector */ + double grad[NUMDIMS]; /* output */ + } * mdata; + + mdata = mymalloc("mdata", N_Grad * sizeof(struct matrix_vec_data)); + + double *Value = mymalloc("Value", N_Grad * sizeof(double)); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(int k = 0; k < N_Grad; k++) + { + minvalues[i * N_Grad + k] = +MAX_REAL_NUMBER; + maxvalues[i * N_Grad + k] = -MAX_REAL_NUMBER; + + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + Value[k] = *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset) / All.cf_atime; + } + else + Value[k] = *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset); + } + + MyDouble *Center = SphP[i].Center; + + /* reset matrix and vector to 0 */ + memset(mdata, 0, N_Grad * sizeof(struct matrix_vec_data)); + +#ifdef REFLECTIVE_X + int OutFlowX = 0; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + int OutFlowY = 0; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + int OutFlowZ = 0; +#endif /* #ifdef REFLECTIVE_Z */ + + int q = SphP[i].first_connection; + + while(q >= 0) + { + int dp = DC[q].dp_index; + int vf = DC[q].vf_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + /* cell has been removed */ + q = DC[q].next; + continue; + } + + if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea) + { + MyDouble *CenterOther, Mirror[3]; + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + +#ifdef REFLECTIVE_X + if((Mesh.DP[dp].image_flags & REFL_X_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_X)) + OutFlowX = 1; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + if((Mesh.DP[dp].image_flags & REFL_Y_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Y)) + OutFlowY = 1; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + if((Mesh.DP[dp].image_flags & REFL_Z_FLAGS) && (Mesh.DP[dp].image_flags & OUTFLOW_Z)) + OutFlowZ = 1; +#endif /* #ifdef REFLECTIVE_Z */ + + if(Mesh.DP[dp].task == ThisTask) + { +#ifndef VORONOI_STATIC_MESH + if(P[particle].Ti_Current != All.Ti_Current) + terminate("surprise! we don't expect this here anymore"); +#endif /* #ifndef VORONOI_STATIC_MESH */ + + if(P[particle].ID == P[i].ID) + { + /* mirrored cell, we have to mirror the Center */ + + /* calculate normal vector of the interface */ + double nx = Mesh.DP[dp].x - P[i].Pos[0]; + double ny = Mesh.DP[dp].y - P[i].Pos[1]; + double nz = Mesh.DP[dp].z - P[i].Pos[2]; + + /* perpendicular on the surface */ + double nn = sqrt(nx * nx + ny * ny + nz * nz); + nx /= nn; + ny /= nn; + nz /= nn; + double fx = (Center[0] - Mesh.VF[vf].cx); + double fy = (Center[1] - Mesh.VF[vf].cy); + double fz = (Center[2] - Mesh.VF[vf].cz); + double ff = (fx * nx + fy * ny + fz * nz); + + double px = Center[0] - ff * nx; + double py = Center[1] - ff * ny; + double pz = Center[2] - ff * nz; + + Mirror[0] = 2. * px - Center[0]; + Mirror[1] = 2. * py - Center[1]; + Mirror[2] = 2. * pz - Center[2]; + CenterOther = Mirror; + } + else + CenterOther = SphP[particle].Center; + } + else + CenterOther = PrimExch[particle].Center; + + double norm[3]; + norm[0] = boundaryX(CenterOther[0] - Center[0]); + norm[1] = boundaryY(CenterOther[1] - Center[1]); + norm[2] = boundaryZ(CenterOther[2] - Center[2]); + + double dist = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]); + double distinv = 1.0 / dist; + norm[0] *= distinv; + norm[1] *= distinv; + norm[2] *= distinv; + + double weight = Mesh.VF[vf].area; + + for(int k = 0; k < N_Grad; k++) + { + double ValueOther; + + if(Mesh.DP[dp].task == ThisTask) + { + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + ValueOther = *(MyFloat *)(((char *)(&P[particle])) + grad_elements[k].offset); + } + else + ValueOther = *(MyFloat *)(((char *)(&SphP[particle])) + grad_elements[k].offset); + } + else + { + ValueOther = *(MyFloat *)(((char *)(&PrimExch[particle])) + grad_elements[k].offset_exch); + } + + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + ValueOther /= All.cf_atime; + +#if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) + correct_for_reflective_boundaries(&ValueOther, Value[k], grad_elements[k].type, &Mesh.DP[dp].image_flags); +#endif /* #if defined(REFLECTIVE_X) || defined(REFLECTIVE_Y) || defined(REFLECTIVE_Z) */ + if(grad_elements[k].type == GRADIENT_TYPE_VELX) + ValueOther += norm[0] * dist * All.cf_atime * All.cf_Hrate; + else if(grad_elements[k].type == GRADIENT_TYPE_VELY) + ValueOther += norm[1] * dist * All.cf_atime * All.cf_Hrate; + else if(grad_elements[k].type == GRADIENT_TYPE_VELZ) + ValueOther += norm[2] * dist * All.cf_atime * All.cf_Hrate; + } + + double fac = weight * (ValueOther - Value[k]) / dist; + + for(int ia = 0; ia < NUMDIMS; ia++) + { + mdata[k].y[ia] += fac * norm[ia]; + + for(int ib = 0; ib < NUMDIMS; ib++) + mdata[k].X[ia][ib] += weight * norm[ia] * norm[ib]; + } + + if(ValueOther < minvalues[i * N_Grad + k]) + minvalues[i * N_Grad + k] = ValueOther; + + if(ValueOther > maxvalues[i * N_Grad + k]) + maxvalues[i * N_Grad + k] = ValueOther; + } + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + + for(int k = 0; k < N_Grad; k++) + { + solve_matrix_problem(mdata[k].X, mdata[k].y, mdata[k].grad); + + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + for(int j = 0; j < NUMDIMS; j++) + data[j] = mdata[k].grad[j]; + for(int j = NUMDIMS; j < 3; j++) + data[j] = 0.; + +#ifdef REFLECTIVE_X + if(OutFlowX) + data[0] = 0; +#endif /* #ifdef REFLECTIVE_X */ +#ifdef REFLECTIVE_Y + if(OutFlowY) + data[1] = 0; +#endif /* #ifdef REFLECTIVE_Y */ +#ifdef REFLECTIVE_Z + if(OutFlowZ) + data[2] = 0; +#endif /* #ifdef REFLECTIVE_Z */ + } + } + + myfree(Value); + myfree(mdata); + +#ifdef MHD + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + SphP[i].CurlB[0] = SphP[i].Grad.dB[2][1] - SphP[i].Grad.dB[1][2]; + SphP[i].CurlB[1] = SphP[i].Grad.dB[0][2] - SphP[i].Grad.dB[2][0]; + SphP[i].CurlB[2] = SphP[i].Grad.dB[1][0] - SphP[i].Grad.dB[0][1]; + } +#endif /* #ifdef MHD */ + + limit_gradients(); + +#ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED + /* compute magnitude of curl */ + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + double curlx = SphP[i].Grad.dvel[2][1] - SphP[i].Grad.dvel[1][2]; + double curly = SphP[i].Grad.dvel[0][2] - SphP[i].Grad.dvel[2][0]; + double curlz = SphP[i].Grad.dvel[1][0] - SphP[i].Grad.dvel[0][1]; + + SphP[i].CurlVel = sqrt(curlx * curlx + curly * curly + curlz * curlz); + } +#endif /* #ifdef REGULARIZE_MESH_CM_DRIFT_USE_SOUNDSPEED */ + + myfree(maxvalues); + myfree(minvalues); + +#if defined(OUTPUT_DIVVEL) || defined(MHD) + compute_divergences(); +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD */ + + TIMER_STOP(CPU_GRADIENTS); +} + +#if defined(OUTPUT_DIVVEL) || defined(MHD) +/*! \brief Computes divergences applying the Gauss' law. + * + * Loops through all active cells and computes the fluxes through all + * its interfaces. + * + * \return 0 + */ +void compute_divergences() +{ + mpi_printf("VORONOI: Computing divergences... \n"); + + exchange_primitive_variables_and_gradients(); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + +#if defined(OUTPUT_DIVVEL) + SphP[i].DivVel = 0; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + SphP[i].DivB = 0; +#endif /* #ifdef MHD */ + + MyDouble *CenterOther, Mirror[3]; +#if defined(OUTPUT_DIVVEL) + MyFloat *VelOther; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + MyFloat *BOther, B[3]; + struct grad_data *GradOther; +#endif /* #ifdef MHD */ + + int q = SphP[i].first_connection; + while(q >= 0) + { + int dp = DC[q].dp_index; + int vf = DC[q].vf_index; + int particle = Mesh.DP[dp].index; + + if(particle < 0) + { + /* cell has been removed */ + q = DC[q].next; + continue; + } + + if(Mesh.VF[vf].area > 1e-10 * SphP[i].SurfaceArea) + { +#ifdef MHD + double dx = boundaryX(Mesh.VF[vf].cx - SphP[i].Center[0]); + double dy = boundaryY(Mesh.VF[vf].cy - SphP[i].Center[1]); + double dz = boundaryZ(Mesh.VF[vf].cz - SphP[i].Center[2]); + + for(int j = 0; j < 3; j++) + B[j] = SphP[i].B[j] + SphP[i].Grad.dB[j][0] * dx + SphP[i].Grad.dB[j][1] * dy + SphP[i].Grad.dB[j][2] * dz; +#endif /* #ifdef MHD */ + + if(particle >= NumGas && Mesh.DP[dp].task == ThisTask) + particle -= NumGas; + + if(Mesh.DP[dp].task == ThisTask) + { + if(P[particle].ID == P[i].ID) + { + /* mirrored cell, we have to mirror the Center */ + /* calculate normal vector of the interface */ + double nx = Mesh.DP[dp].x - P[i].Pos[0]; + double ny = Mesh.DP[dp].y - P[i].Pos[1]; + double nz = Mesh.DP[dp].z - P[i].Pos[2]; + /* perpendicular on the surface */ + double nn = sqrt(nx * nx + ny * ny + nz * nz); + nx /= nn; + ny /= nn; + nz /= nn; + double fx = (SphP[i].Center[0] - Mesh.VF[vf].cx); + double fy = (SphP[i].Center[1] - Mesh.VF[vf].cy); + double fz = (SphP[i].Center[2] - Mesh.VF[vf].cz); + double ff = (fx * nx + fy * ny + fz * nz); + double px = SphP[i].Center[0] - ff * nx; + double py = SphP[i].Center[1] - ff * ny; + double pz = SphP[i].Center[2] - ff * nz; + Mirror[0] = 2. * px - SphP[i].Center[0]; + Mirror[1] = 2. * py - SphP[i].Center[1]; + Mirror[2] = 2. * pz - SphP[i].Center[2]; + CenterOther = Mirror; + } + else + CenterOther = SphP[particle].Center; + +#if defined(OUTPUT_DIVVEL) + VelOther = P[particle].Vel; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + GradOther = &SphP[particle].Grad; + BOther = SphP[particle].B; +#endif /* #ifdef MHD */ + } + else + { + CenterOther = PrimExch[particle].Center; +#if defined(OUTPUT_DIVVEL) + VelOther = PrimExch[particle].VelGas; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + GradOther = &GradExch[particle]; + BOther = PrimExch[particle].B; +#endif /* #ifdef MHD */ + } + +#ifdef MHD + dx = boundaryX(Mesh.VF[vf].cx - CenterOther[0]); + dy = boundaryY(Mesh.VF[vf].cy - CenterOther[1]); + dz = boundaryZ(Mesh.VF[vf].cz - CenterOther[2]); + + for(int j = 0; j < 3; j++) + B[j] = 0.5 * (B[j] + BOther[j] + GradOther->dB[j][0] * dx + GradOther->dB[j][1] * dy + GradOther->dB[j][2] * dz); +#endif /* #ifdef MHD */ + + double norm[3]; + norm[0] = boundaryX(CenterOther[0] - SphP[i].Center[0]); + norm[1] = boundaryY(CenterOther[1] - SphP[i].Center[1]); + norm[2] = boundaryZ(CenterOther[2] - SphP[i].Center[2]); + + double dist = sqrt(norm[0] * norm[0] + norm[1] * norm[1] + norm[2] * norm[2]); + norm[0] /= dist; + norm[1] /= dist; + norm[2] /= dist; + +#if defined(OUTPUT_DIVVEL) + double Vel[3]; + for(int j = 0; j < 3; j++) + Vel[j] = 0.5 * (P[i].Vel[j] + VelOther[j]); + double nVel = Vel[0] * norm[0] + Vel[1] * norm[1] + Vel[2] * norm[2]; + SphP[i].DivVel += Mesh.VF[vf].area * nVel; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + double nB = B[0] * norm[0] + B[1] * norm[1] + B[2] * norm[2]; + SphP[i].DivB += Mesh.VF[vf].area * nB; +#endif /* #ifdef MHD */ + } + + if(q == SphP[i].last_connection) + break; + + q = DC[q].next; + } + +#if defined(OUTPUT_DIVVEL) + SphP[i].DivVel /= SphP[i].Volume; +#endif /* #if defined(OUTPUT_DIVVEL) */ +#ifdef MHD + SphP[i].DivB /= SphP[i].Volume; +#endif /* #ifdef MHD */ + } +} +#endif /* #if defined(OUTPUT_DIVVEL) || defined(MHD) */ + +/*! \brief Correct values for gradient calculation for reflective boundary + * conditions. + * + * + * \param[in, out] Value of other cell. + * \param[in] Value Value of this cell. + * \param[in] type Type of gradient (x,y,z direction). + * \param[in] image_flags Flag that signals boundary interface. + * + * \return void + */ +void correct_for_reflective_boundaries(double *ValueOther, double Value, int type, unsigned int *image_flags) +{ +#if defined(REFLECTIVE_X) + if(type == GRADIENT_TYPE_VELX) + { + if((*image_flags & REFL_X_FLAGS) && !(*image_flags & OUTFLOW_X)) + *ValueOther *= -1; + if((*image_flags & REFL_X_FLAGS) && (*image_flags & OUTFLOW_X)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_X) */ + +#if defined(REFLECTIVE_Y) + if(type == GRADIENT_TYPE_VELY) + { + if((*image_flags & REFL_Y_FLAGS) && !(*image_flags & OUTFLOW_Y)) + *ValueOther *= -1; + if((*image_flags & REFL_Y_FLAGS) && (*image_flags & OUTFLOW_Y)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_Y) */ + +#if defined(REFLECTIVE_Z) + if(type == GRADIENT_TYPE_VELZ) + { + if((*image_flags & REFL_Z_FLAGS) && !(*image_flags & OUTFLOW_Z)) + *ValueOther *= -1; + if((*image_flags & REFL_Z_FLAGS) && (*image_flags & OUTFLOW_Z)) + *ValueOther = Value; + } +#endif /* #if defined(REFLECTIVE_Z) */ +} + +/*! \brief Loops through mesh and limits associated gradients. + * + * \return void + */ +void limit_gradients(void) +{ + mpi_printf("VORONOI: Limiting gradients...\n"); + + point *DP = Mesh.DP; + face *VF = Mesh.VF; + + for(int i = 0; i < Mesh.Nvf; i++) + { + if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0) + continue; + for(int j = 0; j < 2; j++) + { + point *p; + if(j == 0) + { + p = &DP[VF[i].p1]; + } + else + { + p = &DP[VF[i].p2]; + } + + if(p->task == ThisTask && p->index >= 0 && p->index < NumGas) + { + int q = p->index; + if(TimeBinSynchronized[P[q].TimeBinHydro]) + { + double d[3]; + d[0] = VF[i].cx - SphP[q].Center[0]; + d[1] = VF[i].cy - SphP[q].Center[1]; + d[2] = VF[i].cz - SphP[q].Center[2]; +#if !defined(REFLECTIVE_X) + double xtmp; + d[0] = NEAREST_X(d[0]); +#endif /* #if !defined(REFLECTIVE_X) */ +#if !defined(REFLECTIVE_Y) + double ytmp; + d[1] = NEAREST_Y(d[1]); +#endif /* #if !defined(REFLECTIVE_Y) */ +#if !defined(REFLECTIVE_Z) + double ztmp; + d[2] = NEAREST_Z(d[2]); +#endif /* #if !defined(REFLECTIVE_Z) */ + double value; + MySingle *data; + if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea) + { + for(int k = 0; k < N_Grad; k++) + { + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + { + value = *(MyFloat *)(((char *)(&P[q])) + grad_elements[k].offset); + value /= All.cf_atime; + } + else + value = *(MyFloat *)(((char *)(&SphP[q])) + grad_elements[k].offset); + + data = (MySingle *)(((char *)(&(SphP[q].Grad))) + grad_elements[k].offset_grad); + + if(grad_elements[k].type != GRADIENT_TYPE_RTF) + limit_gradient(d, value, minvalues[q * N_Grad + k], maxvalues[q * N_Grad + k], data); + } + } + } + } + } + } + +#ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING + for(int i = 0; i < Mesh.Nvf; i++) + { + if(DP[VF[i].p1].index < 0 || DP[VF[i].p2].index < 0) + continue; + for(int j = 0; j < 2; j++) + { + point *p; + + if(j == 0) + { + p = &DP[VF[i].p1]; + } + else + { + p = &DP[VF[i].p2]; + } + + if(p->task == ThisTask && p->index >= 0 && p->index < NumGas) + { + int q = p->index; + if(TimeBinSynchronized[P[q].TimeBinHydro]) + { + double d[3]; + d[0] = VF[i].cx - SphP[q].Center[0]; + d[1] = VF[i].cy - SphP[q].Center[1]; + d[2] = VF[i].cz - SphP[q].Center[2]; +#if !defined(REFLECTIVE_X) + double xtmp; + d[0] = NEAREST_X(d[0]); +#endif +#if !defined(REFLECTIVE_Y) + double ytmp; + d[1] = NEAREST_Y(d[1]); +#endif +#if !defined(REFLECTIVE_Z) + double ztmp; + d[2] = NEAREST_Z(d[2]); +#endif + double value; + MySingle *data; + + if(VF[i].area > 1.0e-10 * SphP[q].SurfaceArea) + { + /* let's now limit the overall size of the velocity gradient */ + MySingle *grad_vx = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelx->offset_grad); + MySingle *grad_vy = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVely->offset_grad); + MySingle *grad_vz = (MySingle *)(((char *)(&(SphP[q].Grad))) + GVelz->offset_grad); + limit_vel_gradient(d, grad_vx, grad_vy, grad_vz, get_sound_speed(q)); + } + } + } + } + } +#endif /* #ifndef DISABLE_VELOCITY_CSND_SLOPE_LIMITING */ +} + +/*! \brief Limits velocity gradient. + * + * Limit velocity change to the sound speed. + * + * \param[in] d Direction vector. + * \param[in, out] grad_vx X-velocity gradient. + * \param[in, out] grad_vy Y-velocity gradient. + * \param[in, out] grad_vz Z-velocity gradient. + * \param[in] csnd sound speed. + * + * \return void + */ +void limit_vel_gradient(double *d, MySingle *grad_vx, MySingle *grad_vy, MySingle *grad_vz, double csnd) +{ +#define VEL_GRADIENT_LIMIT_FAC 1.0 + if(All.ComovingIntegrationOn) + { + grad_vx[0] -= All.cf_atime * All.cf_Hrate; + grad_vy[1] -= All.cf_atime * All.cf_Hrate; + grad_vz[2] -= All.cf_atime * All.cf_Hrate; + } + + double dvx = fabs(grad_vx[0] * d[0] + grad_vx[1] * d[1] + grad_vx[2] * d[2]); + double dvy = fabs(grad_vy[0] * d[0] + grad_vy[1] * d[1] + grad_vy[2] * d[2]); + double dvz = fabs(grad_vz[0] * d[0] + grad_vz[1] * d[1] + grad_vz[2] * d[2]); + if(dvx > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvx; + for(int i = 0; i < 3; i++) + { + grad_vx[i] *= fac; + } + } + + if(dvy > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvy; + for(int i = 0; i < 3; i++) + { + grad_vy[i] *= fac; + } + } + if(dvz > VEL_GRADIENT_LIMIT_FAC * csnd) + { + double fac = VEL_GRADIENT_LIMIT_FAC * csnd / dvz; + for(int i = 0; i < 3; i++) + { + grad_vz[i] *= fac; + } + } + + if(All.ComovingIntegrationOn) + { + grad_vx[0] += All.cf_atime * All.cf_Hrate; + grad_vy[1] += All.cf_atime * All.cf_Hrate; + grad_vz[2] += All.cf_atime * All.cf_Hrate; + } +} + +/*! \brief Limits gradients. + * + * Slope limiter. + * + * \param[in] d Direction vector. + * \param[in] phi Value. + * \param[in] min_phi Lower bound for value+gradient*dx. + * \param[in] max_phi Upper bound for value+gradient*dx. + * \param[in, out] dphi Gradient. + * + * \return void + */ +void limit_gradient(double *d, double phi, double min_phi, double max_phi, MySingle *dphi) +{ + double dp = dphi[0] * d[0] + dphi[1] * d[1] + dphi[2] * d[2]; + + if(dp > 0) + { + if(phi + dp > max_phi) + { + double fac; + + if(max_phi > phi) + fac = (max_phi - phi) / dp; + else + fac = 0; + if(fac < 0 || fac > 1) + terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi); + dphi[0] *= fac; + dphi[1] *= fac; + dphi[2] *= fac; + } + } + else if(dp < 0) + { + if(phi + dp < min_phi) + { + double fac; + + if(min_phi < phi) + fac = (min_phi - phi) / dp; + else + fac = 0; + if(fac < 0 || fac > 1) + terminate("fac=%g\ndp=%g max_phi=%g phi=%g", fac, dp, max_phi, phi); + dphi[0] *= fac; + dphi[1] *= fac; + dphi[2] *= fac; + } + } +} + +/*! \brief Distance in x direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dx Distance in x direction, not taking into account periodic + * boundaries. + * + * \return Distance in x direction. + */ +double boundaryX(double dx) +{ +#if !defined(REFLECTIVE_X) + if(dx < -boxHalf_X) + dx += boxSize_X; + if(dx > boxHalf_X) + dx -= boxSize_X; +#endif /* #if !defined(REFLECTIVE_X) */ + return dx; +} + +/*! \brief Distance in y direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dy Distance in y direction, not taking into account periodic + * boundaries. + * + * \return Distance in y direction. + */ +double boundaryY(double dy) +{ +#if !defined(REFLECTIVE_Y) + if(dy < -boxHalf_Y) + dy += boxSize_Y; + if(dy > boxHalf_Y) + dy -= boxSize_Y; +#endif /* #if !defined(REFLECTIVE_Y) */ + return dy; +} + +/*! \brief Distance in z direction. + * + * Taking into account periodicity of simulation box, if given. + * + * \param[in] dz Distance in z direction, not taking into account periodic + * boundaries. + * + * \return Distance in z direction. + */ +double boundaryZ(double dz) +{ +#if !defined(REFLECTIVE_Z) + if(dz < -boxHalf_Z) + dz += boxSize_Z; + if(dz > boxHalf_Z) + dz -= boxSize_Z; +#endif /* #if !defined(REFLECTIVE_Z) */ + return dz; +} + +#endif /* #if !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c new file mode 100644 index 0000000000..d3e770a0da --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_gradients_onedims.c @@ -0,0 +1,204 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_gradients.c + * \date 05/2018 + * \brief Algorithms to calculate the gradients in 1d simulations. + * \details contains functions: + * double getValue(int i, int k) + * void calculate_gradients(void) + * void compute_divvel() + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if defined(ONEDIMS) + +#ifdef OUTPUT_DIVVEL +static void compute_divvel(); +#endif /* #ifdef OUTPUT_DIVVEL */ + +/*! \brief Gets a value of a quantity. + * + * \param[i] Index of cell in P and SphP array. + * \param[i] Index in grad_elements array (determines which quantity). + * + * \return value + */ +double getValue(int i, int k) +{ + if((grad_elements[k].type == GRADIENT_TYPE_VELX) || (grad_elements[k].type == GRADIENT_TYPE_VELY) || + (grad_elements[k].type == GRADIENT_TYPE_VELZ)) + return *(MyFloat *)(((char *)(&P[i])) + grad_elements[k].offset); + else + return *(MyFloat *)(((char *)(&SphP[i])) + grad_elements[k].offset); +} + +/*! \brief Calculates gradients in a 1d simulation. + * + * \return void + */ +void calculate_gradients(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + printf("Calculating 1D gradients...\n"); + + int idx, i, k; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + for(k = 0; k < N_Grad; k++) + { + double Value = getValue(i, k); + double Pos = P[i].Pos[0]; + +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + if(i == 0 || i == NumGas - 1) + { + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + memset(data, 0, 3 * sizeof(MySingle)); + continue; + } +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + /* if we get here, we have periodic boundary conditions or are not at the boundaries */ + double ValueL, ValueR; + + if(i == 0) + ValueL = getValue(NumGas - 1, k); + else + ValueL = getValue(i - 1, k); + + if(i == NumGas - 1) + ValueR = getValue(0, k); + else + ValueR = getValue(i + 1, k); + + double PosL = Mesh.DP[i - 1].x; + double PosR = Mesh.DP[i + 1].x; + + double grad = (ValueL - ValueR) / (PosL - PosR); + + MySingle *data = (MySingle *)(((char *)(&(SphP[i].Grad))) + grad_elements[k].offset_grad); + data[0] = grad; + data[1] = 0; + data[2] = 0; + + double ValueMin = dmin(ValueL, ValueR); + double ValueMax = dmax(ValueL, ValueR); + + if(Value + grad * (PosL - Pos) < ValueMin) + { + if(ValueMin < Value) + grad = (ValueMin - Value) / (PosL - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosL - Pos) > ValueMax) + { + if(ValueMax > Value) + grad = (ValueMax - Value) / (PosL - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosR - Pos) < ValueMin) + { + if(ValueMin < Value) + grad = (ValueMin - Value) / (PosR - Pos); + else + grad = 0.; + } + + if(Value + grad * (PosR - Pos) > ValueMax) + { + if(ValueMax > Value) + grad = (ValueMax - Value) / (PosR - Pos); + else + grad = 0.; + } + + data[0] = grad; + } + } + +#ifdef OUTPUT_DIVVEL + compute_divvel(); +#endif /* #ifdef OUTPUT_DIVVEL */ + + CPU_Step[CPU_GRADIENTS] += measure_time(); +} + +#ifdef OUTPUT_DIVVEL +/*! \brief Calculates velocity divergence in 1d simulation. + * + * Using Gauss' theorem. + * + * \return void + */ +void compute_divvel() +{ + face *VF = Mesh.VF; + double VelxL, VelxR; + + int idx, i; + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(i == 0) + { +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + VelxL = P[i].Vel[0]; +#else /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + VelxL = P[NumGas - 1].Vel[0]; +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */ + } + else + VelxL = P[i - 1].Vel[0]; + + if(i == NumGas - 1) + { +#if defined(ONEDIMS_SPHERICAL) || defined(REFLECTIVE_X) + VelxR = P[i].Vel[0]; +#else /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) */ + VelxR = P[0].Vel[0]; +#endif /* #if defined (ONEDIMS_SPHERICAL) || defined (REFLECTIVE_X) #else */ + } + else + VelxR = P[i + 1].Vel[0]; + + SphP[i].DivVel = 0.5 * (VF[i].area * VelxR - VF[i - 1].area * VelxL) / SphP[i].Volume; + } +} +#endif /* #ifdef OUTPUT_DIVVEL */ + +#endif /* #if defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c new file mode 100644 index 0000000000..8077b9b0a0 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_refinement.c @@ -0,0 +1,425 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_refinement.c + * \date 05/2018 + * \brief Contains routines for refinement. + * \details contains functions: + * static void refine_add_ngb(int i, int j) + * int do_refinements(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#ifdef REFINEMENT_SPLIT_CELLS + +static int *ref_SphP_dp_index; +static int *first_ngb, *last_ngb, first_free_ngb; + +/*! \brief Linked list for neighbor data. + * + */ +static struct ngb_data +{ + int index; + int next_ngb; +} * ngb; + +/*! \brief Add element to linked neighbor list. + * + * \param[in] i Index of existing cell. + * \param[in] j Index of new cell. + * + * \return void + */ +static void refine_add_ngb(int i, int j) +{ + if(i >= 0 && j >= 0) + { + if(i >= Mesh.Ndp || j >= Mesh.Ndp) + { + terminate("i>= Ndp || j>= Ndp"); + } + + if(first_ngb[i] >= 0) + { + ngb[last_ngb[i]].next_ngb = first_free_ngb; + last_ngb[i] = first_free_ngb; + } + else + { + first_ngb[i] = last_ngb[i] = first_free_ngb; + } + + ngb[first_free_ngb].index = j; + ngb[first_free_ngb].next_ngb = -1; + first_free_ngb++; + } +} + +/*! \brief Loops through active cells and refine cells if needed. + * + * Splits the cell in random direction; moves mesh-generating point by + * 0.025 cell radius and inserts a second mesh-generating point opposite to + * split the cell into two. + * + * \return Number of cells that were refined. + */ +int do_refinements(void) +{ + char buf[1000]; + int idx, i, j, k, count, countall; + double rad, fac; + MyIDType newid = 0; + + TIMER_START(CPU_REFINE); + + ref_SphP_dp_index = mymalloc_movable(&ref_SphP_dp_index, "ref_SphP_dp_index", NumGas * sizeof(int)); + + int NActiveParticles = TimeBinsHydro.NActiveParticles; /* save this since refinement is going to change it */ + for(idx = 0, count = 0; idx < NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(should_this_cell_be_split(i)) + { + ref_SphP_dp_index[i] = -1; + count++; + } + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + mpi_printf("REFINE: want to refine %d cells\n", countall); + + if(countall) + { + domain_resize_storage(count, count, 2); + + if(NumPart + count >= All.MaxPart) + { + sprintf(buf, "On Task=%d with NumPart=%d we try to produce %d cells. Sorry, no space left...(All.MaxPart=%d)\n", ThisTask, + NumPart, count, All.MaxPart); + terminate(buf); + } + + if(NumGas + count >= All.MaxPartSph) + { + sprintf(buf, "On Task=%d with NumGas=%d we try to produce %d cells. Sorry, no space left...(All.MaxPartSph=%d)\n", ThisTask, + NumGas, count, All.MaxPartSph); + terminate(buf); + } + + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + int *list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + newid = All.MaxID + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + All.MaxID += countall; + + myfree(list); + + Ngb_MarkerValue++; + int nchanged = 0; + int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int)); + + /* create explicit list of neighbors */ + + first_ngb = mymalloc("first_ngb", Mesh.Ndp * sizeof(int)); + ngb = mymalloc("ngbs", 2 * Mesh.Nvf * sizeof(struct ngb_data)); + last_ngb = mymalloc("last_ngb", Mesh.Ndp * sizeof(int)); + + for(i = 0; i < Mesh.Ndp; i++) + { + first_ngb[i] = last_ngb[i] = -1; + + if(Mesh.DP[i].task == ThisTask) + { + int li = Mesh.DP[i].index; + if(li >= 0 && li < NumGas) + if(ref_SphP_dp_index[li] < 0) + ref_SphP_dp_index[li] = i; /* only guaranteed to be set for active cells */ + } + } + + for(i = 0, first_free_ngb = 0; i < Mesh.Nvf; i++) + { + refine_add_ngb(Mesh.VF[i].p1, Mesh.VF[i].p2); + refine_add_ngb(Mesh.VF[i].p2, Mesh.VF[i].p1); + } + + myfree(last_ngb); + + int NActiveParticles = TimeBinsHydro.NActiveParticles; + for(idx = 0, count = 0; idx < NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(should_this_cell_be_split(i)) + { + int addToGravList = TimeBinSynchronized[P[i].TimeBinGrav]; + if(NumPart > NumGas) + { + move_collisionless_particle(NumPart + count, NumGas + count); + if(TimeBinSynchronized[P[NumPart + count].TimeBinGrav] && P[i].Mass > 0) + addToGravList = 0; + + /* there is already an entry in the list of active particles for + gravity that points to the index that we will use for our new cell */ + } + + /* now split the gas cell */ + + j = NumGas + count; + + P[j] = P[i]; + SphP[j] = SphP[i]; + + P[j].ID = newid++; + + rad = get_cell_radius(i); + + double dir[3]; +#ifdef TWODIMS + double phi = 2 * M_PI * get_random_number(); + + dir[0] = cos(phi); + dir[1] = sin(phi); + dir[2] = 0; +#else /* #ifdef TWODIMS */ + double theta = acos(2 * get_random_number() - 1); + double phi = 2 * M_PI * get_random_number(); + + dir[0] = sin(theta) * cos(phi); + dir[1] = sin(theta) * sin(phi); + dir[2] = cos(theta); +#endif /* #ifdef TWODIMS */ + fac = 0.025 * rad; + + P[j].Pos[0] = P[i].Pos[0] + fac * dir[0]; + P[j].Pos[1] = P[i].Pos[1] + fac * dir[1]; + P[j].Pos[2] = P[i].Pos[2] + fac * dir[2]; + + SphP[j].SepVector[0] = SphP[i].SepVector[0] = dir[0]; + SphP[j].SepVector[1] = SphP[i].SepVector[1] = dir[1]; + SphP[j].SepVector[2] = SphP[i].SepVector[2] = dir[2]; + + /**** create the voronoi cell of i as an auxiliary mesh */ + + int jj = ref_SphP_dp_index[i]; /* this is the delaunay point of this cell */ + if(jj < 0) + terminate("jj < 0"); + + initialize_and_create_first_tetra(&DeRefMesh); + + DeRefMesh.DTC = mymalloc_movable(&DeRefMesh.DTC, "DeRefDTC", DeRefMesh.MaxNdt * sizeof(tetra_center)); + DeRefMesh.DTF = mymalloc_movable(&DeRefMesh.DTF, "DeRefDTF", DeRefMesh.MaxNdt * sizeof(char)); + for(k = 0; k < DeRefMesh.Ndt; k++) + DeRefMesh.DTF[k] = 0; + + int tlast = 0; + + k = first_ngb[jj]; + while(k >= 0) + { + int q = ngb[k].index; + + if(DeRefMesh.Ndp + 2 >= DeRefMesh.MaxNdp) + { + DeRefMesh.Indi.AllocFacNdp *= ALLOC_INCREASE_FACTOR; + DeRefMesh.MaxNdp = DeRefMesh.Indi.AllocFacNdp; +#ifdef VERBOSE + printf("Task=%d: increase memory allocation, MaxNdp=%d Indi.AllocFacNdp=%g\n", ThisTask, DeRefMesh.MaxNdp, + DeRefMesh.Indi.AllocFacNdp); +#endif /* #ifdef VERBOSE */ + DeRefMesh.DP -= 5; + DeRefMesh.DP = myrealloc_movable(DeRefMesh.DP, (DeRefMesh.MaxNdp + 5) * sizeof(point)); + DeRefMesh.DP += 5; + } + + DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[q]; + + double r = + sqrt(pow(DeRefMesh.DP[DeRefMesh.Ndp].x - P[i].Pos[0], 2) + pow(DeRefMesh.DP[DeRefMesh.Ndp].y - P[i].Pos[1], 2) + + pow(DeRefMesh.DP[DeRefMesh.Ndp].z - P[i].Pos[2], 2)); + + if(r < 2 * fac) + terminate("We are trying to split a heavily distorted cell... We better stop. Check your refinement criterion."); + +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + + DeRefMesh.Ndp++; + k = ngb[k].next_ngb; + } + + /* now add also the point jj itself (the one that is to be split */ + + DeRefMesh.DP[DeRefMesh.Ndp] = Mesh.DP[jj]; +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + DeRefMesh.Ndp++; + + /* and finally, add the newly generated point */ + + DeRefMesh.DP[DeRefMesh.Ndp].x = P[j].Pos[0]; + DeRefMesh.DP[DeRefMesh.Ndp].y = P[j].Pos[1]; + DeRefMesh.DP[DeRefMesh.Ndp].z = P[j].Pos[2]; + DeRefMesh.DP[DeRefMesh.Ndp].ID = P[j].ID; +#ifndef OPTIMIZE_MEMORY_USAGE + set_integers_for_point(&DeRefMesh, DeRefMesh.Ndp); +#endif /* #ifndef OPTIMIZE_MEMORY_USAGE */ + tlast = insert_point(&DeRefMesh, DeRefMesh.Ndp, tlast); + DeRefMesh.Ndp++; + + /* compute circumcircles */ + compute_circumcircles(&DeRefMesh); + + double *Volume = mymalloc("Volume", DeRefMesh.Ndp * sizeof(double)); + + derefine_refine_compute_volumes(Volume); + + double voli = Volume[DeRefMesh.Ndp - 2]; + double volj = Volume[DeRefMesh.Ndp - 1]; + + myfree(Volume); + + myfree(DeRefMesh.DTF); + myfree(DeRefMesh.DTC); + DeRefMesh.DTC = NULL; + + myfree(DeRefMesh.DT); + myfree(DeRefMesh.DP - 5); + myfree(DeRefMesh.VF); + + /* now split the conserved variables according to the volume ratio of the split */ + + double faci = voli / (voli + volj); + double facj = volj / (voli + volj); + + P[i].Mass *= faci; + P[j].Mass *= facj; + SphP[i].OldMass *= faci; + SphP[j].OldMass *= facj; + + SphP[i].Energy *= faci; + SphP[j].Energy *= facj; + +#ifdef MHD + for(k = 0; k < 3; k++) + { + SphP[i].B[k] = SphP[i].BConserved[k] / (voli + volj); + SphP[j].B[k] = + SphP[i].B[k] + SphP[i].Grad.dB[k][0] * (P[j].Pos[0] - P[i].Pos[0]) + + SphP[i].Grad.dB[k][1] * (P[j].Pos[1] - P[i].Pos[1]) + + SphP[i].Grad.dB[k][2] * (P[j].Pos[2] - P[i].Pos[2]); /* extrapolate B to the position of the new cell */ + + /* update conserved variables */ + SphP[i].BConserved[k] = SphP[i].B[k] * voli; + SphP[j].BConserved[k] = SphP[j].B[k] * volj; + } +#endif /* #ifdef MHD */ + + for(k = 0; k < 3; k++) + { + SphP[i].Momentum[k] *= faci; + SphP[j].Momentum[k] *= facj; + } + +#ifdef USE_SFR + SphP[i].Sfr *= faci; + SphP[j].Sfr *= facj; +#endif /* #ifdef USE_SFR */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; + s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */ + { + *(MyFloat *)(((char *)(&SphP[i])) + scalar_elements[s].offset_mass) *= faci; + *(MyFloat *)(((char *)(&SphP[j])) + scalar_elements[s].offset_mass) *= facj; + } +#endif /* #ifdef MAXSCALARS */ + +#ifdef REFINEMENT_HIGH_RES_GAS + /* the change in the SphP[].HighResMass is treated as part of the Scalars loop above */ + SphP[i].AllowRefinement += 2; /* increment the refinement "generation" of both cells */ + SphP[j].AllowRefinement += 2; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + /* add the new particle into the neighbour tree */ + int no = Ngb_Nextnode[i]; + Ngb_Nextnode[i] = j; + Ngb_Nextnode[j] = no; + Ngb_Father[j] = Ngb_Father[i]; + + ngb_update_rangebounds(j, &nchanged, nodelist); + + /* now add the new particle into the link-lists for the time integration */ + + timebin_add_particle(&TimeBinsHydro, j, i, P[i].TimeBinHydro, 1); + timebin_add_particle(&TimeBinsGravity, j, i, P[i].TimeBinGrav, addToGravList); + + SphP[j].first_connection = -1; + SphP[j].last_connection = -1; + + count++; + } + } + + NumPart += count; + NumGas += count; + All.TotNumPart += countall; + All.TotNumGas += countall; + + myfree(ngb); + myfree(first_ngb); + + ngb_finish_rangebounds_update(nchanged, nodelist); + + myfree(nodelist); + } + + myfree(ref_SphP_dp_index); + + TIMER_STOP(CPU_REFINE); + + return countall; +} + +#endif /* REFINEMENT_SPLIT_CELLS */ diff --git a/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c new file mode 100644 index 0000000000..94ce562398 --- /dev/null +++ b/src/amuse/community/arepo/src/mesh/voronoi/voronoi_utils.c @@ -0,0 +1,501 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mesh/voronoi/voronoi_utils.c + * \date 05/2018 + * \brief Utilities for 3d Voronoi mesh + * \details contains functions: + * double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n) + * void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, unsigned int + * *nof_polygon_elements) void intersection_plane_grid(double *center, double *n, const char *filename) static double + * polygon_area(double *polygon, unsigned int nof_elements) static int qs_partition(double *A, int p, int r, double *B) static void + * qs_sort(double *A, int p, int r, double *B) static double calc_phi(double x, double y) static void rotate_z(double *vec, const + * double alpha) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 23.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../../main/allvars.h" +#include "../../main/proto.h" + +#if !defined(TWODIMS) && !defined(ONEDIMS) + +// helper functions for cross_section_plane_cell and intersections_plane_cell: +static int qs_partition(double *A, int p, int r, double *B); +static void qs_sort(double *A, int p, int r, double *B); +static double calc_phi(double x, double y); +static void rotate_z(double *vec, const double alpha); +static void rotate_y(double *vec, const double alpha); + +#ifdef TETRA_INDEX_IN_FACE +static double polygon_area(double *polygon, unsigned int nof_elements); +static const unsigned int max_poly_elements = 128; + +/*! \brief Calculates the cross section between a plane and a Voronoi cell(3D). + * + * \param[in] sphp_index The hydro index of the cell. + * \param[in] dp_index The delaunay point index of the cell. + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * + * \return The cross section between the plane and the cell. + */ +double cross_section_plane_cell(int sphp_index, int dp_index, double *center, double *n) +{ + double polygon[max_poly_elements]; + unsigned int nof_elements = 0; + + intersections_plane_cell(sphp_index, dp_index, center, n, polygon, &nof_elements); + + // polygon has to contain at least 3 points + if(nof_elements < 6) + { + return 0; + } + else + { + return polygon_area(polygon, nof_elements); + } +} + +/*! \brief Calculates the intersections between a plane and a cell. + * + * \param[in] sphp_index The hydro index of the cell. + * \param[in] dp_index The Delaunay point index of the cell. + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * \param[out] polygon Store the intersections (polygon) in this array. + * \param[out] nof_polygon_elements The number of stored elements in the + * polygon array. + * + * \return void + */ +void intersections_plane_cell(int sphp_index, int dp_index, double *center, double *n, double *polygon, + unsigned int *nof_polygon_elements) +{ + // memory for the line segments + unsigned int line_segments_max = 2000; + double *ls = (double *)mymalloc("line_segments", line_segments_max * sizeof(double)); + + // get the line segments + unsigned int nof_elements = 0; + get_line_segments(sphp_index, dp_index, ls, &nof_elements, line_segments_max); + assert(nof_elements % 6 == 0); // 6 doubles represent one line segment + + // start the calculation + unsigned int i; + double phi; + + if(n[0] == 0 && n[1] == 0) + { + phi = 0; + } + else + { + phi = calc_phi(n[0], n[1]); + } + + double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]); + assert(r > 0); + double theta = acos(n[2] / r); + + double lambda; // z1 + lambda * (z2 - z1) = 0 + + unsigned int max_phi_elms = max_poly_elements / 2; + double phi_values[max_phi_elms]; // phi coordinates of the points of the polygon + unsigned int p = 0; // number of points of the polygon + + // balance point of the polygon + double bal_p_x = 0; + double bal_p_y = 0; + + for(i = 0; i < nof_elements; i += 6) + { + // transform line segment to the center frame + ls[i] -= center[0]; // x1 + ls[i + 1] -= center[1]; // y1 + ls[i + 2] -= center[2]; // z1 + ls[i + 3] -= center[0]; // x2 + ls[i + 4] -= center[1]; // y2 + ls[i + 5] -= center[2]; // z2 + + // rotate line segment such that the cross secting plane is in the x-y plane / the normal vector of the plane is on the z-axis + rotate_z(&ls[i], -phi); + rotate_y(&ls[i], -theta); + + rotate_z(&ls[i + 3], -phi); + rotate_y(&ls[i + 3], -theta); + + if(ls[i + 2] == ls[i + 5]) // same z-coords + { + if(ls[i + 2] != 0) // no intersection + { + lambda = -1; + } + else + { + lambda = 0; // take first point as intersection + } + } + else + { + lambda = ls[i + 2] / (ls[i + 2] - ls[i + 5]); + } + + if(lambda >= 0 && lambda <= 1) // line segment intersects plane + { + if(p == max_phi_elms) + { + terminate("termination in voronoi_utils.c: intersections_plane_cell: not enough memory!\n"); + } + + polygon[2 * p] = ls[i] + lambda * (ls[i + 3] - ls[i]); // x coordinate of the intersection + polygon[2 * p + 1] = ls[i + 1] + lambda * (ls[i + 4] - ls[i + 1]); // y coordinate of the intersection + + bal_p_x += polygon[2 * p]; + bal_p_y += polygon[2 * p + 1]; + + p++; + } + } + + // free memory + myfree(ls); + + // polygon has to contain at least 3 points + if(p < 3) + { + return; + } + + // switch frame to balance point of the polygon + bal_p_x /= p; + bal_p_y /= p; + + for(i = 0; i < p; i++) + { + polygon[2 * i] -= bal_p_x; + polygon[2 * i + 1] -= bal_p_y; + + // calculate the phi values + phi_values[i] = calc_phi(polygon[2 * i], polygon[2 * i + 1]); + } + + // sort polygon + qs_sort(phi_values, 0, p - 1, polygon); + + // close polygon + polygon[2 * p] = polygon[0]; + polygon[2 * p + 1] = polygon[1]; + phi_values[p] = phi_values[0]; + p++; + + // transform back + for(i = 0; i < p; i++) + { + polygon[2 * i] += bal_p_x; + polygon[2 * i + 1] += bal_p_y; + } + + *nof_polygon_elements = 2 * p; +} + +/*! \brief Write out the intersections between a plane and the grid + * (for plotting). + * + * Binary output: + * int: Number of elements in the first array. + * int: Number of elements in the second array. + * int[]: Array, which stores the number of intersections for each intersected + * cell. + * The j-th entry gives the number of elements in the intersections + * array which correspond to the j-th intersected cell. + * double[]: intersections array, all intersections are stored in the + * order x1,y1,x2,y2,x3,y3,... + * + * The intersections are given in a coordinate system where n is the z-axis + * and which has its origin at center. + * + * \param[in] center A point in the plane. + * \param[in] n A vector starting at center and normal to the plane. + * \param[in] filename Filename. + * + * \return void + */ +void intersection_plane_grid(double *center, double *n, const char *filename) +{ + if(NTask != 1) + { + terminate("termination in voronoi_utils.c: intersection_plane_grid: not yet parallelized!\n"); + } + + double phi; + + if(n[0] == 0 && n[1] == 0) + { + phi = 0; + } + else + { + phi = calc_phi(n[0], n[1]); + } + + double r = sqrt(n[0] * n[0] + n[1] * n[1] + n[2] * n[2]); + assert(r > 0); + double theta = acos(n[2] / r); + + double xaxis[3] = {1, 0, 0}; + double yaxis[3] = {0, 1, 0}; + double zaxis[3] = {0, 0, 1}; + + rotate_y(xaxis, theta); + rotate_z(xaxis, phi); + + rotate_y(yaxis, theta); + rotate_z(yaxis, phi); + + rotate_y(zaxis, theta); + rotate_z(zaxis, phi); + + printf("normal vector: (%f, %f, %f)\n", n[0], n[1], n[2]); + printf("Coordinate system of output data: \n"); + printf("center: (%f, %f, %f)\n", center[0], center[1], center[2]); + printf("x-axis: (%f, %f, %f)\n", xaxis[0], xaxis[1], xaxis[2]); + printf("y-axis: (%f, %f, %f)\n", yaxis[0], yaxis[1], yaxis[2]); + printf("z-axis: (%f, %f, %f)\n", zaxis[0], zaxis[1], zaxis[2]); + + const int cells_max_elms = NumGas; + int *nof_intersections = (int *)mymalloc("number of intersections", cells_max_elms * sizeof(int)); + unsigned int l = 0; + + const int polygons_max_elms = NumGas * 5; + double *polygons = (double *)mymalloc("polygons", polygons_max_elms * 5 * sizeof(int)); + unsigned int j = 0; + + unsigned int nof_polygon_elements = 0; + + unsigned int k = 0; + + for(k = 0; k < NumGas; k++) + { + nof_polygon_elements = 0; + intersections_plane_cell(k, k, center, n, &polygons[j], &nof_polygon_elements); + + if(nof_polygon_elements != 0) + { + nof_intersections[l] = (int)nof_polygon_elements; + l++; + + j += nof_polygon_elements; + + if(j > polygons_max_elms - 100) + { + terminate("termination in voronoi_utils.c: intersection_plane_grid: not enough memory for the polygons!\n"); + } + } + } + + // binary output + FILE *pFile; + + pFile = fopen(filename, "wb"); + + fwrite(&l, sizeof(int), 1, pFile); // number of intersected cells + fwrite(&j, sizeof(int), 1, pFile); // number of elements in polygons array + fwrite(nof_intersections, sizeof(int), l, pFile); + fwrite(polygons, sizeof(double), j, pFile); + + fclose(pFile); + + myfree(polygons); + myfree(nof_intersections); +} + +/*! \brief Calculate the area of a 2D polygon. + * + * Formula (wikipedia):A = 0.5 * sum_i=0^{n-1}(x_i * y_{i+1} - x_{i+1} * y_i). + * + * \param[in] polygon Array of points of the polygon: x1, y1, x2, y2, ..., + * has to be sorted counterclockwise and closed + * (x_n == x_0 && y_n == y_0). + * \param[in] nof_elements Number of elements in the array. + * + * \return Area of polygon. + */ +static double polygon_area(double *polygon, unsigned int nof_elements) +{ + assert(nof_elements >= 8); + + double result = 0; + + unsigned int k; + + for(k = 0; k < nof_elements - 2; k += 2) + { + result += polygon[k] * polygon[k + 3] - polygon[k + 2] * polygon[k + 1]; + } + + result *= 0.5; + + assert(result >= 0); + + return result; +} + +#endif /* #ifdef TETRA_INDEX_IN_FACE */ + +/*! \brief Quicksort partitioning function, helper for qs_sort. + * + * \param[in, out] A array to be sorted, usually angle phi. + * \param[in] p Lower index for quicksort. + * \param[in] r Upper index for quicksort. + * \param[in, out] B Array that also changes ordering the same way as A. + * + * \return Index for partitioning. + */ +static int qs_partition(double *A, int p, int r, double *B) +{ + double x = A[r]; + double tmp; + double tmp2; + int i = p - 1; + int j; + + for(j = p; j < r; j++) + { + if(A[j] <= x) + { + // switch phi values ( i <-> j ) + i++; + tmp = A[i]; + A[i] = A[j]; + A[j] = tmp; + + // switch coordinates ( 2i, 2i+1 <-> 2j, 2j+1) + tmp = B[2 * i]; + tmp2 = B[2 * i + 1]; + B[2 * i] = B[2 * j]; + B[2 * i + 1] = B[2 * j + 1]; + B[2 * j] = tmp; + B[2 * j + 1] = tmp2; + } + } + + // switch phi values + tmp = A[i + 1]; + A[i + 1] = A[r]; + A[r] = tmp; + + // switch coordinates + tmp = B[(i + 1) * 2]; + tmp2 = B[(i + 1) * 2 + 1]; + + B[(i + 1) * 2] = B[2 * r]; + B[(i + 1) * 2 + 1] = B[2 * r + 1]; + + B[2 * r] = tmp; + B[2 * r + 1] = tmp2; + + return i + 1; +} + +/*! \brief Quick-sorts the points of the polygon with respect to phi. + * + * \param[in, out] A array to be sorted, usually angle phi. + * \param[in] p lower index for quicksort. + * \param[in] r upper index for quicksort. + * \param[in, out] B array that also changes ordering the same way as A; + * usually polygon. + * + * \return void + */ +static void qs_sort(double *A, int p, int r, double *B) +{ + int q; + + if(p < r) + { + q = qs_partition(A, p, r, B); + qs_sort(A, p, q - 1, B); + qs_sort(A, q + 1, r, B); + } +} + +/*! \brief Calculates the phi coordinate of a point. + * + * Calculates polar angle in a 2d coordinate system from Cartesian coordinate + * system. + * + * \param[in] x X coordinate. + * \param[in] y Y coordinate. + * + * \return Phi (polar angle). + */ +static double calc_phi(double x, double y) +{ + // if both arguments are zero an error occurs in atan2 + if((x == 0) && (y == 0)) + { + fprintf(stderr, "ERROR in calc_phi: both arguments are zero\n"); + return 0; + } + + double p = atan2(y, x); // in [-pi,pi] + + if(p < 0) + { + return p + 2 * M_PI; + } + + return p; +} + +/*! \brief Rotate a vector around the z axis. + * + * \param[in, out] vec Array to 3 dimensional vector to be rotated. + * \param[in] alpha Rotation angle. + * + * \return void + */ +static void rotate_z(double *vec, const double alpha) +{ + double vx_tmp = vec[0]; + vec[0] = cos(alpha) * vec[0] - sin(alpha) * vec[1]; + vec[1] = sin(alpha) * vx_tmp + cos(alpha) * vec[1]; +} + +/*! \brief Rotate a vector around the y axis. + * + * \param[in, out] vec Array to 3 dimensional vector to be rotated. + * \param[in] alpha Rotation angle. + * + * \return void + */ +static void rotate_y(double *vec, const double alpha) +{ + double vx_tmp = vec[0]; + + vec[0] = cos(alpha) * vec[0] + sin(alpha) * vec[2]; + vec[2] = -sin(alpha) * vx_tmp + cos(alpha) * vec[2]; +} + +#endif /* #if !defined(TWODIMS) && !defined(ONEDIMS) */ diff --git a/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c new file mode 100644 index 0000000000..3fd92c29e6 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/checksummed_sendrecv.c @@ -0,0 +1,321 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/checksummed_sendrecv.c + * \date 05/2018 + * \brief MPI send-receive communication with checksum to verify + * communication. + * \details contains functions: + * int MPI_Check_Sendrecv(void *sendbuf, int sendcount, + * MPI_Datatype sendtype, int dest, int sendtag, + * void *recvbufreal, int recvcount, MPI_Datatype recvtype, + * int source, int recvtag, MPI_Comm comm, + * MPI_Status * status) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MPISENDRECV_CHECKSUM + +#undef MPI_Sendrecv + +/*! \brief MPI_Sendrecv with built-in check if message arrived properly. + * + * \param[in] sendbuf Initial address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Type of elements in send buffer. + * \param[in] dest Rank of destination. + * \param[in] sendtag Send tag. + * \param[out] recvbufreal Initial adress of receive buffer. + * \param[in] recvcount Number of elements in receive buffer. + * \param[in] recvtype Type of elements in receive buffer . + * \param[in] source Rank of source. + * \param[in] recvtag Receive tag. + * \param[in] comm Communicator + * \param[out] status Status object; this refers to receive operation. + * + * \return 0 + */ +int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbufreal, int recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) +{ + int checksumtag = 1000, errtag = 2000; + int i, iter = 0, err_flag, err_flag_imported, size_sendtype, size_recvtype; + long long sendCheckSum, recvCheckSum, importedCheckSum; + unsigned char *p, *buf, *recvbuf; + char msg[500]; + + if(dest != source) + terminate("destination task different from source task"); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + if(dest == ThisTask) + { + memcpy(recvbufreal, sendbuf, recvcount * size_recvtype); + return 0; + } + + if(!(buf = mymalloc(recvcount * size_recvtype + 1024))) + terminate("not enough memory to allocate the buffer buf"); + + for(i = 0, p = buf; i < recvcount * size_recvtype + 1024; i++) + *p++ = 255; + + recvbuf = buf + 512; + + MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status); + + for(i = 0, p = buf; i < 512; i++, p++) + { + if(*p != 255) + { + sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured before recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask, + getenv("HOST"), recvcount, dest, i, *p); + terminate(msg); + } + } + + for(i = 0, p = recvbuf + recvcount * size_recvtype; i < 512; i++, p++) + { + if(*p != 255) + { + sprintf(msg, "MPI-ERROR: Task=%d/%s: Recv occured after recv buffer. message-size=%d from %d, i=%d c=%d\n", ThisTask, + getenv("HOST"), recvcount, dest, i, *p); + terminate(msg); + } + } + + for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) + sendCheckSum += *p; + + importedCheckSum = 0; + + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + } + + checksumtag++; + + for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount * size_recvtype; i++, p++) + recvCheckSum += *p; + + err_flag = err_flag_imported = 0; + + if(recvCheckSum != importedCheckSum) + { + printf( + "MPI-ERROR: Receive error on task=%d/%s from task=%d, message size=%d, sendcount=%d checksums= %d %d %d %d. Try to fix " + "it...\n", + ThisTask, getenv("HOST"), source, recvcount, sendcount, (int)(recvCheckSum >> 32), (int)recvCheckSum, + (int)(importedCheckSum >> 32), (int)importedCheckSum); + myflush(stdout); + + err_flag = 1; + } + + if(dest > ThisTask) + { + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + } + else + { + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + } + errtag++; + + if(err_flag > 0 || err_flag_imported > 0) + { + printf("Task=%d is on %s, wants to send %d and has checksum=%d %d of send data\n", ThisTask, getenv("HOST"), sendcount, + (int)(sendCheckSum >> 32), (int)sendCheckSum); + myflush(stdout); + + do + { + sendtag++; + recvtag++; + + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + *p = 0; + + if((iter & 1) == 0) + { + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); + } + } + else + { + if(iter > 5) + { + printf("we're trying to send each byte now on task=%d (iter=%d)\n", ThisTask, iter); + myflush(stdout); + if(dest > ThisTask) + { + for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) + MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); + } + else + { + for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) + MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); + for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) + MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); + } + } + else + { + MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, + status); + } + } + + importedCheckSum = 0; + + for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) + sendCheckSum += *p; + + printf("Task=%d gas send_checksum=%d %d\n", ThisTask, (int)(sendCheckSum >> 32), (int)sendCheckSum); + myflush(stdout); + + if(dest > ThisTask) + { + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + } + else + { + if(recvcount > 0) + MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); + if(sendcount > 0) + MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); + } + + for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount; i++, p++) + recvCheckSum += *p; + + err_flag = err_flag_imported = 0; + + if(recvCheckSum != importedCheckSum) + { + printf( + "MPI-ERROR: Again (iter=%d) a receive error on task=%d/%s from task=%d, message size=%d, checksums= %d %d %d %d. " + "Try to fix it...\n", + iter, ThisTask, getenv("HOST"), source, recvcount, (int)(recvCheckSum >> 32), (int)recvCheckSum, + (int)(importedCheckSum >> 32), (int)importedCheckSum); + myflush(stdout); + err_flag = 1; + } + + if(dest > ThisTask) + { + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + } + else + { + MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); + MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); + } + + if(err_flag == 0 && err_flag_imported == 0) + break; + + errtag++; + checksumtag++; + iter++; + } + while(iter < 10); + + if(iter >= 10) + { + char buf[1000]; + int length; + FILE *fd; + + sprintf(buf, "send_data_%d.dat", ThisTask); + fd = fopen(buf, "w"); + length = sendcount * size_sendtype; + fwrite(&length, 1, sizeof(int), fd); + fwrite(sendbuf, sendcount, size_sendtype, fd); + fclose(fd); + + sprintf(buf, "recv_data_%d.dat", ThisTask); + fd = fopen(buf, "w"); + length = recvcount * size_recvtype; + fwrite(&length, 1, sizeof(int), fd); + fwrite(recvbuf, recvcount, size_recvtype, fd); + fclose(fd); + + sprintf(msg, "MPI-ERROR: Even 10 trials proved to be insufficient on task=%d/%s. Stopping\n", ThisTask, getenv("HOST")); + terminate(msg); + } + } + + memcpy(recvbufreal, recvbuf, recvcount * size_recvtype); + + myfree(buf); + + return 0; +} + +#endif /* #ifdef MPISENDRECV_CHECKSUM */ diff --git a/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c new file mode 100644 index 0000000000..e421807148 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/hypercube_allgatherv.c @@ -0,0 +1,94 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/hypercube_allgatherv.c + * \date 05/2018 + * \brief Home-made MPI_Allgatherv routine. + * \details contains functions: + * int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, + * MPI_Datatype sendtype, void *recvbuf, int *recvcount, + * int *displs, MPI_Datatype recvtype, MPI_Comm comm) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef MPI_HYPERCUBE_ALLGATHERV + +#define TAG 100 +/*! \brief Allgatherv routine based on MPI_Sendrecv calls. + * + * \param[in] sendbuf Starting address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Data type of send buffer elements. + * \param[out] recvbuf Address of receive buffer. + * \param[in] recvcount Integer array (of length group size) containing the + * number of elements that are to be received from each process. + * \param[in] displs Integer array (of length group size). Entry i specifies + * the displacement (relative to recvbuf ) at which to place the + * incoming data from process. + * \param[in] recvtype Data type of receive buffer elements. + * \param[in] comm Communicator. + * + * \return 0 + */ +int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, + MPI_Datatype recvtype, MPI_Comm comm) +{ + int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype; + MPI_Status status; + + MPI_Comm_rank(comm, &thistask); + MPI_Comm_size(comm, &ntask); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + for(ptask = 0; ntask > (1 << ptask); ptask++) + ; + + for(ngrp = 1; ngrp < (1 << ptask); ngrp++) + { + int recvtask = thistask ^ ngrp; + + if(recvtask < ntask) + MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask], + recvtype, recvtask, TAG, comm, &status); + } + + if(sendbuf != recvbuf + displs[thistask] * size_recvtype) + memcpy(recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype); + + return 0; +} + +#endif /* #ifdef MPI_HYPERCUBE_ALLGATHERV */ diff --git a/src/amuse/community/arepo/src/mpi_utils/mpi_util.c b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c new file mode 100644 index 0000000000..e5098c7396 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/mpi_util.c @@ -0,0 +1,375 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/mpi_util.c + * \date 05/2018 + * \brief Custom made auxiliary MPI functions. + * \details contains functions: + * void mpi_exchange_buffers(void *send_buf, int *send_count, + * int *send_offset, void *recv_buf, int *recv_count, + * int *recv_offset, int item_size, int commtag, + * int include_self) + * int mpi_calculate_offsets(int *send_count, int *send_offset, + * int *recv_count, int *recv_offset, int send_identical) + * int mesh_search_compare_task(const void *a, const void *b) + * int intpointer_compare(const void *a, const void *b) + * void *sort_based_on_mesh_search(mesh_search_data * search, + * void *data, int n_items, int item_size) + * void *sort_based_on_field(void *data, int field_offset, + * int n_items, int item_size) + * void mpi_distribute_items_from_search(mesh_search_data * + * search, void *data, int *n_items, int *max_n, int + * item_size, int commtag, int task_offset, int cell_offset) + * void mpi_distribute_items_to_tasks(void *data, + * int task_offset, int *n_items, int *max_n, int item_size, + * int commtag) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +static char *SaveData2; + +/*! \brief Implements the common idiom of exchanging buffers with every other + * MPI task. + * + * All arrays should be allocated with NTask size. + * + * \param[in] send_buf Pointer to data to be sent. + * \param[in] send_count Number of elements to be sent. + * \param[in] send_offset Array with offsets to communicate to specific task. + * \param[out] recv_buf Pointert to dataspace for incoming data. + * \param[in] recv_count Number of elements to be received. + * \param[in] recv_offset Array with offsets in receive buffer from specific + * task. + * \param[in] item_size Size of one element. + * \param[in] commtag Receive tag. + * \param[in] include_self Communication with own task included? + * + * \return void + */ +void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, void *recv_buf, int *recv_count, int *recv_offset, + int item_size, int commtag, int include_self) +{ + int ngrp; + // this loop goes from 0 in some cases, but that doesn't make sense + // because then recvTask==ThisTask and nothing is done. + for(ngrp = include_self ? 0 : 1; ngrp < (1 << PTask); ngrp++) + { + int recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + { + /* exchange data */ + MPI_Sendrecv((char *)send_buf + (size_t)send_offset[recvTask] * item_size, (size_t)send_count[recvTask] * item_size, + MPI_BYTE, recvTask, commtag, (char *)recv_buf + (size_t)recv_offset[recvTask] * item_size, + (size_t)recv_count[recvTask] * item_size, MPI_BYTE, recvTask, commtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + } +} + +/*! \brief Calculates offsets for MPI communication. + * + * Calculates the recv_count, send_offset, and recv_offset arrays + * based on the send_count. Returns nimport, the total number of + * particles to be received. If an identical set of copies are to be + * sent to all tasks, set send_identical=1 and the send_offset will + * be zero for all tasks. + * + * All arrays should be allocated with NTask size. + * + * \param[in] send_count Number of element to be sent. + * \param[out] send_offset Offset in send-buffer. + * \param[out] recv_count Number of elements in receive. + * \param[out] recv_offset Offest for receive buffer. + * \param[in] send_identical Include self-communication? + * + */ +int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical) +{ + // Exchange the send/receive counts + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + int nimport = 0; + recv_offset[0] = 0; + send_offset[0] = 0; + int j; + for(j = 0; j < NTask; j++) + { + nimport += recv_count[j]; + + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + (send_identical ? 0 : send_count[j - 1]); + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + return nimport; +} + +/*! \brief Comparison function used to sort the mesh_search data by task. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a < b. + */ +int mesh_search_compare_task(const void *a, const void *b) +{ + if((*(mesh_search_data **)a)->Task < (*(mesh_search_data **)b)->Task) + return -1; + + if((*(mesh_search_data **)a)->Task > (*(mesh_search_data **)b)->Task) + return +1; + + return 0; +} + +/*! \brief Comparison function used to sort an array of int pointers into order + * of the pointer targets. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1), -1 if a < b. + */ +int intpointer_compare(const void *a, const void *b) +{ + if((**(int **)a) < (**(int **)b)) + return -1; + + if((**(int **)a) > (**(int **)b)) + return +1; + + return 0; +} + +/*! \brief Sort an opaque array according to the order implied by sorting the + * search array by task. Returns a sorted copy of the data array, + * that needs to be myfreed. + * + * We do this by sorting an array of pointers to the elements in + * search, and then using this array to reorder the data + * array. Unfortunately this means making a copy of the data, but + * this just replaces the copy after the mpi_exchange_buffers + * anyway. + * + * \param[in] search Array with sorting criterion. + * \param[in] data Data to be sorted. + * \param[in] n_items Number of elements. + * \param[in] item_size Size of single element. + * + * \return Pointer to sorted data. + */ +void *sort_based_on_mesh_search(mesh_search_data *search, void *data, int n_items, int item_size) +{ + int i; + char *data2; + mesh_search_data **perm; + + data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size); + + SaveData2 = data2; + + perm = mymalloc("perm", n_items * sizeof(*perm)); + + for(i = 0; i < n_items; ++i) + perm[i] = &search[i]; + + mysort(perm, n_items, sizeof(*perm), mesh_search_compare_task); + + // reorder data into data2 + for(i = 0; i < n_items; ++i) + { + size_t orig_pos = perm[i] - search; + memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size); + } + + myfree(perm); + + return (void *)data2; +} + +/*! \brief Sort an opaque array into increasing order of an int field, given + * by the specified offset. (This would typically be field indicating + * the task.) Returns a sorted copy of the data array, that needs to + * be myfreed. + * + * We do this by sorting an array of pointers to the task field, and + * then using this array to deduce the reordering of the data + * array. Unfortunately this means making a copy of the data, but + * this just replaces the copy after the mpi_exchange_buffers + * anyway. + * + * \param[in] data Data to be sorted. + * \param[in] field_offset offset of the sort field. + * \param[in] n_items Number of elements. + * \param[in] item_size Size of individual item. + * + * \return Pointer to sorted array. + */ +void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size) +{ + int i; + char *data2; + int **perm; + + data2 = mymalloc_movable(&SaveData2, "data2", (size_t)n_items * item_size); + + SaveData2 = data2; + + perm = mymalloc("perm", n_items * sizeof(*perm)); + + for(i = 0; i < n_items; ++i) + perm[i] = (int *)((char *)data + (size_t)i * item_size + field_offset); + + mysort(perm, n_items, sizeof(*perm), intpointer_compare); + + // reorder data into data2 + for(i = 0; i < n_items; ++i) + { + size_t orig_pos = ((char *)perm[i] - ((char *)data + field_offset)) / item_size; + myassert(((char *)perm[i] - ((char *)data + field_offset)) % item_size == 0); + memcpy(data2 + item_size * (size_t)i, (char *)data + item_size * orig_pos, item_size); + } + + myfree(perm); + + return (void *)data2; +} + +/*! \brief This function takes a mesh_search structure and exchanges the + * members in an associated structure based on the index and task in + * the search data. n_items is updated to the new size of data. max_n + * is the allocated size of the data array. + * + * Additionally, if the task_offset and cell_offset are nonnegative, + * the Task and Index fields in the search results will be copied to + * those fields in the data array. + * + * \param[in] search Mesh search data. + * \param[in, out] data Data to be sorted. + * \param[in, out] n_items number of elements. + * \param[in, out] max_n Allocated size of data array. + * \param[in] item_size Size of individual element. + * \param[in] commtag Communication tag. + * \param[in] task_offset Offset of this task. + * \param[in] cell_offset offset of cell. + * + * \return void + */ +void mpi_distribute_items_from_search(mesh_search_data *search, void *data, int *n_items, int *max_n, int item_size, int commtag, + int task_offset, int cell_offset) +{ + int i; + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < *n_items; i++) + { + int task = search[i].Task; + myassert(task >= 0 && task < NTask); + Send_count[task]++; + + // copy task/index into data array, if applicable + if(task_offset >= 0) + *(int *)((char *)data + (size_t)i * item_size + task_offset) = task; + if(cell_offset >= 0) + *(int *)((char *)data + (size_t)i * item_size + cell_offset) = search[i].u.Index; + } + + void *data2 = sort_based_on_mesh_search(search, data, *n_items, item_size); + + int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0); + + if(*max_n < nimport) + { + data = myrealloc_movable(data, (size_t)nimport * item_size); + *max_n = nimport; + } + + data2 = SaveData2; + + mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1); + + myfree_movable(data2); + + *n_items = nimport; +} + +/*! \brief This function distributes the members in an opaque structure to + * the tasks based on a task field given by a specified offset into + * the opaque struct. The task field must have int type. n_items is + * updated to the new size of data. max_n is the allocated size of + * the data array, and is updated if a realloc is necessary. + * + * \param[in out] data Data array + * \param[in] task_offset Offset of task. + * \param[in, out] n_items Number of elements in array. + * \param[in, out] max_n Allocated size of the data array. + * \param[in] item_size Size of single element. + * \param[in] commtag Communication tag. + * + * \return void + */ +void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag) +{ + int i; + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < *n_items; i++) + { + int task = *(int *)((char *)data + (size_t)i * item_size + task_offset); + myassert(task >= 0 && task < NTask); + Send_count[task]++; + } + + void *data2 = sort_based_on_field(data, task_offset, *n_items, item_size); + + int nimport = mpi_calculate_offsets(Send_count, Send_offset, Recv_count, Recv_offset, 0); + + if(*max_n < nimport) + { + data = myrealloc_movable(data, (size_t)nimport * item_size); + *max_n = nimport; + } + + data2 = SaveData2; + + mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1); + + myfree_movable(data2); + + *n_items = nimport; +} diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c new file mode 100644 index 0000000000..7133759b85 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.c @@ -0,0 +1,175 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myIBarrier.c + * \date 05/2018 + * \brief Home-made MPI_Ibarrier routine. + * \details Non-blocking version of MPI_Barrier; Once reaching this point, + * a process notifies this to other tasks. + * contains functions: + * void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier) + * void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, + * MPI_Status * unused) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifdef MYIBARRIER + +#include + +#include "myIBarrier.h" + +/*! \brief Non-blocking MPI barrier; Notifies other tasks once it is called. + * + * \param[in] comm MPI communicator. + * \param[in, out] Object containing information about the barrier. + * + * \return void + */ +void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier) +{ + barrier->comm = comm; + MPI_Comm_rank(comm, &barrier->rank); + MPI_Comm_size(comm, &barrier->nTasks); + + barrier->nLevels = fls(barrier->rank - 1); + barrier->LevelDone = mymalloc("myIBarrier", barrier->nLevels); + memset(barrier->LevelDone, 0, barrier->nLevels); + + /* find messages we would expect from nonexisting tasks */ + for(level = 0; level < barrier->nLevels; level++) + if((barrier->rank & (1 << level) == 0) && (barrier->rank + (1 << level) >= barrier->nTasks)) + barrier->LevelDone[level] = 1; + + /* find out if we have to send or wait */ + int level = 0; + while(level < barrier->nLevels) + { + if(barrier->rank & (1 << level)) + { + /* we need to send our result */ + int target = barrier->rank - (1 << level); + int level = barrier->nLevels; + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + break; + } + else + { + /* check if there is something to recieve in which case we have to wait, otherwise go down one level */ + if(barrier->rank + (1 << level) < barrier->nTasks) + { + barrier->levelDone[level] = 1; + break; + } + else + level++; + } + } +} + +/*! \brief Test function for myIBarrier. + * + * \param[in] barrier Object containing information about the barrier. + * \param[out] flag Was test successful? + * \param[in] unused Unused MPI_Status. + * + * \return void + */ +void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused) +{ + flag = 0; + + int rflag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_IBARRIER, barrier->comm, &rflag, &status); + + if(rflag) + { + int source = status.MPI_SOURCE; + + int level; + MPI_Recv(&level, 1, MPI_INT, source, MPI_TAG_IBARRIER, barrier->comm, MPI_STATUS_IGNORE); + + if(source > barrier->rank) + { + /* we got another result, so lets check if we can send out further */ + while((level < barrier->nLevels) && barrier->LevelDone[level]) + level++; + + if(level == barrier->nLevels) + { + if(barrier->rank != 0) + terminate("fail"); + /* ok, the barrier resolved, tell everyone */ + + for(level = 0; level < barrier->nLevels; level++) + { + if(barrier->rank & (1 << level) == 0) + { + int target = barrier->rank + (1 << level); + if(target < barrier->nTasks) + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + break; + } + + flag = 1; + } + else + { + if(barrier->rank & (1 << level)) + { + /* we need to send our result */ + int target = barrier->rank - (1 << level); + int level = barrier->nLevels; + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + { + barrier->LevelDone[level] = 1; + } + } + } + else + { + for(; level < barrier->nLevels; level++) + { + if(barrier->rank & (1 << level) == 0) + { + int target = barrier->rank + (1 << level); + if(target < barrier->nTasks) + MPI_Isend(&level, 1, MPI_INT, target, MPI_TAG_IBARRIER, barrier->comm); + } + else + break; + } + + flag = 1; + } + } +} + +#endif /* #ifdef MYIBARRIER */ diff --git a/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h new file mode 100644 index 0000000000..461f8626c9 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myIBarrier.h @@ -0,0 +1,51 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myIBarrier.h + * \date 05/2018 + * \brief Header for myIBarrier functions. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef MYIBARRIER_H +#define MYIBARRIER_H + +#ifdef MYIBARRIER +#define MPI_TAG_IBARRIER 0x666 + +struct sMyIBarrier +{ + MPI_Comm comm; + int rank; + int nTasks; + int nLevels; + char *LevelDone; +}; + +void myIBarrier(MPI_Comm comm, struct sMyIBarrier *barrier); +void myIBarrierTest(struct sMyIBarrier *barrier, int *flag, MPI_Status *unused); +#endif /* #ifdef MYIBARRIER */ + +#endif /* #ifndef MYIBARRIER_H */ diff --git a/src/amuse/community/arepo/src/mpi_utils/myalltoall.c b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c new file mode 100644 index 0000000000..dcbb889c91 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/myalltoall.c @@ -0,0 +1,122 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/myalltoall.c + * \date 05/2018 + * \brief Specialized all-to-all MPI communication functions. + * \details contains functions: + * void myMPI_Alltoallv(void *sendb, size_t * sendcounts, + * size_t * sdispls, void *recvb, size_t * recvcounts, + * size_t * rdispls, int len, int big_flag, MPI_Comm comm) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief A wrapper around MPI_Alltoallv that can deal with data in + * individual sends that are very big. + * + * \param[in] sendb Starting address of send buffer. + * \param[in] sendcounts Integer array equal to the group size specifying the + * number of elements to send to each processor. + * \param[in] sdispls Integer array (of length group size). Entry j specifies + * the displacement (relative to sendbuf) from which to take the + * outgoing data destined for process j. + * \param[out] recvb Starting address of receive buffer. + * \param[in] recvcounts Integer array equal to the group size specifying the + * maximum number of elements that can be received from each + * processor. + * \param[in] rdispls Integer array (of length group size). Entry i specifies + * the displacement (relative to recvbuf at which to place the + * incoming data from process i. + * \param[in] len Size of single element in send array. + * \param[in] big_flag Flag if cummunication of large data. If not, the normal + * MPI_Alltoallv function is used. + * \param[in] comm MPI communicator. + * + * \return void + */ +void myMPI_Alltoallv(void *sendb, size_t *sendcounts, size_t *sdispls, void *recvb, size_t *recvcounts, size_t *rdispls, int len, + int big_flag, MPI_Comm comm) +{ + char *sendbuf = (char *)sendb; + char *recvbuf = (char *)recvb; + + if(big_flag == 0) + { + int ntask; + MPI_Comm_size(comm, &ntask); + + int *scount = (int *)mymalloc("scount", ntask * sizeof(int)); + int *rcount = (int *)mymalloc("rcount", ntask * sizeof(int)); + int *soff = (int *)mymalloc("soff", ntask * sizeof(int)); + int *roff = (int *)mymalloc("roff", ntask * sizeof(int)); + + for(int i = 0; i < ntask; i++) + { + scount[i] = sendcounts[i] * len; + rcount[i] = recvcounts[i] * len; + soff[i] = sdispls[i] * len; + roff[i] = rdispls[i] * len; + } + + MPI_Alltoallv(sendbuf, scount, soff, MPI_BYTE, recvbuf, rcount, roff, MPI_BYTE, comm); + + myfree(roff); + myfree(soff); + myfree(rcount); + myfree(scount); + } + else + { + /* here we definitely have some large messages. We default to the + * pair-wise protocoll, which should be most robust anyway. + */ + + int ntask, thistask; + MPI_Comm_size(comm, &ntask); + MPI_Comm_rank(comm, &thistask); + + for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) + { + int target = thistask ^ ngrp; + + if(target < ntask) + { + if(sendcounts[target] > 0 || recvcounts[target] > 0) + myMPI_Sendrecv(sendbuf + sdispls[target] * len, sendcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp, + recvbuf + rdispls[target] * len, recvcounts[target] * len, MPI_BYTE, target, TAG_PDATA + ngrp, comm, + MPI_STATUS_IGNORE); + } + } + } +} diff --git a/src/amuse/community/arepo/src/mpi_utils/pinning.c b/src/amuse/community/arepo/src/mpi_utils/pinning.c new file mode 100644 index 0000000000..f7a6dbb04e --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/pinning.c @@ -0,0 +1,292 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/pinning.c + * \date 05/2018 + * \brief Routines to pin MPI threads to cores. + * \details contains functions: + * void get_core_set(void) + * void detect_topology(void) + * void pin_to_core_set(void) + * void report_pinning(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef IMPOSE_PINNING +#include +#include + +#define MAX_CORES 4096 + +static int flag_pinning_error = 0; + +static hwloc_cpuset_t cpuset, cpuset_after_MPI_init; +static hwloc_topology_t topology; +static int topodepth; +static int sockets; +static int cores; +static int pus; +static int hyperthreads_per_core; + +/*! \brief Gets the current physical binding of local process. + * + * \return void + */ +void get_core_set(void) +{ + cpuset = hwloc_bitmap_alloc(); + hwloc_get_proc_cpubind(topology, getpid(), cpuset, 0); +} + +/*! \brief Determines the network topology Arepo is running on. + * + * \return void + */ +void detect_topology(void) +{ + unsigned depth; + + /* Allocate and initialize topology object. */ + hwloc_topology_init(&topology); + + /* Perform the topology detection. */ + hwloc_topology_load(topology); + + /* Get some additional topology information + in case we need the topology depth later. */ + topodepth = hwloc_topology_get_depth(topology); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + sockets = -1; + else + sockets = hwloc_get_nbobjs_by_depth(topology, depth); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_CORE); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + cores = -1; + else + cores = hwloc_get_nbobjs_by_depth(topology, depth); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); + + if(depth == HWLOC_TYPE_DEPTH_UNKNOWN) + pus = -1; + else + pus = hwloc_get_nbobjs_by_depth(topology, depth); +} + +/*! \brief Pins the MPI ranks to the available core set. + * + * \return void + */ +void pin_to_core_set(void) +{ + int i, num_threads, thread; + char buf[MAX_CORES + 1]; + char *p = getenv("OMP_NUM_THREADS"); + if(p) + num_threads = atoi(p); + else + num_threads = 1; + + mpi_printf("\n\n"); + mpi_printf("PINNING: We have %d sockets, %d physical cores and %d logical cores on the first MPI-task's node.\n", sockets, cores, + pus); + if(cores <= 0 || sockets <= 0 || pus <= 0) + { + mpi_printf("PINNING: The topology cannot be recognized. We refrain from any pinning attempt.\n"); + flag_pinning_error = 1; + return; + } + + hyperthreads_per_core = pus / cores; + + if(hyperthreads_per_core < 1) + terminate("Need at least one logical thread per physical core\n"); + + if(pus > cores) + mpi_printf("PINNING: Looks like %d hyperthreads per physical core are in principle possible.\n", hyperthreads_per_core); + + cpuset_after_MPI_init = hwloc_bitmap_alloc(); + hwloc_get_proc_cpubind(topology, getpid(), cpuset_after_MPI_init, 0); + + if(!hwloc_bitmap_isequal(cpuset, cpuset_after_MPI_init)) + mpi_printf("PINNING: Apparently, the MPI library set some pinning itself. We'll override this.\n"); + + int id, available_pus = 0; + + for(id = hwloc_bitmap_first(cpuset); id != -1; id = hwloc_bitmap_next(cpuset, id)) + available_pus++; + + mpi_printf("PINNING: Looks like %d logical cores are available\n", available_pus); + + if(available_pus == pus) + mpi_printf("PINNING: Looks like all available logical cores are at our disposal.\n"); + else + { + if(available_pus >= 1) + { + mpi_printf("PINNING: Looks like allready before start of the code, a tight binding was imposed.\n"); +#ifdef IMPOSE_PINNING_OVERRIDE_MODE + for(id = 0; id < pus; id++) + hwloc_bitmap_set(cpuset, id); + available_pus = pus; + mpi_printf("PINNING: We are overridung this and make all %d available to us.\n", available_pus); +#else /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE */ + mpi_printf( + "PINNING: We refrain from any pinning attempt ourselves. (This can be changed by setting USE_PINNING_OVERRIDE_MODE.)\n"); + flag_pinning_error = 1; + return; +#endif /* #ifdef IMPOSE_PINNING_OVERRIDE_MODE #else */ + } + } + + for(i = 0; i < pus && i < MAX_CORES; i++) + if(hwloc_bitmap_isset(cpuset, i)) + buf[i] = '1'; + else + buf[i] = '-'; + buf[pus] = 0; + + mpi_printf("PINNING: Available logical cores on first node: %s\n", buf); + + int pus_per_task = available_pus / TasksInThisNode; + + mpi_printf("PINNING: %d logical cores are available per MPI Task.\n", pus_per_task); + + if(pus_per_task <= 0) + terminate("Need at least one logical core per MPI task for pinning to make sense. available_pus=%d TasksInThisNode=%d\n", + available_pus, TasksInThisNode); + + int depth, cid, cores_before, id_this, id_found, count; + hwloc_obj_t obj; + hwloc_cpuset_t cpuset_core; + + /* go through all logical cores in sequence of proximity */ + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); + + for(cid = 0, cores_before = 0; cores_before < RankInThisNode * pus_per_task && cid < pus; cid++) + { + obj = hwloc_get_obj_by_depth(topology, depth, cid); + + cpuset_core = hwloc_bitmap_dup(obj->cpuset); + if(hwloc_bitmap_isincluded(cpuset_core, cpuset)) + { + cores_before++; + } + hwloc_bitmap_free(cpuset_core); + } + + int pus_per_thread, skip; + + if(pus_per_task > NUM_THREADS) + pus_per_thread = pus_per_task / NUM_THREADS; + else + pus_per_thread = 1; + + /* cid should now be the logical index of the first PU for this MPI task */ + for(thread = 0, id_this = id_found = cid, count = 0; thread < NUM_THREADS; thread++) + { + obj = hwloc_get_obj_by_depth(topology, depth, id_found); + cpuset_thread[thread] = hwloc_bitmap_dup(obj->cpuset); + + for(skip = 0; skip < pus_per_thread; skip++) + { + id_this++; + count++; + + id_found = -1; + if(count >= pus_per_task) + { + id_this = cid; + count = 0; + } + do + { + obj = hwloc_get_obj_by_depth(topology, depth, id_this); + cpuset_core = hwloc_bitmap_dup(obj->cpuset); + if(hwloc_bitmap_isincluded(cpuset_core, cpuset)) + { + id_found = id_this; + } + else + { + id_this++; + if(id_this >= pus) + terminate("id_this >= pus"); + } + hwloc_bitmap_free(cpuset_core); + } + while(id_found < 0); + } + } + + hwloc_set_proc_cpubind(topology, getpid(), cpuset_thread[0], HWLOC_CPUBIND_PROCESS); +} + +/*! \brief Prints pinning information for each task. + * + * \return void + */ +void report_pinning(void) +{ + int i; + char buf[MAX_CORES + 1]; + + if(flag_pinning_error) + return; + + hwloc_get_cpubind(topology, cpuset, 0); + + for(i = 0; i < pus && i < MAX_CORES; i++) + if(hwloc_bitmap_isset(cpuset, i)) + buf[i] = '1'; + else + buf[i] = '-'; + buf[pus] = 0; + + for(i = 0; i < NTask; i++) + { + if(ThisTask == i && ThisNode == 0) + printf("PINNING: Node=%4d: Task=%04d: %s\n", ThisNode, ThisTask, buf); + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + } +} +#endif /* #ifdef IMPOSE_PINNING */ diff --git a/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c new file mode 100644 index 0000000000..6614f4ed03 --- /dev/null +++ b/src/amuse/community/arepo/src/mpi_utils/sizelimited_sendrecv.c @@ -0,0 +1,116 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/mpi_utils/sizelimited_sendrecv.c + * \date 05/2018 + * \brief MPI_Sendrecv operations split into chunks of maximum size. + * \details If the number of elements in the MPI_Sendrecv is larger than + * count_limit, the function will split up the communication into + * multiple chunks communicated by the usual MPI_Sendrecv routine. + * contains functions: + * int myMPI_Sendrecv(void *sendb, size_t sendcount, + * MPI_Datatype sendtype, int dest, int sendtag, void *recvb, + * size_t recvcount, MPI_Datatype recvtype, int source, + * int recvtag, MPI_Comm comm, MPI_Status * status) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 24.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Self-made sendrecv function with limiter to the number of elements + * that can be sent in one go. + * + * If the total message is longer, multiple MPI_Sendrecv calls are executed + * until the entire message has been communicated. + * + * \param[in] sendb Initial address of send buffer. + * \param[in] sendcount Number of elements in send buffer. + * \param[in] sendtype Type of elements in send buffer (handle). + * \param[in] dest Rank of destination. + * \param[in] sendtag Send tag. + * \param[out] recvb Initial address of receive buffer. + * \param[in] recvcount Number of elements in receive buffer. + * \param[in] recvtype Type of elements in receive buffer (handle). + * \param[in] source Rank of source. + * \param[in] recvtag Receive tag. + * \param[in] comm MPI communicator. + * \param[out] status Status, referring to receive operation. + * + * \return 0 + */ +int myMPI_Sendrecv(void *sendb, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvb, size_t recvcount, + MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) +{ + int iter = 0, size_sendtype, size_recvtype, send_now, recv_now; + char *sendbuf = (char *)sendb; + char *recvbuf = (char *)recvb; + + if(dest != source) + terminate("dest != source"); + + MPI_Type_size(sendtype, &size_sendtype); + MPI_Type_size(recvtype, &size_recvtype); + + if(dest == ThisTask) + { + memcpy(recvbuf, sendbuf, recvcount * size_recvtype); + return 0; + } + + size_t count_limit = MPI_MESSAGE_SIZELIMIT_IN_BYTES / size_sendtype; + + while(sendcount > 0 || recvcount > 0) + { + if(sendcount > count_limit) + { + send_now = count_limit; + iter++; + } + else + send_now = sendcount; + + if(recvcount > count_limit) + recv_now = count_limit; + else + recv_now = recvcount; + + MPI_Sendrecv(sendbuf, send_now, sendtype, dest, sendtag, recvbuf, recv_now, recvtype, source, recvtag, comm, status); + + sendcount -= send_now; + recvcount -= recv_now; + + sendbuf += send_now * size_sendtype; + recvbuf += recv_now * size_recvtype; + } + + return 0; +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree.c b/src/amuse/community/arepo/src/ngbtree/ngbtree.c new file mode 100644 index 0000000000..ea0ec2e8cb --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree.c @@ -0,0 +1,1394 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree.c + * \date 05/2018 + * \brief Construct neighbor tree. + * \details This file contains the neighbor tree construction. This is a + * tree structure that includes all gas cells, but no other + * particle types. + * contains functions: + * int ngb_treebuild(int npart) + * static inline unsigned long long ngb_double_to_int(double d) + * int ngb_treebuild_construct(int npart) + * int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) + * void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode) + * void ngb_record_topnode_siblings(int no, int sib) + * void ngb_exchange_topleafdata(void) + * void drift_node(struct NgbNODE *current, integertime time1) + * void ngb_update_velocities(void) + * void ngb_update_vbounds(int i, int *nchanged, int *nodelist) + * void ngb_finish_vounds_update(int nchanged, int *nodelist) + * void ngb_update_rangebounds(int i, int *nchanged, int *nodelist) + * void ngb_finish_rangebounds_update(int nchanged, int *nodelist) + * void ngb_treemodifylength(int delta_NgbMaxPart) + * void ngb_treeallocate(void) + * void ngb_treefree(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gravity/forcetree.h" + +static void ngb_record_topnode_siblings(int no, int sib); +static int ngb_treebuild_construct(int npart); +static void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode); +static void ngb_exchange_topleafdata(void); +static int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z); +static void ngb_update_vbounds(int i, int *nchanged, int *nodelist); +static void ngb_finish_vounds_update(int nchanged, int *nodelist); + +static int *Ngb_Node_Tmp_Sibling; + +/*! \brief This function is a driver routine for constructing the neighbor + * oct-tree, which is done by calling a small number of other + * functions. + * + * Does not build a tree if All.TotNumGas == 0. + * + * \param[in] npart Number of particles in tree. + * + * \return Number of nodes in the tree. + */ +int ngb_treebuild(int npart) +{ + if(All.TotNumGas == 0) + return 0; + + TIMER_START(CPU_NGBTREEBUILD); + + mpi_printf("NGBTREE: Ngb-tree construction. (presently allocated=%g MB)\n", AllocatedBytes / (1024.0 * 1024.0)); + + double t0 = second(); + + int flag; + do + { + int flag_single = ngb_treebuild_construct(npart); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + if(flag == -1) + { + myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart); + ngb_treefree(); + + All.NgbTreeAllocFactor *= 1.15; + mpi_printf("Increasing NgbTreeAllocFactor, new value=%g\n", All.NgbTreeAllocFactor); + + ngb_treeallocate(); + } + } + while(flag == -1); + + int ntopleaves = DomainNLocalTopleave[ThisTask]; + int *list = DomainListOfLocalTopleaves + DomainFirstLocTopleave[ThisTask]; + + for(int i = 0; i < ntopleaves; i++) + { + int last = -1; + int no = Ngb_DomainNodeIndex[list[i]]; + + if(no < Ngb_MaxPart || no >= Ngb_MaxPart + Ngb_MaxNodes) + terminate("i=%d no=%d task=%d \n", i, no, DomainTask[list[i]]); + + ngb_update_node_recursive(no, Ngb_Node_Tmp_Sibling[no], no, &last, 0); + + /* if there was no particle in the node, we need to initialize nextnode of the node */ + if(no == last) + Ngb_Nodes[no].u.d.nextnode = -1; + + Ngb_Nodes[no].u.d.sibling = last; /* we temporarily store this here and will later restore this sibling pointer, + which is anyway equal to Ngb_Node_Tmp_Sibling[index] */ + } + + ngb_exchange_topleafdata(); + + /* now put in "pseudo" particles as nextnode in non-local topleaves */ + for(int i = 0; i < NTopleaves; i++) + { + if(DomainTask[i] != ThisTask) + { + int index = Ngb_DomainNodeIndex[i]; + Ngb_Nodes[index].u.d.nextnode = Ngb_MaxPart + Ngb_MaxNodes + i; + } + } + + /* now update the top-level tree nodes */ + int last = -1; + ngb_update_node_recursive(Ngb_MaxPart, -1, -1, &last, 1); + + if(last >= Ngb_MaxPart) + { + if(last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[last - Ngb_MaxNodes] = -1; + else + Ngb_Nodes[last].u.d.nextnode = -1; + } + else + Ngb_Nextnode[last] = -1; + + TIMER_STOPSTART(CPU_NGBTREEBUILD, CPU_LOGS); + + double numnodes = Ngb_NumNodes, tot_numnodes; + MPI_Reduce(&numnodes, &tot_numnodes, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + double t1 = second(); + mpi_printf("NGBTREE: Ngb-tree construction done. took %g sec =%g NTopnodes=%d NTopleaves=%d\n", timediff(t0, t1), + tot_numnodes / NTask, NTopnodes, NTopleaves); + + myfree(Ngb_Node_Tmp_Sibling + Ngb_MaxPart); + + Ngb_MarkerValue = 0; + memset(Ngb_Marker, 0, (Ngb_MaxPart + Ngb_NumNodes) * sizeof(int)); + + TIMER_STOP(CPU_LOGS); + + return Ngb_NumNodes; +} + +/*! \brief Converts double precision coordinate to unsigned long long int. + * + * \param[in] d Double precision coordinate that is to be converted. + * + * \return Unsigned long long int represenation of d. + */ +static inline unsigned long long ngb_double_to_int(double d) +{ + union + { + double d; + unsigned long long ull; + } u; + u.d = d; + return (u.ull & 0xFFFFFFFFFFFFFllu); +} + +/*! \brief Constructs the neighbor oct-tree. + * + * The index convention for accessing tree nodes is the following: + * + * 0...NumPart-1 reference single particles. + * Ngb_MaxPart.... Ngb_MaxPart+Numnodes-1 references tree nodes. + * Ngb_MaxPart + All.MaxNgb_Nodes.... reference "pseudo + * particles", i.e. the marker that indicates a top-node lying on + * another CPU. + * + * `Ngb_Nodes_base' points to the first tree node, + * `Ngb_Nodes' is shifted such that Ngb_Nodes[Ngb_MaxPart] gives the first + * tree node. + * + * \param[in] npart Number of particles involved. + * + * \return status: 0 (default) -1: too many nodes. + */ +int ngb_treebuild_construct(int npart) +{ + /* create an empty root node */ + Ngb_NextFreeNode = Ngb_MaxPart; /* index of first free node */ + + for(int i = 0; i < 8; i++) + Ngb_Nodes[Ngb_NextFreeNode].u.suns[i] = -1; + + Ngb_NumNodes = 1; + Ngb_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(ngb_create_empty_nodes(Ngb_MaxPart, 0, 1, 0, 0, 0) < 0) + return -1; + + Ngb_FirstNonTopLevelNode = Ngb_NextFreeNode; + + Ngb_Node_Tmp_Sibling = (int *)mymalloc("Ngb_Node_Tmp_Sibling", (Ngb_MaxNodes + 1) * sizeof(int)); + Ngb_Node_Tmp_Sibling -= Ngb_MaxPart; + + ngb_record_topnode_siblings(Ngb_MaxPart, -1); + + unsigned long long *ngbTree_IntPos_list = + (unsigned long long *)mymalloc("ngbTree_IntPos_list", 3 * npart * sizeof(unsigned long long)); + + /* now we insert all particles */ + { + int out_of_space = 0; + + int threadid = get_thread_num(); + int start, end, size; + + int first_empty_slot = Ngb_NextFreeNode + threadid * TAKE_NSLOTS_IN_ONE_GO; + int count_empty_slot = TAKE_NSLOTS_IN_ONE_GO; + + if(threadid == 0) + Ngb_NextFreeNode += NUM_THREADS * TAKE_NSLOTS_IN_ONE_GO; + + size = (npart - 1) / NUM_THREADS + 1; + start = threadid * size; + end = (threadid + 1) * size - 1; + if(end >= npart) + end = npart - 1; + + for(int i = start; i <= end && out_of_space == 0; i++) + { + unsigned long long xxb = ngb_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0); + unsigned long long yyb = ngb_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0); + unsigned long long zzb = ngb_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + ngbTree_IntPos_list[3 * i + 0] = xxb; + ngbTree_IntPos_list[3 * i + 1] = yyb; + ngbTree_IntPos_list[3 * i + 2] = zzb; + + int no = 0; + while(TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */ + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = TopNodes[no].Daughter + TopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = TopNodes[no].Leaf; + + if(DomainTask[no] != ThisTask) + terminate("STOP! ID=%lld of type=%d is inserted into task=%d, but should be on task=%d no=%d\n", (long long)P[i].ID, + P[i].Type, ThisTask, DomainTask[no], no); + + int th = Ngb_DomainNodeIndex[no]; + + signed long long centermask = (0xFFF0000000000000llu) >> levels; + + int parent = -1; /* note: will not be used below before it is changed */ + unsigned char subnode = 0; + + while(1) + { + if(th >= Ngb_MaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(int j = 0; j < 8; j++) + { + if(Ngb_Nodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = Ngb_Nodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + Ngb_Nodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + * Then resume trying to insert the new particle at + * the newly created internal node + */ + int thold = th; + + if(count_empty_slot) + { + th = first_empty_slot + (TAKE_NSLOTS_IN_ONE_GO - count_empty_slot); + count_empty_slot--; + } + else + { + { + th = Ngb_NextFreeNode; + Ngb_NextFreeNode += TAKE_NSLOTS_IN_ONE_GO; + } + + first_empty_slot = th; + count_empty_slot = (TAKE_NSLOTS_IN_ONE_GO - 1); + + if(first_empty_slot + TAKE_NSLOTS_IN_ONE_GO - Ngb_MaxPart >= Ngb_MaxNodes) + { + out_of_space = 1; + break; + } + } + + Ngb_Nodes[parent].u.suns[subnode] = th; + struct NgbNODE *nfreep = &Ngb_Nodes[th]; + + for(int j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + unsigned long long *intppos = &ngbTree_IntPos_list[3 * thold]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = thold; + } + } + } + } + + myfree(ngbTree_IntPos_list); + + if((Ngb_NumNodes = Ngb_NextFreeNode - Ngb_MaxPart) >= Ngb_MaxNodes) + { + if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + dump_particles(); + terminate("task %d: out of space for neighbor tree, stopping with particle dump.\n", ThisTask); + } + else + return -1; + } + + return 0; +} + +/*! \brief Create empty ngb-tree node. + * + * This function recursively creates a set of empty tree nodes which + * corresponds to the top-level tree for the domain grid. This is done to + * ensure that this top-level tree is always "complete" so that we can easily + * associate the pseudo-particles of other CPUs with tree-nodes at a given + * level in the tree, even when the particle population is so sparse that + * some of these nodes are actually empty. + * + * \param[in] no Index of node in Ngb_Nodes array. + * \param[in] topnode Index in TopNodes. + * \param[in] bits Number of bits used. + * \param[in] x Integer coordinate X. + * \param[in] y Integer coordinate Y. + * \param[in] z Integer coordinate Z. + * + * \return Status: 0 success; -1 error. + */ +int ngb_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z) +{ + if(TopNodes[topnode].Daughter >= 0) + { + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + for(int k = 0; k < 2; k++) + { + if(Ngb_NumNodes >= Ngb_MaxNodes) + { + if(All.NgbTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + dump_particles(); + terminate("task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", ThisTask, + NTopnodes); + } + return -1; + } + + int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + int count = i + 2 * j + 4 * k; + + Ngb_Nodes[no].u.suns[count] = Ngb_NextFreeNode; + + for(int n = 0; n < 8; n++) + Ngb_Nodes[Ngb_NextFreeNode].u.suns[n] = -1; + + if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1) + Ngb_DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = Ngb_NextFreeNode; + + Ngb_NextFreeNode++; + Ngb_NumNodes++; + + if(ngb_create_empty_nodes(Ngb_NextFreeNode - 1, TopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, 2 * y + j, + 2 * z + k) < 0) + return -1; + } + } + + return 0; +} + +/*! \brief Determine node ranges. + * + * This routine determines the node ranges a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the Ngb_Nodes[] structure in the sequence of this tree-walk. + * + * + * \param[in] no Index of node. + * \param[in] sib Sibling node of no. + * \param[in] father Parent node of no. + * \param[in, out] last Pointer to last node for which this function was + * called. + * \param[in] mode 0: process a leave branch; 1: process top-level nodes. + * + * \return void + */ +void ngb_update_node_recursive(int no, int sib, int father, int *last, int mode) +{ + int j, jj, k, p, pp, nextsib, suns[8]; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat vmin[3], vmax[3], maxcsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + if(no >= Ngb_MaxPart && no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + if(*last >= 0) + { + if(*last >= Ngb_MaxPart) + { + if(*last == no) + terminate("as"); + + if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[*last - Ngb_MaxNodes] = no; + else + Ngb_Nodes[*last].u.d.nextnode = no; + } + else + Ngb_Nextnode[*last] = no; + } + + *last = no; + + int not_interal_top_level = 0; + + if(mode == 1) + { + if(!(no >= Ngb_MaxPart && no < Ngb_FirstNonTopLevelNode)) + terminate("can't be"); + + if(Ngb_Node_Tmp_Sibling[no] != -2) + not_interal_top_level = 1; + } + + if(not_interal_top_level) + { + p = Ngb_Nodes[no].u.d.nextnode; + + if(p >= Ngb_MaxPart + Ngb_MaxNodes && + p < Ngb_MaxPart + Ngb_MaxNodes + NTopleaves) /* a pseudo-particle, i.e. we are dealing with a non-local top-leave */ + ngb_update_node_recursive(p, sib, no, last, mode); + else + { + /* this is local toplevel node */ + *last = Ngb_Nodes[no].u.d.sibling; + } + + if(Ngb_Node_Tmp_Sibling[no] != sib) + terminate("Ngb_Node_Tmp_Sibling[no] != sib"); + + /* restore the sibling pointer for local toplevel nodes (we had temporarily stored the last element in this branch */ + Ngb_Nodes[no].u.d.sibling = sib; + Ngb_Nodes[no].father = father; + } + else + { + for(j = 0; j < 8; j++) + suns[j] = Ngb_Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + +#ifdef TREE_BASED_TIMESTEPS + maxcsnd = 0; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + range_min[k] = MAX_NGBRANGE_NUMBER; + range_max[k] = -MAX_NGBRANGE_NUMBER; + + vertex_vmin[k] = MAX_NGBRANGE_NUMBER; + vertex_vmax[k] = -MAX_NGBRANGE_NUMBER; + +#ifdef TREE_BASED_TIMESTEPS + vmin[k] = MAX_NGBRANGE_NUMBER; + vmax[k] = -MAX_NGBRANGE_NUMBER; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + ngb_update_node_recursive(p, nextsib, no, last, mode); + + if(p >= Ngb_MaxPart) /* an internal node or pseudo particle */ + { + if(p >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { +#ifdef TREE_BASED_TIMESTEPS + if(maxcsnd < ExtNgb_Nodes[p].MaxCsnd) + maxcsnd = ExtNgb_Nodes[p].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + if(range_min[k] > Ngb_Nodes[p].u.d.range_min[k]) + range_min[k] = Ngb_Nodes[p].u.d.range_min[k]; + + if(range_max[k] < Ngb_Nodes[p].u.d.range_max[k]) + range_max[k] = Ngb_Nodes[p].u.d.range_max[k]; + + if(vertex_vmin[k] > Ngb_Nodes[p].vertex_vmin[k]) + vertex_vmin[k] = Ngb_Nodes[p].vertex_vmin[k]; + + if(vertex_vmax[k] < Ngb_Nodes[p].vertex_vmax[k]) + vertex_vmax[k] = Ngb_Nodes[p].vertex_vmax[k]; + +#ifdef TREE_BASED_TIMESTEPS + if(vmin[k] > ExtNgb_Nodes[p].vmin[k]) + vmin[k] = ExtNgb_Nodes[p].vmin[k]; + + if(vmax[k] < ExtNgb_Nodes[p].vmax[k]) + vmax[k] = ExtNgb_Nodes[p].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + } + else /* a particle */ + { +#ifdef TREE_BASED_TIMESTEPS + if(maxcsnd < SphP[p].Csnd) + maxcsnd = SphP[p].Csnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(k = 0; k < 3; k++) + { + if(range_min[k] > P[p].Pos[k]) + range_min[k] = P[p].Pos[k]; + + if(range_max[k] < P[p].Pos[k]) + range_max[k] = P[p].Pos[k]; + + if(P[p].Type == 0) + { + if(vertex_vmin[k] > SphP[p].VelVertex[k]) + vertex_vmin[k] = SphP[p].VelVertex[k]; + + if(vertex_vmax[k] < SphP[p].VelVertex[k]) + vertex_vmax[k] = SphP[p].VelVertex[k]; + } + +#ifdef TREE_BASED_TIMESTEPS + if(vmin[k] > P[p].Vel[k]) + vmin[k] = P[p].Vel[k]; + + if(vmax[k] < P[p].Vel[k]) + vmax[k] = P[p].Vel[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + } + } + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].MaxCsnd = maxcsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + for(k = 0; k < 3; k++) + { + Ngb_Nodes[no].u.d.range_min[k] = range_min[k]; + Ngb_Nodes[no].u.d.range_max[k] = range_max[k]; + Ngb_Nodes[no].vertex_vmin[k] = vertex_vmin[k]; + Ngb_Nodes[no].vertex_vmax[k] = vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[k] = vmin[k]; + ExtNgb_Nodes[no].vmax[k] = vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + Ngb_Nodes[no].u.d.sibling = sib; + Ngb_Nodes[no].father = father; + + Ngb_Nodes[no].Ti_Current = All.Ti_Current; + } + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= Ngb_MaxPart) + { + if(*last >= Ngb_MaxPart + Ngb_MaxNodes) /* a pseudo-particle */ + Ngb_Nextnode[*last - Ngb_MaxNodes] = no; + else + Ngb_Nodes[*last].u.d.nextnode = no; + } + else + { + Ngb_Nextnode[*last] = no; + } + } + if(no < Ngb_MaxPart) /* only set it for single particles... */ + { + if(father < Ngb_MaxPart) + terminate("no=%d father=%d\n", no, father); + + Ngb_Father[no] = father; + } + + *last = no; + } +} + +/*! \brief Sets sibling information in u.suns for node no. + * + * \param[in] no Index of node. + * \param[in] sib Index of sibling. + * + * \return void + */ +void ngb_record_topnode_siblings(int no, int sib) +{ + /* note: when this routine is called, only toplevel tree nodes are present */ + + if(Ngb_Nodes[no].u.suns[0] >= 0) + { + /* marker value to designate internal nodes in the top-level tree */ + Ngb_Node_Tmp_Sibling[no] = -2; + + if(Ngb_Nodes[no].u.suns[0] >= 0) + for(int j = 0; j < 8; j++) + { + int p = Ngb_Nodes[no].u.suns[j]; + int nextsib; + + if(j < 7) + nextsib = Ngb_Nodes[no].u.suns[j + 1]; + else + nextsib = sib; + + ngb_record_topnode_siblings(p, nextsib); + } + } + else + Ngb_Node_Tmp_Sibling[no] = sib; /* a top-level leave node */ +} + +/*! \brief Communicates top leaf data. + * + * \return void + */ +void ngb_exchange_topleafdata(void) +{ + struct DomainNODE + { + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat MaxCsnd, vmin[3], vmax[3]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + }; + + struct DomainNODE *DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + recvcounts[DomainTask[n]]++; + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + recvoffset[0] = 0, byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + int idx = 0; + for(int n = 0; n < NTopleaves; n++) + { + if(DomainTask[n] == ThisTask) + { + int no = Ngb_DomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[idx].MaxCsnd = ExtNgb_Nodes[no].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(int k = 0; k < 3; k++) + { + loc_DomainMoment[idx].range_min[k] = Ngb_Nodes[no].u.d.range_min[k]; + loc_DomainMoment[idx].range_max[k] = Ngb_Nodes[no].u.d.range_max[k]; + loc_DomainMoment[idx].vertex_vmin[k] = Ngb_Nodes[no].vertex_vmin[k]; + loc_DomainMoment[idx].vertex_vmax[k] = Ngb_Nodes[no].vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[idx].vmin[k] = ExtNgb_Nodes[no].vmin[k]; + loc_DomainMoment[idx].vmax[k] = ExtNgb_Nodes[no].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + recvcounts[task] = 0; + + for(int n = 0; n < NTopleaves; n++) + { + int task = DomainTask[n]; + if(task != ThisTask) + { + int no = Ngb_DomainNodeIndex[n]; + int idx = recvoffset[task] + recvcounts[task]++; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].MaxCsnd = DomainMoment[idx].MaxCsnd; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + for(int k = 0; k < 3; k++) + { + Ngb_Nodes[no].u.d.range_min[k] = DomainMoment[idx].range_min[k]; + Ngb_Nodes[no].u.d.range_max[k] = DomainMoment[idx].range_max[k]; + Ngb_Nodes[no].vertex_vmin[k] = DomainMoment[idx].vertex_vmin[k]; + Ngb_Nodes[no].vertex_vmax[k] = DomainMoment[idx].vertex_vmax[k]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[k] = DomainMoment[idx].vmin[k]; + ExtNgb_Nodes[no].vmax[k] = DomainMoment[idx].vmax[k]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + Ngb_Nodes[no].Ti_Current = All.Ti_Current; + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief Drifts a node to time time1. + * + * \param[in] current Current node. + * \param[in] time1 Time to be drifted to. + * + * \return void + */ +void drift_node(struct NgbNODE *current, integertime time1) +{ + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(current->Ti_Current, time1); + else + dt_drift = (time1 - current->Ti_Current) * All.Timebase_interval; + + for(int j = 0; j < 3; j++) + { + current->u.d.range_min[j] += current->vertex_vmin[j] * dt_drift; + current->u.d.range_max[j] += current->vertex_vmax[j] * dt_drift; + } + + current->Ti_Current = time1; +} + +/*! \brief Updates velocity informataion in ngb node data. + * + * \return void + */ +void ngb_update_velocities(void) +{ + TIMER_START(CPU_NGBTREEUPDATEVEL); + + Ngb_MarkerValue++; + + int nchanged = 0; + int *nodelist = (int *)mymalloc("nodelist", NTopleaves * sizeof(int)); + + for(int idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + int target = TimeBinsHydro.ActiveParticleList[idx]; + if(target >= 0) + if(P[target].Type == 0) + ngb_update_vbounds(target, &nchanged, nodelist); + } + + for(int timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--) + { + for(int target = TimeBinsGravity.FirstInTimeBin[timebin]; target >= 0; target = TimeBinsGravity.NextInTimeBin[target]) + if(target >= 0) + if(P[target].Type == 0) + ngb_update_vbounds(target, &nchanged, nodelist); + } + + ngb_finish_vounds_update(nchanged, nodelist); + + myfree(nodelist); + + TIMER_STOP(CPU_NGBTREEUPDATEVEL); +} + +/*! \brief Updates vmin and vmax in ngb nodes. + * + * Inverse tree walk. + * + * \param[in] i Index of particle. + * \param[in, out] nchanged Number of changed top level nodes. + * \param[out] nodelist Top level nodes that were changed. + * + * \return void + */ +void ngb_update_vbounds(int i, int *nchanged, int *nodelist) +{ + int no = Ngb_Father[i]; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].vertex_vmin[j] > SphP[i].VelVertex[j]) + { + Ngb_Nodes[no].vertex_vmin[j] = SphP[i].VelVertex[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].vertex_vmax[j] < SphP[i].VelVertex[j]) + { + Ngb_Nodes[no].vertex_vmax[j] = SphP[i].VelVertex[j]; + flag_changed = 1; + } + +#ifdef TREE_BASED_TIMESTEPS + if(ExtNgb_Nodes[no].vmin[j] > P[i].Vel[j]) + { + ExtNgb_Nodes[no].vmin[j] = P[i].Vel[j]; + flag_changed = 1; + } + + if(ExtNgb_Nodes[no].vmax[j] < P[i].Vel[j]) + { + ExtNgb_Nodes[no].vmax[j] = P[i].Vel[j]; + flag_changed = 1; + } +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + if(flag_changed == 0) + break; + + if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */ + { + if(Ngb_Marker[no] != Ngb_MarkerValue) + { + Ngb_Marker[no] = Ngb_MarkerValue; + nodelist[*nchanged] = no; + *nchanged = *nchanged + 1; + } + break; + } + + no = Ngb_Nodes[no].father; + } +} + +/*! \brief Finalizes velocity bounds update. + * + * Exchanges changed information in top level nodes to all tasks. + * + * \param[in] nchanged Number of changed top level nodes. + * \param[in] list of changed top level nodes + * + * \return void + */ +void ngb_finish_vounds_update(int nchanged, int *nodelist) +{ + struct DomainNODE + { + int node; + MyNgbTreeFloat vertex_vmin[3]; + MyNgbTreeFloat vertex_vmax[3]; +#ifdef TREE_BASED_TIMESTEPS + MyNgbTreeFloat vmin[3]; + MyNgbTreeFloat vmax[3]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + }; + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(int i = 0; i < nchanged; i++) + { + int no = nodelist[i]; + loc_DomainMoment[i].node = no; + + for(int j = 0; j < 3; j++) + { + loc_DomainMoment[i].vertex_vmin[j] = Ngb_Nodes[no].vertex_vmin[j]; + loc_DomainMoment[i].vertex_vmax[j] = Ngb_Nodes[no].vertex_vmax[j]; +#ifdef TREE_BASED_TIMESTEPS + loc_DomainMoment[i].vmin[j] = ExtNgb_Nodes[no].vmin[j]; + loc_DomainMoment[i].vmax[j] = ExtNgb_Nodes[no].vmax[j]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + } + + int tot_nchanged = 0; + for(int task = 0; task < NTask; task++) + tot_nchanged += recvcounts[task]; + + struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE)); + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int i = 0; i < tot_nchanged; i++) + { + int no = tot_DomainMoment[i].node; + + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + for(int j = 0; j < 3; j++) + { + Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j]; + Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j]; +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j]; + ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j]; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + no = Ngb_Nodes[no].father; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].vertex_vmin[j] > tot_DomainMoment[i].vertex_vmin[j]) + { + Ngb_Nodes[no].vertex_vmin[j] = tot_DomainMoment[i].vertex_vmin[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].vertex_vmax[j] < tot_DomainMoment[i].vertex_vmax[j]) + { + Ngb_Nodes[no].vertex_vmax[j] = tot_DomainMoment[i].vertex_vmax[j]; + flag_changed = 1; + } +#ifdef TREE_BASED_TIMESTEPS + if(ExtNgb_Nodes[no].vmin[j] > tot_DomainMoment[i].vmin[j]) + { + ExtNgb_Nodes[no].vmin[j] = tot_DomainMoment[i].vmin[j]; + flag_changed = 1; + } + + if(ExtNgb_Nodes[no].vmax[j] < tot_DomainMoment[i].vmax[j]) + { + ExtNgb_Nodes[no].vmax[j] = tot_DomainMoment[i].vmax[j]; + flag_changed = 1; + } +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } + + if(flag_changed == 0) + break; + + no = Ngb_Nodes[no].father; + } + } + + myfree(tot_DomainMoment); + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); +} + +/*! \brief Updates min and max position in ngb nodes. + * + * Inverse tree walk. + * + * \param[in] i Index of particle. + * \param[in, out] nchanged Number of changed top level nodes. + * \param[out] nodelist Top level nodes that were changed. + * + * \return void + */ +void ngb_update_rangebounds(int i, int *nchanged, int *nodelist) +{ + int no = Ngb_Father[i]; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].u.d.range_min[j] > P[i].Pos[j]) + { + Ngb_Nodes[no].u.d.range_min[j] = P[i].Pos[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].u.d.range_max[j] < P[i].Pos[j]) + { + Ngb_Nodes[no].u.d.range_max[j] = P[i].Pos[j]; + flag_changed = 1; + } + } + + if(flag_changed == 0) + break; + + if(no < Ngb_FirstNonTopLevelNode) /* top-level tree-node reached */ + { + if(Ngb_Marker[no] != Ngb_MarkerValue) + { + Ngb_Marker[no] = Ngb_MarkerValue; + nodelist[*nchanged] = no; + *nchanged = *nchanged + 1; + } + break; + } + + no = Ngb_Nodes[no].father; + } +} + +/*! \brief Finalizes position bounds update. + * + * Exchanges changed information in top level nodes to all tasks. + * + * \param[in] nchanged Number of changed top level nodes. + * \param[in] nodelist List of changed top level nodes. + * + * \return void + */ +void ngb_finish_rangebounds_update(int nchanged, int *nodelist) +{ + struct DomainNODE + { + int node; + MyNgbTreeFloat range_min[3]; + MyNgbTreeFloat range_max[3]; + }; + + /* share the pseudo-particle data accross CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&nchanged, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + for(int task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + byteoffset[0] = 0; + for(int task = 1; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + struct DomainNODE *loc_DomainMoment = + (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); + + for(int i = 0; i < nchanged; i++) + { + int no = nodelist[i]; + loc_DomainMoment[i].node = no; + + for(int j = 0; j < 3; j++) + { + loc_DomainMoment[i].range_min[j] = Ngb_Nodes[no].u.d.range_min[j]; + loc_DomainMoment[i].range_max[j] = Ngb_Nodes[no].u.d.range_max[j]; + } + } + + int tot_nchanged = 0; + for(int task = 0; task < NTask; task++) + tot_nchanged += recvcounts[task]; + + struct DomainNODE *tot_DomainMoment = (struct DomainNODE *)mymalloc("tot_DomainMoment", tot_nchanged * sizeof(struct DomainNODE)); + + MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, tot_DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + for(int i = 0; i < tot_nchanged; i++) + { + int no = tot_DomainMoment[i].node; + + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + for(int j = 0; j < 3; j++) + { + Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j]; + Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j]; + } + + no = Ngb_Nodes[no].father; + + while(no >= 0) + { + if(Ngb_Nodes[no].Ti_Current != All.Ti_Current) + drift_node(&Ngb_Nodes[no], All.Ti_Current); + + int flag_changed = 0; + + for(int j = 0; j < 3; j++) + { + if(Ngb_Nodes[no].u.d.range_min[j] > tot_DomainMoment[i].range_min[j]) + { + Ngb_Nodes[no].u.d.range_min[j] = tot_DomainMoment[i].range_min[j]; + flag_changed = 1; + } + + if(Ngb_Nodes[no].u.d.range_max[j] < tot_DomainMoment[i].range_max[j]) + { + Ngb_Nodes[no].u.d.range_max[j] = tot_DomainMoment[i].range_max[j]; + flag_changed = 1; + } + } + + if(flag_changed == 0) + break; + + no = Ngb_Nodes[no].father; + } + } + + myfree(tot_DomainMoment); + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); +} + +/*! \brief Adjust ngb-tree structures due to a change in number of gas cells. + * + * \param[in] delta_NgbMaxPart Difference in number of cells. + * + * \return void + */ +void ngb_treemodifylength(int delta_NgbMaxPart) +{ + mpi_printf("ALLOCATE: Need to adjust NgbTree because Ngb_MaxPart needs to grow by %d\n", delta_NgbMaxPart); + + for(int i = 0; i < Ngb_MaxPart + NTopleaves; i++) /* check for particles and pseudo particles */ + if(Ngb_Nextnode[i] >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nextnode[i] += delta_NgbMaxPart; + + for(int i = 0; i < Ngb_MaxPart; i++) + if(Ngb_Father[i] >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Father[i] += delta_NgbMaxPart; + + for(int i = 0; i < Ngb_MaxNodes; i++) + { + if(Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nodes[i + Ngb_MaxPart].u.d.nextnode += delta_NgbMaxPart; + + if(Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling >= Ngb_MaxPart) /* internal node or pseudo particle */ + Ngb_Nodes[i + Ngb_MaxPart].u.d.sibling += delta_NgbMaxPart; + + if(Ngb_Nodes[i + Ngb_MaxPart].father >= Ngb_MaxPart) + Ngb_Nodes[i + Ngb_MaxPart].father += delta_NgbMaxPart; + } + + for(int i = 0; i < NTopleaves; i++) + Ngb_DomainNodeIndex[i] += delta_NgbMaxPart; + + Ngb_Nextnode = (int *)myrealloc_movable(Ngb_Nextnode, (Ngb_MaxPart + delta_NgbMaxPart + NTopleaves) * sizeof(int)); + + memmove(&Ngb_Nextnode[Ngb_MaxPart + delta_NgbMaxPart], &Ngb_Nextnode[Ngb_MaxPart], NTopleaves * sizeof(int)); + + Ngb_MaxPart += delta_NgbMaxPart; + + Ngb_FirstNonTopLevelNode += delta_NgbMaxPart; + + Ngb_Nodes -= delta_NgbMaxPart; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes -= delta_NgbMaxPart; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + Ngb_Father = (int *)myrealloc_movable(Ngb_Father, Ngb_MaxPart * sizeof(int)); + + Ngb_Marker = (int *)myrealloc_movable(Ngb_Marker, (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int)); + memmove(Ngb_Marker + Ngb_MaxPart, Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, Ngb_MaxNodes * sizeof(int)); + memset(Ngb_Marker + Ngb_MaxPart - delta_NgbMaxPart, -1, delta_NgbMaxPart * sizeof(int)); +} + +/*! \brief Allocates arrays for neighbor tree. + * + * \return void + */ +void ngb_treeallocate(void) +{ + if(Ngb_MaxPart == 0) + { + Ngb_MaxPart = All.MaxPartSph; + Ngb_MaxNodes = (int)(All.NgbTreeAllocFactor * (All.MaxPartSph + BASENUMBER)) + NTopnodes; + } + + if(All.TotNumGas == 0) + return; + + if(Ngb_Nodes) + terminate("already allocated"); + + Ngb_DomainNodeIndex = (int *)mymalloc_movable(&Ngb_DomainNodeIndex, "Ngb_DomainNodeIndex", NTopleaves * sizeof(int)); + + Ngb_Nodes = (struct NgbNODE *)mymalloc_movable(&Ngb_Nodes, "Ngb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct NgbNODE)); + Ngb_Nodes -= Ngb_MaxPart; + +#ifdef TREE_BASED_TIMESTEPS + ExtNgb_Nodes = (struct ExtNgbNODE *)mymalloc_movable(&ExtNgb_Nodes, "ExtNgb_Nodes", (Ngb_MaxNodes + 1) * sizeof(struct ExtNgbNODE)); + ExtNgb_Nodes -= Ngb_MaxPart; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + Ngb_Nextnode = (int *)mymalloc_movable(&Ngb_Nextnode, "Ngb_Nextnode", (Ngb_MaxPart + NTopleaves) * sizeof(int)); + Ngb_Father = (int *)mymalloc_movable(&Ngb_Father, "Ngb_Father", Ngb_MaxPart * sizeof(int)); + + Ngb_Marker = (int *)mymalloc_movable(&Ngb_Marker, "Ngb_Marker", (Ngb_MaxNodes + Ngb_MaxPart) * sizeof(int)); +} + +/*! \brief This function frees the memory allocated for the neighbor tree. + * + * \return void + */ +void ngb_treefree(void) +{ + if(All.TotNumGas == 0) + return; + + if(Ngb_Nodes) + { + myfree_movable(Ngb_Marker); + myfree_movable(Ngb_Father); + myfree_movable(Ngb_Nextnode); +#ifdef TREE_BASED_TIMESTEPS + myfree_movable(ExtNgb_Nodes + Ngb_MaxPart); + ExtNgb_Nodes = NULL; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + myfree_movable(Ngb_Nodes + Ngb_MaxPart); + myfree_movable(Ngb_DomainNodeIndex); + + Ngb_Marker = NULL; + Ngb_Father = NULL; + Ngb_Nodes = NULL; + Ngb_DomainNodeIndex = NULL; + Ngb_Nextnode = NULL; + Ngb_MaxPart = 0; + Ngb_MaxNodes = 0; + } + else + terminate("trying to free the tree even though it's not allocated"); +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c new file mode 100644 index 0000000000..e777a7c29c --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_search.c @@ -0,0 +1,376 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree_search.c + * \date 05/2018 + * \brief This file contains a search routine on the neighbor tree. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void find_nearest_meshpoint_global(mesh_search_data * + * searchdata_input, int nn, int hsmlguess, int verbose) + * int ngbsearch_primary_cell_evaluate(int target, int mode, + * int threadid) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/* temporary particle arrays */ +static MyDouble *ngbsearch_nearest_dist; +static MyDouble *ngbsearch_hsml; +static mesh_search_data *searchdata; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble pos[3]; /* tracer particle position */ + MyDouble hsml; /* current search radius */ + MyDouble distance; /* nearest neighbor distance */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->pos[0] = searchdata[i].Pos[0]; + in->pos[1] = searchdata[i].Pos[1]; + in->pos[2] = searchdata[i].Pos[2]; + + in->hsml = ngbsearch_hsml[i]; + in->distance = ngbsearch_nearest_dist[i]; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Distance; /* distance to closest cell on task */ + int Task; + int Index; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(out->Index >= 0) + { + ngbsearch_nearest_dist[i] = out->Distance; + searchdata[i].Task = out->Task; + searchdata[i].u.Index = out->Index; + } + } + else /* combine */ + { + /* closer cell on other task? */ + if(out->Distance < ngbsearch_nearest_dist[i]) + { + ngbsearch_nearest_dist[i] = out->Distance; + searchdata[i].Task = out->Task; + searchdata[i].u.Index = out->Index; + } + } +} + +#include "../utils/generic_comm_helpers2.h" + +static int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid); +static int n; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= n) + break; + + if(searchdata[i].Task == -1) + ngbsearch_primary_cell_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + ngbsearch_primary_cell_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Searches the cells at the positions in searchdata. + * + * This function searches the cells which are at the positions specified in + * searchdata. The Pos field must be set. After the search is performed the + * Task and Index field contain the task/index of the cell at position Pos. + * If hsmlguess=1 initial search radius is read from Index/Hsml union in + * searchdata. + * + * \param[in] searchdata_input Contains the search positions, after function + * call the fields Task and Index are set. + * \param[in] nn Number of items in searchdata. + * \param[in] hsmlguess Guess for initial search radius; + * 1: from searchdata; else from MeanVolume of cells. + * \param[in] verbose More output. + * + * \return void + */ +void find_nearest_meshpoint_global(mesh_search_data *searchdata_input, int nn, int hsmlguess, int verbose) +{ + int i; + n = nn; + ngbsearch_nearest_dist = mymalloc("ngbsearch_nearest_dist", n * sizeof(MyDouble)); + ngbsearch_hsml = mymalloc("ngbsearch_hsml", n * sizeof(MyDouble)); + searchdata = searchdata_input; + + for(i = 0; i < n; i++) + { + ngbsearch_nearest_dist[i] = MAX_REAL_NUMBER; + + if(hsmlguess) + ngbsearch_hsml[i] = searchdata[i].u.hsmlguess; + else + ngbsearch_hsml[i] = 1e-6 * pow(All.MeanVolume, 1.0 / 3); + + searchdata[i].Task = -1; // None found yet + } + + generic_set_MaxNexport(); + + int ntot, iter = 0; + + /* we will repeat the whole thing for those points where we did not find a nearest neighbor */ + do + { + generic_comm_pattern(n, kernel_local, kernel_imported); + + int npleft = 0; + + /* do final operations on results */ + for(i = 0; i < n; i++) + { + if(searchdata[i].Task == -1) + { + npleft++; + ngbsearch_hsml[i] *= 2.0; + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d hsml=%g nearest dist=%g pos=(%g|%g|%g)\n", i, ThisTask, ngbsearch_hsml[i], + ngbsearch_nearest_dist[i], searchdata[i].Pos[0], searchdata[i].Pos[1], searchdata[i].Pos[2]); + myflush(stdout); + } + if(iter > MAXITER) + terminate("NGBSEARCH: iter > MAXITER"); + } + } + + /* sum up the left overs */ + MPI_Allreduce(&npleft, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if(ntot > 0) /* ok, we need to repeat for a few particles */ + { + iter++; + if(iter > 0 && ThisTask == 0 && verbose) + { + printf("NGBSEARCH: iteration %d: need to repeat for %d points.\n", iter, ntot); + myflush(stdout); + } + + if(iter > MAXITER) + terminate("NGBSEARCH: failed to converge in tracer particles\n"); + } + } + while(ntot > 0); + + myfree(ngbsearch_hsml); + myfree(ngbsearch_nearest_dist); +} + +/*! \brief Performs the neighbor search. + * + * \param[in] target the index of the particle to process(mode 0: in + * searchdata, mode 1: in NgbSearchDataGet/Result). + * \param[in] mode either 0 (handle local particles) or 1 (handle particles + * sent to us). + * \param[in] treadid Id of thread. + * + * \return 0 + */ +int ngbsearch_primary_cell_evaluate(int target, int mode, int threadid) +{ + int j, n; + int numnodes, *firstnode; + MyDouble h, distmax; + MyDouble dx, dy, dz, r; + MyDouble *pos; + data_in local, *target_data; + data_out out; + + int index = -1; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->pos; + h = target_data->hsml; + distmax = target_data->distance; + + int numngb = ngb_treefind_variable_threads(pos, h, target, mode, threadid, numnodes, firstnode); + + for(n = 0; n < numngb; n++) + { + j = Thread[threadid].Ngblist[n]; + + dx = pos[0] - P[j].Pos[0]; + dy = pos[1] - P[j].Pos[1]; + dz = pos[2] - P[j].Pos[2]; + + if(dx > boxHalf_X) + dx -= boxSize_X; + if(dx < -boxHalf_X) + dx += boxSize_X; + if(dy > boxHalf_Y) + dy -= boxSize_Y; + if(dy < -boxHalf_Y) + dy += boxSize_Y; + if(dz > boxHalf_Z) + dz -= boxSize_Z; + if(dz < -boxHalf_Z) + dz += boxSize_Z; + + r = sqrt(dx * dx + dy * dy + dz * dz); + if(r < distmax && r < h && P[j].ID != 0 && P[j].Mass > 0) + { + distmax = r; + index = j; + } + } + + out.Distance = distmax; + out.Task = ThisTask; + out.Index = index; + + if(index < 0) + { + out.Distance = MAX_REAL_NUMBER; + out.Task = -1; + out.Index = -1; + } + + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} diff --git a/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c new file mode 100644 index 0000000000..c682ce157d --- /dev/null +++ b/src/amuse/community/arepo/src/ngbtree/ngbtree_walk.c @@ -0,0 +1,225 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/ngbtree/ngbtree_walk.c + * \date 05/2018 + * \brief Routines to walk the ngb tree. + * \details contains functions: + * int ngb_treefind_variable_threads(MyDouble searchcenter[3], + * MyFloat hsml, int target, int mode, int thread_id, int + * numnodes, int *firstnode) + * int ngb_treefind_export_node_threads(int no, int target, int + * thread_id, int image_flag) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 16.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Finds all cells around seearchcenter in region with radius hsml. + * + * This function returns the number of neighbors with distance <= hsml, and + * returns the particle indices in the global buffer Ngblist. + * The tree traversal starts at startnode. + * Keep in mind that this is usually called within an *_evaluate function + * within the generic communication pattern. This means that first, the local + * (bound to this task) search is performed and the local neighbors written + * to the array, then communication happens and afterwards, the function is + * called again in imported mode, finding particles on other tasks. + * + * \param[in] searchcenter Center of the neighbor search. + * \param[in] hsml Radius of the search. + * \param[in] target Index of the particle around which the search is + * performed; needed for parallel search. If < 0, only local search + * is performed. + * \param[in] mode Mode for local or imported particle search. + * \param[in] thread_id ID of thread (always 0 in our case). + * \param[in] numnodes Number of nodes on this task (1 for mode local; + * for mode imported: given by generic_get_numnodes(...) ). + * \param[in] firstnode Node to start with (in case of mode imported). + * + * \return The number of neighbors found. + */ +int ngb_treefind_variable_threads(MyDouble searchcenter[3], MyFloat hsml, int target, int mode, int thread_id, int numnodes, + int *firstnode) +{ + MyDouble search_min[3], search_max[3], search_max_Lsub[3], search_min_Ladd[3]; + + for(int i = 0; i < 3; i++) + { + search_min[i] = searchcenter[i] - 1.001 * hsml; + search_max[i] = searchcenter[i] + 1.001 * hsml; + } + + search_max_Lsub[0] = search_max[0] - boxSize_X; + search_max_Lsub[1] = search_max[1] - boxSize_Y; + search_max_Lsub[2] = search_max[2] - boxSize_Z; + + search_min_Ladd[0] = search_min[0] + boxSize_X; + search_min_Ladd[1] = search_min[1] + boxSize_Y; + search_min_Ladd[2] = search_min[2] + boxSize_Z; + + int numngb = 0; + double xtmp, ytmp, ztmp; + double hsml2 = hsml * hsml; + + for(int k = 0; k < numnodes; k++) + { + int no; + + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Ngb_MaxPart) /* single particle */ + { + int p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + double dx = NGB_PERIODIC_LONG_X(P[p].Pos[0] - searchcenter[0]); + if(dx > hsml) + continue; + double dy = NGB_PERIODIC_LONG_Y(P[p].Pos[1] - searchcenter[1]); + if(dy > hsml) + continue; + double dz = NGB_PERIODIC_LONG_Z(P[p].Pos[2] - searchcenter[2]); + if(dz > hsml) + continue; + + double r2 = dx * dx + dy * dy + dz * dz; + if(r2 > hsml2) + continue; + + Thread[thread_id].R2list[numngb] = r2; + Thread[thread_id].Ngblist[numngb++] = p; + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal node */ + { + struct NgbNODE *current = &Ngb_Nodes[no]; + + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + no = current->u.d.sibling; /* in case the node can be discarded */ + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + + if(search_min[0] > current->u.d.range_max[0] && search_max_Lsub[0] < current->u.d.range_min[0]) + continue; + if(search_min_Ladd[0] > current->u.d.range_max[0] && search_max[0] < current->u.d.range_min[0]) + continue; + + if(search_min[1] > current->u.d.range_max[1] && search_max_Lsub[1] < current->u.d.range_min[1]) + continue; + if(search_min_Ladd[1] > current->u.d.range_max[1] && search_max[1] < current->u.d.range_min[1]) + continue; + + if(search_min[2] > current->u.d.range_max[2] && search_max_Lsub[2] < current->u.d.range_min[2]) + continue; + if(search_min_Ladd[2] > current->u.d.range_max[2] && search_max[2] < current->u.d.range_min[2]) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES should not occur here"); + + if(target >= 0) /* if no target is given, export will not occur */ + if(ngb_treefind_export_node_threads(no, target, thread_id, 0)) + return -1; + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + return numngb; +} + +/*! \brief Prepares export of ngb-tree node. + * + * \param[in] no Pseudoparticle node to be exported. + * \param[in] target (Local) index to identify what it refers to. + * \param[in] thread_id ID of thread (0 in our case). + * \param[in] image_flag Bit flag used in EXTENDED_GHOST_SEARCH. + * + * \return 0 + */ +int ngb_treefind_export_node_threads(int no, int target, int thread_id, int image_flag) +{ + /* The task indicated by the pseudoparticle node */ + int task = DomainTask[no - (Ngb_MaxPart + Ngb_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != target) + { + Thread[thread_id].Exportflag[task] = target; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = target; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = target; + nodelist[nexp].Node = Ngb_DomainNodeIndex[no - (Ngb_MaxPart + Ngb_MaxNodes)]; +#ifdef EXTENDED_GHOST_SEARCH + nodelist[nexp].BitFlags = image_flag; +#endif /* #ifdef EXTENDED_GHOST_SEARCH */ + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} diff --git a/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c new file mode 100644 index 0000000000..7e9fbef498 --- /dev/null +++ b/src/amuse/community/arepo/src/star_formation/sfr_eEOS.c @@ -0,0 +1,539 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/star_formation/sfr_eEOS.c + * \date 05/2018 + * \brief Star formation rate routines for the effective multi-phase + * model. + * \details contains functions: + * void cooling_and_starformation(void) + * double get_starformation_rate(int i) + * void init_clouds(void) + * void integrate_sfr(void) + * void set_units_sfr(void) + * double calc_egyeff(int i, double gasdens, double *ne, + * double *x, double *tsfr, double *factorEVP) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../gravity/forcetree.h" + +#ifdef USE_SFR + +/*! \brief Main driver for star formation and gas cooling. + * + * This function loops over all the active gas cells. If a given cell + * meets the criteria for star formation to be active the multi-phase + * model is activated, the properties of the cell are updated according to + * the latter and the star formation rate computed. In the other case, the + * standard isochoric cooling is applied to the gas cell by calling the + * function cool_cell() and the star formation rate is set to 0. + * + * \return void + */ +void cooling_and_starformation(void) +{ + TIMER_START(CPU_COOLINGSFR); + + int idx, i, bin, flag; + double dt, dtime, ne = 1; + double unew, du; + double cloudmass; + double factorEVP, dens; + double tsfr; + double egyeff, x; + + double eos_dens_threshold = All.PhysDensThresh; + + /* note: assuming FULL ionization */ + double u_to_temp_fac = + (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g; + + /* clear the SFR stored in the active timebins */ + for(bin = 0; bin < TIMEBINS; bin++) + if(TimeBinSynchronized[bin]) + TimeBinSfr[bin] = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + + dens = SphP[i].Density; + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + /* apply the temperature floor */ + + unew = dmax(All.MinEgySpec, SphP[i].Utherm); + + if(unew < 0) + terminate("Invalid Temperature: Task=%d i=%d unew=%g\n", ThisTask, i, unew); + + du = unew - SphP[i].Utherm; + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + + egyeff = 0.; + /* calculate the effective equation of state for gas above the density threshold */ + if(dens * All.cf_a3inv >= eos_dens_threshold) + { + ne = SphP[i].Ne; + egyeff = calc_egyeff(i, dens * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP); + } + + /* do cooling, except for gas above the EOS density threshold that is colder than the eEOS */ + if(dens * All.cf_a3inv < eos_dens_threshold || (dens * All.cf_a3inv >= eos_dens_threshold && SphP[i].Utherm > egyeff)) + { + cool_cell(i); + } + + /* check whether conditions for star formation are fulfilled. + * f=1 normal cooling + * f=0 star formation + */ + + flag = 1; /* default is normal cooling */ + + /* enable star formation if gas is above SF density threshold */ + if(dens * All.cf_a3inv >= eos_dens_threshold) + if(SphP[i].Utherm <= egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh) + flag = 0; + + if(All.ComovingIntegrationOn) + if(dens < All.OverDensThresh) + flag = 1; + + if(P[i].Mass == 0) /* tracer particles don't form stars */ + flag = 1; + + if(flag == 1) + SphP[i].Sfr = 0; + + /* active star formation */ + if(flag == 0) + { + SphP[i].Ne = (HYDROGEN_MASSFRAC + 1) / 2 / HYDROGEN_MASSFRAC; /* note: assuming FULL ionization */ + + cloudmass = x * P[i].Mass; + + if(tsfr < dtime) + tsfr = dtime; + + if(dt > 0) + { + if(P[i].TimeBinHydro) /* upon start-up, we need to protect against dt==0 */ + { + unew = SphP[i].Utherm; + + // put (cold) star forming cells on the effective equation of state + if(SphP[i].Utherm < egyeff) + { + unew = egyeff; + } + + du = unew - SphP[i].Utherm; + if(unew < All.MinEgySpec) + du = All.MinEgySpec - SphP[i].Utherm; + + SphP[i].Utherm += du; + SphP[i].Energy += All.cf_atime * All.cf_atime * du * P[i].Mass; + +#ifdef OUTPUT_COOLHEAT + if(dtime > 0) + SphP[i].CoolHeat = du * P[i].Mass / dtime; +#endif /* #ifdef OUTPUT_COOLHEAT */ + + set_pressure_of_cell(i); + } + } + + SphP[i].Sfr = (1 - All.FactorSN) * cloudmass / tsfr * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + TimeBinSfr[P[i].TimeBinHydro] += SphP[i].Sfr; + } + } /* end of main loop over active particles */ + + TIMER_STOP(CPU_COOLINGSFR); +} + +/*! \brief Return the star formation rate associated with the gas cell i. + * + * \param[in] i the index of the gas cell. + * + * \return star formation rate in solar masses / yr. + */ +double get_starformation_rate(int i) +{ + if(RestartFlag == 3) + return SphP[i].Sfr; + + double rateOfSF; + int flag; + double tsfr; + double factorEVP, egyeff, ne, x, cloudmass; + /* note: assuming FULL ionization */ + double u_to_temp_fac = + (4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC))) * PROTONMASS / BOLTZMANN * GAMMA_MINUS1 * All.UnitEnergy_in_cgs / All.UnitMass_in_g; + + double eos_dens_threshold = All.PhysDensThresh; + + flag = 1; /* default is normal cooling */ + egyeff = 0.0; + + if(SphP[i].Density * All.cf_a3inv >= eos_dens_threshold) + { + ne = SphP[i].Ne; + egyeff = calc_egyeff(i, SphP[i].Density * All.cf_a3inv, &ne, &x, &tsfr, &factorEVP); + } + + if(SphP[i].Density * All.cf_a3inv >= All.PhysDensThresh) + if(SphP[i].Utherm <= 1.01 * egyeff || u_to_temp_fac * SphP[i].Utherm <= All.TemperatureThresh) + flag = 0; + + if(All.ComovingIntegrationOn) + if(SphP[i].Density < All.OverDensThresh) + flag = 1; + + if(flag == 1) + return 0; + + cloudmass = x * P[i].Mass; + + rateOfSF = (1 - All.FactorSN) * cloudmass / tsfr; + + /* convert to solar masses per yr */ + rateOfSF *= (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + return rateOfSF; +} + +/*! \brief Initialize the parameters of effective multi-phase model. + * + * In particular this function computes the value of PhysDensThresh, that is + * the physical density threshold above which star formation is active, if + * its value was set to 0 in the parameter file. + * + * \return void + */ +void init_clouds(void) +{ + double A0, dens, tcool, ne, coolrate, egyhot, x, u4, meanweight; + double tsfr, peff, fac, neff, egyeff, factorEVP, sigma, thresholdStarburst; + + if(All.PhysDensThresh == 0) + { + A0 = All.FactorEVP; + + egyhot = All.EgySpecSN / A0; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + u4 = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4; + u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + /* choose a high reference density to avoid that we pick up a compton cooling contribution */ + if(All.ComovingIntegrationOn) + dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + else + dens = 1.0e10 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + if(All.ComovingIntegrationOn) + { + All.Time = 1.0; /* to be guaranteed to get z=0 rate */ + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + ne = 1.0; + SetZeroIonization(); + + tcool = GetCoolingTime(egyhot, dens, &ne); + + coolrate = egyhot / tcool / dens; + + x = (egyhot - u4) / (egyhot - All.EgySpecCold); + + All.PhysDensThresh = + x / pow(1 - x, 2) * (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold) / (All.MaxSfrTimescale * coolrate); + + mpi_printf( + "USE_SFR: A0=%g PhysDensThresh=%g (int units) %g h^2 cm^-3 expected fraction of cold gas at threshold=%g tcool=%g " + "dens=%g egyhot=%g\n", + A0, All.PhysDensThresh, All.PhysDensThresh / (PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs), x, tcool, dens, + egyhot); + + dens = All.PhysDensThresh; + + do + { + ne = 0.5; + egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP); + peff = GAMMA_MINUS1 * dens * egyeff; + + fac = 1 / (log(dens * 1.025) - log(dens)); + dens *= 1.025; + + neff = -log(peff) * fac; + + ne = 0.5; + egyeff = calc_egyeff(-1, dens, &ne, &x, &tsfr, &factorEVP); + peff = GAMMA_MINUS1 * dens * egyeff; + + neff += log(peff) * fac; + } + while(neff > 4.0 / 3); + + thresholdStarburst = dens; + + mpi_printf("USE_SFR: run-away sets in for dens=%g dynamic range for quiescent star formation=%g\n", thresholdStarburst, + thresholdStarburst / All.PhysDensThresh); + + integrate_sfr(); + + if(ThisTask == 0) + { + sigma = 10.0 / All.Hubble * 1.0e-10 / pow(1.0e-3, 2); + + printf("USE_SFR: isotherm sheet central density=%g z0=%g\n", M_PI * All.G * sigma * sigma / (2 * GAMMA_MINUS1) / u4, + GAMMA_MINUS1 * u4 / (2 * M_PI * All.G * sigma)); + myflush(stdout); + } + + mpi_printf("USE_SFR: SNII energy=%g [internal units] = %g [erg/M_sun] = %g [1e51 erg/Msun]\n", All.FactorSN * All.EgySpecSN, + All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS, + All.FactorSN * All.EgySpecSN / (1 - All.FactorSN) / (All.UnitMass_in_g / All.UnitEnergy_in_cgs) * SOLAR_MASS / 1e51); + + if(All.ComovingIntegrationOn) + { + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + } +} + +/*! \brief Compute the effective equation of state for the gas and + * the integrated SFR per unit area. + * + * This function computes the effective equation of state for the gas and + * the integrated SFR per unit area. It saves the results into two files: + * eos.txt for the equation of state and sfrrate.txt for the integrated SFR. + * In the latter case, the SFR is determined by integrating along the vertical + * direction the gas density of an infinite self-gravitating isothermal sheet. + * The integrated gas density is saved as well, so effectively sfrrate.txt + * contains the Kennicutt-Schmidt law of the star formation model. + * + * \return void + */ +void integrate_sfr(void) +{ + double rho0, rho, rho2, q, dz, gam, sigma = 0, sigma_u4, sigmasfr = 0, ne, P1; + double x = 0, P, P2, x2, tsfr2, factorEVP2, drho, dq; + double meanweight, u4, tsfr, factorEVP, egyeff, egyeff2; + FILE *fd; + + double eos_dens_threshold = All.PhysDensThresh; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + u4 = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * 1.0e4; + u4 *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + if(All.ComovingIntegrationOn) + { + All.Time = 1.0; /* to be guaranteed to get z=0 rate */ + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + if(WriteMiscFiles && (ThisTask == 0)) + fd = fopen("eos.txt", "w"); + else + fd = 0; + + for(rho = eos_dens_threshold; rho <= 1000 * eos_dens_threshold; rho *= 1.1) + { + ne = 1.0; + egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP); + + P = GAMMA_MINUS1 * rho * egyeff; + + if(WriteMiscFiles && (ThisTask == 0)) + { + fprintf(fd, "%g %g %g\n", rho, P, x); + } + } + + if(WriteMiscFiles && (ThisTask == 0)) + fclose(fd); + + if(WriteMiscFiles && (ThisTask == 0)) + fd = fopen("sfrrate.txt", "w"); + else + fd = 0; + + for(rho0 = eos_dens_threshold; rho0 <= 10000 * eos_dens_threshold; rho0 *= 1.02) + { + rho = rho0; + q = 0; + dz = 0.001; + + sigma = sigmasfr = sigma_u4 = 0; + + while(rho > 0.0001 * rho0) + { + if(rho > All.PhysDensThresh) + { + ne = 1.0; + egyeff = calc_egyeff(-1, rho, &ne, &x, &tsfr, &factorEVP); + + P = P1 = GAMMA_MINUS1 * rho * egyeff; + + rho2 = 1.1 * rho; + + egyeff2 = calc_egyeff(-1, rho2, &ne, &x2, &tsfr2, &factorEVP2); + + P2 = GAMMA_MINUS1 * rho2 * egyeff2; + + gam = log(P2 / P1) / log(rho2 / rho); + } + else + { + tsfr = 0; + + P = GAMMA_MINUS1 * rho * u4; + gam = 1.0; + + sigma_u4 += rho * dz; + } + + drho = q; + dq = -(gam - 2) / rho * q * q - 4 * M_PI * All.G / (gam * P) * rho * rho * rho; + + sigma += rho * dz; + if(tsfr > 0) + { + sigmasfr += (1 - All.FactorSN) * rho * x / tsfr * dz; + } + + rho += drho * dz; + q += dq * dz; + } + + sigma *= 2; /* to include the other side */ + sigmasfr *= 2; + sigma_u4 *= 2; + + sigma *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + sigmasfr *= All.HubbleParam * All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * (SEC_PER_YEAR / All.UnitTime_in_s) * 1.0e6 * + PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + sigma_u4 *= All.HubbleParam * (All.UnitMass_in_g / SOLAR_MASS) * PARSEC * PARSEC / (All.UnitLength_in_cm * All.UnitLength_in_cm); + + if(WriteMiscFiles && (ThisTask == 0)) + { + fprintf(fd, "%g %g %g %g\n", rho0, sigma, sigmasfr, sigma_u4); + } + } + + if(All.ComovingIntegrationOn) + { + All.Time = All.TimeBegin; + set_cosmo_factors_for_current_time(); + IonizeParams(); + } + + if(WriteMiscFiles && (ThisTask == 0)) + fclose(fd); +} + +/*! \brief Set the appropriate units for the parameters of the multi-phase + * model. + * + * \return void + */ +void set_units_sfr(void) +{ + double meanweight; + + All.OverDensThresh = All.CritOverDensity * All.OmegaBaryon * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + All.PhysDensThresh = All.CritPhysDensity * PROTONMASS / HYDROGEN_MASSFRAC / All.UnitDensity_in_cgs; + + meanweight = 4 / (1 + 3 * HYDROGEN_MASSFRAC); /* note: assuming NEUTRAL GAS */ + + All.EgySpecCold = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempClouds; + All.EgySpecCold *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; + + meanweight = 4 / (8 - 5 * (1 - HYDROGEN_MASSFRAC)); /* note: assuming FULL ionization */ + + All.EgySpecSN = 1 / meanweight * (1.0 / GAMMA_MINUS1) * (BOLTZMANN / PROTONMASS) * All.TempSupernova; + All.EgySpecSN *= All.UnitMass_in_g / All.UnitEnergy_in_cgs; +} + +/*! \brief Calculate the effective energy of the multi-phase model. + * + * \param[in] i (unused) + * \param[in] gasdens gas density. + * \param[in, out] ne Fractional electron density. + * \param[out] x Fraction cold gas within model. + * \param[out] tsfr Star formation timescale. + * \param[out] factorEVP Supernova evaporation factor for given density. + */ +double calc_egyeff(int i, double gasdens, double *ne, double *x, double *tsfr, double *factorEVP) +{ + double egyhot, egyeff, tcool, y; + double rho = gasdens; + + rho = dmax(rho, All.PhysDensThresh); + + *tsfr = sqrt(All.PhysDensThresh / rho) * All.MaxSfrTimescale; + + *factorEVP = pow(rho / All.PhysDensThresh, -0.8) * All.FactorEVP; + + egyhot = All.EgySpecSN / (1 + *factorEVP) + All.EgySpecCold; + + tcool = GetCoolingTime(egyhot, rho, ne); + + y = *tsfr / tcool * egyhot / (All.FactorSN * All.EgySpecSN - (1 - All.FactorSN) * All.EgySpecCold); + + *x = 1 + 1 / (2 * y) - sqrt(1 / y + 1 / (4 * y * y)); + + egyeff = egyhot * (1 - *x) + All.EgySpecCold * (*x); + + return egyeff; +} + +#endif /* #ifdef USE_SFR */ diff --git a/src/amuse/community/arepo/src/star_formation/starformation.c b/src/amuse/community/arepo/src/star_formation/starformation.c new file mode 100644 index 0000000000..9ce94a96e5 --- /dev/null +++ b/src/amuse/community/arepo/src/star_formation/starformation.c @@ -0,0 +1,437 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/star_formation/starformation.c + * \date 05/2018 + * \brief Generic creation routines for star particles. + * \details Star formation rates are calculated in sfr_eEOS for the + * multiphase model. + * contains functions: + * void sfr_init() + * void sfr_create_star_particles(void) + * void convert_cell_into_star(int i, double birthtime) + * void spawn_star_from_cell(int igas, double birthtime, int + * istar, MyDouble mass_of_star) + * void make_star(int idx, int i, double prob, MyDouble + * mass_of_star, double *sum_mass_stars) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.06.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../gravity/forcetree.h" + +#ifdef USE_SFR + +static int stars_spawned; /*!< local number of star particles spawned in the time step */ +static int tot_stars_spawned; /*!< global number of star paricles spawned in the time step */ +static int stars_converted; /*!< local number of gas cells converted into stars in the time step */ +static int tot_stars_converted; /*!< global number of gas cells converted into stars in the time step */ +static int altogether_spawned; /*!< local number of star+wind particles spawned in the time step */ +static int tot_altogether_spawned; /*!< global number of star+wind particles spawned in the time step */ +static double cum_mass_stars = 0.0; /*!< cumulative mass of stars created in the time step (global value) */ + +static int sfr_init_called = 0; + +/*! \brief Initialization routine. + * + * \return void + */ +void sfr_init() +{ + if(sfr_init_called) + return; + + sfr_init_called = 1; + + init_clouds(); +} + +/*! \brief This routine creates star particles according to their + * respective rates. + * + * This function loops over all the active gas cells. If in a given cell the + * SFR is greater than zero, the probability of forming a star is computed + * and the corresponding particle is created stichastically according to the + * model in Springel & Hernquist (2003, MNRAS). It also saves information + * about the formed stellar mass and the star formation rate in the file + * FdSfr. + * + * \return void + */ +void sfr_create_star_particles(void) +{ + TIMER_START(CPU_COOLINGSFR); + + int idx, i, bin; + double dt, dtime; + MyDouble mass_of_star; + double sum_sm, total_sm, rate, sum_mass_stars, total_sum_mass_stars; + double p = 0, pall = 0, prob, p_decide; + double rate_in_msunperyear; + double sfrrate, totsfrrate; + + stars_spawned = stars_converted = 0; + sum_sm = sum_mass_stars = 0; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i >= 0) + { + if(P[i].Mass == 0 && P[i].ID == 0) + continue; /* skip cells that have been swallowed or eliminated */ + +#ifdef SFR_KEEP_CELLS + if(P[i].Mass < 0.3 * All.TargetGasMass) + continue; +#endif /* #ifdef SFR_KEEP_CELLS */ + + dt = (P[i].TimeBinHydro ? (((integertime)1) << P[i].TimeBinHydro) : 0) * All.Timebase_interval; + + /* the actual time-step */ + + dtime = All.cf_atime * dt / All.cf_time_hubble_a; + + mass_of_star = 0; + prob = 0; + p = 0; + pall = 0; + + if(SphP[i].Sfr > 0) + { + p = SphP[i].Sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR)) * dtime / P[i].Mass; + pall = p; + sum_sm += P[i].Mass * (1 - exp(-p)); + +#if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) + + if(P[i].Mass < 2.0 * All.TargetGasMass) +#ifdef SFR_KEEP_CELLS + mass_of_star = 0.9 * P[i].Mass; +#else /* #ifdef SFR_KEEP_CELLS */ + mass_of_star = P[i].Mass; +#endif /* #ifdef SFR_KEEP_CELLS */ + else + mass_of_star = All.TargetGasMass; + +#ifdef REFINEMENT_HIGH_RES_GAS + if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass) + { + /* this cell does not appear to be in the high-res region. + If we form a star, then it is given the mass of the cell, + and later we give the star the SofteningType=3 particle to give it large softening */ +#ifdef SFR_KEEP_CELLS + mass_of_star = 0.9 * P[i].Mass; +#else /* #ifdef SFR_KEEP_CELLS */ + mass_of_star = P[i].Mass; +#endif /* #ifdef SFR_KEEP_CELLS #else */ + } + +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + +#else /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) */ + mass_of_star = P[i].Mass; +#endif /* #if defined(REFINEMENT_SPLIT_CELLS) && defined(REFINEMENT_MERGE_CELLS) #else */ + +#ifdef SFR_KEEP_CELLS + if(P[i].Mass < 0.5 * All.TargetGasMass) + continue; /* do not make stars from cells that should be derefined */ +#endif /* #ifdef SFR_KEEP_CELLS */ + + prob = P[i].Mass / mass_of_star * (1 - exp(-pall)); + } + + if(prob == 0) + continue; + + if(prob < 0) + terminate("prob < 0"); + + if(prob > 1) + { + printf( + "SFR: Warning, need to make a heavier star than desired. Task=%d prob=%g P[i].Mass=%g mass_of_star=%g " + "mass_of_star_new=%g p=%g pall=%g\n", + ThisTask, prob, P[i].Mass, mass_of_star, P[i].Mass * (1 - exp(-pall)), p, pall); + mass_of_star = P[i].Mass * (1 - exp(-pall)); + prob = 1.0; + } + + /* decide what process to consider (currently available: make a star or kick to wind) */ + p_decide = get_random_number(); + + if(p_decide < p / pall) /* ok, it is decided to consider star formation */ + make_star(idx, i, prob, mass_of_star, &sum_mass_stars); + } + } /* end of main loop over active gas particles */ + + int in[4], out[4], cnt = 2; + in[0] = stars_spawned; + in[1] = stars_converted; + + MPI_Allreduce(in, out, cnt, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + tot_stars_spawned = out[0]; + tot_stars_converted = out[1]; + + if(tot_stars_spawned > 0 || tot_stars_converted > 0) + mpi_printf("SFR: spawned %d stars, converted %d gas particles into stars\n", tot_stars_spawned, tot_stars_converted); + + tot_altogether_spawned = tot_stars_spawned; + altogether_spawned = stars_spawned; + + if(tot_altogether_spawned) + { + /* need to assign new unique IDs to the spawned stars */ + + int *list; + + if(All.MaxID == 0) /* MaxID not calculated yet */ + calculate_maxid(); + + list = mymalloc("list", NTask * sizeof(int)); + + MPI_Allgather(&altogether_spawned, 1, MPI_INT, list, 1, MPI_INT, MPI_COMM_WORLD); + + MyIDType newid = All.MaxID + 1; + + for(i = 0; i < ThisTask; i++) + newid += list[i]; + + myfree(list); + + for(i = 0; i < altogether_spawned; i++) + { + P[NumPart + i].ID = newid; + + newid++; + } + + All.MaxID += tot_altogether_spawned; + } + + /* Note: New tree construction can be avoided because of `force_add_star_to_tree()' */ + if(tot_stars_spawned > 0 || tot_stars_converted > 0) + { + All.TotNumPart += tot_stars_spawned; + All.TotNumGas -= tot_stars_converted; + NumPart += stars_spawned; + } + + for(bin = 0, sfrrate = 0; bin < TIMEBINS; bin++) + if(TimeBinsHydro.TimeBinCount[bin]) + sfrrate += TimeBinSfr[bin]; + + double din[3] = {sfrrate, sum_sm, sum_mass_stars}, dout[3]; + + MPI_Reduce(din, dout, 3, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + totsfrrate = dout[0]; + total_sm = dout[1]; + total_sum_mass_stars = dout[2]; + + if(All.TimeStep > 0) + rate = total_sm / (All.TimeStep / All.cf_time_hubble_a); + else + rate = 0; + + /* compute the cumulative mass of stars */ + cum_mass_stars += total_sum_mass_stars; + + /* convert to solar masses per yr */ + rate_in_msunperyear = rate * (All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR); + + fprintf(FdSfr, "%14e %14e %14e %14e %14e %14e\n", All.Time, total_sm, totsfrrate, rate_in_msunperyear, total_sum_mass_stars, + cum_mass_stars); + myflush(FdSfr); + } + + TIMER_STOP(CPU_COOLINGSFR); +} + +/*! \brief Convert a cell into a star. + * + * This function converts an active star-forming gas cell into a star. + * The particle information of the gas cell is copied to the + * location star and the fields necessary for the creation of the star + * particle are initialized. + * + * \param[in] i Index of the gas cell to be converted. + * \param[in] birthtime Time of birth (in code units) of the stellar particle. + * + * \return void + */ +void convert_cell_into_star(int i, double birthtime) +{ + P[i].Type = 4; + P[i].SofteningType = All.SofteningTypeOfPartType[P[i].Type]; + +#if defined(REFINEMENT_HIGH_RES_GAS) + if(SphP[i].HighResMass < HIGHRESMASSFAC * P[i].Mass) + { + /* this cell does not appear to be in the high-res region. + We give the star the SofteningType=3 particle to give it large softening */ + P[i].SofteningType = All.SofteningTypeOfPartType[3]; + } +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + if(((1 << P[i].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[i].SofteningType = get_softening_type_from_mass(P[i].Mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + TimeBinSfr[P[i].TimeBinHydro] -= SphP[i].Sfr; + + voronoi_remove_connection(i); + + return; +} + +/*! \brief Spawn a star particle from a gas cell. + * + * This function spawns a star particle from an active star-forming + * cell. The particle information of the gas cell is copied to the + * location istar and the fields necessary for the creation of the star + * particle are initialized. The conserved variables of the gas cell + * are then updated according to the mass ratio between the two components + * to ensure conservation. + * + * \param[in] igas Index of the gas cell from which the star is spawned. + * \param[in] birthtime Time of birth (in code units) of the stellar particle. + * \param[in] istar Index of the spawned stellar particle. + * \param[in] mass_of_star The mass of the spawned stellar particle. + * + * \return void + */ +void spawn_star_from_cell(int igas, double birthtime, int istar, MyDouble mass_of_star) +{ + P[istar] = P[igas]; + P[istar].Type = 4; + P[istar].SofteningType = All.SofteningTypeOfPartType[P[istar].Type]; + P[istar].Mass = mass_of_star; + +#if defined(REFINEMENT_HIGH_RES_GAS) + if(SphP[igas].HighResMass < HIGHRESMASSFAC * P[igas].Mass) + { + /* this cell does not appear to be in the high-res region. + We give the star the SofteningType=3 particle to give it large softening */ + P[istar].SofteningType = All.SofteningTypeOfPartType[3]; + } +#endif /* #if defined(REFINEMENT_HIGH_RES_GAS) */ + +#ifdef INDIVIDUAL_GRAVITY_SOFTENING + if(((1 << P[istar].Type) & (INDIVIDUAL_GRAVITY_SOFTENING))) + P[istar].SofteningType = get_softening_type_from_mass(P[istar].Mass); +#endif /* #ifdef INDIVIDUAL_GRAVITY_SOFTENING */ + + timebin_add_particle(&TimeBinsGravity, istar, igas, P[istar].TimeBinGrav, TimeBinSynchronized[P[istar].TimeBinGrav]); + + /* now change the conserved quantities in the cell in proportion */ + double fac = (P[igas].Mass - P[istar].Mass) / P[igas].Mass; + +#ifdef MHD + double Emag = 0.5 * (SphP[igas].B[0] * SphP[igas].B[0] + SphP[igas].B[1] * SphP[igas].B[1] + SphP[igas].B[2] * SphP[igas].B[2]) * + SphP[igas].Volume * All.cf_atime; + SphP[igas].Energy -= Emag; +#endif /* #ifdef MHD */ + + P[igas].Mass *= fac; + SphP[igas].Energy *= fac; + SphP[igas].Momentum[0] *= fac; + SphP[igas].Momentum[1] *= fac; + SphP[igas].Momentum[2] *= fac; + +#ifdef MHD + SphP[igas].Energy += Emag; +#endif /* #ifdef MHD */ + +#ifdef MAXSCALARS + for(int s = 0; s < N_Scalar; s++) /* Note, the changes in MATERIALS, HIGHRESGASMASS, etc., are treated as part of the Scalars */ + *(MyFloat *)(((char *)(&SphP[igas])) + scalar_elements[s].offset_mass) *= fac; +#endif /* #ifdef MAXSCALARS */ + + return; +} + +/*! \brief Make a star particle from a gas cell. + * + * Given a gas cell where star formation is active and the probability + * of forming a star, this function selectes either to convert the gas + * cell into a star particle or to spawn a star depending on the + * target mass for the star. + * + * \param[in] idx Index of the gas cell in the hydro list of active cells. + * \param[in] i Index of the gas cell. + * \param[in] prob Probability of making a star. + * \param[in] mass_of_star Desired mass of the star particle. + * \param[in, out] sum_mass_stars Holds the mass of all the stars created at the + * current time-step (for the local task) + * + * \return void + */ +void make_star(int idx, int i, double prob, MyDouble mass_of_star, double *sum_mass_stars) +{ + if(mass_of_star > P[i].Mass) + terminate("mass_of_star > P[i].Mass"); + + if(get_random_number() < prob) + { + if(mass_of_star == P[i].Mass) + { + /* here we turn the gas particle itself into a star particle */ + Stars_converted++; + stars_converted++; + + *sum_mass_stars += P[i].Mass; + + convert_cell_into_star(i, All.Time); + timebin_remove_particle(&TimeBinsHydro, idx, P[i].TimeBinHydro); + } + else + { + /* in this case we spawn a new star particle, only reducing the mass in the cell by mass_of_star */ + altogether_spawned = stars_spawned; + if(NumPart + altogether_spawned >= All.MaxPart) + terminate("NumPart=%d spwawn %d particles no space left (All.MaxPart=%d)\n", NumPart, altogether_spawned, All.MaxPart); + + int j = NumPart + altogether_spawned; /* index of new star */ + + spawn_star_from_cell(i, All.Time, j, mass_of_star); + + *sum_mass_stars += mass_of_star; + stars_spawned++; + } + } +} + +#endif /* #ifdef USE_SFR */ diff --git a/src/amuse/community/arepo/src/subfind/subfind.c b/src/amuse/community/arepo/src/subfind/subfind.c new file mode 100644 index 0000000000..4759ae416a --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind.c @@ -0,0 +1,577 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind.c + * \date 05/2018 + * \brief Main routines of the subfind sub-halo finder. + * \details contains functions: + * double subfind_get_particle_balance(void) + * void subfind(int num) + * void subfind_reorder_according_to_submp(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" + +#ifdef SUBFIND +#include "subfind.h" + +/*! \brief Gets a measure of the particle load balance. + * + * \return Maximum number of particle at one core divided by its average. + */ +double subfind_get_particle_balance(void) +{ + int maxpart; + long long sum; + MPI_Allreduce(&NumPart, &maxpart, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + sumup_large_ints(1, &NumPart, &sum); + return maxpart / (((double)sum) / NTask); +} + +/*! \brief Main subfind algorithm. + * + * \param[in] num Index of this snapshot output. + * + * \return void + */ +void subfind(int num) +{ + double t0, t1, tstart, tend, cputime; + int i, gr, nlocid, offset; + + TIMER_START(CPU_SUBFIND); + + tstart = second(); + + mpi_printf("\nSUBFIND: We now execute a parallel version of SUBFIND.\n"); + + /* let's determine the local dark matter densities */ + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 1, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + cputime = subfind_density(FIND_SMOOTHING_LENGTHS); + mpi_printf("SUBFIND: iteration to correct primary neighbor count took %g sec\n", cputime); + + /* free the tree storage again */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + cputime = subfind_density(FIND_TOTAL_DENSITIES); + mpi_printf("SUBFIND: density() took %g sec\n", cputime); + + /* free the tree storage again */ + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + + for(i = 0; i < NumPart; i++) + if(P[i].Type == 0) + { +#ifdef CELL_CENTER_GRAVITY + for(int j = 0; j < 3; j++) + PS[i].Center[j] = SphP[i].Center[j]; +#endif /* #ifdef CELL_CENTER_GRAVITY */ + PS[i].Utherm = SphP[i].Utherm; + } + else + PS[i].Utherm = 0; + + SubTreeAllocFactor = All.TreeAllocFactor; + + /* Count, how many groups are above this limit, and how many processors we need for them */ + int ncount = 0, nprocs = 0; + int seriallen = 0; + long long sum_seriallen; + + double GroupSize = 0.6; + + do + { + ncount = 0; + nprocs = 0; + seriallen = 0; + + /* Let's set a fiducial size for the maximum group size before we select the collective subfind algorithm */ + MaxSerialGroupLen = (int)(GroupSize * All.TotNumPart / NTask); + + for(i = 0; i < Ngroups; i++) + if(Group[i].Len > MaxSerialGroupLen) + { + ncount++; + nprocs += ((Group[i].Len - 1) / MaxSerialGroupLen) + 1; + } + else + seriallen += Group[i].Len; + + MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &seriallen, &sum_seriallen); + + GroupSize += 0.05; + } + while(NprocsCollective > 0 && NprocsCollective >= NTask - 1); + + if(GroupSize > 0.65) + { + mpi_printf("Increased GroupSize to %g.\n", GroupSize); + } + + MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + sumup_large_ints(1, &seriallen, &sum_seriallen); + + mpi_printf("SUBFIND: Number of FOF halos treated with collective SubFind code = %d\n", Ncollective); + mpi_printf("SUBFIND: Number of processors used in different partitions for the collective SubFind code = %d\n", NprocsCollective); + mpi_printf("SUBFIND: (The adopted size-limit for the collective algorithm was %d particles.)\n", MaxSerialGroupLen); + mpi_printf("SUBFIND: The other %d FOF halos are treated in parallel with serial code\n", TotNgroups - Ncollective); + + /* set up a global table that informs about the processor assignment of the groups that are treated collectively */ + ProcAssign = mymalloc_movable(&ProcAssign, "ProcAssign", Ncollective * sizeof(struct proc_assign_data)); + struct proc_assign_data *locProcAssign = mymalloc("locProcAssign", ncount * sizeof(struct proc_assign_data)); + + for(i = 0, ncount = 0; i < Ngroups; i++) + if(Group[i].Len > MaxSerialGroupLen) + { + locProcAssign[ncount].GrNr = Group[i].GrNr; + locProcAssign[ncount].Len = Group[i].Len; + ncount++; + } + + /* gather the information on the collective groups accross all CPUs */ + int *recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * NTask); + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + MPI_Allgather(&ncount, 1, MPI_INT, recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + int task; + for(task = 0; task < NTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct proc_assign_data); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + myfree(recvcounts); + myfree(locProcAssign); + + /* make sure, the table is sorted in ascending group-number order */ + qsort(ProcAssign, Ncollective, sizeof(struct proc_assign_data), subfind_compare_procassign_GrNr); + + /* assign the processor sets for the collective groups and set disjoint color-flag to later split the processors into different + * communicators */ + for(i = 0, nprocs = 0, CommSplitColor = Ncollective; i < Ncollective; i++) + { + ProcAssign[i].FirstTask = nprocs; + ProcAssign[i].NTask = ((ProcAssign[i].Len - 1) / MaxSerialGroupLen) + 1; + nprocs += ProcAssign[i].NTask; + + if(ThisTask >= ProcAssign[i].FirstTask && ThisTask < (ProcAssign[i].FirstTask + ProcAssign[i].NTask)) + CommSplitColor = i; + } + + /* Now assign a target task for the group. For collective groups, the target task is the master in the CPU set, whereas + * the serial ones are distributed in a round-robin fashion to the remaining CPUs + */ + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Len > MaxSerialGroupLen) /* we have a collective group */ + { + if(Group[i].GrNr >= Ncollective || Group[i].GrNr < 0) + terminate("odd"); + Group[i].TargetTask = ProcAssign[Group[i].GrNr].FirstTask; + } + else + Group[i].TargetTask = ((Group[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + } + + /* distribute the groups */ + subfind_distribute_groups(); + qsort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + + /* assign target CPUs for the particles in groups */ + /* the particles not in groups will be distributed such that a uniform particle load results */ + t0 = second(); + int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int)); + int *count_task = mymalloc("count_task", NTask * sizeof(int)); + int *count_free = mymalloc("count_free", NTask * sizeof(int)); + int count_loc_free = 0; + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr < TotNgroups) /* particle is in a group */ + { + if(PS[i].GrNr < Ncollective) /* we are in a collective group */ + PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask); + else + PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + + count_loc_task[PS[i].TargetTask]++; + } + else + count_loc_free++; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + long long sum = 0; + for(i = 0; i < NTask; i++) + sum += count_task[i] + count_free[i]; + + int maxload = (sum + NTask - 1) / NTask; + for(i = 0; i < NTask; i++) + { + count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */ + if(count_task[i] < 0) + count_task[i] = 0; + } + + int current_task = 0; + + for(i = 0; i < ThisTask; i++) + { + while(count_free[i] > 0 && current_task < NTask) + { + if(count_free[i] < count_task[current_task]) + { + count_task[current_task] -= count_free[i]; + count_free[i] = 0; + } + else + { + count_free[i] -= count_task[current_task]; + count_task[current_task] = 0; + current_task++; + } + } + } + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr >= + TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */ + { + while(count_task[current_task] == 0 && current_task < NTask - 1) + current_task++; + + PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */ + count_task[current_task]--; + } + } + + myfree(count_free); + myfree(count_task); + myfree(count_loc_task); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + int ngroups_cat = 42; // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal. +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + int nsubgroups_cat = 42; // dummy. not used for any calculation but fct needs to receive a value and we want to keep fct universal. + + double balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance=%g\n", balance); + + /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */ + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1)); + + balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance for processing=%g\n", balance); + + /* lets estimate the maximum number of substructures we need to store on the local CPU */ + if(ThisTask < NprocsCollective) + { + MaxNsubgroups = (ProcAssign[CommSplitColor].Len / ProcAssign[CommSplitColor].NTask) / All.DesLinkNgb; + } + else + { + for(i = 0, nlocid = 0; i < Ngroups; i++) + nlocid += Group[i].Len; + + MaxNsubgroups = nlocid / All.DesLinkNgb; /* should be a quite conservative upper limit */ + } + + Nsubgroups = 0; + SubGroup = (struct subgroup_properties *)mymalloc_movable(&SubGroup, "SubGroup", MaxNsubgroups * sizeof(struct subgroup_properties)); + + /* we can now split the communicator to give each collectively treated group its own processor set */ + MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm); + MPI_Comm_size(SubComm, &SubNTask); + MPI_Comm_rank(SubComm, &SubThisTask); + SubTagOffset = TagOffset; + + /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each + * deal with one large group. The serial CPUs each deal with several halos by themselves + */ + if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */ + { + /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm + * The relevant group is the one stored in Group[0] on SubThisTask==0. + */ + subfind_process_group_collectively(nsubgroups_cat); + } + else + { + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! + */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + PS[i].SubNr = TotNgroups + 1; /* set a default that is larger than reasonable group number */ + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* now we have the particles in each group consecutively */ + if(SubThisTask == 0) + printf( + "SUBFIND-SERIAL: Start to do %d small groups (cumulative length %lld) with serial subfind algorithm on %d processors " + "(root-node=%d)\n", + TotNgroups - Ncollective, sum_seriallen, SubNTask, ThisTask); + + /* we now apply a serial version of subfind to the local groups */ + t0 = second(); + for(gr = 0, offset = 0; gr < Ngroups; gr++) + { + if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask) + offset = subfind_process_group_serial(gr, offset, nsubgroups_cat); + else + terminate("how come that we have this group number?"); + } + + MPI_Barrier(SubComm); + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: processing of serial groups took %g sec\n", timediff(t0, t1)); + + /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); + } + + /* free the communicator */ + MPI_Comm_free(&SubComm); + + /* make common allocation on all tasks */ + int max_load, max_loadsph, load; + + /* for resize */ + load = All.MaxPart; + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + load = All.MaxPartSph; + MPI_Allreduce(&load, &max_loadsph, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + /* do resize */ + All.MaxPart = max_load; + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + + All.MaxPartSph = max_loadsph; + reallocate_memory_maxpartsph(); + + /* distribute particles back to original CPU */ + t0 = second(); + for(i = 0; i < NumPart; i++) + { + PS[i].TargetTask = PS[i].OriginTask; + PS[i].TargetIndex = PS[i].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + if(ThisTask == 0) + printf("SUBFIND: subfind_exchange() (for return to original CPU) took %g sec\n", timediff(t0, t1)); + + TIMER_STOP(CPU_SUBFIND); + construct_forcetree(0, 0, 0, All.HighestOccupiedTimeBin); /* build forcetree with all particles */ + TIMER_START(CPU_SUBFIND); + + /* compute spherical overdensities for FOF groups */ + cputime = subfind_overdensity(); + mpi_printf("SUBFIND: determining spherical overdensity masses took %g sec\n", cputime); + + myfree(Father); + myfree(Nextnode); + myfree(Tree_Points); + force_treefree(); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + subfind_add_grp_props_calc_fof_angular_momentum(num, ngroups_cat); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + MPI_Allreduce(&Nsubgroups, &TotNsubgroups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + /* sort the groups according to group/subgroup-number */ + t0 = second(); + parallel_sort(Group, Ngroups, sizeof(struct group_properties), fof_compare_Group_GrNr); + parallel_sort(SubGroup, Nsubgroups, sizeof(struct subgroup_properties), subfind_compare_SubGroup_GrNr_SubNr); + t1 = second(); + mpi_printf("SUBFIND: assembled and ordered groups and subgroups (took %g sec)\n", timediff(t0, t1)); + + /* determine largest subgroup and total particle/cell count in substructures */ + int lenmax, glob_lenmax, totlen; + long long totsublength; + for(i = 0, totlen = 0, lenmax = 0; i < Nsubgroups; i++) + { + totlen += SubGroup[i].Len; + + if(SubGroup[i].Len > lenmax) + lenmax = SubGroup[i].Len; + } + sumup_large_ints(1, &totlen, &totsublength); + MPI_Reduce(&lenmax, &glob_lenmax, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + + /* set binding energy of fuzz to zero, was overwritten with Hsml before; needed for proper snapshot sorting of fuzz */ + for(i = 0; i < NumPart; i++) + if(PS[i].SubNr == TotNgroups + 1) + PS[i].BindingEnergy = 0; + + TIMER_STOP(CPU_SUBFIND); + TIMER_START(CPU_SNAPSHOT); + + /* now final output of catalogue */ + subfind_save_final(num); + + TIMER_STOP(CPU_SNAPSHOT); + TIMER_START(CPU_SUBFIND); + + tend = second(); + + if(ThisTask == 0) + { + printf("SUBFIND: Finished with SUBFIND. (total time=%g sec)\n", timediff(tstart, tend)); + printf("SUBFIND: Total number of subhalos with at least %d particles: %d\n", All.DesLinkNgb, TotNsubgroups); + if(TotNsubgroups > 0) + { + printf("SUBFIND: Largest subhalo has %d particles/cells.\n", glob_lenmax); + printf("SUBFIND: Total number of particles/cells in subhalos: %lld\n", totsublength); + } + } + + myfree_movable(SubGroup); + myfree_movable(ProcAssign); + + TIMER_STOP(CPU_SUBFIND); +} + +/*! \brief Reorders particles in P and SphP array. + * + * Reordering given by the submp array. + * + * \return void + */ +void subfind_reorder_according_to_submp(void) +{ + int i; + struct particle_data Psave, Psource; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + int *Id; + + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart)); + + for(i = 0; i < NumPart; i++) + Id[submp[i].index] = i; + + for(i = 0; i < NumPart; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + PSsource = PS[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + PSsave = PS[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } + + myfree(Id); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind.h b/src/amuse/community/arepo/src/subfind/subfind.h new file mode 100644 index 0000000000..d229af8490 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind.h @@ -0,0 +1,213 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind.h + * \date 05/2018 + * \brief Header for subfind algorithm. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 27.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef SUBFIND_H +#define SUBFIND_H + +#include "../domain/domain.h" +#include "../main/allvars.h" + +#define FIND_SMOOTHING_LENGTHS 0 +#define FIND_TOTAL_DENSITIES 1 +#define SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER 10000 +#define SUBFIND_GAL_RADIUS_FAC 2.0 /* for subfind metal calculation */ + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) +extern int *NodeGrNr; +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ + +extern int GrNr; +extern int NumPartGroup; + +extern struct topnode_data *SubTopNodes; +extern struct local_topnode_data *Sub_LocTopNodes; + +extern int *SubDomainTask; +extern int *SubDomainNodeIndex; +extern int *SubNextnode; +extern int SubNTopleaves; +extern int SubNTopnodes; + +extern int SubTree_MaxPart; +extern int SubTree_NumNodes; +extern int SubTree_MaxNodes; +extern int SubTree_FirstNonTopLevelNode; +extern int SubTree_NumPartImported; +extern int SubTree_NumPartExported; +extern int SubTree_ImportedNodeOffset; +extern int SubTree_NextFreeNode; +extern MyDouble *SubTree_Pos_list; +extern struct NODE *SubNodes; +extern struct ExtNODE *SubExtNodes; + +extern double SubTreeAllocFactor; + +extern int *SubTree_ResultIndexList; +extern int *SubTree_Task_list; +extern unsigned long long *SubTree_IntPos_list; + +extern double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac; +extern double SubDomainInverseLen, SubDomainBigFac; + +extern MyDouble GrCM[3]; + +extern int Ncollective; +extern int NprocsCollective; +extern int MaxNsubgroups; +extern int MaxNgbs; +extern int MaxSerialGroupLen; +extern r2type *R2list; + +extern int CommSplitColor; +extern MPI_Comm SubComm; + +extern int SubNTask, SubThisTask; +extern int SubTagOffset; + +extern struct proc_assign_data +{ + int GrNr; + int Len; + int FirstTask; + int NTask; +} * ProcAssign; + +extern struct subgroup_properties +{ + int Len; + int LenType[NTYPES]; + int GrNr; + int SubNr; + int SubParent; + MyIDType SubMostBoundID; + MyFloat Mass; + MyFloat MassType[NTYPES]; + MyFloat SubVelDisp; + MyFloat SubVmax; + MyFloat SubVmaxRad; + MyFloat SubHalfMassRad; + MyFloat SubHalfMassRadType[NTYPES]; + MyFloat SubMassInRad; + MyFloat SubMassInRadType[NTYPES]; + MyFloat SubMassInHalfRad; + MyFloat SubMassInHalfRadType[NTYPES]; + MyFloat SubMassInMaxRad; + MyFloat SubMassInMaxRadType[NTYPES]; + MyFloat Pos[3]; + MyFloat CM[3]; + MyFloat Vel[3]; + MyFloat Spin[3]; + +#ifdef MHD + MyFloat Bfld_Halo, Bfld_Disk; +#endif /* #ifdef MHD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + MyFloat Ekin, Epot, Ethr; + MyFloat J[3], Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES]; + MyFloat J_inRad[3], Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES]; + MyFloat J_inHalfRad[3], Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef USE_SFR + MyFloat Sfr, SfrInRad, SfrInHalfRad, SfrInMaxRad, GasMassSfr; +#endif /* #ifdef USE_SFR */ +} * SubGroup; + +extern struct nearest_r2_data +{ + double dist[2]; +} * R2Loc; + +extern struct nearest_ngb_data +{ + long long index[2]; + int count; +} * NgbLoc; + +extern int NumPaux; + +extern struct paux_data +{ + int TaskOfGr; + int LocGrIndex; + unsigned char Type; + unsigned char SofteningType; + MyDouble Pos[3]; + MyDouble Mass; +} * Paux; + +extern struct submp_data +{ + int index; + int GrNr; + int OldIndex; + MyFloat DM_Density; +} * submp; + +extern struct cand_dat +{ + int head; + int len; + int nsub; + int rank, subnr, parent; + int bound_length; +} * candidates; + +extern struct coll_cand_dat +{ + long long head; + long long rank; + int len; + int nsub; + int subnr, parent; + int bound_length; +} * coll_candidates; + +typedef struct +{ + double rho; +#ifdef SUBFIND_CALC_MORE + double vx, vy, vz; + double v2; +#endif +} SubDMData; + +void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr, + int parallel_flag, int nsubgroups_cat); +int subfind_ngb_treefind_density(MyDouble searchcenter[3], double hsml, int target, int *startnode, int mode, int *exportflag, + int *exportnodecount, int *exportindex, SubDMData *sub_dm_data); +int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id); +void subfind_domain_do_local_refine(int n, int *list); +void assign_group_numbers_based_on_catalogue(int ngroups_cat, int nsubgroups_cat); +int subfind_compare_rlist_mhd(const void *a, const void *b); + +#endif /* #ifndef SUBFIND_H */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c new file mode 100644 index 0000000000..9abd20009d --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_domain.c @@ -0,0 +1,620 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_domain.c + * \date 05/2018 + * \brief Domain decomposition for collective subfind algorithm. + * \details contains functions: + * static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) + * void subfind_coll_domain_decomposition(void) + * void subfind_coll_findExtent(void) + * int subfind_coll_domain_determineTopTree(void) + * void subfind_domain_do_local_refine(int n, int *list) + * void subfind_coll_domain_walktoptree(int no) + * void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain) + * void subfind_coll_domain_allocate(void) + * void subfind_coll_domain_free(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../domain/bsd_tree.h" +#include "../domain/domain.h" +#include "subfind.h" + +/*! \brief Define structure of my tree nodes. + */ +struct mydata +{ + double workload; + int topnode_index; + + RB_ENTRY(mydata) linkage; /* this creates the linkage pointers needed by the RB tree, using symbolic name 'linkage' */ +}; + +/*! \brief Comparison function of mydata objects (i.e. tree elements). + * + * Compares the elements (most important first): + * workload, topnode_index. + * + * \param[in] lhs First object to compare. + * \param[in] rhs Second object to compare. + * + * \return (-1,0,1) -1 if lhs.workload > rhs.workload or lhs.topnode_index < + * rhs.topnode_index. + */ +static int mydata_cmp(struct mydata *lhs, struct mydata *rhs) +{ + if(lhs->workload > rhs->workload) + return -1; + else if(lhs->workload < rhs->workload) + return 1; + else if(lhs->topnode_index < rhs->topnode_index) + return -1; + else if(lhs->topnode_index > rhs->topnode_index) + return 1; + + return 0; +} + +/* the following macro declares 'struct mytree', which is the header element + * needed as handle for a tree + */ +RB_HEAD(mytree, mydata); + +static struct mydata *nload; +static struct mytree queue_load; + +/* the following macros declare appropriate function prototypes and functions + * needed for this type of tree + */ +RB_PROTOTYPE_STATIC(mytree, mydata, linkage, mydata_cmp); +RB_GENERATE_STATIC(mytree, mydata, linkage, mydata_cmp); + +/*! \brief Performs domain decomposition for subfind collective. + * + * \return void + */ +void subfind_coll_domain_decomposition(void) +{ + int i; + int col_grouplen, col_partcount; + + subfind_coll_domain_allocate(); + subfind_coll_findExtent(); + + Key = (peanokey *)mymalloc_movable(&Key, "Key", (sizeof(peanokey) * NumPart)); + Sub_LocTopNodes = (struct local_topnode_data *)mymalloc_movable(&Sub_LocTopNodes, "Sub_LocTopNodes", + (MaxTopNodes * sizeof(struct local_topnode_data))); + + MPI_Allreduce(&NumPartGroup, &col_grouplen, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&NumPart, &col_partcount, 1, MPI_INT, MPI_SUM, SubComm); + + fac_work = 0.5 / col_grouplen; + fac_load = 0.5 / col_partcount; + + subfind_coll_domain_determineTopTree(); + + /* find the split of the top-level tree */ + subfind_coll_domain_combine_topleaves_to_domains(SubNTask, SubNTopleaves); + + /* determine the particles that need to be exported, and to which CPU they need to be sent */ + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + int no = 0; + while(Sub_LocTopNodes[no].Daughter >= 0) + no = Sub_LocTopNodes[no].Daughter + (Key[i] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3); + + no = Sub_LocTopNodes[no].Leaf; + + int task = SubDomainTask[no]; + + PS[i].TargetTask = task; + } + else + PS[i].TargetTask = SubThisTask; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + fof_subfind_exchange(SubComm); + + /* note that the domain decomposition leads to an invalid values of NumPartGroup. This will however be redetermined in the main + * routine of the collective subfind, after the domain decomposition has been done. + */ + + /* copy what we need for the topnodes */ + for(i = 0; i < SubNTopnodes; i++) + { + SubTopNodes[i].StartKey = Sub_LocTopNodes[i].StartKey; + SubTopNodes[i].Size = Sub_LocTopNodes[i].Size; + SubTopNodes[i].Daughter = Sub_LocTopNodes[i].Daughter; + SubTopNodes[i].Leaf = Sub_LocTopNodes[i].Leaf; + + int j; + int bits = my_ffsll(SubTopNodes[i].Size); + int blocks = (bits - 1) / 3 - 1; + + for(j = 0; j < 8; j++) + { + peano1D xb, yb, zb; + peano_hilbert_key_inverse(SubTopNodes[i].StartKey + j * (SubTopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb); + xb >>= blocks; + yb >>= blocks; + zb >>= blocks; + int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2); + if(idx < 0 || idx > 7) + terminate("j=%d idx=%d", j, idx); + + SubTopNodes[i].MortonToPeanoSubnode[idx] = j; + } + } + + myfree(Sub_LocTopNodes); + myfree(Key); + + SubTopNodes = (struct topnode_data *)myrealloc_movable(SubTopNodes, SubNTopnodes * sizeof(struct topnode_data)); + SubDomainTask = (int *)myrealloc_movable(SubDomainTask, SubNTopleaves * sizeof(int)); +} + +/*! \brief Determines extent of local data and writes it to global variables. + * + * \return void + */ +void subfind_coll_findExtent(void) +{ + int i, j; + double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; + + /* determine extension */ + for(i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(i = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + if(xmin[j] > PS[i].Center[j]) + xmin[j] = PS[i].Center[j]; + + if(xmax[j] < PS[i].Center[j]) + xmax[j] = PS[i].Center[j]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + } + } + + MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, SubComm); + MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, SubComm); + + len = 0; + for(j = 0; j < 3; j++) + if(xmax_glob[j] - xmin_glob[j] > len) + len = xmax_glob[j] - xmin_glob[j]; + + len *= 1.001; + + SubDomainLen = len; + SubDomainInverseLen = 1.0 / SubDomainLen; + SubDomainFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + SubDomainBigFac = (SubDomainLen / (((long long)1) << 52)); + + for(j = 0; j < 3; j++) + { + SubDomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); + SubDomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len; + } +} + +/*! \brief Determines extent of the subfind top-tree. + * + * \return void + */ +int subfind_coll_domain_determineTopTree(void) +{ + int i, count; + + mp = (struct domain_peano_hilbert_data *)mymalloc("mp", sizeof(struct domain_peano_hilbert_data) * NumPartGroup); + + for(i = 0, count = 0; i < NumPart; i++) + { + if(PS[i].GrNr == GrNr) + { + peano1D xb, yb, zb; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + xb = domain_double_to_int(((PS[i].Center[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + yb = domain_double_to_int(((PS[i].Center[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + zb = domain_double_to_int(((PS[i].Center[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + xb = domain_double_to_int(((P[i].Pos[0] - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + yb = domain_double_to_int(((P[i].Pos[1] - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + zb = domain_double_to_int(((P[i].Pos[2] - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + } + + mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION); + mp[count].index = i; + count++; + } + } + + if(count != NumPartGroup) + terminate("cost != NumPartGroup"); + + mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data)); + + SubNTopnodes = 1; + SubNTopleaves = 1; + Sub_LocTopNodes[0].Daughter = -1; + Sub_LocTopNodes[0].Parent = -1; + Sub_LocTopNodes[0].Size = PEANOCELLS; + Sub_LocTopNodes[0].StartKey = 0; + Sub_LocTopNodes[0].PIndex = 0; + Sub_LocTopNodes[0].Cost = NumPartGroup; + Sub_LocTopNodes[0].Count = NumPartGroup; + + int limitNTopNodes = 2 * imax(1 + (NTask / 7 + 1) * 8, All.TopNodeFactor * SubNTask); + + if(limitNTopNodes > MaxTopNodes) + terminate("limitNTopNodes > MaxTopNodes"); + + RB_INIT(&queue_load); + nload = mymalloc("nload", limitNTopNodes * sizeof(struct mydata)); + int *list = mymalloc("list", limitNTopNodes * sizeof(int)); + + double limit = 1.0 / (All.TopNodeFactor * SubNTask); + + /* insert the root node */ + nload[0].workload = 1.0; + nload[0].topnode_index = 0; + RB_INSERT(mytree, &queue_load, &nload[0]); + + int iter = 0; + + do + { + count = 0; + + double first_workload = 0; + + for(struct mydata *nfirst = RB_MIN(mytree, &queue_load); nfirst != NULL; nfirst = RB_NEXT(mytree, &queue_load, nfirst)) + { + if(Sub_LocTopNodes[nfirst->topnode_index].Size >= 8) + { + first_workload = nfirst->workload; + break; + } + } + + for(struct mydata *np = RB_MIN(mytree, &queue_load); np != NULL; np = RB_NEXT(mytree, &queue_load, np)) + { + if(np->workload < 0.125 * first_workload) + break; + + if(SubNTopnodes + 8 * (count + 1) >= limitNTopNodes) + break; + + if(np->workload > limit || (SubNTopleaves < SubNTask && count == 0)) + { + if(Sub_LocTopNodes[np->topnode_index].Size >= 8) + { + list[count] = np->topnode_index; + count++; + } + } + } + + if(count > 0) + { + subfind_domain_do_local_refine(count, list); + iter++; + } + } + while(count > 0); + + myfree(list); + myfree(nload); + myfree(mp); + + /* count toplevel leaves */ + + /* count the number of top leaves */ + SubNTopleaves = 0; + subfind_coll_domain_walktoptree(0); + + if(SubNTopleaves < SubNTask) + terminate("SubNTopleaves = %d < SubNTask = %d", SubNTopleaves, SubNTask); + + return 0; +} + +/*! \brief Refines top-tree locally. + * + * \param[in] n Number of new nodes. + * \param[in] list Array with indices of new nodes. + * + * \return void + */ +void subfind_domain_do_local_refine(int n, int *list) +{ + double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double)); + double *worklist = mymalloc("worklist", 8 * n * sizeof(double)); + + /* create the new nodes */ + for(int k = 0; k < n; k++) + { + int i = list[k]; + + Sub_LocTopNodes[i].Daughter = SubNTopnodes; + SubNTopnodes += 8; + SubNTopleaves += 7; + + for(int j = 0; j < 8; j++) + { + int sub = Sub_LocTopNodes[i].Daughter + j; + + Sub_LocTopNodes[sub].Daughter = -1; + Sub_LocTopNodes[sub].Parent = i; + Sub_LocTopNodes[sub].Size = (Sub_LocTopNodes[i].Size >> 3); + Sub_LocTopNodes[sub].StartKey = Sub_LocTopNodes[i].StartKey + j * Sub_LocTopNodes[sub].Size; + Sub_LocTopNodes[sub].PIndex = Sub_LocTopNodes[i].PIndex; + Sub_LocTopNodes[sub].Cost = 0; + Sub_LocTopNodes[sub].Count = 0; + } + + int sub = Sub_LocTopNodes[i].Daughter; + + for(int p = Sub_LocTopNodes[i].PIndex, j = 0; p < Sub_LocTopNodes[i].PIndex + Sub_LocTopNodes[i].Count; p++) + { + if(PS[mp[p].index].GrNr != GrNr) + terminate("Houston, we have a problem."); + + if(j < 7) + while(mp[p].key >= Sub_LocTopNodes[sub + 1].StartKey) + { + j++; + sub++; + Sub_LocTopNodes[sub].PIndex = p; + if(j >= 7) + break; + } + + Sub_LocTopNodes[sub].Count++; + Sub_LocTopNodes[sub].Cost++; + } + + for(int j = 0; j < 8; j++) + { + sub = Sub_LocTopNodes[i].Daughter + j; + worklist[k * 8 + j] = fac_work * Sub_LocTopNodes[sub].Cost + fac_load * Sub_LocTopNodes[sub].Count; + } + } + + MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, SubComm); + + for(int k = 0; k < n; k++) + { + int i = list[k]; + RB_REMOVE(mytree, &queue_load, &nload[i]); + } + + for(int k = 0, l = 0; k < n; k++) + { + int i = list[k]; + + for(int j = 0; j < 8; j++, l++) + { + int sub = Sub_LocTopNodes[i].Daughter + j; + + /* insert the node */ + nload[sub].workload = worktotlist[l]; + nload[sub].topnode_index = sub; + RB_INSERT(mytree, &queue_load, &nload[sub]); + } + } + + myfree(worklist); + myfree(worktotlist); +} + +/*! \brief Walk the top tree and set reference to leaf node. + * + * \param[in] no Node index. + * + * \return void + */ +void subfind_coll_domain_walktoptree(int no) +{ + int i; + + if(Sub_LocTopNodes[no].Daughter == -1) + { + Sub_LocTopNodes[no].Leaf = SubNTopleaves; + SubNTopleaves++; + } + else + { + for(i = 0; i < 8; i++) + subfind_coll_domain_walktoptree(Sub_LocTopNodes[no].Daughter + i); + } +} + +/*! \brief Uses the cumulative cost function (which weights work-load and + * memory-load equally) to subdivide the list of top-level leave + * nodes into pieces that are (approximately) equal in size. + * + * \param[in] ncpu Number of tasks. + * \param[in] ndomain Number of domains. + * + * \return void + */ +void subfind_coll_domain_combine_topleaves_to_domains(int ncpu, int ndomain) +{ + int i, j, start, end, n, no; + double work, workavg, work_before, workavg_before, workhalfnode; + float *domainWork, *local_domainWork; + int *domainCount, *local_domainCount; + + /* sum the costs for each top leave */ + + domainWork = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float)); + domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int)); + + local_domainWork = (float *)mymalloc("local_domainWork", SubNTopleaves * sizeof(float)); + local_domainCount = (int *)mymalloc("local_domainCount", SubNTopleaves * sizeof(int)); + + for(i = 0; i < SubNTopleaves; i++) + { + local_domainWork[i] = 0; + local_domainCount[i] = 0; + } + + /* find for each particle its top-leave, and then add the associated cost with it */ + for(n = 0; n < NumPart; n++) + { + if(PS[n].GrNr == GrNr) + { + no = 0; + while(Sub_LocTopNodes[no].Daughter >= 0) + no = Sub_LocTopNodes[no].Daughter + (Key[n] - Sub_LocTopNodes[no].StartKey) / (Sub_LocTopNodes[no].Size >> 3); + + no = Sub_LocTopNodes[no].Leaf; + + local_domainCount[no] += 1; + local_domainWork[no] += 1; + } + } + + MPI_Allreduce(local_domainWork, domainWork, SubNTopleaves, MPI_FLOAT, MPI_SUM, SubComm); + MPI_Allreduce(local_domainCount, domainCount, SubNTopleaves, MPI_INT, MPI_SUM, SubComm); + + myfree(local_domainCount); + myfree(local_domainWork); + + /* now combine the top leaves to form the individual domains */ + + workhalfnode = 0.5 / ndomain; + workavg = 1.0 / ncpu; + work_before = workavg_before = 0; + + start = 0; + + for(i = 0; i < ncpu; i++) + { + work = 0; + end = start; + + work += fac_work * domainWork[end] + fac_load * domainCount[end]; + + while((work + work_before + (end + 1 < ndomain ? fac_work * domainWork[end + 1] + fac_load * domainCount[end + 1] : 0) < + workavg + workavg_before + workhalfnode) || + (i == ncpu - 1 && end < ndomain - 1)) + { + if((ndomain - end) > (ncpu - i)) + end++; + else + break; + + work += fac_work * domainWork[end] + fac_load * domainCount[end]; + } + + for(j = start; j <= end; j++) + SubDomainTask[j] = i; + + work_before += work; + workavg_before += workavg; + start = end + 1; + } + + myfree(domainCount); + myfree(domainWork); +} + +/*! \brief Allocates all the stuff that will be required for the + * tree-construction/walk later on. + * + * \return void + */ +void subfind_coll_domain_allocate(void) +{ + MaxTopNodes = (int)(All.TopNodeAllocFactor * All.MaxPart + 1); + + if(SubDomainTask) + terminate("subfind collective domain storage already allocated"); + + SubTopNodes = (struct topnode_data *)mymalloc_movable(&SubTopNodes, "SubTopNodes", (MaxTopNodes * sizeof(struct topnode_data))); + SubDomainTask = (int *)mymalloc_movable(&SubDomainTask, "SubDomainTask", (MaxTopNodes * sizeof(int))); +} + +/*! \brief Free memory used for subfind collective domain decomposition. + * + * \return void + */ +void subfind_coll_domain_free(void) +{ + if(!SubDomainTask) + terminate("subfind collective domain storage not allocated"); + + myfree(SubDomainTask); + myfree(SubTopNodes); + + SubDomainTask = NULL; + SubTopNodes = NULL; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c new file mode 100644 index 0000000000..96d7db4b07 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_tree.c @@ -0,0 +1,992 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_tree.c + * \date 05/2018 + * \brief Functions for tree-construction for subfind collective. + * \details contains functions: + * int subfind_coll_treebuild(int npart, struct unbind_data *mp) + * int subfind_coll_treebuild_construct(int npart, struct + * unbind_data *mp) + * int subfind_coll_treebuild_insert_single_point(int i, + * unsigned long long *intpos, int th, unsigned char levels) + * int subfind_coll_create_empty_nodes(int no, int topnode, + * int bits, int x, int y, int z, unsigned long long xc, + * unsigned long long yc, unsigned long long zc, + * unsigned long long ilen) + * void subfind_coll_insert_pseudo_particles(void) + * void subfind_coll_update_node_recursive(int no, int sib, + * int father, int *last) + * void subfind_coll_exchange_topleafdata(void) + * void subfind_coll_treeupdate_toplevel(int no, int topnode, + * int bits, int x, int y, int z) + * void subfind_coll_treeallocate(int maxpart, int maxindex) + * void subfind_coll_treefree(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../gravity/forcetree.h" +#include "subfind.h" + +/*! \brief Main function to build subfind collective tree. + * + * \param[in] npart Number of particles. + * \param[in] mp Unbind data. + * + * \return Number of nodes in tree. + */ +int subfind_coll_treebuild(int npart, struct unbind_data *mp) +{ + int flag; + + do + { + int flag_single = subfind_coll_treebuild_construct(npart, mp); + + MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, SubComm); + + if(flag < 0) + { + subfind_coll_treefree(); + + SubTreeAllocFactor *= 1.15; + + printf("SUBFIND-COLLECTIVE, root-task=%d: Increasing TreeAllocFactor, new value=%g\n", ThisTask, SubTreeAllocFactor); + fflush(stdout); + + subfind_coll_treeallocate(NumPart, All.MaxPart); + } + } + while(flag < 0); + + /* insert the pseudo particles that represent the mass distribution of other domains */ + subfind_coll_insert_pseudo_particles(); + + /* now compute the multipole moments recursively */ + int last = -1; + + subfind_coll_update_node_recursive(SubTree_MaxPart, -1, -1, &last); + + if(last >= SubTree_MaxPart) + { + if(last >= SubTree_MaxPart + SubTree_MaxNodes) /* a pseudo-particle or imported particle */ + SubNextnode[last - SubTree_MaxNodes] = -1; + else + SubNodes[last].u.d.nextnode = -1; + } + else + SubNextnode[last] = -1; + + subfind_coll_exchange_topleafdata(); + + SubTree_NextFreeNode = SubTree_MaxPart + 1; + + subfind_coll_treeupdate_toplevel(SubTree_MaxPart, 0, 1, 0, 0, 0); + + return SubTree_NumNodes; +} + +/*! \brief Constructs the collective subfind oct-tree. + * + * The index convention for accessing tree nodes is the following: + * node index + * [0...SubTree_MaxPart-1] references single particles, the indices + * [SubTree_MaxPart...SubTree_MaxPart+SubTree_MaxNodes-1] references tree + * nodes. + * [SubTree_MaxPart+SubTree_MaxNodes... + * SubTree_MaxPart+SubTree_MaxNodes+NTopleaves-1] references "pseudo + * particles", i.e. mark branches on foreign CPUs + * [SubTree_MaxPart+SubTree_MaxNodes+NTopleaves... + * SubTree_MaxPart+SubTree_MaxNodes+NTopleaves+0-1] references imported points + * + * `Nodes_base' points to the first tree node, while `Nodes' is shifted such + * that SubNodes[SubTree_MaxPart] gives the root tree node. + * + * \param[in] npart Number of particles. + * \param[in] mp Unbind data. + * + * \return Number of nodes. + */ +int subfind_coll_treebuild_construct(int npart, struct unbind_data *mp) +{ + int i, j, k, no, flag_full = 0; + unsigned long long *intposp; + MyDouble *posp; + unsigned long long ibaselen = ((unsigned long long)1) << 52; + + /* create an empty root node */ + SubTree_NextFreeNode = SubTree_MaxPart; /* index of first free node */ + struct NODE *nfreep = &SubNodes[SubTree_NextFreeNode]; /* select first node */ + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + nfreep->len = SubDomainLen; + for(j = 0; j < 3; j++) + nfreep->center[j] = SubDomainCenter[j]; + + SubTree_NumNodes = 1; + SubTree_NextFreeNode++; + + /* create a set of empty nodes corresponding to the top-level domain + * grid. We need to generate these nodes first to make sure that we have a + * complete top-level tree which allows the easy insertion of the + * pseudo-particles at the right place + */ + if(subfind_coll_create_empty_nodes(SubTree_MaxPart, 0, 1, 0, 0, 0, 0, 0, 0, ibaselen) < 0) + return -1; + + SubTree_FirstNonTopLevelNode = SubTree_NextFreeNode; + + /* if a high-resolution region in a global tree is used, we need to generate + * an additional set empty nodes to make sure that we have a complete + * top-level tree for the high-resolution inset + */ + + SubTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&SubTree_IntPos_list, "SubTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + SubTree_ImportedNodeOffset = SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves; + + /* now we determine for each point the insertion top-level node, and the task on which this lies */ + for(i = 0; i < npart; i++) + { + for(j = 0; j < 3; j++) + { + if(mp) + k = mp[i].index; + else + k = i; + +#ifdef CELL_CENTER_GRAVITY + if(P[k].Type == 0) + posp = &PS[k].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + posp = &P[k].Pos[j]; + + if(*posp < SubDomainCorner[j] || *posp >= SubDomainCorner[j] + SubDomainLen) + { + terminate("out of box i=%d j=%d coord=%g SubDomainCorner=(%g|%g|%g) SubDomainLen=%g", i, j, *posp, SubDomainCorner[0], + SubDomainCorner[1], SubDomainCorner[2], SubDomainLen); + } + + SubTree_Pos_list[3 * k + j] = *posp; + } + } + + for(i = 0; i < npart; i++) + { + if(mp) + k = mp[i].index; + else + k = i; + + posp = &SubTree_Pos_list[3 * k]; + + unsigned long long xxb = force_double_to_int(((*posp++ - SubDomainCorner[0]) * SubDomainInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((*posp++ - SubDomainCorner[1]) * SubDomainInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((*posp++ - SubDomainCorner[2]) * SubDomainInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + unsigned char levels = 0; + + intposp = &SubTree_IntPos_list[3 * k]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + no = 0; + while(SubTopNodes[no].Daughter >= 0) + { + unsigned char subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + mask >>= 1; + levels++; + + no = SubTopNodes[no].Daughter + SubTopNodes[no].MortonToPeanoSubnode[subnode]; + } + + no = SubTopNodes[no].Leaf; + + if(no >= SubTree_ImportedNodeOffset) + terminate("i=%d: no=%d SubTree_ImportedNodeOffset=%d", i, no, SubTree_ImportedNodeOffset); + + if(subfind_coll_treebuild_insert_single_point(k, &SubTree_IntPos_list[3 * k], SubDomainNodeIndex[no], levels) < 0) + { + flag_full = 1; + break; + } + } + + myfree_movable(SubTree_IntPos_list); + + if(flag_full) + return -1; + + return SubTree_NumNodes; +} + +/*! \brief Inserts single point in tree. + * + * \param[in] i Index of particle. + * \param[in] intpos Integer position. + * \param[in] th Index in SubNodes. + * \param[in] levels Level corresponding to subnode. + * + * \return void + */ +int subfind_coll_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char levels) +{ + int j, parent = -1; + unsigned char subnode = 0; + unsigned long long xxb = intpos[0]; + unsigned long long yyb = intpos[1]; + unsigned long long zzb = intpos[2]; + unsigned long long mask = ((unsigned long long)1) << ((52 - 1) - levels); + unsigned char shiftx = (52 - 1) - levels; + unsigned char shifty = (52 - 2) - levels; + unsigned char shiftz = (52 - 3) - levels; + signed long long centermask = (0xFFF0000000000000llu); + unsigned long long *intppos; + centermask >>= levels; + + while(1) + { + if(th >= SubTree_MaxPart && th < SubTree_ImportedNodeOffset) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(SubNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + int nn = SubNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; + th = nn; + } + else + { + /* here we have found an empty slot where we can attach + * the new particle as a leaf. + */ + SubNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* We try to insert into a leaf with a single particle. Need + * to generate a new internal node at this point. + */ + SubNodes[parent].u.suns[subnode] = SubTree_NextFreeNode; + struct NODE *nfreep = &SubNodes[SubTree_NextFreeNode]; + + /* the other is: */ + double len = ((double)(mask << 1)) * SubDomainBigFac; + double cx = ((double)((xxb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * SubDomainBigFac + SubDomainCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + for(j = 0; j < 8; j++) + nfreep->u.suns[j] = -1; + + if(th >= SubTree_ImportedNodeOffset) + { + terminate("unexpected here: th=%d SubTree_ImportedNodeOffset=%d", th, SubTree_ImportedNodeOffset); + } + else + intppos = &SubTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = SubTree_NextFreeNode; /* resume trying to insert the new particle the newly created internal node */ + SubTree_NumNodes++; + SubTree_NextFreeNode++; + + if(SubTree_NumNodes >= SubTree_MaxNodes) + { + if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, + "task %d: looks like a serious problem for particle %d, stopping with particle dump. SubTree_NumNodes=%d " + "SubTree_MaxNodes=%d 0=%d NumPart=%d\n", + SubThisTask, i, SubTree_NumNodes, SubTree_MaxNodes, 0, NumPart); + dump_particles(); + terminate(buf); + } + + return -1; + } + } + } + + return 0; +} + +/*! \brief Recursively creates a set of empty tree nodes which corresponds to + * the top-level tree for the domain grid. This is done to ensure that + * this top-level tree is always "complete" so that we can easily + * associate the pseudo-particles of other CPUs with tree-nodes at a + * given level in the tree, even when the particle population is so + * sparse that some of these nodes are actually empty. + * + * \param[in] no Index of node. + * \param[in] topnode Index of topnode. + * \param[in] bits Number of bits used for Peano key. + * \param[in] x Integer x position. + * \param[in] y Integer y position. + * \param[in] z Integer z position. + * \param[in] xc X position of corner. + * \param[in] yc Y position of corner. + * \param[in] zc Z position of corner. + * \param[in] ilen Sidelength. + * + * \return 0: success; -1 Number of nodes exceeds maximum number of nodes. + */ +int subfind_coll_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, unsigned long long xc, unsigned long long yc, + unsigned long long zc, unsigned long long ilen) +{ + int i, j, k, n, sub, count; + unsigned long long xxc, yyc, zzc, ilenhalf; + + ilen >>= 1; + + if(SubTopNodes[topnode].Daughter >= 0) + { + for(i = 0; i < 2; i++) + for(j = 0; j < 2; j++) + for(k = 0; k < 2; k++) + { + if(SubTree_NumNodes >= SubTree_MaxNodes) + { + if(SubTreeAllocFactor > MAX_TREE_ALLOC_FACTOR) + { + char buf[500]; + sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", SubThisTask, + NTopnodes); + dump_particles(); + terminate(buf); + } + return -1; + } + + sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + count = i + 2 * j + 4 * k; + + SubNodes[no].u.suns[count] = SubTree_NextFreeNode; + + xxc = xc + i * ilen; + yyc = yc + j * ilen; + zzc = zc + k * ilen; + ilenhalf = ilen >> 1; + + double len = ((double)ilen) * SubDomainBigFac; + double cx = ((double)(xxc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[0]; + double cy = ((double)(yyc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[1]; + double cz = ((double)(zzc + ilenhalf)) * SubDomainBigFac + SubDomainCorner[2]; + + SubNodes[SubTree_NextFreeNode].len = len; + SubNodes[SubTree_NextFreeNode].center[0] = cx; + SubNodes[SubTree_NextFreeNode].center[1] = cy; + SubNodes[SubTree_NextFreeNode].center[2] = cz; + + for(n = 0; n < 8; n++) + SubNodes[SubTree_NextFreeNode].u.suns[n] = -1; + + if(SubTopNodes[SubTopNodes[topnode].Daughter + sub].Daughter == -1) + SubDomainNodeIndex[SubTopNodes[SubTopNodes[topnode].Daughter + sub].Leaf] = SubTree_NextFreeNode; + + SubTree_NextFreeNode++; + SubTree_NumNodes++; + + if(subfind_coll_create_empty_nodes(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, + 2 * y + j, 2 * z + k, xxc, yyc, zzc, ilen) < 0) + return -1; + } + } + + return 0; +} + +/*! \brief Inserts pseudo-particles which will represent the mass + * distribution of the other CPUs. Initially, the mass of the + * pseudo-particles is set to zero, and their coordinate is set to the + * center of the domain-cell they correspond to. These quantities will + * be updated later on. + * + * \return void + */ +void subfind_coll_insert_pseudo_particles(void) +{ + int i, index; + + for(i = 0; i < SubNTopleaves; i++) + { + index = SubDomainNodeIndex[i]; + + if(SubDomainTask[i] != SubThisTask) + SubNodes[index].u.suns[0] = SubTree_MaxPart + SubTree_MaxNodes + i; + } +} + +/*! \brief Determines the multipole moments for a given internal node + * and all its subnodes using a recursive computation. The result is + * stored in the SubNodes structure in the sequence of this tree-walk. + * + * \param[in] no Index of node. + * \param[in] sib Index of sibling. + * \param[in] father Index of parent node. + * \param[in, out] last Node index of last call. + * + * \return void + */ +void subfind_coll_update_node_recursive(int no, int sib, int father, int *last) +{ + int j, jj, p, pp, nextsib, suns[8]; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(no >= SubTree_MaxPart && no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + for(j = 0; j < 8; j++) + suns[j] = SubNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(*last >= 0) + { + if(*last >= SubTree_MaxPart) + { + if(*last >= SubTree_MaxPart + SubTree_MaxNodes) + SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or imported point */ + else + SubNodes[*last].u.d.nextnode = no; + } + else + SubNextnode[*last] = no; + } + + *last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_coll_update_node_recursive(p, nextsib, no, last); + + if(p < SubTree_MaxPart) /* a particle */ + { + MyDouble *pos = &SubTree_Pos_list[3 * p]; + + mass += P[p].Mass; + s[0] += P[p].Mass * pos[0]; + s[1] += P[p].Mass * pos[1]; + s[2] += P[p].Mass * pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; + +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < SubTree_MaxPart + SubTree_MaxNodes) /* an internal node */ + { + mass += SubNodes[p].u.d.mass; + s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0]; + s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1]; + s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype]) + maxsofttype = SubNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += SubExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype) + minhydrosofttype = SubNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else if(p < SubTree_MaxPart + SubTree_MaxNodes + SubNTopleaves) /* a pseudo particle */ + { + /* nothing to be done here because the mass of the + * pseudo-particle is still zero. This will be changed + * later. + */ + } + else + { + /* an imported point */ + terminate("should not occur here"); + } + } + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = SubNodes[no].center[0]; + s[1] = SubNodes[no].center[1]; + s[2] = SubNodes[no].center[2]; + } + + SubNodes[no].u.d.mass = mass; + SubNodes[no].u.d.s[0] = s[0]; + SubNodes[no].u.d.s[1] = s[1]; + SubNodes[no].u.d.s[2] = s[2]; + SubNodes[no].u.d.maxsofttype = maxsofttype; + SubNodes[no].u.d.sibling = sib; + SubNodes[no].u.d.father = father; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* single particle or pseudo particle */ + { + if(*last >= 0) + { + if(*last >= SubTree_MaxPart) + { + if(*last >= SubTree_MaxPart + SubTree_MaxNodes) + SubNextnode[*last - SubTree_MaxNodes] = no; /* a pseudo-particle or an imported point */ + else + SubNodes[*last].u.d.nextnode = no; + } + else + SubNextnode[*last] = no; + } + + *last = no; + } +} + +/*! \brief This function communicates the values of the multipole moments of + * the top-level tree-nodes of the domain grid. This data can then be + * used to update the pseudo-particles on each CPU accordingly. + * + * \return void + */ +void subfind_coll_exchange_topleafdata(void) +{ + int n, no, idx, task; + int *recvcounts, *recvoffset, *bytecounts, *byteoffset; + struct DomainNODE + { + MyFloat s[3]; + MyFloat mass; +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + unsigned char maxsofttype; + } * DomainMoment, *loc_DomainMoment; + + DomainMoment = (struct DomainNODE *)mymalloc("DomainMoment", SubNTopleaves * sizeof(struct DomainNODE)); + + /* share the pseudo-particle data accross CPUs */ + recvcounts = (int *)mymalloc("recvcounts", sizeof(int) * SubNTask); + recvoffset = (int *)mymalloc("recvoffset", sizeof(int) * SubNTask); + bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * SubNTask); + byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * SubNTask); + + for(task = 0; task < SubNTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < SubNTopleaves; n++) + { + if(SubDomainTask[n] < 0 || SubDomainTask[n] >= SubNTask) + terminate("n=%d|%d: SubDomainTask[n]=%d", n, SubNTopleaves, SubDomainTask[n]); + + recvcounts[SubDomainTask[n]]++; + } + + for(task = 0; task < SubNTask; task++) + bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); + + for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < SubNTask; task++) + { + recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + } + + loc_DomainMoment = (struct DomainNODE *)mymalloc("loc_DomainMoment", recvcounts[SubThisTask] * sizeof(struct DomainNODE)); + + for(n = 0, idx = 0; n < SubNTopleaves; n++) + { + if(SubDomainTask[n] == SubThisTask) + { + no = SubDomainNodeIndex[n]; + + /* read out the multipole moments from the local base cells */ + loc_DomainMoment[idx].s[0] = SubNodes[no].u.d.s[0]; + loc_DomainMoment[idx].s[1] = SubNodes[no].u.d.s[1]; + loc_DomainMoment[idx].s[2] = SubNodes[no].u.d.s[2]; + loc_DomainMoment[idx].mass = SubNodes[no].u.d.mass; + loc_DomainMoment[idx].maxsofttype = SubNodes[no].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + loc_DomainMoment[idx].mass_per_type[k] = SubExtNodes[no].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + loc_DomainMoment[idx].maxhydrosofttype = SubNodes[no].u.d.maxhydrosofttype; + loc_DomainMoment[idx].minhydrosofttype = SubNodes[no].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + idx++; + } + } + + MPI_Allgatherv(loc_DomainMoment, bytecounts[SubThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, SubComm); + + for(task = 0; task < SubNTask; task++) + recvcounts[task] = 0; + + for(n = 0; n < SubNTopleaves; n++) + { + task = SubDomainTask[n]; + if(task != SubThisTask) + { + no = SubDomainNodeIndex[n]; + idx = recvoffset[task] + recvcounts[task]++; + + SubNodes[no].u.d.s[0] = DomainMoment[idx].s[0]; + SubNodes[no].u.d.s[1] = DomainMoment[idx].s[1]; + SubNodes[no].u.d.s[2] = DomainMoment[idx].s[2]; + SubNodes[no].u.d.mass = DomainMoment[idx].mass; + SubNodes[no].u.d.maxsofttype = DomainMoment[idx].maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = DomainMoment[idx].mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = DomainMoment[idx].maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = DomainMoment[idx].minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + myfree(loc_DomainMoment); + myfree(byteoffset); + myfree(bytecounts); + myfree(recvoffset); + myfree(recvcounts); + myfree(DomainMoment); +} + +/*! \brief This function updates the top-level tree after the multipole + * moments of the pseudo-particles have been updated. + * + * \param[in] no Index of node. + * \param[in] topnode Index of topnode. + * \param[in] bits Number of bits used. + * \param[in] x Integer x position. + * \param[in] y Integer y position. + * \param[in] z Integer z position. + * + * \return void + */ +void subfind_coll_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z) +{ + int i, j, k, sub; + int p; + double s[3], mass; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(SubTopNodes[topnode].Daughter >= 0) + { + for(i = 0; i < 2; i++) + for(j = 0; j < 2; j++) + for(k = 0; k < 2; k++) + { + sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits); + + SubTree_NextFreeNode++; + subfind_coll_treeupdate_toplevel(SubTree_NextFreeNode - 1, SubTopNodes[topnode].Daughter + sub, bits + 1, 2 * x + i, + 2 * y + j, 2 * z + k); + } + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + p = SubNodes[no].u.d.nextnode; + + for(j = 0; j < 8; j++) /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */ + { + if(p >= SubTree_MaxPart && p < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + mass += SubNodes[p].u.d.mass; + s[0] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[0]; + s[1] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[1]; + s[2] += SubNodes[p].u.d.mass * SubNodes[p].u.d.s[2]; + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[SubNodes[p].u.d.maxsofttype]) + maxsofttype = SubNodes[p].u.d.maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += SubExtNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < SubNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = SubNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > SubNodes[p].u.d.minhydrosofttype) + minhydrosofttype = SubNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else + terminate("may not happen"); + + p = SubNodes[p].u.d.sibling; + } + + if(mass) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = SubNodes[no].center[0]; + s[1] = SubNodes[no].center[1]; + s[2] = SubNodes[no].center[2]; + } + + SubNodes[no].u.d.s[0] = s[0]; + SubNodes[no].u.d.s[1] = s[1]; + SubNodes[no].u.d.s[2] = s[2]; + SubNodes[no].u.d.mass = mass; + SubNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + SubExtNodes[no].mass_per_type[k] = mass_per_type[k]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + SubNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + SubNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } +} + +/*! \brief Allocates tree arrays. + * + * This function allocates the memory used for storage of the tree nodes. + * Usually, the number of required nodes is of order 0.7*maxpart, but if this + * is insufficient, the code will try to allocated more space. + * + * \param[in] maxpart Maximum number of nodes. + * \param[in] maxindex Maximum number of particles. + * + * \return void + */ +void subfind_coll_treeallocate(int maxpart, int maxindex) +{ + if(SubNodes) + terminate("already allocated"); + + SubTree_MaxPart = maxindex; + SubTree_MaxNodes = (int)(SubTreeAllocFactor * maxpart) + SubNTopnodes; + + SubDomainNodeIndex = (int *)mymalloc_movable(&SubDomainNodeIndex, "SubDomainNodeIndex", SubNTopleaves * sizeof(int)); + + SubTree_Pos_list = (MyDouble *)mymalloc_movable(&SubTree_Pos_list, "SubTree_Pos_list", 3 * maxpart * sizeof(MyDouble)); + + SubNodes = (struct NODE *)mymalloc_movable(&SubNodes, "SubNodes", (SubTree_MaxNodes + 1) * sizeof(struct NODE)); + SubNodes -= SubTree_MaxPart; + +#ifdef MULTIPLE_NODE_SOFTENING + SubExtNodes = (struct ExtNODE *)mymalloc_movable(&SubExtNodes, "SubExtNodes", (SubTree_MaxNodes + 1) * sizeof(struct ExtNODE)); + SubExtNodes -= SubTree_MaxPart; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + SubNextnode = (int *)mymalloc_movable(&SubNextnode, "SubNextnode", (SubTree_MaxPart + SubNTopleaves) * sizeof(int)); +} + +/*! \brief Free tree arrays. + * + * This function frees the memory allocated for the tree, i.e. it frees + * the space allocated by the function subfind_coll_treeallocate(). + * + * \return void + */ +void subfind_coll_treefree(void) +{ + if(SubNodes) + { + myfree(SubNextnode); + +#ifdef MULTIPLE_NODE_SOFTENING + myfree(SubExtNodes + SubTree_MaxPart); + SubExtNodes = NULL; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + myfree(SubNodes + SubTree_MaxPart); + myfree(SubTree_Pos_list); + myfree(SubDomainNodeIndex); + + SubNodes = NULL; + SubDomainNodeIndex = NULL; + SubNextnode = NULL; + SubTree_Pos_list = NULL; + } + else + terminate("trying to free the tree even though it's not allocated"); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c new file mode 100644 index 0000000000..1a7cbd67c7 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_coll_treewalk.c @@ -0,0 +1,460 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_coll_treewalk.c + * \date 05/2018 + * \brief Algorithm for collective tree walk; computes gravitational + * binding energy. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_potential_compute(int num, struct unbind_data + * *darg, int phasearg, double weakly_bound_limit_arg) + * static int subfind_force_treeevaluate_potential(int target, + * int mode, int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +static int subfind_force_treeevaluate_potential(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + unsigned char SofteningType; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(int k = 0; k < 3; k++) + in->Pos[k] = PS[i].Center[k]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(int k = 0; k < 3; k++) + in->Pos[k] = P[i].Pos[k]; + } + + in->SofteningType = P[i].SofteningType; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Potential; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + PS[i].Potential = out->Potential; + } + else /* combine */ + { + PS[i].Potential += out->Potential; + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +static int Num; +static struct unbind_data *d; +static int phase; +static double weakly_bound_limit; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i, idx; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= Num) + break; + + i = d[idx].index; + + if(phase == 1) + if(PS[i].BindingEnergy <= weakly_bound_limit) + continue; + + subfind_force_treeevaluate_potential(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_force_treeevaluate_potential(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Computes potential energy. + * + * \param[in] num Number of elements. + * \param[in] darg Unbind data. + * \param[in] phasearg Which phase are we in? 1:ignore weakly bound particles. + * \param[in] weakly_bound_limit_arg Minimum binding energy between two + * particles that is accounted for. + * + * \return void + */ +void subfind_potential_compute(int num, struct unbind_data *darg, int phasearg, double weakly_bound_limit_arg) +{ + generic_set_MaxNexport(); + + Num = num; + d = darg; + phase = phasearg; + weakly_bound_limit = weakly_bound_limit_arg; + + generic_comm_pattern(Num, kernel_local, kernel_imported); + + double atime; + + if(All.ComovingIntegrationOn) + atime = All.Time; + else + atime = 1; + + for(int i = 0; i < num; i++) + { + if(phase == 1) + if(PS[d[i].index].BindingEnergy <= weakly_bound_limit) + continue; + + PS[d[i].index].Potential *= All.G / atime; + } +} + +/*! \brief Evaluate function of potential calculation. + * + * \param[in] target Index of particle/cell/imported data. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_force_treeevaluate_potential(int target, int mode, int threadid) +{ + struct NODE *nop = 0; + int no, numnodes, *firstnode, k; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pos_x, pos_y, pos_z; +#ifdef MULTIPLE_NODE_SOFTENING + struct ExtNODE *extnop = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +#if !defined(GRAVITY_NOT_PERIODIC) + double xtmp, ytmp, ztmp; +#endif + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos_x = in->Pos[0]; + pos_y = in->Pos[1]; + pos_z = in->Pos[2]; + h_i = All.ForceSoftening[in->SofteningType]; + + double pot = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + no = SubTree_MaxPart; /* root node */ + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < SubTree_MaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(SubTree_Pos_list[3 * no + 0] - pos_x); + dy = GRAVITY_NEAREST_Y(SubTree_Pos_list[3 * no + 1] - pos_y); + dz = GRAVITY_NEAREST_Z(SubTree_Pos_list[3 * no + 2] - pos_z); + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + h_j = All.ForceSoftening[P[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = SubNextnode[no]; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + break; + } + + nop = &SubNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(SubExtNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + /* node can be used */ +#ifdef MULTIPLE_NODE_SOFTENING + extnop = &SubExtNodes[no]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + no = nop->u.d.sibling; + } + else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("this is not expected here"); + } + else + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + subfind_treefind_collective_export_node_threads(no, target, threadid); + + no = SubNextnode[no - SubTree_MaxNodes]; + continue; + } + + /* now evaluate the potential contribution */ + r = sqrt(r2); + +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = extnop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r >= hmax) + pot += FLT(-mass / r); + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += FLT(mass * h_inv * wp); + } +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + out.Potential = pot; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_collective.c b/src/amuse/community/arepo/src/subfind/subfind_collective.c new file mode 100644 index 0000000000..02c702b6de --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_collective.c @@ -0,0 +1,2417 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_collective.c + * \date 05/2018 + * \brief Subfind algorithm running collectively on all tasks. + * \details contains functions: + * void subfind_process_group_collectively(int nsubgroups_cat) + * void subfind_fof_calc_am_collective(int snapnr, int + * ngroups_cat) + * void subfind_col_find_coll_candidates(int totgrouplen) + * void subfind_unbind_independent_ones(int count_cand) + * int subfind_col_unbind(struct unbind_data *d, int num, int + * *num_non_gas) + * void subfind_poll_for_requests(void) + * long long subfind_distlinklist_setrank_and_get_next( + * long long index, long long *rank) + * void subfind_distlinklist_set_next(long long index, + * long long next) + * void subfind_distlinklist_add_particle(long long index) + * void subfind_distlinklist_mark_particle(long long index, + * int target, int submark) + * void subfind_distlinklist_add_bound_particles( + * long long index, int nsub) + * long long subfind_distlinklist_get_next(long long index) + * long long subfind_distlinklist_get_rank(long long index) + * long long subfind_distlinklist_get_head(long long index) + * void subfind_distlinklist_get_two_heads(long long ngb_index1, + * long long ngb_index2, long long *head, long long + * *head_attach) + * void subfind_distlinklist_set_headandnext(long long index, + * long long head, long long next) + * int subfind_distlinklist_get_tail_set_tail_increaselen( + * long long index, long long *tail, long long newtail) + * void subfind_distlinklist_set_tailandlen(long long index, + * long long tail, int len) + * void subfind_distlinklist_get_tailandlen(long long index, + * long long *tail, int *len) + * void subfind_distlinklist_set_all(long long index, + * long long head, long long tail, int len, long long next) + * int subfind_compare_densities(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +#define TAG_POLLING_DONE 201 +#define TAG_SET_ALL 202 +#define TAG_GET_NGB_INDICES 204 +#define TAG_GET_TAILANDLEN 205 +#define TAG_GET_TAILANDLEN_DATA 206 +#define TAG_SET_TAILANDLEN 207 +#define TAG_SET_HEADANDNEXT 209 +#define TAG_SETHEADGETNEXT_DATA 210 +#define TAG_SET_NEXT 211 +#define TAG_SETHEADGETNEXT 213 +#define TAG_GET_NEXT 215 +#define TAG_GET_NEXT_DATA 216 +#define TAG_GET_HEAD 217 +#define TAG_GET_HEAD_DATA 218 +#define TAG_ADD_PARTICLE 219 +#define TAG_ADDBOUND 220 +#define TAG_NID 222 +#define TAG_NID_DATA 223 +#define TAG_SETRANK 224 +#define TAG_SETRANK_OUT 226 +#define TAG_GET_RANK 227 +#define TAG_GET_RANK_DATA 228 +#define TAG_MARK_PARTICLE 229 +#define TAG_SET_NEWTAIL 230 +#define TAG_GET_OLDTAIL 231 +#define TAG_GET_TWOHEADS 232 +#define TAG_GET_TWOHEADS_DATA 233 + +#define MASK ((((long long)1) << 32) - 1) +#define HIGHBIT (1 << 30) + +static long long *Head, *Next, *Tail; +static int *Len; +static int LocalLen; +static int count_cand, max_coll_candidates; + +static struct unbind_data *ud; + +/*! \brief Data structure for sorting density data. + */ +static struct sort_density_data +{ + MyFloat density; + int ngbcount; + long long index; /* this will store the task in the upper word */ + long long ngb_index1, ngb_index2; +} * sd; + +/*! \brief Processes a group collectively on all MPI tasks. + * + * \param[in] nsubgroups_cat (unused) + * + * \return void + */ +void subfind_process_group_collectively(int nsubgroups_cat) +{ + int totgrouplen1, totgrouplen2; + + /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */ + if(SubThisTask == 0) + { + if(Ngroups != 1) + terminate("Ngroups=%d != 1 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + else + { + if(Ngroups != 0) + terminate("Ngroups=%d != 0 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing halo %d of length %d on %d processors.\n", ThisTask, + Group[0].GrNr, Group[0].Len, SubNTask); + + GrNr = Group[0].GrNr; + totgrouplen2 = Group[0].Len; + for(int j = 0; j < 3; j++) + GrCM[j] = Group[0].CM[j]; + } + + /* tell everybody in the set the group number, the center of mass, and the grouplen */ + MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm); + MPI_Bcast(&GrCM[0], 3 * sizeof(MyDouble), MPI_BYTE, 0, SubComm); + MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm); + + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm); + + /* sanity check that we actually have all the right particles on the processor subset */ + if(totgrouplen1 != totgrouplen2) + terminate("totgrouplen1=%d != totgrouplen2=%d", totgrouplen1, totgrouplen2); /* inconsistency */ + + /* do a domain decomposition just for this halo */ + subfind_coll_domain_decomposition(); + + /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */ + subfind_loctree_copyExtent(); + + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + PS[i].SubNr = TotNgroups + 1; /* set a default that is larger than reasonable group number */ + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned. + They can however be accessed via SphP[PS[i].OldIndex] */ + + /* re-determine the number of local group particles, which has changed due to domain decomposition */ + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + /* allocate some storage for the halo */ + subfind_coll_treeallocate(NumPart, All.MaxPart); + + /* construct a tree for the halo */ + subfind_coll_treebuild(NumPartGroup, NULL); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // calculate binding energy of full fof group + { + struct unbind_data *ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data)); + + NumPartGroup = 0; + for(int i = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + ud[NumPartGroup++].index = i; + + subfind_potential_compute(NumPartGroup, ud, 0, 0); + + double binding_energy_local = 0, binding_energy_global; + + for(int i = 0; i < NumPartGroup; i++) + binding_energy_local += 0.5 * P[ud[i].index].Mass * PS[ud[i].index].Potential; + + MPI_Allreduce(&binding_energy_local, &binding_energy_global, 1, MPI_DOUBLE, MPI_SUM, SubComm); + Group[0].Epot = binding_energy_global; + + myfree(ud); + ud = NULL; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + long long p; + int len; + int ncand, parent, totcand, nremaining; + int max_loc_length, max_length; + int count, countall, *countlist, *offset; + int i, j, k, nr, grindex = 0, nsubs, subnr; + int count_leaves, tot_count_leaves, master; + struct coll_cand_dat *tmp_coll_candidates = 0; + double t0, t1, tt0, tt1; + + /* determine the radius that encloses a certain number of link particles */ + subfind_find_linkngb(); + + sd = (struct sort_density_data *)mymalloc_movable(&sd, "sd", NumPartGroup * sizeof(struct sort_density_data)); + + /* determine the indices of the nearest two denser neighbours within the link region */ + NgbLoc = (struct nearest_ngb_data *)mymalloc("NgbLoc", NumPartGroup * sizeof(struct nearest_ngb_data)); + R2Loc = (struct nearest_r2_data *)mymalloc("R2Loc", NumPartGroup * sizeof(struct nearest_r2_data)); + + subfind_find_nearesttwo(); + + for(i = 0; i < NumPartGroup; i++) + { + sd[i].density = PS[i].Density; + sd[i].ngbcount = NgbLoc[i].count; + sd[i].index = (((long long)SubThisTask) << 32) + i; + sd[i].ngb_index1 = NgbLoc[i].index[0]; + sd[i].ngb_index2 = NgbLoc[i].index[1]; + } + myfree(R2Loc); + myfree(NgbLoc); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: before parallel sort of 'sd'.\n", ThisTask); + fflush(stdout); + } + + /* sort the densities */ + parallel_sort_comm(sd, NumPartGroup, sizeof(struct sort_density_data), subfind_compare_densities, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: parallel sort of 'sd' done.\n", ThisTask); + fflush(stdout); + } + + /* allocate and initialize distributed link list */ + Head = (long long *)mymalloc_movable(&Head, "Head", NumPartGroup * sizeof(long long)); + Next = (long long *)mymalloc_movable(&Next, "Next", NumPartGroup * sizeof(long long)); + Tail = (long long *)mymalloc_movable(&Tail, "Tail", NumPartGroup * sizeof(long long)); + Len = (int *)mymalloc_movable(&Len, "Len", NumPartGroup * sizeof(int)); + + for(i = 0; i < NumPartGroup; i++) + { + Head[i] = Next[i] = Tail[i] = -1; + Len[i] = 0; + } + + /* allocate a list to store subhalo coll_candidates */ + max_coll_candidates = imax((NumPartGroup / 50), 200); + coll_candidates = (struct coll_cand_dat *)mymalloc_movable(&coll_candidates, "coll_candidates", + max_coll_candidates * sizeof(struct coll_cand_dat)); + count_cand = 0; + + subfind_col_find_coll_candidates(totgrouplen1); + + /* establish total number of coll_candidates */ + MPI_Allreduce(&count_cand, &totcand, 1, MPI_INT, MPI_SUM, SubComm); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: total number of subhalo coll_candidates=%d\n", ThisTask, totcand); + fflush(stdout); + } + + nremaining = totcand; + + for(i = 0; i < NumPartGroup; i++) + Tail[i] = -1; + + for(i = 0; i < count_cand; i++) + coll_candidates[i].parent = 0; + + do + { + /* Let's see which coll_candidates can be unbound independent from each other. + We identify them with those coll_candidates that have no embedded other candidate */ + t0 = second(); + if(SubThisTask == 0) + tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat)); + + count = count_cand; + count *= sizeof(struct coll_cand_dat); + + countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + countlist[i - 1]; + + MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm); + + if(SubThisTask == 0) + { + for(k = 0; k < totcand; k++) + { + tmp_coll_candidates[k].nsub = k; + tmp_coll_candidates[k].subnr = k; + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank); + for(k = 0; k < totcand; k++) + { + if(tmp_coll_candidates[k].parent >= 0) + { + tmp_coll_candidates[k].parent = 0; + + for(j = k + 1; j < totcand; j++) + { + if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len) + break; + + if(tmp_coll_candidates[j].parent < 0) /* ignore these */ + continue; + + if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= + tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len) + { + tmp_coll_candidates[k].parent++; /* we here count the number of subhalos that are enclosed */ + } + else + { + terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d\n", k, totcand, + (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, j, + (int)tmp_coll_candidates[j].rank, (int)tmp_coll_candidates[j].len); + } + } + } + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr); + } + + MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm); + + myfree(offset); + myfree(countlist); + + if(SubThisTask == 0) + myfree(tmp_coll_candidates); + + for(i = 0, count_leaves = 0, max_loc_length = 0; i < count_cand; i++) + if(coll_candidates[i].parent == 0) + { + if(coll_candidates[i].len > max_loc_length) + max_loc_length = coll_candidates[i].len; + + if(coll_candidates[i].len > 0.20 * All.TotNumPart / NTask) /* seems large, let's rather do it collectively */ + { + coll_candidates[i].parent++; /* this will ensure that it is not considered in this round */ + } + else + { + count_leaves++; + } + } + + MPI_Allreduce(&count_leaves, &tot_count_leaves, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&max_loc_length, &max_length, 1, MPI_INT, MPI_MAX, SubComm); + + t1 = second(); + if(SubThisTask == 0) + printf( + "SUBFIND-COLLECTIVE, root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, " + "finding took %g sec)\n", + ThisTask, tot_count_leaves, max_length, timediff(t0, t1)); + + if(tot_count_leaves <= 0) /* if there are none left, we break and do the reset collectively */ + { + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: too few, I do the rest of %d collectively\n", ThisTask, nremaining); + break; + } + + nremaining -= tot_count_leaves; + + for(i = 0; i < NumPart; i++) + { + PS[i].origintask = PS[i].TargetTask = SubThisTask; + PS[i].originindex = i; + PS[i].submark = HIGHBIT; + if(i < NumPartGroup) + if(Tail[i] >= 0) /* this means this particle is already bound to a substructure */ + PS[i].origintask |= HIGHBIT; + } + + /* we now mark the particles that are in subhalo coll_candidates that can be processed independently in parallel */ + nsubs = 0; + t0 = second(); + for(master = 0; master < SubNTask; master++) + { + ncand = count_cand; + + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].len; + parent = coll_candidates[k].parent; /* this is here actually the daughter count */ + } + + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(parent == 0) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_mark_particle(p, master, nsubs); + + if(p < 0) + terminate("Bummer i=%d \n", i); + + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + } + + nsubs++; + } + } + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: particles are marked (took %g)\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + for(i = 0; i < NumPart; i++) + PS[i].TargetIndex = PS[i].submark; /* this will make sure that the particles are grouped by submark on the target task */ + + t0 = second(); + subfind_distribute_particles(SubComm); /* assemble the particles on individual processors */ + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: distribution of independent ones took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + MPI_Barrier(SubComm); + t0 = second(); + + subfind_unbind_independent_ones(count_cand); + + MPI_Barrier(SubComm); + t1 = second(); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: unbinding of independent ones took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + for(i = 0; i < NumPart; i++) + { + PS[i].origintask &= (HIGHBIT - 1); /* clear high bit if set */ + PS[i].TargetTask = PS[i].origintask; + PS[i].TargetIndex = PS[i].originindex; + } + + t0 = second(); + subfind_distribute_particles(SubComm); /* bring them back to their original processor */ + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: bringing the independent ones back took %g sec\n", ThisTask, timediff(t0, t1)); + fflush(stdout); + } + + /* now mark the bound particles */ + for(i = 0; i < NumPartGroup; i++) + if(PS[i].submark >= 0 && PS[i].submark < nsubs) + Tail[i] = PS[i].submark; /* we use this to flag bound parts of substructures */ + + for(i = 0; i < count_cand; i++) + if(coll_candidates[i].parent == 0) + coll_candidates[i].parent = -1; + } + while(tot_count_leaves > 0); + + /**** now we do the collective unbinding of the subhalo coll_candidates that contain other subhalo coll_candidates ****/ + ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", NumPartGroup * sizeof(struct unbind_data)); + + t0 = second(); + for(master = 0, nr = 0; master < SubNTask; master++) + { + ncand = count_cand; + + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].len; + nsubs = coll_candidates[k].nsub; + parent = coll_candidates[k].parent; /* this is here actually the daughter count */ + } + + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(parent >= 0) + { + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr, + nremaining, (int)len); + fflush(stdout); + } + + nr++; + + LocalLen = 0; + + tt0 = second(); + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_add_particle(p); + if(p < 0) + terminate("Bummer i=%d \n", i); + + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + int LocalNonGasLen; + + LocalLen = subfind_col_unbind(ud, LocalLen, &LocalNonGasLen); + + tt1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: took %g sec\n", ThisTask, timediff(tt0, tt1)); + fflush(stdout); + } + + MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + + for(i = 0; i < LocalLen; i++) + Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */ + + if(SubThisTask == master) + { + coll_candidates[k].bound_length = len; + } + } + else + { + if(SubThisTask == master) + { + coll_candidates[k].bound_length = 0; + } + } + } + } + } + t1 = second(); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask, + timediff(t0, t1)); + fflush(stdout); + } + + for(k = 0, count = 0; k < count_cand; k++) + if(coll_candidates[k].bound_length >= All.DesLinkNgb) + { + if(coll_candidates[k].len < All.DesLinkNgb) + terminate("coll_candidates[k=%d].len=%d bound=%d\n", k, coll_candidates[k].len, coll_candidates[k].bound_length); + + count++; + } + + MPI_Allreduce(&count, &countall, 1, MPI_INT, MPI_SUM, SubComm); + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: found %d bound substructures in FoF group of length %d\n", ThisTask, countall, + totgrouplen1); + fflush(stdout); + } + + /* now determine the parent subhalo for each candidate */ + t0 = second(); + parallel_sort_comm(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_boundlength, SubComm); + + if(SubThisTask == 0) + tmp_coll_candidates = (struct coll_cand_dat *)mymalloc("tmp_coll_candidates", totcand * sizeof(struct coll_cand_dat)); + + count = count_cand; + count *= sizeof(struct coll_cand_dat); + + countlist = (int *)mymalloc("countlist", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + countlist[i - 1]; + + MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm); + + if(SubThisTask == 0) + { + for(k = 0; k < totcand; k++) + { + tmp_coll_candidates[k].subnr = k; + tmp_coll_candidates[k].parent = 0; + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_rank); + + for(k = 0; k < totcand; k++) + { + for(j = k + 1; j < totcand; j++) + { + if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len) + break; + + if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len) + { + if(tmp_coll_candidates[k].bound_length >= All.DesLinkNgb) + tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr; + } + else + { + terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, countall, + (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, + (int)tmp_coll_candidates[k].bound_length, (int)tmp_coll_candidates[j].rank, + (int)tmp_coll_candidates[j].len, (int)tmp_coll_candidates[j].bound_length); + } + } + } + + qsort(tmp_coll_candidates, totcand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_subnr); + } + + MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm); + + myfree(offset); + myfree(countlist); + + if(SubThisTask == 0) + myfree(tmp_coll_candidates); + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n", ThisTask, + timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + /* Now let's save some properties of the substructures */ + if(SubThisTask == 0) + Group[0].Nsubs = countall; + + t0 = second(); + for(master = 0, subnr = 0; master < SubNTask; master++) + { + ncand = count_cand; + MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm); + + for(k = 0; k < ncand; k++) + { + if(SubThisTask == master) + { + len = coll_candidates[k].bound_length; + nsubs = coll_candidates[k].nsub; + parent = coll_candidates[k].parent; + } + + MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm); + MPI_Barrier(SubComm); + + if(len > 0) + { + MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm); + MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm); + + LocalLen = 0; + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0, p = coll_candidates[k].head; i < coll_candidates[k].len; i++) + { + subfind_distlinklist_add_bound_particles(p, nsubs); + p = subfind_distlinklist_get_next(p); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + + if(SubThisTask == 0) + { + if(Nsubgroups >= MaxNsubgroups) + terminate("Nsubgroups=%d >= MaxNsubgroups=%d", Nsubgroups, MaxNsubgroups); + } + + tt0 = second(); + subfind_determine_sub_halo_properties(ud, LocalLen, &SubGroup[Nsubgroups], GrNr, subnr, 1, nsubgroups_cat); + tt1 = second(); + + /* we have filled into ud the binding energy and the particle ID return */ + + if(SubThisTask == 0) + { + if(Nsubgroups >= MaxNsubgroups) + terminate("Nsubgroups >= MaxNsubgroups"); + + if(subnr == 0) + { + for(j = 0; j < 3; j++) + Group[grindex].Pos[j] = SubGroup[Nsubgroups].Pos[j]; + } + + SubGroup[Nsubgroups].GrNr = GrNr; + SubGroup[Nsubgroups].SubNr = subnr; + SubGroup[Nsubgroups].SubParent = parent; + + Nsubgroups++; + } + + /* Let's now assign the subgroup number */ + for(i = 0; i < LocalLen; i++) + PS[ud[i].index].SubNr = subnr; + + subnr++; + } + } + } + + t1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: determining substructure properties took %g sec (presently allocated %g MB)\n", + ThisTask, timediff(t0, t1), AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + myfree(ud); + ud = NULL; + myfree(coll_candidates); + myfree(Len); + myfree(Tail); + myfree(Next); + myfree(Head); + myfree(sd); + + subfind_coll_treefree(); + subfind_coll_domain_free(); + + /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); +} + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Calculates angualar momentum collectively on all MPI tasks. + * + * \param[in] snapnr (unused) + * \param[in] ngroups_cat (unused) + * + * \return void + */ +void subfind_fof_calc_am_collective(int snapnr, int ngroups_cat) +{ + int len, totgrouplen1, totgrouplen2; + long long index; + + int grindex = 0, i, k, ptype; + double Pos_pbc[3], Vel_tot[3], gr_pos[3], gr_vel[3]; + double gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3]; + double gr_CMFrac, gr_CMFracType[NTYPES]; + int gr_len_dm; + double gr_mass, gr_mass_gas, gr_mass_stars; // gr_mass_dm, + double gr_Ekin, gr_Ethr; + + /* make a sanity check: We should have exactly 1 group, stored on the root of the processor subset */ + if(SubThisTask == 0) + { + if(Ngroups != 1) + terminate("Ngroups=%d != 1 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + else + { + if(Ngroups != 0) + terminate("Ngroups=%d != 0 SubNTask=%d SubThisTask=%d", Ngroups, SubNTask, SubThisTask); + } + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: Collectively doing AM of halo %d of length %d on %d processors.\n", ThisTask, + Group[0].GrNr, Group[0].Len, SubNTask); + + totgrouplen2 = Group[0].Len; + } + + /* tell everybody in the set the group number and the grouplen */ + MPI_Bcast(&GrNr, 1, MPI_INT, 0, SubComm); + MPI_Bcast(&totgrouplen2, 1, MPI_INT, 0, SubComm); + + for(i = 0, NumPartGroup = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + MPI_Allreduce(&NumPartGroup, &totgrouplen1, 1, MPI_INT, MPI_SUM, SubComm); + + /* sanity check that we actually have all the right particles on the processor subset */ + if(totgrouplen1 != totgrouplen2) + terminate("totgrouplen1 != totgrouplen2"); /* inconsistency */ + + /* do a domain decomposition just for this halo */ + subfind_coll_domain_decomposition(); + + /* copy over the domain dimensions to serial tree code, as this may be used in the collective unbinding */ + subfind_loctree_copyExtent(); + + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* note: now we have the particles of the group at the beginning, but SPH particles are not aligned. + They can however be accessed via SphP[PS[i].OldIndex] */ + + /* re-determine the number of local group particles, which has changed due to domain decomposition */ + for(i = 0, NumPartGroup = 0; i < NumPart; i++) + if(PS[i].GrNr == GrNr) + NumPartGroup++; + + ud = (struct unbind_data *)mymalloc("ud", NumPartGroup * sizeof(struct unbind_data)); + len = NumPartGroup; + + // pick my particles + for(i = 0; i < len; i++) + ud[i].index = i; + + // initialize + gr_CMFrac = 0; + gr_Ekin = 0; + gr_Ethr = 0; + for(k = 0; k < 3; k++) + { + gr_Jtot[k] = 0; + gr_Jdm[k] = 0; + gr_Jgas[k] = 0; + gr_Jstars[k] = 0; + } + for(k = 0; k < NTYPES; k++) + { + gr_CMFracType[k] = 0; + } + + if(SubThisTask == 0) + { + for(k = 0; k < 3; k++) + { + gr_pos[k] = Group[grindex].Pos[k]; + gr_vel[k] = Group[grindex].Vel[k]; + } + } + + // send group properties stored only on root task to all participating tasks + MPI_Bcast(gr_pos, 3, MPI_DOUBLE, 0, SubComm); + MPI_Bcast(gr_vel, 3, MPI_DOUBLE, 0, SubComm); + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - gr_pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; /* convert to physical length */ + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + if(P[index].Type == 0) + gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm; + + gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas (incl. winds) + { + gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + + MPI_Allreduce(MPI_IN_PLACE, gr_Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + // save the properties + if(SubThisTask == 0) + { + Group[grindex].Ekin = gr_Ekin; + Group[grindex].Ethr = gr_Ethr; + for(i = 0; i < 3; i++) + { + Group[grindex].J[i] = gr_Jtot[i]; + Group[grindex].JDM[i] = gr_Jdm[i]; + Group[grindex].JGas[i] = gr_Jgas[i]; + Group[grindex].JStars[i] = gr_Jstars[i]; + } + } + + // calculate counter-rotating fractions + gr_len_dm = 0; + gr_mass = gr_mass_gas = gr_mass_stars = 0; + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - gr_pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - gr_vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + gr_mass += P[index].Mass; + if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.) + gr_CMFrac += P[index].Mass; // / gr_mass; + + if(ptype == 1) // dm illustris + { + gr_len_dm++; + if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.) + gr_CMFracType[1]++; //= P[index].Mass / gr_mass_dm; + } + if(ptype == 0) // gas (incl. winds) + { + gr_mass_gas += P[index].Mass; + if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.) + gr_CMFracType[0] += P[index].Mass; // / gr_mass_gas; + } + if(ptype == 4) // stars + { + gr_mass_stars += P[index].Mass; + if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.) + gr_CMFracType[4] += P[index].Mass; // / gr_mass_stars; + } + } + + MPI_Allreduce(MPI_IN_PLACE, &gr_mass, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_len_dm, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_mass_gas, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_mass_stars, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &gr_CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, gr_CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + + // save the properties + if(SubThisTask == 0) + { + gr_CMFrac /= gr_mass; + gr_CMFracType[1] /= gr_len_dm; + gr_CMFracType[0] /= gr_mass_gas; + gr_CMFracType[4] /= gr_mass_stars; + + Group[grindex].CMFrac = gr_CMFrac; + for(i = 0; i < NTYPES; i++) + Group[grindex].CMFracType[i] = gr_CMFracType[i]; + } + + myfree(ud); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE: root-task = %d AM done.\n", ThisTask); + + subfind_coll_domain_free(); + + /* undo local rearrangement that made group consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +/*! \brief Finds candidates for subfind collective. + * + * \param[in] totgrouplen Length of group. + * + * \return void + */ +void subfind_col_find_coll_candidates(int totgrouplen) +{ + int ngbcount, retcode, len_attach; + int i, k, len, master; + long long prev, tail, tail_attach, tmp, next, index; + long long p, ss, head, head_attach, ngb_index1, ngb_index2, rank; + double t0, t1, tt0, tt1; + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask, + AllocatedBytes / (1024.0 * 1024.0)); + fflush(stdout); + } + + /* now find the subhalo coll_candidates by building up link lists from high density to low density */ + t0 = second(); + for(master = 0; master < SubNTask; master++) + { + tt0 = second(); + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(k = 0; k < NumPartGroup; k++) + { + ngbcount = sd[k].ngbcount; + ngb_index1 = sd[k].ngb_index1; + ngb_index2 = sd[k].ngb_index2; + + switch(ngbcount) /* treat the different possible cases */ + { + case 0: /* this appears to be a lonely maximum -> new group */ + subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, -1); + break; + + case 1: /* the particle is attached to exactly one group */ + head = subfind_distlinklist_get_head(ngb_index1); + + if(head == -1) + terminate("We have a problem! head=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head, k, SubThisTask); + + retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index); + + if(!(retcode & 1)) + subfind_distlinklist_set_headandnext(sd[k].index, head, -1); + if(!(retcode & 2)) + subfind_distlinklist_set_next(tail, sd[k].index); + break; + + case 2: /* the particle merges two groups together */ + if((ngb_index1 >> 32) == (ngb_index2 >> 32)) + { + subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, &head, &head_attach); + } + else + { + head = subfind_distlinklist_get_head(ngb_index1); + head_attach = subfind_distlinklist_get_head(ngb_index2); + } + + if(head == -1 || head_attach == -1) + terminate("We have a problem! head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", (int)(head >> 32), (int)head, + (int)(head_attach >> 32), (int)head_attach, k, SubThisTask); + + if(head != head_attach) + { + subfind_distlinklist_get_tailandlen(head, &tail, &len); + subfind_distlinklist_get_tailandlen(head_attach, &tail_attach, &len_attach); + + if(len_attach > len || + (len_attach == len && + head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */ + { + tmp = head; + head = head_attach; + head_attach = tmp; + tmp = tail; + tail = tail_attach; + tail_attach = tmp; + tmp = len; + len = len_attach; + len_attach = tmp; + } + + /* only in case the attached group is long enough we bother to register it + as a subhalo candidate */ + + if(len_attach >= All.DesLinkNgb) + { + if(count_cand < max_coll_candidates) + { + coll_candidates[count_cand].len = len_attach; + coll_candidates[count_cand].head = head_attach; + count_cand++; + } + else + terminate("Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates, + NumPartGroup); + } + + /* now join the two groups */ + subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach); + subfind_distlinklist_set_next(tail, head_attach); + + ss = head_attach; + do + { + ss = subfind_distlinklist_set_head_get_next(ss, head); + } + while(ss >= 0); + } + + /* finally, attach the particle to 'head' */ + retcode = subfind_distlinklist_get_tail_set_tail_increaselen(head, &tail, sd[k].index); + + if(!(retcode & 1)) + subfind_distlinklist_set_headandnext(sd[k].index, head, -1); + if(!(retcode & 2)) + subfind_distlinklist_set_next(tail, sd[k].index); + break; + } + } + + fflush(stdout); + + /* now tell the others to stop polling */ + for(k = 0; k < SubNTask; k++) + if(k != SubThisTask) + MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + tt1 = second(); + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask, timediff(tt0, tt1)); + fflush(stdout); + } + } + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask, timediff(t0, t1)); + + /* add the full thing as a subhalo candidate */ + t0 = second(); + for(master = 0, head = -1, prev = -1; master < SubNTask; master++) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(i = 0; i < NumPartGroup; i++) + { + index = (((long long)SubThisTask) << 32) + i; + + if(Head[i] == index) + { + subfind_distlinklist_get_tailandlen(Head[i], &tail, &len); + next = subfind_distlinklist_get_next(tail); + if(next == -1) + { + if(prev < 0) + head = index; + + if(prev >= 0) + subfind_distlinklist_set_next(prev, index); + + prev = tail; + } + } + } + + /* now tell the others to stop polling */ + for(k = 0; k < SubNTask; k++) + if(k != SubThisTask) + MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + MPI_Bcast(&head, sizeof(head), MPI_BYTE, master, SubComm); + MPI_Bcast(&prev, sizeof(prev), MPI_BYTE, master, SubComm); + } + + if(SubThisTask == SubNTask - 1) + { + if(count_cand < max_coll_candidates) + { + coll_candidates[count_cand].len = totgrouplen; + coll_candidates[count_cand].head = head; + count_cand++; + } + else + terminate("count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates); + } + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: adding background as candidate took %g sec\n", ThisTask, timediff(t0, t1)); + + /* go through the whole chain once to establish a rank order. For the rank we use Len[] */ + t0 = second(); + + master = (head >> 32); + + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + p = head; + rank = 0; + + while(p >= 0) + { + p = subfind_distlinklist_setrank_and_get_next(p, &rank); + } + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != master) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + + MPI_Barrier(SubComm); + MPI_Bcast(&rank, sizeof(rank), MPI_BYTE, master, SubComm); /* just for testing */ + + /* for each candidate, we now pull out the rank of its head */ + for(master = 0; master < SubNTask; master++) + { + if(SubThisTask != master) + subfind_poll_for_requests(); + else + { + for(k = 0; k < count_cand; k++) + coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head); + + /* now tell the others to stop polling */ + for(i = 0; i < SubNTask; i++) + if(i != SubThisTask) + MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm); + } + } + MPI_Barrier(SubComm); + + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: establishing of rank order took %g sec (p=%d, grouplen=%d) presently allocated %g MB\n", + ThisTask, timediff(t0, t1), (int)rank, totgrouplen, AllocatedBytes / (1024.0 * 1024.0)); + + if(((int)rank) != totgrouplen) + terminate("mismatch\n"); +} + +/*! \brief Unbinding for independent subgroups. + * + * \param[in] cont_cand Number of subgroup candidates. + * + * \return void + */ +void subfind_unbind_independent_ones(int count_cand) +{ + int i, j, k, len, nsubs, len_non_gas; + + ud = (struct unbind_data *)mymalloc("ud", NumPart * sizeof(struct unbind_data)); + + subfind_loctree_treeallocate(All.TreeAllocFactor * NumPart, NumPart); + + qsort(coll_candidates, count_cand, sizeof(struct coll_cand_dat), subfind_compare_coll_candidates_nsubs); + + for(k = 0, i = 0; k < count_cand; k++) + if(coll_candidates[k].parent == 0) + { + while(PS[i].submark < coll_candidates[k].nsub) + { + i++; + if(i >= NumPart) + terminate("i >= NumPart"); + } + + if(PS[i].submark >= 0 && PS[i].submark < HIGHBIT) + { + len = 0; + nsubs = PS[i].submark; + + if(nsubs != coll_candidates[k].nsub) + { + terminate("TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs, + coll_candidates[k].nsub); + } + + while(i < NumPart) + { + if(PS[i].submark == nsubs) + { + PS[i].submark = HIGHBIT; + if((PS[i].origintask & HIGHBIT) == 0) + { + ud[len].index = i; + len++; + } + i++; + } + else + break; + } + + /* call the serial unbind function */ + len = subfind_unbind(ud, len, &len_non_gas); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + coll_candidates[k].bound_length = len; + + for(j = 0; j < len; j++) + PS[ud[j].index].submark = nsubs; /* we use this to flag the substructures */ + } + else + coll_candidates[k].bound_length = 0; + } + } + + subfind_loctree_treefree(); + + myfree(ud); +} + +/*! \brief Unbinding for subfind collective. + * + * \param[in] d Unbind data. + * \param[in] num Number of particles in subgroup. + * \param[out] num_non_gas Number of particles which are not gas cells. + * + * \return + */ +int subfind_col_unbind(struct unbind_data *d, int num, int *num_non_gas) +{ + int iter = 0; + int i, j, p, part_index, minindex, task; + int unbound, totunbound, numleft, mincpu; + int *npart, *offset, *nbu_count, count_bound_unbound, phaseflag; + double s[3], dx[3], ddxx, v[3], dv[3], sloc[3], vloc[3], pos[3]; + double vel_to_phys, atime; + MyFloat minpot, *potlist; + double boxsize, xtmp; + double mass, massloc; + double *bnd_energy, energy_limit, energy_limit_local, weakly_bound_limit_local, weakly_bound_limit = 0; + + if(SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: beginning of subfind_col_unbind()\n", ThisTask); + fflush(stdout); + } + + boxsize = All.BoxSize; + + vel_to_phys = 1.0 / All.cf_atime; + atime = All.cf_atime; + + phaseflag = 0; /* this means we will recompute the potential for all particles */ + + do + { + subfind_coll_treebuild(num, d); + + /* let's compute the potential energy */ + + subfind_potential_compute(num, d, phaseflag, weakly_bound_limit); + + if(phaseflag == 0) + { + potlist = (MyFloat *)mymalloc("potlist", SubNTask * sizeof(MyFloat)); + + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + if(gsl_isnan(PS[d[i].index].Potential)) + terminate("pot is nan"); + + if(PS[d[i].index].Potential < minpot || minindex == -1) + { + minpot = PS[d[i].index].Potential; + minindex = d[i].index; + } + } + + MPI_Allgather(&minpot, sizeof(MyFloat), MPI_BYTE, potlist, sizeof(MyFloat), MPI_BYTE, SubComm); + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(potlist[i] < minpot) + { + mincpu = i; + minpot = potlist[i]; + } + + if(mincpu < 0) + terminate("mincpu < 0"); + + myfree(potlist); + + if(SubThisTask == mincpu) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + { + for(j = 0; j < 3; j++) + pos[j] = PS[minindex].Center[j]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + pos[j] = P[minindex].Pos[j]; + } + } + + MPI_Bcast(&pos[0], 3, MPI_DOUBLE, mincpu, SubComm); + /* pos[] now holds the position of minimum potential */ + /* we take that as the center */ + } + + /* let's get bulk velocity and the center-of-mass */ + + for(j = 0; j < 3; j++) + sloc[j] = vloc[j] = 0; + + for(i = 0, massloc = 0; i < num; i++) + { + part_index = d[i].index; + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + ddxx = GRAVITY_NEAREST_X(PS[part_index].Center[j] - pos[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + ddxx = GRAVITY_NEAREST_X(P[part_index].Pos[j] - pos[j]); + + sloc[j] += P[part_index].Mass * ddxx; + vloc[j] += P[part_index].Mass * P[part_index].Vel[j]; + } + massloc += P[part_index].Mass; + } + + MPI_Allreduce(sloc, s, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(vloc, v, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&massloc, &mass, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + for(j = 0; j < 3; j++) + { + s[j] /= mass; /* center of mass */ + v[j] /= mass; + + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + } + + bnd_energy = (double *)mymalloc("bnd_energy", num * sizeof(double)); + + for(i = 0; i < num; i++) + { + part_index = d[i].index; + + for(j = 0; j < 3; j++) + { + dv[j] = vel_to_phys * (P[part_index].Vel[j] - v[j]); + +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + dx[j] = atime * GRAVITY_NEAREST_X(PS[part_index].Center[j] - s[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + dx[j] = atime * GRAVITY_NEAREST_X(P[part_index].Pos[j] - s[j]); + + dv[j] += All.cf_Hrate * dx[j]; + } + + PS[part_index].BindingEnergy = PS[part_index].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + PS[part_index].BindingEnergy += All.G / All.cf_atime * P[part_index].Mass / + (All.ForceSoftening[P[part_index].SofteningType] / 2.8); /* add self-energy */ + + if(P[part_index].Type == 0) + PS[part_index].BindingEnergy += PS[part_index].Utherm; + + bnd_energy[i] = PS[part_index].BindingEnergy; + } + + parallel_sort_comm(bnd_energy, num, sizeof(double), subfind_compare_binding_energy, SubComm); + + npart = (int *)mymalloc("npart", SubNTask * sizeof(int)); + nbu_count = (int *)mymalloc("nbu_count", SubNTask * sizeof(int)); + offset = (int *)mymalloc("offset", SubNTask * sizeof(int)); + + MPI_Allgather(&num, 1, MPI_INT, npart, 1, MPI_INT, SubComm); + MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm); + + for(i = 1, offset[0] = 0; i < SubNTask; i++) + offset[i] = offset[i - 1] + npart[i - 1]; + + j = (int)(0.25 * numleft); /* index of limiting energy value */ + + task = 0; + while(j >= npart[task]) + { + j -= npart[task]; + task++; + } + + if(SubThisTask == task) + energy_limit_local = bnd_energy[j]; + else + energy_limit_local = 1.0e30; + + MPI_Allreduce(&energy_limit_local, &energy_limit, 1, MPI_DOUBLE, MPI_MIN, SubComm); + + for(i = 0, count_bound_unbound = 0; i < num; i++) + { + if(bnd_energy[i] > 0) + count_bound_unbound++; + else + count_bound_unbound--; + } + + MPI_Allgather(&count_bound_unbound, 1, MPI_INT, nbu_count, 1, MPI_INT, SubComm); + + for(i = 0, count_bound_unbound = 0; i < SubThisTask; i++) + count_bound_unbound += nbu_count[i]; + + for(i = 0; i < num - 1; i++) + { + if(bnd_energy[i] > 0) + count_bound_unbound++; + else + count_bound_unbound--; + if(count_bound_unbound <= 0) + break; + } + + if(num > 0 && count_bound_unbound <= 0) + weakly_bound_limit_local = bnd_energy[i]; + else + weakly_bound_limit_local = -1.0e30; + + MPI_Allreduce(&weakly_bound_limit_local, &weakly_bound_limit, 1, MPI_DOUBLE, MPI_MAX, SubComm); + + for(i = 0, unbound = 0; i < num; i++) + { + p = d[i].index; + + if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit) + { + unbound++; + + d[i] = d[num - 1]; + num--; + i--; + } + else if(P[p].Type != 0) + (*num_non_gas)++; + } + + myfree(offset); + myfree(nbu_count); + myfree(npart); + myfree(bnd_energy); + + MPI_Allreduce(&unbound, &totunbound, 1, MPI_INT, MPI_SUM, SubComm); + MPI_Allreduce(&num, &numleft, 1, MPI_INT, MPI_SUM, SubComm); + + if(phaseflag == 0) + { + if(totunbound > 0) + phaseflag = 1; + } + else + { + if(totunbound == 0) + { + phaseflag = 0; /* this will make us repeat everything once more for all particles */ + totunbound = 1; + } + } + + iter++; + } + while(totunbound > 0 && numleft >= All.DesLinkNgb); + + return num; +} + +/*! \brief Gets new request from other task. + * + * \return void + */ +void subfind_poll_for_requests(void) +{ + int index, nsub, source, tag, ibuf[3], target, submark, task; + long long head, next, rank, buf[5]; + long long oldtail, newtail; + int task_newtail, i_newtail, task_oldtail, i_oldtail; + char msg[200]; + MPI_Status status; + + do + { + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status); + + source = status.MPI_SOURCE; + tag = status.MPI_TAG; + + /* MPI_Get_count(&status, MPI_BYTE, &count); */ + switch(tag) + { + case TAG_GET_TWOHEADS: + MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE); + buf[0] = Head[ibuf[0]]; + buf[1] = Head[ibuf[1]]; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm); + break; + case TAG_SET_NEWTAIL: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + newtail = buf[1]; + oldtail = Tail[index]; /* return old tail */ + Tail[index] = newtail; + Len[index]++; + + task_newtail = (newtail >> 32); + if(task_newtail == SubThisTask) + { + i_newtail = (newtail & MASK); + Head[i_newtail] = (((long long)SubThisTask) << 32) + index; + Next[i_newtail] = -1; + } + task_oldtail = (oldtail >> 32); + if(task_oldtail == SubThisTask) + { + i_oldtail = (oldtail & MASK); + Next[i_oldtail] = newtail; + } + + buf[0] = oldtail; + MPI_Send(buf, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm); + break; + case TAG_SET_ALL: + MPI_Recv(buf, 5 * sizeof(long long), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Head[index] = buf[1]; + Tail[index] = buf[2]; + Len[index] = buf[3]; + Next[index] = buf[4]; + break; + case TAG_GET_TAILANDLEN: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + buf[0] = Tail[index]; + buf[1] = Len[index]; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm); + break; + case TAG_SET_TAILANDLEN: + MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Tail[index] = buf[1]; + Len[index] = buf[2]; + break; + case TAG_SET_HEADANDNEXT: + MPI_Recv(buf, 3 * sizeof(long long), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Head[index] = buf[1]; + Next[index] = buf[2]; + break; + case TAG_SET_NEXT: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + Next[index] = buf[1]; + break; + case TAG_SETHEADGETNEXT: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + head = buf[1]; + do + { + Head[index] = head; + next = Next[index]; + task = (next >> 32); + index = (next & MASK); + } + while(next >= 0 && task == SubThisTask); + MPI_Send(&next, 1 * sizeof(long long), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm); + break; + case TAG_GET_NEXT: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + MPI_Send(&Next[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm); + break; + case TAG_GET_HEAD: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + MPI_Send(&Head[index], 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm); + break; + case TAG_ADD_PARTICLE: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + if(Tail[index] < 0) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = index; + if(index >= NumPartGroup) + { + sprintf(msg, "What: index=%d NumPartGroup=%d\n", index, NumPartGroup); + terminate(msg); + } + LocalLen++; + } + break; + case TAG_MARK_PARTICLE: + MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE); + index = ibuf[0]; + target = ibuf[1]; + submark = ibuf[2]; + + if(PS[index].submark != HIGHBIT) + terminate("TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, index, PS[index].submark); + + PS[index].TargetTask = target; + PS[index].submark = submark; + break; + case TAG_ADDBOUND: + MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status); + index = ibuf[0]; + nsub = ibuf[1]; + if(Tail[index] == nsub) /* consider only particles in this substructure */ + { + ud[LocalLen].index = index; + LocalLen++; + } + break; + case TAG_SETRANK: + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE); + index = buf[0]; + rank = buf[1]; + do + { + Len[index] = rank++; + next = Next[index]; + if(next < 0) + break; + index = (next & MASK); + } + while((next >> 32) == SubThisTask); + buf[0] = next; + buf[1] = rank; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm); + break; + case TAG_GET_RANK: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + rank = Len[index]; + MPI_Send(&rank, 1 * sizeof(long long), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm); + break; + + case TAG_POLLING_DONE: + MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status); + break; + + default: + terminate("tag not present in the switch"); + break; + } + } + while(tag != TAG_POLLING_DONE); +} + +/*! \brief Sets rank in global linked list and gets next entry. + * + * \param[in] index Index in global linked list. + * \param[in, out] rank Rank to be set in linked list. + * + * \return Next entry + */ +long long subfind_distlinklist_setrank_and_get_next(long long index, long long *rank) +{ + int task, i; + long long next; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Len[i] = *rank; + *rank = *rank + 1; + next = Next[i]; + } + else + { + buf[0] = i; + buf[1] = *rank; + + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE); + next = buf[0]; + *rank = buf[1]; + } + return next; +} + +/*! \brief Sets head in global linked list and gets next + * + * \param[in] index Index in global linked list. + * \param[in] head Head value to be set. + * + * \return Next value. + */ +long long subfind_distlinklist_set_head_get_next(long long index, long long head) +{ + int task, i; + long long buf[2]; + long long next; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + next = Next[i]; + } + else + { + buf[0] = i; + buf[1] = head; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm); + MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return next; +} + +/*! \brief Sets next value in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] next Next value to be set. + * + * \return void + */ +void subfind_distlinklist_set_next(long long index, long long next) +{ + int task, i; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = next; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEXT, SubComm); + } +} + +/*! \brief Adds particle to 'ud' list if not already in substructure. + * + * \param[in] index Index in global linked list. + * + * \return void + */ +void subfind_distlinklist_add_particle(long long index) +{ + int task, i; + char msg[200]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(Tail[i] < 0) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = i; + if(i >= NumPartGroup) + { + sprintf(msg, "What: index=%d NumPartGroup=%d\n", i, NumPartGroup); + terminate(msg); + } + + LocalLen++; + } + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm); + } +} + +/*! \brief Sets target task and submark field in 'PS' structure. + * + * \param[in] index Index in global linked list + * \param[in] target Value for TargetTask field. + * \param[in] submark Value for submark field. + * + * \return void + */ +void subfind_distlinklist_mark_particle(long long index, int target, int submark) +{ + int task, i, ibuf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(PS[i].submark != HIGHBIT) + terminate("Tas=%d i=%d P[i].submark=%d?\n", SubThisTask, i, PS[i].submark); + + PS[i].TargetTask = target; + PS[i].submark = submark; + } + else + { + ibuf[0] = i; + ibuf[1] = target; + ibuf[2] = submark; + MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm); + } +} + +/*! \brief Add bound particle to 'ud' array. + * + * \param[in] index Index in global linked list. + * \param[in] nsub Number of subgroups (i.e. if Tail index the same, not yet + * in a substructrue). + * + * \return void + */ +void subfind_distlinklist_add_bound_particles(long long index, int nsub) +{ + int task, i, ibuf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + if(Tail[i] == nsub) /* consider only particles not already in substructures */ + { + ud[LocalLen].index = i; + LocalLen++; + } + } + else + { + ibuf[0] = i; + ibuf[1] = nsub; + MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm); + } +} + +/*! \brief Get Next value from global linked list. + * + * \param[in] index Index in global linked list. + * + * \return + */ +long long subfind_distlinklist_get_next(long long index) +{ + int task, i; + long long next; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + next = Next[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm); + MPI_Recv(&next, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return next; +} + +/*! \brief Get rank value from global linked list. + * + * \param[in] index Index in global linked list. + * + * \return Rank value. + */ +long long subfind_distlinklist_get_rank(long long index) +{ + int task, i; + long long rank; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + rank = Len[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm); + MPI_Recv(&rank, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return rank; +} + +/*! \brief Get the head value of global linked list. + * + * \param[in] index Index in the global linked list. + * + * \return Head value. + */ +long long subfind_distlinklist_get_head(long long index) +{ + int task, i; + long long head; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + head = Head[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm); + MPI_Recv(&head, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE); + } + + return head; +} + +/*! \brief Gets the head value of two entries in linked list. + * + * \param[in] ngb_index1 Index of first subgroup. + * \param[in] ngb_index2 Index of second subgroup. + * \param[out] head Head value of first subgroup. + * \param[out] head_attach head value of second subgroup. + * + * \return void + */ +void subfind_distlinklist_get_two_heads(long long ngb_index1, long long ngb_index2, long long *head, long long *head_attach) +{ + int task, i1, i2, ibuf[2]; + long long buf[2]; + + task = (ngb_index1 >> 32); + i1 = (ngb_index1 & MASK); + i2 = (ngb_index2 & MASK); + + if(SubThisTask == task) + { + *head = Head[i1]; + *head_attach = Head[i2]; + } + else + { + ibuf[0] = i1; + ibuf[1] = i2; + MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE); + *head = buf[0]; + *head_attach = buf[1]; + } +} + +/*! \brief Sets Head and Next entries in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] head Value for Head. + * \param[in] next Value for Next. + * + * \return void + */ +void subfind_distlinklist_set_headandnext(long long index, long long head, long long next) +{ + int task, i; + long long buf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = head; + buf[2] = next; + MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm); + } +} + +/*! \brief Returns old tail, sets a new tail, increases length of linked list. + * + * \param[in] index Index of the subgroup. + * \param[out] tail Old value for tail. + * \param[in] newtail New value for tail. + * + * \return return code + */ +int subfind_distlinklist_get_tail_set_tail_increaselen(long long index, long long *tail, long long newtail) +{ + int task, i, task_newtail, i_newtail, task_oldtail, i_oldtail, retcode; + long long oldtail; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + retcode = 0; + + if(SubThisTask == task) + { + oldtail = Tail[i]; + Tail[i] = newtail; + Len[i]++; + *tail = oldtail; + + task_newtail = (newtail >> 32); + if(task_newtail == SubThisTask) + { + i_newtail = (newtail & MASK); + Head[i_newtail] = index; + Next[i_newtail] = -1; + retcode |= 1; + } + task_oldtail = (oldtail >> 32); + if(task_oldtail == SubThisTask) + { + i_oldtail = (oldtail & MASK); + Next[i_oldtail] = newtail; + retcode |= 2; + } + } + else + { + buf[0] = i; + buf[1] = newtail; + MPI_Send(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm); + MPI_Recv(&oldtail, 1 * sizeof(long long), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE); + *tail = oldtail; + + if((newtail >> 32) == task) + retcode |= 1; + if((oldtail >> 32) == task) + retcode |= 2; + } + + return retcode; +} + +/*! \brief Set tail and len in global linked list. + * + * \param[in] index Index in global linked list. + * \param[in] tail Value to be set in 'Tail'. + * \param[in] len Value to be set in 'Len'. + * + * \return void + */ +void subfind_distlinklist_set_tailandlen(long long index, long long tail, int len) +{ + int task, i; + long long buf[3]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Tail[i] = tail; + Len[i] = len; + } + else + { + buf[0] = i; + buf[1] = tail; + buf[2] = len; + MPI_Send(buf, 3 * sizeof(long long), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm); + } +} + +/*! \brief Get tail and len in global linked list. + * + * \param[in] index Index in global linked list. + * \param[out] tail 'Tail' value. + * \param[out] len 'Len' value. + * + * \return void + */ +void subfind_distlinklist_get_tailandlen(long long index, long long *tail, int *len) +{ + int task, i; + long long buf[2]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + *tail = Tail[i]; + *len = Len[i]; + } + else + { + MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm); + MPI_Recv(buf, 2 * sizeof(long long), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE); + *tail = buf[0]; + *len = buf[1]; + } +} + +/*! \brief Sets head, tail, len and next in global linked list + * + * \param[in] index Index in global linked list. + * \param[in] head Value for 'Head'. + * \param[in] tail Value for 'Tail'. + * \param[in] len Value for 'Len'. + * \param[in] next Value for 'Next'. + * + * \return void + */ +void subfind_distlinklist_set_all(long long index, long long head, long long tail, int len, long long next) +{ + int task, i; + long long buf[5]; + + task = (index >> 32); + i = (index & MASK); + + if(SubThisTask == task) + { + Head[i] = head; + Tail[i] = tail; + Len[i] = len; + Next[i] = next; + } + else + { + buf[0] = i; + buf[1] = head; + buf[2] = tail; + buf[3] = len; + buf[4] = next; + MPI_Send(buf, 5 * sizeof(long long), MPI_BYTE, task, TAG_SET_ALL, SubComm); + } +} + +/*! \brief Comparison function of sort_density_data objects. + * + * Compares element density. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1); -1 if a > b + */ +int subfind_compare_densities(const void *a, const void *b) /* largest density first */ +{ + if(((struct sort_density_data *)a)->density > (((struct sort_density_data *)b)->density)) + return -1; + + if(((struct sort_density_data *)a)->density < (((struct sort_density_data *)b)->density)) + return +1; + + return 0; +} + +#endif diff --git a/src/amuse/community/arepo/src/subfind/subfind_density.c b/src/amuse/community/arepo/src/subfind/subfind_density.c new file mode 100644 index 0000000000..0b61aa9d97 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_density.c @@ -0,0 +1,662 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_density.c + * \date 05/2018 + * \brief Smoothing length and density calculation for particles. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double subfind_density(int mode) + * static int subfind_density_evaluate(int target, int mode, + * int threadid) + * void subfind_density_hsml_guess(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND + +#include "../fof/fof.h" +#include "subfind.h" + +static char *Todo; +static int *DM_NumNgb; +#ifdef SUBFIND_CALC_MORE +static MyFloat *Vx, *Vy, *Vz; +#endif /* #ifdef SUBFIND_CALC_MORE */ + +static int subfind_density_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = SphP[i].Center[0]; + in->Pos[1] = SphP[i].Center[1]; + in->Pos[2] = SphP[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + in->Hsml = PS[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Ngb; + MyFloat Rho; +#ifdef SUBFIND_CALC_MORE + MyFloat VelDisp, Vx, Vy, Vz, RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + DM_NumNgb[i] = out->Ngb; + PS[i].Density = out->Rho; +#ifdef SUBFIND_CALC_MORE + Vx[i] = out->Vx; + Vy[i] = out->Vy; + Vz[i] = out->Vz; + PS[i].SubfindVelDisp = out->VelDisp; + PS[i].SubfindDMDensity = out->RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + else /* combine */ + { + DM_NumNgb[i] += out->Ngb; + PS[i].Density += out->Rho; +#ifdef SUBFIND_CALC_MORE + Vx[i] += out->Vx; + Vy[i] += out->Vy; + Vz[i] += out->Vz; + PS[i].SubfindVelDisp += out->VelDisp; + PS[i].SubfindDMDensity += out->RhoDM; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPart) + break; + + if(Todo[i]) + subfind_density_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_density_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Calculates smoothing length or density via neighbor search. + * + * \param[in] mode Mode if the function: FIND_SMOOTHING_LENGTHS, or to + * calculate densities. + * + * \return Time spent in this routine. + */ +double subfind_density(int mode) +{ + long long ntot; + int i, npleft, iter; + MyFloat *Left, *Right; + double t0, t1, tstart, tend; + + if(mode == FIND_SMOOTHING_LENGTHS) + mpi_printf("SUBFIND: finding smoothing length for all particles\n"); + else + mpi_printf("SUBFIND: finding total densities around all particles\n"); + + tstart = second(); + + int HsmlFlag = 0; + +#ifdef SUBFIND_CALC_MORE + HsmlFlag = 1; /* in this case, calculate densities for all particles, not only those in groups */ +#endif /* #ifdef SUBFIND_CALC_MORE */ + + DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPart); + Left = (MyFloat *)mymalloc_movable(&Left, "Left", sizeof(MyFloat) * NumPart); + Right = (MyFloat *)mymalloc_movable(&Right, "Right", sizeof(MyFloat) * NumPart); + Todo = (char *)mymalloc_movable(&Todo, "Todo", sizeof(char) * NumPart); + +#ifdef SUBFIND_CALC_MORE + Vx = (MyFloat *)mymalloc("Vx", sizeof(MyFloat) * NumPart); + Vy = (MyFloat *)mymalloc("Vy", sizeof(MyFloat) * NumPart); + Vz = (MyFloat *)mymalloc("Vz", sizeof(MyFloat) * NumPart); +#endif /* #ifdef SUBFIND_CALC_MORE */ + + generic_set_MaxNexport(); + + for(i = 0; i < NumPart; i++) + { + Left[i] = Right[i] = 0; + DM_NumNgb[i] = 0; + Todo[i] = 1; + if((PS[i].GrNr >= TotNgroups) && (HsmlFlag == 0)) // particle not in groups + Todo[i] = 0; + +#ifdef REFINEMENT_HIGH_RES_GAS + if((PS[i].GrNr >= TotNgroups) && (P[i].Type == 4 || P[i].Type == 5)) // particle of type 4 or 5 but not in group + Todo[i] = 0; + + if(P[i].Type != 0 && P[i].Type != 1 && P[i].Type != 4 && P[i].Type != 5) + Todo[i] = 0; + if(P[i].Type == 0) + if(SphP[i].AllowRefinement == 0) + Todo[i] = 0; +#endif /* #ifdef REFINEMENT_HIGH_RES_GAS */ + + PS[i].Density = 0; +#ifdef SUBFIND_CALC_MORE + PS[i].SubfindHsml = 0; + PS[i].SubfindDensity = 0; + PS[i].SubfindDMDensity = 0; + PS[i].SubfindVelDisp = 0; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + + iter = 0; + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(NumPart, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPart; i++) + { + /* now check whether we had enough neighbours */ + + if(Todo[i] && mode == FIND_SMOOTHING_LENGTHS) + { + if(abs(DM_NumNgb[i] - All.DesNumNgb) > All.MaxNumNgbDeviation && + ((Right[i] - Left[i]) > 1.0e-4 * Left[i] || Left[i] == 0 || Right[i] == 0)) + { + /* need to redo this particle */ + npleft++; + + if(DM_NumNgb[i] < All.DesNumNgb) + Left[i] = (MyFloat)dmax(PS[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(PS[i].Hsml < Right[i]) + Right[i] = PS[i].Hsml; + } + else + Right[i] = PS[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("SUBFIND: i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, + ThisTask, (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)DM_NumNgb[i], Right[i] - Left[i], + P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); + myflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + PS[i].Hsml = (MyFloat)pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("can't occur"); + + if(Right[i] == 0 && Left[i] > 0) + PS[i].Hsml *= 1.26; + + if(Right[i] > 0 && Left[i] == 0) + PS[i].Hsml /= 1.26; + } + } + else + Todo[i] = 0; + } + } + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0 && mode == FIND_SMOOTHING_LENGTHS) + { + iter++; + + if(iter > 0) + mpi_printf("SUBFIND: ngb iteration %2d: need to repeat for %15lld particles. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + +#ifdef SUBFIND_CALC_MORE + double vel_to_phys; + + vel_to_phys = 1.0 / All.cf_atime; + + for(i = 0; i < NumPart; i++) + { + Vx[i] /= DM_NumNgb[i]; + Vy[i] /= DM_NumNgb[i]; + Vz[i] /= DM_NumNgb[i]; + PS[i].SubfindVelDisp /= DM_NumNgb[i]; + PS[i].SubfindVelDisp = vel_to_phys * sqrt(PS[i].SubfindVelDisp - Vx[i] * Vx[i] - Vy[i] * Vy[i] - Vz[i] * Vz[i]); + } +#endif /* #ifdef SUBFIND_CALC_MORE */ + +#ifdef SUBFIND_CALC_MORE + myfree_movable(Vz); + myfree_movable(Vy); + myfree_movable(Vx); +#endif /* #ifdef SUBFIND_CALC_MORE */ + myfree_movable(Todo); + myfree_movable(Right); + myfree_movable(Left); + myfree_movable(DM_NumNgb); + +#ifdef SUBFIND_CALC_MORE + for(i = 0; i < NumPart; i++) + { + PS[i].SubfindHsml = PS[i].Hsml; + PS[i].SubfindDensity = PS[i].Density; + } +#endif /* #ifdef SUBFIND_CALC_MORE */ + + tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function of subfind density calculation. + * + * \param[in] target Index of particle of interest + * \param[in] mode Local or imported particles? + * \param[in] treadid ID of thread. + * + * \return 0 + */ +static int subfind_density_evaluate(int target, int mode, int threadid) +{ + int k, numnodes, *firstnode, type; + double hsml; + double rhosum = 0; + MyDouble *pos; + int numngb = 0, no, p; + struct NODE *current; + double dx, dy, dz, r2, mass; + double h2, hinv, hinv3, r, u, wk; + MyDouble xtmp, ytmp, ztmp; +#ifdef SUBFIND_CALC_MORE + double vxsum = 0, vysum = 0, vzsum = 0, v2sum = 0, rhodmsum = 0; +#endif /* #ifdef SUBFIND_CALC_MORE */ + + data_in local, *target_data; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + hsml = target_data->Hsml; + + h2 = hsml * hsml; + hinv = 1.0 / hsml; + hinv3 = hinv * hinv * hinv; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > hsml) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > hsml) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > hsml) + continue; + + if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml) + continue; + + mass = P[p].Mass; + type = P[p].Type; + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + double dist = hsml + 0.5 * current->len; + + dx = (MyFloat)FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = (MyFloat)FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = (MyFloat)FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + continue; + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + no = Nextnode[no - Tree_MaxNodes]; + + dx = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]); + if(dx > hsml) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]); + if(dy > hsml) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]); + if(dz > hsml) + continue; + + if((r2 = (dx * dx + dy * dy + dz * dz)) > hsml * hsml) + continue; + + mass = Tree_Points[n].Mass; + type = Tree_Points[n].Type; + + p = -1; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("can't be"); + + if(target >= 0) /* if no target is given, export will not occur */ + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + continue; + } + + if((1 << type) & (FOF_PRIMARY_LINK_TYPES)) + { + numngb++; + +#ifdef SUBFIND_CALC_MORE + if(p < 0) + terminate("this should not occur"); + + vxsum += P[p].Vel[0]; + vysum += P[p].Vel[1]; + vzsum += P[p].Vel[2]; + v2sum += P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + + if(((1 << type) & (FOF_PRIMARY_LINK_TYPES)) || ((1 << type) & (FOF_SECONDARY_LINK_TYPES))) + if(r2 < h2) + { + r = sqrt(r2); + + u = r * hinv; + + if(u < 0.5) + wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u); + else + wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u); + + rhosum += mass * wk; + +#ifdef SUBFIND_CALC_MORE + if((1 << type) & (FOF_PRIMARY_LINK_TYPES)) + rhodmsum += mass * wk; +#endif /* #ifdef SUBFIND_CALC_MORE */ + } + } + } + + out.Ngb = numngb; + out.Rho = rhosum; +#ifdef SUBFIND_CALC_MORE + out.Vx = vxsum; + out.Vy = vysum; + out.Vz = vzsum; + out.VelDisp = v2sum; + out.RhoDM = rhodmsum; +#endif /* #ifdef SUBFIND_CALC_MORE */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/*! \brief Sets Hsml to an initial guess to reduce number of iterations for + * to get final smoothing length (Hsml). + * + * \return void + */ +void subfind_density_hsml_guess(void) +{ + int i; + double hsml_prev = 0; + + for(i = 0; i < NumPart; i++) + { + int no, p; + + if((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES)) + { + no = Father[i]; + + while(8 * All.DesNumNgb * P[i].Mass > Nodes[no].u.d.mass && Nodes[no].len == 0) + { + p = Nodes[no].u.d.father; + + if(p < 0) + break; + + no = p; + } + + PS[i].Hsml = hsml_prev = (pow(3.0 / (4 * M_PI) * All.DesNumNgb * P[i].Mass / Nodes[no].u.d.mass, 1.0 / 3) * Nodes[no].len); + + if(PS[i].Hsml == 0) + { + printf("Hsml=0 task=%d i=%d no=%d Nodes[no].len=%g Nodes[no].u.d.mass=%g P[i].Mass=%g type=%d ID=%llu pos=(%g|%g|%g)\n", + ThisTask, i, no, Nodes[no].len, Nodes[no].u.d.mass, P[i].Mass, P[i].Type, (long long)P[i].ID, P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + terminate("zero hsml guess\n"); + } + } + else + { + if(hsml_prev) + PS[i].Hsml = hsml_prev; + else + PS[i].Hsml = All.SofteningTable[P[i].SofteningType]; + } + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_distribute.c b/src/amuse/community/arepo/src/subfind/subfind_distribute.c new file mode 100644 index 0000000000..80b492193c --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_distribute.c @@ -0,0 +1,421 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_distribute.c + * \date 05/2018 + * \brief Moves grops and particles across MPI tasks form their + * simulation ordering to a subfind ordering. + * \details contains functions: + * void subfind_distribute_groups(void) + * void subfind_distribute_particles(MPI_Comm Communicator) + * void subfind_reorder_P(int *Id, int Nstart, int N) + * void subfind_reorder_PS(int *Id, int Nstart, int N) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND +static struct group_properties *send_Group; + +/*! \brief Distributes groups equally on MPI tasks. + * + * \return void + */ +void subfind_distribute_groups(void) +{ + int i, nexport, nimport, target, ngrp, recvTask; + + /* count how many we have of each task */ + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < Ngroups; i++) + { + target = Group[i].TargetTask; + + if(target < 0 || target >= NTask) + terminate("target < 0 || target >= NTask"); + + if(target != ThisTask) + Send_count[target]++; + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(i = 0, nexport = 0, nimport = 0, Recv_offset[0] = Send_offset[0] = 0; i < NTask; i++) + { + nimport += Recv_count[i]; + nexport += Send_count[i]; + + if(i > 0) + { + Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1]; + Recv_offset[i] = Recv_offset[i - 1] + Recv_count[i - 1]; + } + } + + send_Group = (struct group_properties *)mymalloc_movable(&send_Group, "send_Group", nexport * sizeof(struct group_properties)); + + for(i = 0; i < NTask; i++) + Send_count[i] = 0; + + for(i = 0; i < Ngroups; i++) + { + target = Group[i].TargetTask; + + if(target != ThisTask) + { + send_Group[Send_offset[target] + Send_count[target]] = Group[i]; + Send_count[target]++; + + Group[i] = Group[Ngroups - 1]; + Ngroups--; + i--; + } + } + + if(Ngroups + nimport > MaxNgroups) + { +#ifdef VERBOSE + printf("SUBFIND: Task=%d: (Ngroups=%d) + (nimport=%d) > (MaxNgroups=%d). Will increase MaxNgroups.\n", ThisTask, Ngroups, + nimport, MaxNgroups); +#endif /* #ifdef VERBOSE */ + MaxNgroups = Ngroups + nimport; + Group = (struct group_properties *)myrealloc_movable(Group, sizeof(struct group_properties) * MaxNgroups); + } + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + recvTask = ThisTask ^ ngrp; + + if(recvTask < NTask) + { + if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) + { + /* get the group info */ + MPI_Sendrecv(&send_Group[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, + recvTask, TAG_DENS_A, &Group[Ngroups + Recv_offset[recvTask]], + Recv_count[recvTask] * sizeof(struct group_properties), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + } + + Ngroups += nimport; + + myfree_movable(send_Group); +} + +static struct particle_data *partBuf; +static struct subfind_data *subBuf; + +/* \brief Distributes particles on MPI tasks. + * + * This function redistributes the particles in P[] and PS[] according to what + * is stored in PS[].TargetTask, and PS[].TargetIndex. NOTE: The associated + * SphP[] is not moved, i.e. the association is broken until the particles are + * moved back into the original order! + * + * \param[in] Communicator MPI communicator. + * + * \return void + */ +void subfind_distribute_particles(MPI_Comm Communicator) +{ + int nimport, nexport; + int i, j, n, ngrp, target; + int max_load, load; + int CommThisTask, CommNTask; + + MPI_Comm_size(Communicator, &CommNTask); + MPI_Comm_rank(Communicator, &CommThisTask); + + for(n = 0; n < CommNTask; n++) + Send_count[n] = 0; + + for(n = 0; n < NumPart; n++) + { + target = PS[n].TargetTask; + + if(target != CommThisTask) + { + if(target < 0 || target >= CommNTask) + terminate("n=%d targettask=%d", n, target); + + Send_count[target]++; + } + } + + MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator); + + for(j = 0, nimport = 0, nexport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < CommNTask; j++) + { + nexport += Send_count[j]; + nimport += Recv_count[j]; + + if(j > 0) + { + Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; + Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; + } + } + + /* for resize */ + load = (NumPart + nimport - nexport); + MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator); + + partBuf = (struct particle_data *)mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(struct particle_data)); + subBuf = (struct subfind_data *)mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(struct subfind_data)); + + for(i = 0; i < CommNTask; i++) + Send_count[i] = 0; + + for(n = 0; n < NumPart; n++) + { + target = PS[n].TargetTask; + + if(target != CommThisTask) + { + partBuf[Send_offset[target] + Send_count[target]] = P[n]; + subBuf[Send_offset[target] + Send_count[target]] = PS[n]; + + P[n] = P[NumPart - 1]; + PS[n] = PS[NumPart - 1]; + + Send_count[target]++; + NumPart--; + n--; + } + } + + /* do resize */ + if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart) + { + All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); + reallocate_memory_maxpart(); + PS = (struct subfind_data *)myrealloc_movable(PS, All.MaxPart * sizeof(struct subfind_data)); + } + + for(i = 0; i < CommNTask; i++) + Recv_offset[i] += NumPart; + +#ifndef NO_ISEND_IRECV_IN_DOMAIN + + MPI_Request *requests = (MPI_Request *)mymalloc("requests", 8 * CommNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Recv_count[target] > 0) + { + MPI_Irecv(P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + Communicator, &requests[n_requests++]); + MPI_Irecv(PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, &requests[n_requests++]); + } + } + } + + MPI_Barrier(Communicator); /* not really necessary, but this will guarantee that all receives are + posted before the sends, which helps the stability of MPI on + bluegene, and perhaps some mpich1-clusters */ + + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0) + { + MPI_Isend(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, TAG_PDATA, + Communicator, &requests[n_requests++]); + MPI_Isend(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, &requests[n_requests++]); + } + } + } + + MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE); + myfree(requests); + +#else /* #ifndef NO_ISEND_IRECV_IN_DOMAIN */ + for(ngrp = 1; ngrp < (1 << PTask); ngrp++) + { + target = CommThisTask ^ ngrp; + + if(target < CommNTask) + { + if(Send_count[target] > 0 || Recv_count[target] > 0) + { + MPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, P + Recv_offset[target], Recv_count[target] * sizeof(struct particle_data), MPI_BYTE, target, + TAG_PDATA, Communicator, MPI_STATUS_IGNORE); + + MPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + PS + Recv_offset[target], Recv_count[target] * sizeof(struct subfind_data), MPI_BYTE, target, TAG_KEY, + Communicator, MPI_STATUS_IGNORE); + } + } + } +#endif /* #ifndef NO_ISEND_IRECV_IN_DOMAIN #else */ + + NumPart += nimport; + myfree_movable(subBuf); + myfree_movable(partBuf); + + /* finally, let's also address the desired local order according to PS[].TargetIndex */ + + struct fof_local_sort_data *mp; + int *Id; + + mp = (struct fof_local_sort_data *)mymalloc("mp", sizeof(struct fof_local_sort_data) * (NumPart)); + Id = (int *)mymalloc("Id", sizeof(int) * (NumPart)); + + for(i = 0; i < NumPart; i++) + { + mp[i].index = i; + mp[i].targetindex = PS[i].TargetIndex; + } + + qsort(mp, NumPart, sizeof(struct fof_local_sort_data), fof_compare_local_sort_data_targetindex); + + for(i = 0; i < NumPart; i++) + Id[mp[i].index] = i; + + subfind_reorder_P(Id, 0, NumPart); + + for(i = 0; i < NumPart; i++) + Id[mp[i].index] = i; + + subfind_reorder_PS(Id, 0, NumPart); + + myfree(Id); + myfree(mp); +} + +/*! \brief Reorders elements in the P array. + * + * \param[in] Id Array containing ordering. + * \param[in] Nstart Start index (in Id and P). + * \param[in] N Final element index + 1. + * + * \return void + */ +void subfind_reorder_P(int *Id, int Nstart, int N) +{ + int i; + struct particle_data Psave, Psource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + Psource = P[i]; + idsource = Id[i]; + + dest = Id[i]; + + do + { + Psave = P[dest]; + idsave = Id[dest]; + + P[dest] = Psource; + Id[dest] = idsource; + + if(dest == i) + break; + + Psource = Psave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +/*! \brief Reorders elements in the PS array. + * + * \param[in] Id Array containing ordering. + * \param[in] Nstart Start index (in Id and P). + * \param[in] N Final element index + 1. + * + * \return void + */ +void subfind_reorder_PS(int *Id, int Nstart, int N) +{ + int i; + struct subfind_data PSsave, PSsource; + int idsource, idsave, dest; + + for(i = Nstart; i < N; i++) + { + if(Id[i] != i) + { + PSsource = PS[i]; + + idsource = Id[i]; + dest = Id[i]; + + do + { + PSsave = PS[dest]; + idsave = Id[dest]; + + PS[dest] = PSsource; + Id[dest] = idsource; + + if(dest == i) + break; + + PSsource = PSsave; + idsource = idsave; + + dest = idsource; + } + while(1); + } + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c new file mode 100644 index 0000000000..8faaba4542 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_findlinkngb.c @@ -0,0 +1,539 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_findlinkngb.c + * \date 05/2018 + * \brief Algorithm to find smoothing lengths of particles to get a + * desried number of neighbours. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_find_linkngb(void) + * static int subfind_linkngb_evaluate(int target, int mode, + * int threadid) + * int subfind_treefind_collective_export_node_threads(int no, + * int i, int thread_id) + * static int subfind_ngb_compare_dist(const void *a, const + * void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 15.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "subfind.h" + +static int subfind_ngb_compare_dist(const void *a, const void *b); +static int subfind_linkngb_evaluate(int target, int mode, int threadid); + +static int *DM_NumNgb; +static double *Dist2list; +static int *Ngblist; +static MyFloat *Left, *Right; +static char *Todo; + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat DM_Hsml; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = PS[i].Center[0]; + in->Pos[1] = PS[i].Center[1]; + in->Pos[2] = PS[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + + in->DM_Hsml = PS[i].Hsml; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + int Ngb; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays. + * \param[in] i Index of particle. + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + DM_NumNgb[i] = out->Ngb; + } + else /* combine */ + { + DM_NumNgb[i] += out->Ngb; + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPartGroup) + break; + + if(Todo[i]) + subfind_linkngb_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_linkngb_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Iteratvie search for particle smoothing length to enclose a given + * number of neighbours. + * + * \return void + */ +void subfind_find_linkngb(void) +{ + long long ntot; + int i, npleft, iter = 0; + double t0, t1; + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Start find_linkngb. (%d particles on root-task)\n", ThisTask, NumPartGroup); + + /* allocate buffers to arrange communication */ + + Ngblist = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int)); + Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double)); + + generic_set_MaxNexport(); + + Left = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * NumPartGroup); + Right = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * NumPartGroup); + Todo = (char *)mymalloc("Todo", sizeof(char) * NumPartGroup); + DM_NumNgb = (int *)mymalloc_movable(&DM_NumNgb, "DM_NumNgb", sizeof(int) * NumPartGroup); + + for(i = 0; i < NumPartGroup; i++) + { + Left[i] = Right[i] = 0; + Todo[i] = 1; + } + + /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ + do + { + t0 = second(); + + generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported); + + /* do final operations on results */ + for(i = 0, npleft = 0; i < NumPartGroup; i++) + { + /* now check whether we had enough neighbours */ + if(Todo[i]) + { + if(DM_NumNgb[i] != All.DesLinkNgb && ((Right[i] - Left[i]) > 1.0e-6 * Left[i] || Left[i] == 0 || Right[i] == 0)) + { + /* need to redo this particle */ + npleft++; + + if(DM_NumNgb[i] < All.DesLinkNgb) + Left[i] = dmax(PS[i].Hsml, Left[i]); + else + { + if(Right[i] != 0) + { + if(PS[i].Hsml < Right[i]) + Right[i] = PS[i].Hsml; + } + else + Right[i] = PS[i].Hsml; + } + + if(iter >= MAXITER - 10) + { + printf("i=%d task=%d ID=%d DM_Hsml=%g Left=%g Right=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, + (int)P[i].ID, PS[i].Hsml, Left[i], Right[i], (double)(Right[i] - Left[i]), P[i].Pos[0], P[i].Pos[1], + P[i].Pos[2]); + fflush(stdout); + } + + if(Right[i] > 0 && Left[i] > 0) + PS[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); + else + { + if(Right[i] == 0 && Left[i] == 0) + terminate("can't occur"); + + if(Right[i] == 0 && Left[i] > 0) + PS[i].Hsml *= 1.26; + + if(Right[i] > 0 && Left[i] == 0) + PS[i].Hsml /= 1.26; + } + } + else + Todo[i] = 0; + } + } + + sumup_large_ints_comm(1, &npleft, &ntot, SubComm); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0 && SubThisTask == 0) + { + printf("SUBFIND-COLLECTIVE, root-task=%d: find linkngb iteration %d, need to repeat for %lld particles. (took %g sec)\n", + ThisTask, iter, ntot, timediff(t0, t1)); + fflush(stdout); + } + + if(iter > MAXITER) + terminate("failed to converge in neighbour iteration in density()\n"); + } + } + while(ntot > 0); + + myfree(DM_NumNgb); + myfree(Todo); + myfree(Right); + myfree(Left); + + myfree(Dist2list); + myfree(Ngblist); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Done with find_linkngb\n", ThisTask); +} + +/*! \brief Evaluate function for the neighbor search algorithm. + * + * \param[in] target Index of particle of interest. + * \param[in] mode Local or imported particles? + * \param[in] treadid ID of thread. + * + * \return 0 + */ +static int subfind_linkngb_evaluate(int target, int mode, int threadid) +{ + int no, numnodes, *firstnode, numngb; + double hsml; + MyDouble *pos; + int i, k, p, exported = 0; + struct NODE *current; + double dx, dy, dz, dist, r2; + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = in->Pos; + hsml = in->DM_Hsml; + + numngb = 0; + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = SubTree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < SubTree_MaxPart) /* single particle */ + { + p = no; + no = SubNextnode[no]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist) + continue; + + Dist2list[numngb] = r2; + Ngblist[numngb++] = p; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + break; + } + + current = &SubNodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else + { /* pseudo particle */ + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) /* if no target is given, export will not occur */ + { + exported = 1; + + if(mode == MODE_LOCAL_PARTICLES) + subfind_treefind_collective_export_node_threads(no, target, threadid); + } + + no = SubNextnode[no - SubTree_MaxNodes]; + } + } + } + + if(mode == MODE_LOCAL_PARTICLES) /* local particle */ + if(exported == 0) /* completely local */ + if(numngb >= All.DesLinkNgb) + { + R2list = (r2type *)mymalloc("R2list", sizeof(r2type) * numngb); + for(i = 0; i < numngb; i++) + { + R2list[i].index = Ngblist[i]; + R2list[i].r2 = Dist2list[i]; + } + + qsort(R2list, numngb, sizeof(r2type), subfind_ngb_compare_dist); + + PS[target].Hsml = sqrt(R2list[All.DesLinkNgb - 1].r2); + numngb = All.DesLinkNgb; + + for(i = 0; i < numngb; i++) + { + Ngblist[i] = R2list[i].index; + Dist2list[i] = R2list[i].r2; + } + + myfree(R2list); + } + + out.Ngb = numngb; + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +/*! \brief Prepares node export. + * + * \param[in] no Index of node. + * \param[in] i Index of particle. + * \param[in] thread_id Export thread. + * + * \return 0 + */ +int subfind_treefind_collective_export_node_threads(int no, int i, int thread_id) +{ + /* The task indicated by the pseudoparticle node */ + int task = SubDomainTask[no - (SubTree_MaxPart + SubTree_MaxNodes)]; + + if(Thread[thread_id].Exportflag[task] != i) + { + Thread[thread_id].Exportflag[task] = i; + int nexp = Thread[thread_id].Nexport++; + Thread[thread_id].PartList[nexp].Task = task; + Thread[thread_id].PartList[nexp].Index = i; + Thread[thread_id].ExportSpace -= Thread[thread_id].ItemSize; + } + + int nexp = Thread[thread_id].NexportNodes++; + nexp = -1 - nexp; + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[thread_id].PartList) + Thread[thread_id].InitialSpace); + nodelist[nexp].Task = task; + nodelist[nexp].Index = i; + nodelist[nexp].Node = SubDomainNodeIndex[no - (SubTree_MaxPart + SubTree_MaxNodes)]; + Thread[thread_id].ExportSpace -= sizeof(struct datanodelist) + sizeof(int); + return 0; +} + +/*! \brief Comparison function for r2type objects. + * + * Compares element r2. + * + * \param[in] a First object. + * \param[in] b Second object. + * + * \return (-1,0,1) -1 if a < b. + */ +static int subfind_ngb_compare_dist(const void *a, const void *b) +{ + if(((r2type *)a)->r2 < (((r2type *)b)->r2)) + return -1; + + if(((r2type *)a)->r2 > (((r2type *)b)->r2)) + return +1; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_io.c b/src/amuse/community/arepo/src/subfind/subfind_io.c new file mode 100644 index 0000000000..be760976b1 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_io.c @@ -0,0 +1,156 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_io.c + * \date 05/2018 + * \brief Main output routine for subfind. + * \details contains functions: + * void subfind_save_final(int num) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" + +#ifdef SUBFIND +#include "subfind.h" + +/*! \brief Saves subfind group catalogue to disk. + * + * Note that this routine calls the FoF I/O routines. + * + * \param[in] num Index of this snapshot output. + * + * \return void + */ +void subfind_save_final(int num) +{ + int i, filenr, gr, ngrps, masterTask, lastTask, totsubs; + char buf[1000]; + double t0, t1; + + /* prepare list of ids with assigned group numbers */ +#ifdef FOF_STOREIDS + fof_subfind_prepare_ID_list(); +#endif /* #ifdef FOF_STOREIDS */ + + t0 = second(); + + /* fill in the FirstSub-values */ + for(i = 0, totsubs = 0; i < Ngroups; i++) + { + if(i > 0) + Group[i].FirstSub = Group[i - 1].FirstSub + Group[i - 1].Nsubs; + else + Group[i].FirstSub = 0; + totsubs += Group[i].Nsubs; + } + + MPI_Allgather(&totsubs, 1, MPI_INT, Send_count, 1, MPI_INT, MPI_COMM_WORLD); + for(i = 1, Send_offset[0] = 0; i < NTask; i++) + Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1]; + + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + Group[i].FirstSub += Send_offset[ThisTask]; + else + Group[i].FirstSub = -1; + } + + CommBuffer = mymalloc("CommBuffer", COMMBUFFERSIZE); + + if(NTask < All.NumFilesPerSnapshot) + { + warn( + "Number of processors must be larger or equal than All.NumFilesPerSnapshot! Reducing All.NumFilesPerSnapshot " + "accordingly.\n"); + All.NumFilesPerSnapshot = NTask; + } + + if(All.SnapFormat < 1 || All.SnapFormat > 3) + mpi_printf("Unsupported File-Format All.SnapFormat=%d \n", All.SnapFormat); + +#ifndef HAVE_HDF5 + if(All.SnapFormat == 3) + { + mpi_terminate("Code wasn't compiled with HDF5 support enabled!\n"); + } +#endif /* #ifndef HAVE_HDF5 */ + + /* assign processors to output files */ + distribute_file(All.NumFilesPerSnapshot, 0, 0, NTask - 1, &filenr, &masterTask, &lastTask); + + if(All.NumFilesPerSnapshot > 1) + { + if(ThisTask == 0) + { + sprintf(buf, "%s/groups_%03d", All.OutputDir, num); + mkdir(buf, 02755); + } + MPI_Barrier(MPI_COMM_WORLD); + } + + if(All.NumFilesPerSnapshot > 1) + sprintf(buf, "%s/groups_%03d/%s_%03d.%d", All.OutputDir, num, "fof_subhalo_tab", num, filenr); + else + sprintf(buf, "%s%s_%03d", All.OutputDir, "fof_subhalo_tab", num); + + ngrps = All.NumFilesPerSnapshot / All.NumFilesWrittenInParallel; + if((All.NumFilesPerSnapshot % All.NumFilesWrittenInParallel)) + ngrps++; + + for(gr = 0; gr < ngrps; gr++) + { + if((filenr / All.NumFilesWrittenInParallel) == gr) /* ok, it's this processor's turn */ + fof_subfind_write_file(buf, masterTask, lastTask); + + MPI_Barrier(MPI_COMM_WORLD); + } + + myfree(CommBuffer); + +#ifdef FOF_STOREIDS + myfree(ID_list); +#endif /* #ifdef FOF_STOREIDS */ + + t1 = second(); + + mpi_printf("SUBFIND: Subgroup catalogues saved. took = %g sec\n", timediff(t0, t1)); +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_loctree.c b/src/amuse/community/arepo/src/subfind/subfind_loctree.c new file mode 100644 index 0000000000..9b3f26255c --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_loctree.c @@ -0,0 +1,930 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_loctree.c + * \date 05/2018 + * \brief Algorithms for local tree in subfind. + * \details contains functions: + * void subfind_loctree_findExtent(int npart, struct unbind_data *mp) + * void subfind_loctree_copyExtent(void) + * int subfind_loctree_treebuild(int npart, struct unbind_data **udp) + * void subfind_loctree_update_node_recursive(int no, int sib, int father) + * double subfind_loctree_treeevaluate_potential(int target) + * int subfind_locngb_compare_key(const void *a, const void *b) + * double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess) + * int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess) + * size_t subfind_loctree_treeallocate(int maxnodes, int maxpart) + * void subfind_loctree_treefree(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../gravity/forcetree.h" +#include "subfind.h" + +#ifdef SUBFIND +static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3]; +static int LocMaxPart; +static int MaxNodes, last; +static int *LocNextNode; + +static unsigned long long *LocTree_IntPos_list; + +/*! \brief Node structure for subfind tree. + */ +static struct LocNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + unsigned char maxsofttype; +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + int sibling; /*!< this gives the next node in the walk in case the current node can be used */ + int nextnode; /*!< this gives the next node in case the current node needs to be opened */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ +} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */ + *LocNodes; /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart] + gives the first allocated node */ + +/*! \brief Calculates min/max coordinate of particles in unbind data. + * + * \param[in] npart Number of local particles (in unbind_data). + * \param[in] mp Pointer to unbind data. + * + * \return void + */ +void subfind_loctree_findExtent(int npart, struct unbind_data *mp) +{ + int i, j, k; + double len, xmin[3], xmax[3]; + + /* determine extension */ + for(i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(k = 0; k < npart; k++) + { + if(mp) + i = mp[k].index; + else + terminate("what?"); + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > PS[i].Center[j]) + xmin[j] = PS[i].Center[j]; + + if(xmax[j] < PS[i].Center[j]) + xmax[j] = PS[i].Center[j]; + } + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + { + if(xmin[j] > P[i].Pos[j]) + xmin[j] = P[i].Pos[j]; + + if(xmax[j] < P[i].Pos[j]) + xmax[j] = P[i].Pos[j]; + } + } + } + + len = 0; + for(j = 0; j < 3; j++) + if(xmax[j] - xmin[j] > len) + len = xmax[j] - xmin[j]; + + len *= 1.001; + + RootLen = len; + RootInverseLen = 1.0 / RootLen; + RootFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + RootBigFac = (RootLen / (((long long)1) << 52)); + + for(j = 0; j < 3; j++) + { + RootCenter[j] = 0.5 * (xmin[j] + xmax[j]); + RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len; + } +} + +/*! \brief Copy extent information from SubDomain to Root. + * + * This is called from the collective subfind code. + * + * \return void + */ +void subfind_loctree_copyExtent(void) +{ + int j; + for(j = 0; j < 3; j++) + { + RootCenter[j] = SubDomainCenter[j]; + RootCorner[j] = SubDomainCorner[j]; + } + RootLen = SubDomainLen; + RootInverseLen = SubDomainInverseLen; + RootFac = SubDomainFac; + RootBigFac = SubDomainBigFac; +} + +/*! \brief Construct the subfind tree. + * + * \param[in] npart Number of particles involved. + * \param[in] udp Unbind data. + * + * \return Number of nodes. + */ +int subfind_loctree_treebuild(int npart, struct unbind_data **udp) +{ + int i, j, k, subnode = 0, parent = -1, numnodes; + int nfree, th, nn; + struct LocNODE *nfreep; + struct unbind_data *mp; + + /* select first node */ + nfree = LocMaxPart; + nfreep = &LocNodes[nfree]; + + mp = *udp; + + /* create an empty root node */ + nfreep->len = (MyFloat)RootLen; + for(i = 0; i < 3; i++) + nfreep->center[i] = (MyFloat)RootCenter[i]; + + for(i = 0; i < 8; i++) + nfreep->u.suns[i] = -1; + + numnodes = 1; + nfreep++; + nfree++; + + /* insert all particles */ + + LocTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * NumPart * sizeof(unsigned long long)); + + for(k = 0; k < npart; k++) + { + if(mp) + i = mp[k].index; + else + terminate("what?"); + + MyDouble *posp; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + posp = &PS[i].Center[0]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + posp = &P[i].Pos[0]; + + unsigned long long xxb = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + signed long long centermask = (0xFFF0000000000000llu); + unsigned char levels = 0; + + unsigned long long *intposp = &LocTree_IntPos_list[3 * i]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + th = LocMaxPart; + + while(1) + { + if(th >= LocMaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEEL < gravitational softening length + */ + for(j = 0; j < 8; j++) + { + if(LocNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + nn = LocNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; /* note: subnode can still be used in the next step of the walk */ + th = nn; + } + else + { + /* here we have found an empty slot where we can + * attach the new particle as a leaf + */ + LocNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* we try to insert into a leaf with a single particle + * need to generate a new internal node at this point + */ + LocNodes[parent].u.suns[subnode] = nfree; + + /* the other is: */ + double len = ((double)(mask << 1)) * RootBigFac; + double cx = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + nfreep->u.suns[0] = -1; + nfreep->u.suns[1] = -1; + nfreep->u.suns[2] = -1; + nfreep->u.suns[3] = -1; + nfreep->u.suns[4] = -1; + nfreep->u.suns[5] = -1; + nfreep->u.suns[6] = -1; + nfreep->u.suns[7] = -1; + + unsigned long long *intppos = &LocTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = nfree; /* resume trying to insert the new particle at + the newly created internal node */ + + numnodes++; + nfree++; + nfreep++; + + if(numnodes >= MaxNodes) + { + MaxNodes *= 1.2; + + LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + nfreep = &LocNodes[nfree]; + mp = *udp; + + if(numnodes > MaxNodes) + { + char buf[1000]; + + sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d %g %g %g", MaxNodes, i, P[i].Pos[0], + P[i].Pos[1], P[i].Pos[2]); + terminate(buf); + } + } + } + } + } + + myfree(LocTree_IntPos_list); + + /* now compute the multipole moments recursively */ + last = -1; + subfind_loctree_update_node_recursive(LocMaxPart, -1, -1); + + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = -1; + else + LocNextNode[last] = -1; + + return numnodes; +} + +/*! \brief Compute multipole moments. + * + * This routine computes the multipole moments for a given internal node and + * all its subnodes using a recursive computation. + * + * \param[in] no Node that we are in. + * \param[in] sib Sibling of the node. + * \param[in] father Parent node. + * + * \return void + */ +void subfind_loctree_update_node_recursive(int no, int sib, int father) +{ + int j, jj, p, pp = 0, nextsib, suns[8]; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + double mass; + double s[3]; + + if(no >= LocMaxPart) + { + for(j = 0; j < 8; j++) + suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_loctree_update_node_recursive(p, nextsib, no); + + if(p >= LocMaxPart) /* an internal node */ + { + mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */ + s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0]; + s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1]; + s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype]) + maxsofttype = LocNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += LocNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype) + minhydrosofttype = LocNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* a particle */ + { + mass += P[p].Mass; +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + { + s[0] += P[p].Mass * PS[p].Center[0]; + s[1] += P[p].Mass * PS[p].Center[1]; + s[2] += P[p].Mass * PS[p].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + s[0] += P[p].Mass * P[p].Pos[0]; + s[1] += P[p].Mass * P[p].Pos[1]; + s[2] += P[p].Mass * P[p].Pos[2]; + } + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[P[p].SofteningType]) + maxsofttype = P[p].SofteningType; +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[P[p].Type == 0 ? 0 : P[p].SofteningType] += P[p].Mass; + + if(P[p].Type == 0) + { + if(maxhydrosofttype < P[p].SofteningType) + maxhydrosofttype = P[p].SofteningType; + if(minhydrosofttype > P[p].SofteningType) + minhydrosofttype = P[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[P[p].SofteningType] += P[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass > 0) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = LocNodes[no].center[0]; + s[1] = LocNodes[no].center[1]; + s[2] = LocNodes[no].center[2]; + } + + LocNodes[no].u.d.s[0] = (MyFloat)s[0]; + LocNodes[no].u.d.s[1] = (MyFloat)s[1]; + LocNodes[no].u.d.s[2] = (MyFloat)s[2]; + LocNodes[no].u.d.mass = (MyFloat)mass; + LocNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + LocNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + LocNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + LocNodes[no].u.d.sibling = sib; + } + else /* single particle or pseudo particle */ + { + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + } +} + +/*! \brief Evaluates the potential by walking the subfind local tree. + * + * \param[in] target Index of the particle. + * + * \return Gravitational potiential. + */ +double subfind_loctree_treeevaluate_potential(int target) +{ + struct LocNODE *nop = 0; + int no; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp; + +#ifdef CELL_CENTER_GRAVITY + if(P[target].Type == 0) + { + pos_x = PS[target].Center[0]; + pos_y = PS[target].Center[1]; + pos_z = PS[target].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + pos_x = P[target].Pos[0]; + pos_y = P[target].Pos[1]; + pos_z = P[target].Pos[2]; + } + + h_i = All.ForceSoftening[P[target].SofteningType]; + + pot = 0; + + no = LocMaxPart; + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < LocMaxPart) /* single particle */ + { +#ifdef CELL_CENTER_GRAVITY + if(P[no].Type == 0) + { + dx = GRAVITY_NEAREST_X(PS[no].Center[0] - pos_x); + dy = GRAVITY_NEAREST_Y(PS[no].Center[1] - pos_y); + dz = GRAVITY_NEAREST_Z(PS[no].Center[2] - pos_z); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[no].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(P[no].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(P[no].Pos[2] - pos_z); + } + + r2 = dx * dx + dy * dy + dz * dz; + + mass = P[no].Mass; + + h_j = All.ForceSoftening[P[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = LocNextNode[no]; + } + else + { + nop = &LocNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(LocNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + no = nop->u.d.sibling; /* node can be used */ + } + + r = sqrt(r2); +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = nop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r >= hmax) + pot -= mass / r; + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += mass * h_inv * wp; +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + return pot; +} + +/*! \brief Comparison function for r2type objects. + * + * Compares element r2. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a->r2 < b->r2. + */ +int subfind_locngb_compare_key(const void *a, const void *b) +{ + if(((r2type *)a)->r2 < (((r2type *)b)->r2)) + return -1; + + if(((r2type *)a)->r2 > (((r2type *)b)->r2)) + return +1; + + return 0; +} + +/*! \brief Iterates on smoothing length of neighbor search to get a desired + * number of neighbors. + * + * \param[in] xyz Search center of neighbor search. + * \param[in] desngb Desired number of neighbors. + * \param[in] hguess Initial guess of smoothing length. + * + * \return Distance of the outermost particle to seearch center. + */ +double subfind_locngb_treefind(MyDouble xyz[3], int desngb, double hguess) +{ + int numngb; + double h2max; + + if(hguess == 0) + terminate("hguess needed"); + + while(1) + { + numngb = subfind_locngb_treefind_variable(xyz, hguess); + + if(numngb < desngb) + { + hguess *= 1.26; + continue; + } + + if(numngb >= desngb) + { + qsort(R2list, numngb, sizeof(r2type), subfind_locngb_compare_key); + h2max = R2list[desngb - 1].r2; + break; + } + + hguess *= 1.26; + } + + return sqrt(h2max); +} + +/*! \brief (Local) tree-search in subfind tree. + * + * Adds these cells to R2list. + * + * \param[in] searchcenter Center around which particles are searched. + * \param[in] hguess Distance up to which particles are searched. + * + * \return Number of neighbors found. + */ +int subfind_locngb_treefind_variable(MyDouble searchcenter[3], double hguess) +{ + int numngb, no, p; + double dx, dy, dz, r2, h2; + struct LocNODE *thisnode; + double xtmp, ytmp, ztmp; + + h2 = hguess * hguess; + + numngb = 0; + no = LocMaxPart; + + while(no >= 0) + { + if(no < LocMaxPart) /* single particle */ + { + p = no; + no = LocNextNode[no]; +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + { + dx = GRAVITY_NEAREST_X(PS[p].Center[0] - searchcenter[0]); + dy = GRAVITY_NEAREST_Y(PS[p].Center[1] - searchcenter[1]); + dz = GRAVITY_NEAREST_Z(PS[p].Center[2] - searchcenter[2]); + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + dx = GRAVITY_NEAREST_X(P[p].Pos[0] - searchcenter[0]); + dy = GRAVITY_NEAREST_Y(P[p].Pos[1] - searchcenter[1]); + dz = GRAVITY_NEAREST_Z(P[p].Pos[2] - searchcenter[2]); + } + + if(dx < -hguess) + continue; + if(dx > hguess) + continue; + + if(dy < -hguess) + continue; + if(dy > hguess) + continue; + + if(dz < -hguess) + continue; + if(dz > hguess) + continue; + + r2 = dx * dx + dy * dy + dz * dz; + + if(r2 <= h2) + { + R2list[numngb].r2 = r2; + R2list[numngb].index = p; + numngb++; + } + } + else + { + thisnode = &LocNodes[no]; + + no = LocNodes[no].u.d.sibling; /* in case the node can be discarded */ + + if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_X(thisnode->center[0] - searchcenter[0]) - 0.5 * thisnode->len) > hguess) + continue; + if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_Y(thisnode->center[1] - searchcenter[1]) - 0.5 * thisnode->len) > hguess) + continue; + if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) + 0.5 * thisnode->len) < -hguess) + continue; + if((GRAVITY_NEAREST_Z(thisnode->center[2] - searchcenter[2]) - 0.5 * thisnode->len) > hguess) + continue; + + no = thisnode->u.d.nextnode; /* ok, we need to open the node */ + } + } + + return numngb; +} + +/*! \brief Allocates memory used for storage of the tree + * and auxiliary arrays for tree-walk and link-lists. + * + * \param[in] maxnodes Maximum number of nodes. + * \param[in] maxpart Maximum number of particles. + * + * \return Size of allocated memory in bytes. + */ +size_t subfind_loctree_treeallocate(int maxnodes, int maxpart) +{ + size_t bytes, allbytes = 0; + + if(LocNextNode) + terminate("loctree already allocated"); + + MaxNodes = maxnodes; + LocMaxPart = maxpart; + + LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int)); + allbytes += bytes; + + R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type)); + allbytes += bytes; + + LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + allbytes += bytes; + + return allbytes; +} + +/*! \brief Frees the memory allocated for subfind_loctree. + * + * \return void + */ +void subfind_loctree_treefree(void) +{ + myfree(LocNodes_base); + myfree(R2list); + myfree(LocNextNode); + + LocNextNode = NULL; + R2list = NULL; + LocNodes_base = NULL; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c new file mode 100644 index 0000000000..23e8bf95f3 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_nearesttwo.c @@ -0,0 +1,475 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_nearesttwo.c + * \date 05/2018 + * \brief Neighbor finding of particles in group. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void subfind_find_nearesttwo(void) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "subfind.h" + +static int subfind_nearesttwo_evaluate(int target, int mode, int threadid); + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyIDType ID; + MyFloat Hsml; + MyFloat Density; + MyFloat Dist[2]; + int Count; + long long Index[2]; + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + int k; + +#ifdef CELL_CENTER_GRAVITY + if(P[i].Type == 0) + { + in->Pos[0] = PS[i].Center[0]; + in->Pos[1] = PS[i].Center[1]; + in->Pos[2] = PS[i].Center[2]; + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + in->Pos[0] = P[i].Pos[0]; + in->Pos[1] = P[i].Pos[1]; + in->Pos[2] = P[i].Pos[2]; + } + + in->Hsml = PS[i].Hsml; + in->ID = P[i].ID; + in->Density = PS[i].Density; + in->Count = NgbLoc[i].count; + for(k = 0; k < NgbLoc[i].count; k++) + { + in->Dist[k] = R2Loc[i].dist[k]; + in->Index[k] = NgbLoc[i].index[k]; + } + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat Dist[2]; + long long Index[2]; + int Count; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + int k; + + NgbLoc[i].count = out->Count; + + for(k = 0; k < out->Count; k++) + { + R2Loc[i].dist[k] = out->Dist[k]; + NgbLoc[i].index[k] = out->Index[k]; + } + } + else /* combine */ + { + int k, l; + + for(k = 0; k < out->Count; k++) + { + if(NgbLoc[i].count >= 1) + if(NgbLoc[i].index[0] == out->Index[k]) + continue; + + if(NgbLoc[i].count == 2) + if(NgbLoc[i].index[1] == out->Index[k]) + continue; + + if(NgbLoc[i].count < 2) + { + l = NgbLoc[i].count; + NgbLoc[i].count++; + } + else + { + if(R2Loc[i].dist[0] > R2Loc[i].dist[1]) + l = 0; + else + l = 1; + + if(out->Dist[k] >= R2Loc[i].dist[l]) + continue; + } + + R2Loc[i].dist[l] = out->Dist[k]; + NgbLoc[i].index[l] = out->Index[k]; + + if(NgbLoc[i].count == 2) + if(NgbLoc[i].index[0] == NgbLoc[i].index[1]) + terminate("this is not supposed to happen"); + } + } +} + +#define USE_SUBCOMM_COMMUNICATOR +#include "../utils/generic_comm_helpers2.h" + +static double *Dist2list; +static int *Ngblist; + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + { + int j, threadid = get_thread_num(); + + for(j = 0; j < SubNTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= NumPartGroup) + break; + + subfind_nearesttwo_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_nearesttwo_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Neighbour finding for each particle in group. + * + * \return void + */ +void subfind_find_nearesttwo(void) +{ + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Start finding nearest two.\n", ThisTask); + + /* allocate buffers to arrange communication */ + + Ngblist = (int *)mymalloc("Ngblist", NumPartGroup * sizeof(int)); + Dist2list = (double *)mymalloc("Dist2list", NumPartGroup * sizeof(double)); + + generic_set_MaxNexport(); + + for(int i = 0; i < NumPartGroup; i++) + NgbLoc[i].count = 0; + + generic_comm_pattern(NumPartGroup, kernel_local, kernel_imported); + + myfree(Dist2list); + myfree(Ngblist); + + if(SubThisTask == 0) + printf("SUBFIND-COLLECTIVE, root-task=%d: Done with nearest two.\n", ThisTask); +} + +/*! \brief Neighbor finding routine on local particles. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_nearesttwo_evaluate(int target, int mode, int threadid) +{ + int j, k, n, no, count; + MyIDType ID; + long long index[2]; + double dist[2]; + int numngb, numnodes, *firstnode; + double hsml; + double density; + MyDouble *pos; + struct NODE *current; + double dx, dy, dz, disthsml, r2; + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + ID = in->ID; + density = in->Density; + pos = in->Pos; + hsml = in->Hsml; + count = in->Count; + for(k = 0; k < count; k++) + { + dist[k] = in->Dist[k]; + index[k] = in->Index[k]; + } + + if(count == 2) + if(index[0] == index[1]) + { + terminate("task=%d/%d target=%d mode=%d index_0=%lld index_1=%lld\n", SubThisTask, ThisTask, target, mode, index[0], + index[1]); + } + + numngb = 0; + count = 0; + + hsml *= 1.00001; /* prevents that the most distant neighbour on the edge of the search region may not be found. + * (needed for consistency with serial algorithm) + */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = SubTree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = SubNodes[no].u.d.nextnode; /* open it */ + } + while(no >= 0) + { + if(no < SubTree_MaxPart) /* single particle */ + { + int p = no; + no = SubNextnode[no]; + + disthsml = hsml; + dx = FOF_NEAREST_LONG_X(SubTree_Pos_list[3 * p + 0] - pos[0]); + if(dx > disthsml) + continue; + dy = FOF_NEAREST_LONG_Y(SubTree_Pos_list[3 * p + 1] - pos[1]); + if(dy > disthsml) + continue; + dz = FOF_NEAREST_LONG_Z(SubTree_Pos_list[3 * p + 2] - pos[2]); + if(dz > disthsml) + continue; + if((r2 = (dx * dx + dy * dy + dz * dz)) > disthsml * disthsml) + continue; + + Dist2list[numngb] = r2; + Ngblist[numngb++] = p; + } + else if(no < SubTree_MaxPart + SubTree_MaxNodes) /* internal node */ + { + if(mode == 1) + { + if(no < SubTree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the + branch */ + { + break; + } + } + + current = &SubNodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + disthsml = hsml + 0.5 * current->len; + + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > disthsml) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > disthsml) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > disthsml) + continue; + /* now test against the minimal sphere enclosing everything */ + disthsml += FACT1 * current->len; + if(dx * dx + dy * dy + dz * dz > disthsml * disthsml) + continue; + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= SubTree_ImportedNodeOffset) /* point from imported nodelist */ + { + terminate("do not expect imported points here"); + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(target >= 0) /* note: if no target is given, export will not occur */ + subfind_treefind_collective_export_node_threads(no, target, threadid); + + no = SubNextnode[no - SubTree_MaxNodes]; + } + } + } + + for(n = 0; n < numngb; n++) + { + j = Ngblist[n]; + r2 = Dist2list[n]; + + if(P[j].ID != ID) /* exclude the self-particle */ + { + if(PS[j].Density > density) /* we only look at neighbours that are denser */ + { + if(count < 2) + { + dist[count] = r2; + index[count] = (((long long)SubThisTask) << 32) + j; + count++; + } + else + { + if(dist[0] > dist[1]) + k = 0; + else + k = 1; + + if(r2 < dist[k]) + { + dist[k] = r2; + index[k] = (((long long)SubThisTask) << 32) + j; + } + } + } + } + } + + out.Count = count; + for(k = 0; k < count; k++) + { + out.Dist[k] = dist[k]; + out.Index[k] = index[k]; + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_properties.c b/src/amuse/community/arepo/src/subfind/subfind_properties.c new file mode 100644 index 0000000000..5d2756cbdf --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_properties.c @@ -0,0 +1,1195 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_properties.c + * \date 05/2018 + * \brief Calculation of the subgroup properties. + * \details contains functions: + * void subfind_determine_sub_halo_properties(struct + * unbind_data *d, int num, struct subgroup_properties + * *subgroup, int grnr, int subnr, int parallel_flag, int + * nsubgroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +/*! \brief Calculates subhalo properties. + * + * + * \param[in] d Unbind data. + * \param[in] num Length of d. + * \param[out] subgroup Data for subgroup properties. + * \param[in] grnr Index in GroupCat. + * \param[in] subnr Index of Subhalo in this group. + * \param[in] parallel_flag If set, the code calculates the properties for a + * subhalo distributed onto several processors. + * \param[in] nsubgroups_cat (unused) + * + * \return void + */ +void subfind_determine_sub_halo_properties(struct unbind_data *d, int num, struct subgroup_properties *subgroup, int grnr, int subnr, + int parallel_flag, int nsubgroups_cat) +{ + int i, j, p, len_type[NTYPES], len_type_loc[NTYPES], totlen; + double s[3], v[3], pos[3], vel[3], spin[3], cm[3], veldisp, max, vel_to_phys, H_of_a, minpot; +#ifdef MHD + double bfld_halo, bfld_disk, bfld_vol_halo, bfld_vol_disk; +#endif /* #ifdef MHD */ +#ifdef SUBFIND_EXTENDED_PROPERTIES + double Ekin = 0, Epot = 0, Ethr = 0, Jdm[3], Jgas[3], Jstars[3], CMFrac, CMFracType[NTYPES]; + double Jdm_inHalfRad[3], Jgas_inHalfRad[3], Jstars_inHalfRad[3], CMFrac_inHalfRad, CMFracType_inHalfRad[NTYPES]; + double Jdm_inRad[3], Jgas_inRad[3], Jstars_inRad[3], CMFrac_inRad, CMFracType_inRad[NTYPES]; + double jpart[3], Jtot[3], Jtot_inRad[3], Jtot_inHalfRad[3]; + double sinrad[3], sinhalfrad[3], vinrad[3], vinhalfrad[3]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + double lx, ly, lz, dv[3], dx[3], disp, rr_tmp, disp_tmp, halfmassrad = 0, halfmassradtype[NTYPES]; + double boxsize, ddxx, vmax, vmaxrad, maxrad; + double mass, massinrad, massinhalfrad, massinmaxrad; + double mass_tab[NTYPES], massinrad_tab[NTYPES], massinhalfrad_tab[NTYPES], massinmaxrad_tab[NTYPES]; + double xtmp; + + sort_r2list *rr_list = 0; + int minindex; + MyIDType mostboundid; + +#ifdef USE_SFR + double sfr = 0, sfrinrad = 0, sfrinhalfrad = 0, sfrinmaxrad = 0, gasMassSfr = 0; +#endif /* #ifdef USE_SFR */ + + boxsize = All.BoxSize; + + vel_to_phys = 1.0 / All.cf_atime; + + if(All.ComovingIntegrationOn) + H_of_a = hubble_function(All.Time); + else + H_of_a = 0; + + mass = massinrad = massinhalfrad = massinmaxrad = 0; + for(j = 0; j < NTYPES; j++) + { + len_type[j] = 0; + mass_tab[j] = halfmassradtype[j] = massinrad_tab[j] = massinhalfrad_tab[j] = massinmaxrad_tab[j] = 0; + } + + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + p = d[i].index; + if(PS[p].Potential < minpot || minindex == -1) + { + minpot = PS[p].Potential; + minindex = p; + } + + len_type[P[p].Type]++; + +#ifdef USE_SFR + if(P[p].Type == 0) + sfr += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + + for(j = 0; j < NTYPES; j++) + len_type_loc[j] = len_type[j]; + + if(parallel_flag) + { + int len_typetot[NTYPES]; + MPI_Allreduce(len_type, len_typetot, NTYPES, MPI_INT, MPI_SUM, SubComm); + for(j = 0; j < NTYPES; j++) + len_type[j] = len_typetot[j]; + + double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double)); + MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm); + int mincpu; + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(minpotlist[i] < minpot) + { + mincpu = i; + minpot = minpotlist[mincpu]; + } + + myfree(minpotlist); + + if(mincpu < 0) + terminate("mincpu < 0"); + + if(SubThisTask == mincpu) + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + pos[j] = SphP[PS[minindex].OldIndex].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos[j] = P[minindex].Pos[j]; + } + + MPI_Bcast(pos, 3, MPI_DOUBLE, mincpu, SubComm); + +#ifdef USE_SFR + double sfrtot; + MPI_Allreduce(&sfr, &sfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfr = sfrtot; +#endif /* #ifdef USE_SFR */ + } + else + { + if(minindex == -1) + terminate("minindex == -1"); + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + pos[j] = SphP[PS[minindex].OldIndex].Center[j]; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos[j] = P[minindex].Pos[j]; + } + } + + /* pos[] now holds the position of minimum potential */ + /* we'll take it that as the center */ + + /* determine the particle ID with the smallest binding energy */ + for(i = 0, minindex = -1, minpot = 1.0e30; i < num; i++) + { + p = d[i].index; + if(PS[p].BindingEnergy < minpot || minindex == -1) + { + minpot = PS[p].BindingEnergy; + minindex = p; + } + } + + if(parallel_flag) + { + double *minpotlist = mymalloc("minpotlist", SubNTask * sizeof(double)); + MPI_Allgather(&minpot, 1, MPI_DOUBLE, minpotlist, 1, MPI_DOUBLE, SubComm); + int mincpu; + + for(i = 0, mincpu = -1, minpot = 1.0e30; i < SubNTask; i++) + if(minpotlist[i] < minpot) + { + mincpu = i; + minpot = minpotlist[mincpu]; + } + + myfree(minpotlist); + + if(mincpu < 0) + terminate("mincpu < 0"); + + if(SubThisTask == mincpu) + { + mostboundid = P[minindex].ID; + } + + MPI_Bcast(&mostboundid, sizeof(mostboundid), MPI_BYTE, mincpu, SubComm); + } + else + { + if(minindex == -1) + terminate("minindex == -1"); + + mostboundid = P[minindex].ID; + } + + /* let's get bulk velocity and the center-of-mass */ + /* here we still take all particles */ + + for(j = 0; j < 3; j++) + s[j] = v[j] = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + s[j] += P[p].Mass * ddxx; + v[j] += P[p].Mass * P[p].Vel[j]; + } + mass += P[p].Mass; + + int ptype = P[p].Type; + mass_tab[ptype] += P[p].Mass; + } + + if(parallel_flag) + { + double stot[3], vtot[3], masstot, mass_tabtot[NTYPES]; + + MPI_Allreduce(s, stot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&mass, &masstot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(v, vtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(mass_tab, mass_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + + mass = masstot; + for(j = 0; j < 3; j++) + { + s[j] = stot[j]; + v[j] = vtot[j]; + } + + for(j = 0; j < NTYPES; j++) + mass_tab[j] = mass_tabtot[j]; + } + + for(j = 0; j < 3; j++) + { + s[j] /= mass; /* center of mass */ + v[j] /= mass; + vel[j] = vel_to_phys * v[j]; + } + + for(j = 0; j < 3; j++) + { + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + cm[j] = s[j]; // this is in comoving coordinates + } + + disp = lx = ly = lz = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Jtot[0] = Jtot[1] = Jtot[2] = 0; + Jdm[0] = Jdm[1] = Jdm[2] = 0; + Jgas[0] = Jgas[1] = Jgas[2] = 0; + Jstars[0] = Jstars[1] = Jstars[2] = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (num + 1)); + + for(i = 0; i < num; i++) + { + p = d[i].index; + + for(j = 0, rr_tmp = 0, disp_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + + disp_tmp += P[p].Mass * dv[j] * dv[j]; + /* for rotation curve computation, take minimum of potential as center */ + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + ddxx = All.cf_atime * ddxx; + rr_tmp += ddxx * ddxx; + } + + lx += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + ly += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + lz += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) // hubble drifts in velocity now with respect to pot min which we consider as the centre of rotation + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + } + + int ptype = P[p].Type; + + Ekin += (P[p].Mass / 2) * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + Epot += (P[p].Mass / 2) * PS[p].Potential; + if(P[p].Type == 0) + Ethr += P[p].Mass * SphP[PS[p].OldIndex].Utherm; + + Jtot[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas (incl. winds!) + { + Jgas[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars (previously: StarP[P[p].AuxDataID].BirthTime) + { + Jstars[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + rr_tmp = sqrt(rr_tmp); + + rr_list[i].mass = P[p].Mass; + rr_list[i].r = rr_tmp; + disp += disp_tmp; + } + + if(parallel_flag) + { + double spintot[3], disptot; + spin[0] = lx; + spin[1] = ly; + spin[2] = lz; + MPI_Allreduce(spin, spintot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&disp, &disptot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + disp = disptot; + lx = spintot[0]; + ly = spintot[1]; + lz = spintot[2]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + MPI_Allreduce(MPI_IN_PLACE, &Ekin, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &Epot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &Ethr, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jtot, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars, 3, MPI_DOUBLE, MPI_SUM, SubComm); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + spin[0] = lx / mass; + spin[1] = ly / mass; + spin[2] = lz / mass; + + veldisp = sqrt(disp / (3 * mass)); /* convert to 1d velocity dispersion */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // counter rotating mass fractions + CMFrac = 0; + for(i = 0; i < NTYPES; i++) + CMFracType[i] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // counter-rotating mass calc with respect to pot min + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); + dv[j] += H_of_a * dx[j]; + } + + int ptype = P[p].Type; + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.) + CMFrac += P[p].Mass / mass; + + if(ptype == 1) // dm illustris + if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.) + CMFracType[1] += P[p].Mass / mass_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.) + CMFracType[0] += P[p].Mass / mass_tab[0]; + if(ptype == 4) // stars + if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.) + CMFracType[4] += P[p].Mass / mass_tab[4]; + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, &CMFrac, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + } + +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(parallel_flag) + parallel_sort_comm(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm); + else + mysort(rr_list, num, sizeof(sort_r2list), subfind_compare_dist_rotcurve); + + /* calculate cumulative mass */ + for(i = 1; i < num; i++) + rr_list[i].mass += rr_list[i - 1].mass; + + if(parallel_flag) + { + double mass_part = 0; + if(num) + mass_part = rr_list[num - 1].mass; + double *masslist = mymalloc("masslist", SubNTask * sizeof(double)); + MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm); + + double massbefore = 0; + for(i = 0; i < SubThisTask; i++) + massbefore += masslist[i]; + + for(i = 0; i < num; i++) + rr_list[i].mass += massbefore; + + myfree(masslist); + + /* now calculate rotation curve maximum and half mass radius */ + + double halfmassrad_loc = 0; + sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list)); + sort_r2list low_element; + if(num > 0) + low_element = rr_list[0]; + else + { + low_element.mass = 0; + low_element.r = 0; + } + MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm); + + rr_list[num].mass = 0; + rr_list[num].r = 0; + + for(j = SubThisTask + 1; j < SubNTask; j++) + if(rr_lowlist[j].mass > 0) + { + rr_list[num] = rr_lowlist[j]; + break; + } + + myfree(rr_lowlist); + + int *numlist = mymalloc("numlist", SubNTask * sizeof(int)); + MPI_Allgather(&num, 1, MPI_INT, numlist, 1, MPI_INT, SubComm); + + int nbefore = 0; + for(i = 0; i < SubThisTask; i++) + nbefore += numlist[i]; + + for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--) + { + if((i + nbefore) > 5 && rr_list[i].mass > max * rr_list[i].r) + { + max = rr_list[i].mass / rr_list[i].r; + maxrad = rr_list[i].r; + } + + if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass) + halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + + myfree(numlist); + + MPI_Allreduce(&halfmassrad_loc, &halfmassrad, 1, MPI_DOUBLE, MPI_MAX, SubComm); + double *maxlist = mymalloc("maxlist", SubNTask * sizeof(double)); + double *maxradlist = mymalloc("maxradlist", SubNTask * sizeof(double)); + MPI_Allgather(&max, 1, MPI_DOUBLE, maxlist, 1, MPI_DOUBLE, SubComm); + MPI_Allgather(&maxrad, 1, MPI_DOUBLE, maxradlist, 1, MPI_DOUBLE, SubComm); + for(i = 0, max = maxrad = 0; i < SubNTask; i++) + { + if(maxlist[i] > max) + { + max = maxlist[i]; + maxrad = maxradlist[i]; + } + } + myfree(maxradlist); + myfree(maxlist); + } + else + { + for(i = num - 1, max = 0, maxrad = 0; i >= 0; i--) + { + if(i > 5 && rr_list[i].mass > max * rr_list[i].r) + { + max = rr_list[i].mass / rr_list[i].r; + maxrad = rr_list[i].r; + } + + if(i < num - 1) + if(rr_list[i].mass < 0.5 * mass && rr_list[i + 1].mass >= 0.5 * mass) + halfmassrad = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + } + + halfmassrad /= All.cf_atime; + vmax = sqrt(All.G * max); + vmaxrad = maxrad / All.cf_atime; + + myfree(rr_list); + + /* half mass radii for different types */ + /* need to recalculate len_type_loc first, because of special particle treatment in GFM */ + for(j = 0; j < NTYPES; j++) + len_type_loc[j] = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + int ptype = P[p].Type; + + len_type_loc[ptype]++; + } + + int itmp, type; + for(type = 0; type < NTYPES; type++) + { + rr_list = mymalloc("rr_list", sizeof(sort_r2list) * (len_type_loc[type] + 1)); + itmp = 0; + for(i = 0; i < num; i++) + { + p = d[i].index; + + int ptype = P[p].Type; + + if(ptype == type) + { + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + + rr_tmp = sqrt(rr_tmp); + + rr_list[itmp].mass = P[p].Mass; + rr_list[itmp].r = rr_tmp; + itmp++; + } + } + + if(itmp != len_type_loc[type]) + terminate("should not occur: %d %d", itmp, len_type_loc[type]); + + if(parallel_flag) + parallel_sort_comm(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve, SubComm); + else + mysort(rr_list, len_type_loc[type], sizeof(sort_r2list), subfind_compare_dist_rotcurve); + + /* calculate cumulative mass */ + for(i = 1; i < len_type_loc[type]; i++) + rr_list[i].mass = rr_list[i - 1].mass + rr_list[i].mass; + + if(parallel_flag) + { + double mass_part = 0; + if(len_type_loc[type]) + mass_part = rr_list[len_type_loc[type] - 1].mass; + double *masslist = mymalloc("masslist", SubNTask * sizeof(double)); + MPI_Allgather(&mass_part, 1, MPI_DOUBLE, masslist, 1, MPI_DOUBLE, SubComm); + + double massbefore = 0; + for(i = 0; i < SubThisTask; i++) + massbefore += masslist[i]; + + for(i = 0; i < len_type_loc[type]; i++) + rr_list[i].mass += massbefore; + + myfree(masslist); + } + + /* now calculate half mass radii */ + if(parallel_flag) + { + double halfmassrad_loc = 0; + sort_r2list *rr_lowlist = mymalloc("rr_lowlist", SubNTask * sizeof(sort_r2list)); + sort_r2list low_element; + if(len_type_loc[type] > 0) + low_element = rr_list[0]; + else + { + low_element.mass = 0; + low_element.r = 0; + } + + MPI_Allgather(&low_element, sizeof(sort_r2list), MPI_BYTE, rr_lowlist, sizeof(sort_r2list), MPI_BYTE, SubComm); + + rr_list[len_type_loc[type]].mass = 0; + rr_list[len_type_loc[type]].r = 0; + for(j = SubThisTask + 1; j < SubNTask; j++) + if(rr_lowlist[j].mass > 0) + { + rr_list[len_type_loc[type]] = rr_lowlist[j]; + break; + } + + myfree(rr_lowlist); + + for(i = len_type_loc[type] - 1; i >= 0; i--) + { + if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type]) + halfmassrad_loc = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + + MPI_Allreduce(&halfmassrad_loc, &halfmassradtype[type], 1, MPI_DOUBLE, MPI_MAX, SubComm); + } + else + { + for(i = len_type_loc[type] - 1; i >= 0; i--) + { + if(i < len_type_loc[type] - 1) + if(rr_list[i].mass < 0.5 * mass_tab[type] && rr_list[i + 1].mass >= 0.5 * mass_tab[type]) + halfmassradtype[type] = 0.5 * (rr_list[i].r + rr_list[i + 1].r); + } + } + + myfree(rr_list); + } + + /* properties of 'central galaxies', defined in several ways as particles within some radius: + either (stellar half mass radius) or SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) or (radius of Vmax) */ +#ifdef SUBFIND_EXTENDED_PROPERTIES + // centre of mass /velocity of particles in half/ stellar mass rad + sinrad[0] = sinrad[1] = sinrad[2] = 0; + sinhalfrad[0] = sinhalfrad[1] = sinhalfrad[2] = 0; + vinrad[0] = vinrad[1] = vinrad[2] = 0; + vinhalfrad[0] = vinhalfrad[1] = vinhalfrad[2] = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + int ptype = P[p].Type; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]) + { + massinrad += P[p].Mass; + massinrad_tab[ptype] += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // comoving (as it should be.) + sinrad[j] += P[p].Mass * ddxx; + vinrad[j] += P[p].Mass * P[p].Vel[j]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + + /* properties inside (stellar half mass radius) */ + if(rr_tmp < 1.0 * halfmassradtype[4]) + { + massinhalfrad += P[p].Mass; + massinhalfrad_tab[ptype] += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // comoving (as it should be.) + sinhalfrad[j] += P[p].Mass * ddxx; + vinhalfrad[j] += P[p].Mass * P[p].Vel[j]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinhalfrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + + /* properties inside (radius of Vmax) */ + if(rr_tmp < 1.0 * vmaxrad) + { + massinmaxrad += P[p].Mass; + massinmaxrad_tab[ptype] += P[p].Mass; + + if(ptype == 0) + { + if(P[p].Type == 0) + { +#ifdef USE_SFR + sfrinmaxrad += SphP[PS[p].OldIndex].Sfr; /* note: the SphP[] array has not been reordered */ +#endif /* #ifdef USE_SFR */ + } + } + } + } + + /* properties of star forming gas */ +#ifdef USE_SFR + for(i = 0; i < num; i++) + { + p = d[i].index; + + if(P[p].Type == 0) + { + if(SphP[PS[p].OldIndex].Sfr > 0) + { + gasMassSfr += P[p].Mass; + } + } + } +#endif /* #ifdef USE_SFR */ + +#ifdef MHD + bfld_halo = bfld_disk = bfld_vol_halo = bfld_vol_disk = 0; + + for(i = 0; i < num; i++) + { + p = d[i].index; + + if(P[p].Type == 0) + { + double bfld2 = (SphP[PS[p].OldIndex].B[0] * SphP[PS[p].OldIndex].B[0]) + + (SphP[PS[p].OldIndex].B[1] * SphP[PS[p].OldIndex].B[1]) + + (SphP[PS[p].OldIndex].B[2] * SphP[PS[p].OldIndex].B[2]); + double vol = SphP[PS[p].OldIndex].Volume; + + bfld_halo += bfld2 * vol; + bfld_vol_halo += vol; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + if(rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4]) + { + bfld_disk += bfld2 * vol; + bfld_vol_disk += vol; + } + } + } +#endif /* #ifdef MHD */ + + if(parallel_flag) + { + double massinradtot, massinrad_tabtot[NTYPES]; + MPI_Allreduce(&massinrad, &massinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinrad_tab, massinrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinrad = massinradtot; + for(j = 0; j < NTYPES; j++) + massinrad_tab[j] = massinrad_tabtot[j]; + + double massinhalfradtot, massinhalfrad_tabtot[NTYPES]; + MPI_Allreduce(&massinhalfrad, &massinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinhalfrad_tab, massinhalfrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinhalfrad = massinhalfradtot; + for(j = 0; j < NTYPES; j++) + massinhalfrad_tab[j] = massinhalfrad_tabtot[j]; + + double massinmaxradtot, massinmaxrad_tabtot[NTYPES]; + MPI_Allreduce(&massinmaxrad, &massinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(massinmaxrad_tab, massinmaxrad_tabtot, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + massinmaxrad = massinmaxradtot; + for(j = 0; j < NTYPES; j++) + massinmaxrad_tab[j] = massinmaxrad_tabtot[j]; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + MPI_Allreduce(MPI_IN_PLACE, sinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, vinrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, sinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, vinhalfrad, 3, MPI_DOUBLE, MPI_SUM, SubComm); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef MHD + double bfld_halo_tot, bfld_disk_tot, bfld_vol_halo_tot, bfld_vol_disk_tot; + MPI_Allreduce(&bfld_halo, &bfld_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_vol_halo, &bfld_vol_halo_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_disk, &bfld_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(&bfld_vol_disk, &bfld_vol_disk_tot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + + bfld_halo = bfld_halo_tot; + bfld_vol_halo = bfld_vol_halo_tot; + bfld_disk = bfld_disk_tot; + bfld_vol_disk = bfld_vol_disk_tot; +#endif /* #ifdef MHD */ + +#ifdef USE_SFR + double sfrinradtot; + MPI_Allreduce(&sfrinrad, &sfrinradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinrad = sfrinradtot; + + double sfrinhalfradtot; + MPI_Allreduce(&sfrinhalfrad, &sfrinhalfradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinhalfrad = sfrinhalfradtot; + + double sfrinmaxradtot; + MPI_Allreduce(&sfrinmaxrad, &sfrinmaxradtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + sfrinmaxrad = sfrinmaxradtot; + + double gasMassSfrtot; + MPI_Allreduce(&gasMassSfr, &gasMassSfrtot, 1, MPI_DOUBLE, MPI_SUM, SubComm); + gasMassSfr = gasMassSfrtot; +#endif /* #ifdef USE_SFR */ + } + + if(parallel_flag) + MPI_Allreduce(&num, &totlen, 1, MPI_INT, MPI_SUM, SubComm); + else + totlen = num; + +#ifdef MHD + if(bfld_vol_halo > 0.) + bfld_halo = sqrt(bfld_halo / bfld_vol_halo); + if(bfld_vol_disk > 0.) + bfld_disk = sqrt(bfld_disk / bfld_vol_disk); +#endif /* #ifdef MHD */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // finish centre of mass of spheres + for(j = 0; j < 3; j++) + { + if(massinrad > 0) + { + sinrad[j] /= massinrad; + sinrad[j] += pos[j]; + + while(sinrad[j] < 0) + sinrad[j] += boxsize; + while(sinrad[j] >= boxsize) + sinrad[j] -= boxsize; + + vinrad[j] /= massinrad; // this is comoving (as it should be.) + } + + if(massinhalfrad > 0) + { + sinhalfrad[j] /= massinhalfrad; + sinhalfrad[j] += pos[j]; + + while(sinhalfrad[j] < 0) + sinhalfrad[j] += boxsize; + while(sinhalfrad[j] >= boxsize) + sinhalfrad[j] -= boxsize; + + vinhalfrad[j] /= massinhalfrad; + } + } + + Jtot_inHalfRad[0] = Jtot_inHalfRad[1] = Jtot_inHalfRad[2] = 0; + Jdm_inHalfRad[0] = Jdm_inHalfRad[1] = Jdm_inHalfRad[2] = 0; + Jgas_inHalfRad[0] = Jgas_inHalfRad[1] = Jgas_inHalfRad[2] = 0; + Jstars_inHalfRad[0] = Jstars_inHalfRad[1] = Jstars_inHalfRad[2] = 0; + Jtot_inRad[0] = Jtot_inRad[1] = Jtot_inRad[2] = 0; + Jdm_inRad[0] = Jdm_inRad[1] = Jdm_inRad[2] = 0; + Jgas_inRad[0] = Jgas_inRad[1] = Jgas_inRad[2] = 0; + Jstars_inRad[0] = Jstars_inRad[1] = Jstars_inRad[2] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + int ptype = P[p].Type; + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]); + dv[j] += H_of_a * dx[j]; + } + + Jtot_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas + { + Jgas_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars + { + Jstars_inRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars_inRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars_inRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + } + + /* properties inside (stellar half mass radius) */ + if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]); + dv[j] += H_of_a * dx[j]; + } + + Jtot_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jtot_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jtot_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if(ptype == 1) // dm illustris + { + Jdm_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jdm_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jdm_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 0) // gas + { + Jgas_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jgas_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jgas_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + if(ptype == 4) // stars + { + Jstars_inHalfRad[0] += P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + Jstars_inHalfRad[1] += P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + Jstars_inHalfRad[2] += P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + } + } + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, Jtot_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars_inRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jtot_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jdm_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jgas_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, Jstars_inHalfRad, 3, MPI_DOUBLE, MPI_SUM, SubComm); + } + + // counter rotating mass fractions + CMFrac_inHalfRad = CMFrac_inRad = 0; + for(i = 0; i < NTYPES; i++) + CMFracType_inHalfRad[i] = CMFracType_inRad[i] = 0; + + for(i = 0; i < num; i++) + { + /* identify particle type */ + p = d[i].index; + + /* calculate particle radius */ + for(j = 0, rr_tmp = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); // counter-rotating mass calc with respect to pot min + rr_tmp += ddxx * ddxx; + } + rr_tmp = sqrt(rr_tmp); + + int ptype = P[p].Type; + + /* properties inside SUBFIND_GAL_RADIUS_FAC*(stellar half mass radius) */ + if((massinrad > 0) && (rr_tmp < SUBFIND_GAL_RADIUS_FAC * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinrad[j]); + dv[j] += H_of_a * dx[j]; + } + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot_inRad[0] * jpart[0] + Jtot_inRad[1] * jpart[1] + Jtot_inRad[2] * jpart[2]) < 0.) + CMFrac_inRad += P[p].Mass / massinrad; + + if(ptype == 1) // dm illustris + if((Jdm_inRad[0] * jpart[0] + Jdm_inRad[1] * jpart[1] + Jdm_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[1] += P[p].Mass / massinrad_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas_inRad[0] * jpart[0] + Jgas_inRad[1] * jpart[1] + Jgas_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[0] += P[p].Mass / massinrad_tab[0]; + if(ptype == 4) // stars + if((Jstars_inRad[0] * jpart[0] + Jstars_inRad[1] * jpart[1] + Jstars_inRad[2] * jpart[2]) < 0.) + CMFracType_inRad[4] += P[p].Mass / massinrad_tab[4]; + } + + /* properties inside (stellar half mass radius) */ + if((massinhalfrad > 0) && (rr_tmp < 1.0 * halfmassradtype[4])) + { + for(j = 0; j < 3; j++) + { + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + dx[j] = All.cf_atime * ddxx; + dv[j] = vel_to_phys * (P[p].Vel[j] - vinhalfrad[j]); + dv[j] += H_of_a * dx[j]; + } + + jpart[0] = P[p].Mass * (dx[1] * dv[2] - dx[2] * dv[1]); + jpart[1] = P[p].Mass * (dx[2] * dv[0] - dx[0] * dv[2]); + jpart[2] = P[p].Mass * (dx[0] * dv[1] - dx[1] * dv[0]); + + if((Jtot_inHalfRad[0] * jpart[0] + Jtot_inHalfRad[1] * jpart[1] + Jtot_inHalfRad[2] * jpart[2]) < 0.) + CMFrac_inHalfRad += P[p].Mass / massinhalfrad; + + if(ptype == 1) // dm illustris + if((Jdm_inHalfRad[0] * jpart[0] + Jdm_inHalfRad[1] * jpart[1] + Jdm_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[1] += P[p].Mass / massinhalfrad_tab[1]; + if(ptype == 0) // gas (incl. winds!) + if((Jgas_inHalfRad[0] * jpart[0] + Jgas_inHalfRad[1] * jpart[1] + Jgas_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[0] += P[p].Mass / massinhalfrad_tab[0]; + if(ptype == 4) // stars + if((Jstars_inHalfRad[0] * jpart[0] + Jstars_inHalfRad[1] * jpart[1] + Jstars_inHalfRad[2] * jpart[2]) < 0.) + CMFracType_inHalfRad[4] += P[p].Mass / massinhalfrad_tab[4]; + } + } + + if(parallel_flag) + { + MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inRad, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, &CMFrac_inHalfRad, 1, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType_inRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + MPI_Allreduce(MPI_IN_PLACE, CMFracType_inHalfRad, NTYPES, MPI_DOUBLE, MPI_SUM, SubComm); + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* now store the calculated properties in the subgroup structure */ + if(parallel_flag == 0 || SubThisTask == 0) + { + subgroup->Len = totlen; + subgroup->Mass = mass; + subgroup->SubMassInRad = massinrad; + subgroup->SubMassInHalfRad = massinhalfrad; + subgroup->SubMassInMaxRad = massinmaxrad; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->Ekin = Ekin; + subgroup->Epot = Epot; + subgroup->Ethr = Ethr; + subgroup->CMFrac = CMFrac; + subgroup->CMFrac_inHalfRad = CMFrac_inHalfRad; + subgroup->CMFrac_inRad = CMFrac_inRad; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#ifdef MHD + subgroup->Bfld_Halo = bfld_halo; + subgroup->Bfld_Disk = bfld_disk; +#endif /* #ifdef MHD */ + + for(j = 0; j < 6; j++) + { + subgroup->MassType[j] = mass_tab[j]; + subgroup->LenType[j] = len_type[j]; + subgroup->SubHalfMassRadType[j] = halfmassradtype[j]; + subgroup->SubMassInRadType[j] = massinrad_tab[j]; + subgroup->SubMassInHalfRadType[j] = massinhalfrad_tab[j]; + subgroup->SubMassInMaxRadType[j] = massinmaxrad_tab[j]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->CMFracType[j] = CMFracType[j]; + subgroup->CMFracType_inHalfRad[j] = CMFracType_inHalfRad[j]; + subgroup->CMFracType_inRad[j] = CMFracType_inRad[j]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + for(j = 0; j < 3; j++) + { + subgroup->Pos[j] = pos[j]; + subgroup->Vel[j] = vel[j]; + subgroup->CM[j] = cm[j]; + subgroup->Spin[j] = spin[j]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + subgroup->J[j] = Jtot[j]; + subgroup->Jdm[j] = Jdm[j]; + subgroup->Jgas[j] = Jgas[j]; + subgroup->Jstars[j] = Jstars[j]; + subgroup->J_inHalfRad[j] = Jtot_inHalfRad[j]; + subgroup->Jdm_inHalfRad[j] = Jdm_inHalfRad[j]; + subgroup->Jgas_inHalfRad[j] = Jgas_inHalfRad[j]; + subgroup->Jstars_inHalfRad[j] = Jstars_inHalfRad[j]; + subgroup->J_inRad[j] = Jtot_inRad[j]; + subgroup->Jdm_inRad[j] = Jdm_inRad[j]; + subgroup->Jgas_inRad[j] = Jgas_inRad[j]; + subgroup->Jstars_inRad[j] = Jstars_inRad[j]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + subgroup->SubMostBoundID = mostboundid; + subgroup->SubVelDisp = veldisp; + subgroup->SubVmax = vmax; + subgroup->SubVmaxRad = vmaxrad; + subgroup->SubHalfMassRad = halfmassrad; + +#ifdef USE_SFR + subgroup->Sfr = sfr; + subgroup->SfrInRad = sfrinrad; + subgroup->SfrInHalfRad = sfrinhalfrad; + subgroup->SfrInMaxRad = sfrinmaxrad; + subgroup->GasMassSfr = gasMassSfr; +#endif /* #ifdef USE_SFR */ + } +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_reprocess.c b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c new file mode 100644 index 0000000000..c189d86001 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_reprocess.c @@ -0,0 +1,240 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_fof_reprocess.c + * \date 05/2018 + * \brief Routines to calculate additional group properties. + * \details contains functions: + * void subfind_add_grp_props_calc_fof_angular_momentum(int num, + * int ngroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Angular Momentum calculation for groups. + * + * \param[in] num Index of snapshot. + * \param[in] ngroups_cat Number of groups in group file. + * + * \return void + */ +void subfind_add_grp_props_calc_fof_angular_momentum(int num, int ngroups_cat) +{ + mpi_printf("FOF: Begin Angular Momentum Calculation for FOF Groups.\n"); + + /* assign target CPUs for the particles in groups */ + /* the particles not in groups will be distributed such that a uniform particle load results */ + double t0 = second(); + int *count_loc_task = mymalloc_clear("count_loc_task", NTask * sizeof(int)); + int *count_task = mymalloc("count_task", NTask * sizeof(int)); + int *count_free = mymalloc("count_free", NTask * sizeof(int)); + int count_loc_free = 0; + + for(int i = 0; i < NumPart; i++) + { + if(PS[i].GrNr < 0) + terminate("PS[i].GrNr=%d", PS[i].GrNr); + + if(PS[i].GrNr < TotNgroups) /* particle is in a group */ + { + if(PS[i].GrNr < Ncollective) /* we are in a collective group */ + PS[i].TargetTask = ProcAssign[PS[i].GrNr].FirstTask + (i % ProcAssign[PS[i].GrNr].NTask); + else + PS[i].TargetTask = ((PS[i].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective; + + if(PS[i].TargetTask < 0 || PS[i].TargetTask >= NTask) + terminate("PS[i].TargetTask=%d PS[i].GrNr=%d", PS[i].TargetTask, PS[i].GrNr); + + count_loc_task[PS[i].TargetTask]++; + } + else + count_loc_free++; + + PS[i].TargetIndex = 0; /* unimportant here */ + } + + MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, MPI_COMM_WORLD); + MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + long long sum = 0; + for(int i = 0; i < NTask; i++) + sum += count_task[i] + count_free[i]; + + int maxload = (sum + NTask - 1) / NTask; + for(int i = 0; i < NTask; i++) + { + count_task[i] = maxload - count_task[i]; /* this is the amount that can fit on this task */ + if(count_task[i] < 0) + count_task[i] = 0; + } + + int current_task = 0; + + for(int i = 0; i < ThisTask; i++) + { + while(count_free[i] > 0 && current_task < NTask) + { + if(count_free[i] < count_task[current_task]) + { + count_task[current_task] -= count_free[i]; + count_free[i] = 0; + } + else + { + count_free[i] -= count_task[current_task]; + count_task[current_task] = 0; + current_task++; + } + } + } + + for(int i = 0; i < NumPart; i++) + { + if(PS[i].GrNr >= + TotNgroups) /* particle not in a group. Can in principle stay but we move it such that a good load balance is obtained. */ + { + while(count_task[current_task] == 0 && current_task < NTask - 1) + current_task++; + + PS[i].TargetTask = current_task; /* particle not in any group, move it here so that uniform load is achieved */ + count_task[current_task]--; + } + } + + myfree(count_free); + myfree(count_task); + myfree(count_loc_task); + + double balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance=%g\n", balance); + + /* distribute particles such that groups are completely on the CPU(s) that do the corresponding group(s) */ + fof_subfind_exchange(MPI_COMM_WORLD); + double t1 = second(); + mpi_printf("SUBFIND: subfind_exchange() took %g sec\n", timediff(t0, t1)); + + balance = subfind_get_particle_balance(); + mpi_printf("SUBFIND: particle balance for AM processing=%g\n", balance); + + /* we can now split the communicator to give each collectively treated group its own processor set */ + MPI_Comm_split(MPI_COMM_WORLD, CommSplitColor, ThisTask, &SubComm); + MPI_Comm_size(SubComm, &SubNTask); + MPI_Comm_rank(SubComm, &SubThisTask); + SubTagOffset = TagOffset; + + /* here the execution paths for collective groups and serial groups branch. The collective CPUs work in small sets that each + * deal with one large group. The serial CPUs each deal with several halos by themselves + */ + if(CommSplitColor < Ncollective) /* we are one of the CPUs that does a collective group */ + { + /* we now apply a collective version of subfind to the group split across the processors belonging to communicator SubComm + * The relevant group is the one stored in Group[0] on SubThisTask==0. + */ + subfind_fof_calc_am_collective(num, ngroups_cat); + } + else + { + /* now let us sort according to GrNr and Density. This step will temporarily break the association with SphP[] and other arrays! + */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + PS[i].OldIndex = i; + submp[i].index = i; + submp[i].GrNr = PS[i].GrNr; + submp[i].DM_Density = PS[i].Density; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_GrNr_DM_Density); + subfind_reorder_according_to_submp(); + myfree(submp); + + /* now we have the particles in each group consecutively */ + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: Start to do AM for %d small groups with serial subfind algorithm on %d processors (root-node=%d)\n", + TotNgroups - Ncollective, SubNTask, ThisTask); + + /* we now apply a serial version of subfind to the local groups */ + + t0 = second(); + for(int gr = 0, offset = 0; gr < Ngroups; gr++) + { + if(((Group[gr].GrNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective == ThisTask) + offset = subfind_fof_calc_am_serial(gr, offset, num, ngroups_cat); + else + terminate("how come that we have this group number?"); + } + + MPI_Barrier(SubComm); + t1 = second(); + if(SubThisTask == 0) + printf("SUBFIND-SERIAL: processing AM of serial groups took %g sec\n", timediff(t0, t1)); + + /* undo local rearrangement that made groups consecutive. After that, the association of SphP[] will be correct again */ + submp = (struct submp_data *)mymalloc("submp", sizeof(struct submp_data) * NumPart); + for(int i = 0; i < NumPart; i++) + { + submp[i].index = i; + submp[i].OldIndex = PS[i].OldIndex; + } + qsort(submp, NumPart, sizeof(struct submp_data), subfind_compare_submp_OldIndex); + subfind_reorder_according_to_submp(); + myfree(submp); + } + + /* free the communicator */ + MPI_Comm_free(&SubComm); + + /* distribute particles back to original CPU */ + t0 = second(); + for(int i = 0; i < NumPart; i++) + { + PS[i].TargetTask = PS[i].OriginTask; + PS[i].TargetIndex = PS[i].OriginIndex; + } + + fof_subfind_exchange(MPI_COMM_WORLD); + t1 = second(); + if(ThisTask == 0) + printf("SUBFIND: subfind_exchange() (for return to original CPU after AM) took %g sec\n", timediff(t0, t1)); + + mpi_printf("FOF: Angular Momentum Calculation for FOF Groups finished successfully.\n"); +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_serial.c b/src/amuse/community/arepo/src/subfind/subfind_serial.c new file mode 100644 index 0000000000..acc996ed02 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_serial.c @@ -0,0 +1,807 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_serial.c + * \date 05/2018 + * \brief Processes the local groups in serial mode. + * \details contains functions: + * int subfind_process_group_serial(int gr, int Offs, int + * nsubgroups_cat) + * int subfind_unbind(struct unbind_data *ud, int len, int + * *len_non_gas) + * int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, + * int ngroups_cat) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND +#include "../fof/fof.h" +#include "subfind.h" + +static int *Head, *Next, *Tail, *Len; + +/*! \brief Subhalo finding on each core individually. + * + * \param[in] gr Group index. + * \param[in] Offs Offset. + * \param[in] nsubgroups_cat (unused) + * + * \return New offset. + */ +int subfind_process_group_serial(int gr, int Offs, int nsubgroups_cat) +{ + int i, j, k, p, count_cand, count, len, len_non_gas, N, nsubs, part_index, subnr, totlen; + static struct unbind_data *ud; + + while(PS[Offs].GrNr != Group[gr].GrNr) + { + Offs++; + if(Offs >= NumPart) + { + char buf[1000]; + sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr); + + for(int i = 0; i < NumPart; i++) + printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr); + + terminate(buf); + } + } + + N = Group[gr].Len; + GrNr = Group[gr].GrNr; + + subfind_loctree_treeallocate((int)(All.TreeAllocFactor * N) + NTopnodes, NumPart); + + for(int i = 0; i < N; i++) + if(PS[Offs + i].GrNr != Group[gr].GrNr) + terminate("task=%d, gr=%d: don't have the number of particles for GrNr=%d i=%d group-len:N=%d found=%d before=%d\n", ThisTask, + gr, Group[gr].GrNr, i, N, PS[Offs + i].GrNr, PS[Offs - 1].GrNr); + + candidates = (struct cand_dat *)mymalloc_movable(&candidates, "candidates", N * sizeof(struct cand_dat)); + + Head = (int *)mymalloc_movable(&Head, "Head", N * sizeof(int)); + Next = (int *)mymalloc_movable(&Next, "Next", N * sizeof(int)); + Tail = (int *)mymalloc_movable(&Tail, "Tail", N * sizeof(int)); + Len = (int *)mymalloc_movable(&Len, "Len", N * sizeof(int)); + ud = (struct unbind_data *)mymalloc_movable(&ud, "ud", N * sizeof(struct unbind_data)); + + Head -= Offs; + Next -= Offs; + Tail -= Offs; + Len -= Offs; + + for(int i = 0; i < N; i++) + ud[i].index = Offs + i; + + subfind_loctree_findExtent(N, ud); + + subfind_loctree_treebuild(N, &ud); /* build tree for all particles of this group */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + // compute the binding energy of FOF group + double Epot = 0; + for(int i = 0; i < N; i++) + { + int p = ud[i].index; + double pot = subfind_loctree_treeevaluate_potential(p); + + // note: add self-energy + pot += P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8); // (P[p].Soft / 2.8); + + // multiply with G, scale by scale factor + pot *= All.G / All.cf_atime; + + Epot += (P[p].Mass / 2) * pot; + } + Group[gr].Epot = Epot; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(int i = Offs; i < Offs + N; i++) + Head[i] = Next[i] = Tail[i] = -1; + + /* note: particles are already ordered in the order of decreasing density */ + + int ss, ngbs, ndiff, head = 0, head_attach; + int listofdifferent[2], prev; + int ngb_index, rank; + int desngb = All.DesLinkNgb; + + for(i = 0, count_cand = 0; i < N; i++) + { + part_index = Offs + i; + + MyDouble *pos; +#ifdef CELL_CENTER_GRAVITY + if(P[part_index].Type == 0) + pos = PS[part_index].Center; + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + pos = P[part_index].Pos; + + subfind_locngb_treefind(pos, desngb, PS[part_index].Hsml); + + /* note: returned neighbours are already sorted by distance */ + + for(k = 0, ndiff = 0, ngbs = 0; k < desngb && ngbs < 2 && ndiff < 2; k++) + { + ngb_index = R2list[k].index; + + if(ngb_index != part_index) /* to exclude the particle itself */ + { + /* we only look at neighbours that are denser */ + if(PS[ngb_index].Density > PS[part_index].Density) + { + ngbs++; + + if(Head[ngb_index] >= 0) /* neighbor is attached to a group */ + { + if(ndiff == 1) + if(listofdifferent[0] == Head[ngb_index]) + continue; + + /* a new group has been found */ + listofdifferent[ndiff++] = Head[ngb_index]; + } + else + terminate( + "this may not occur: ThisTask=%d gr=%d k=%d i=%d part_index=%d ngb_index = %d head[ngb_index]=%d " + "P[part_index].DM_Density=%g %g GrNrs= %d %d \n", + ThisTask, gr, k, i, part_index, ngb_index, Head[ngb_index], PS[part_index].Density, PS[ngb_index].Density, + PS[part_index].GrNr, PS[ngb_index].GrNr); + } + } + } + + switch(ndiff) /* treat the different possible cases */ + { + case 0: /* this appears to be a lonely maximum -> new group */ + head = part_index; + Head[part_index] = Tail[part_index] = part_index; + Len[part_index] = 1; + Next[part_index] = -1; + break; + + case 1: /* the particle is attached to exactly one group */ + head = listofdifferent[0]; + Head[part_index] = head; + Next[Tail[head]] = part_index; + Tail[head] = part_index; + Len[head]++; + Next[part_index] = -1; + break; + + case 2: /* the particle merges two groups together */ + head = listofdifferent[0]; + head_attach = listofdifferent[1]; + if(Len[head_attach] > Len[head] || + (Len[head_attach] == Len[head] && + head_attach < head)) /* other group is longer, swap them. for equal length, take the larger head value */ + { + head = listofdifferent[1]; + head_attach = listofdifferent[0]; + } + + /* only in case the attached group is long enough we bother to register is + as a subhalo candidate */ + + if(Len[head_attach] >= All.DesLinkNgb) + { + candidates[count_cand].len = Len[head_attach]; + candidates[count_cand].head = Head[head_attach]; + count_cand++; + } + + /* now join the two groups */ + Next[Tail[head]] = head_attach; + Tail[head] = Tail[head_attach]; + Len[head] += Len[head_attach]; + + ss = head_attach; + do + { + Head[ss] = head; + } + while((ss = Next[ss]) >= 0); + + /* finally, attach the particle */ + Head[part_index] = head; + Next[Tail[head]] = part_index; + Tail[head] = part_index; + Len[head]++; + Next[part_index] = -1; + break; + + default: + terminate("can't be!"); + break; + } + } + + /* add the full thing as a subhalo candidate */ + for(i = 0, prev = -1; i < N; i++) + { + if(Head[Offs + i] == Offs + i) + if(Next[Tail[Offs + i]] == -1) + { + if(prev < 0) + head = Offs + i; + if(prev >= 0) + Next[prev] = Offs + i; + + prev = Tail[Offs + i]; + } + } + + candidates[count_cand].len = N; + candidates[count_cand].head = head; + count_cand++; + + /* go through them once and assign the rank */ + for(i = 0, p = head, rank = 0; i < N; i++) + { + Len[p] = rank++; + p = Next[p]; + } + + /* for each candidate, we now pull out the rank of its head */ + for(k = 0; k < count_cand; k++) + candidates[k].rank = Len[candidates[k].head]; + + for(i = Offs; i < Offs + N; i++) + Tail[i] = -1; + + for(k = 0, nsubs = 0; k < count_cand; k++) + { + for(i = 0, p = candidates[k].head, len = 0; i < candidates[k].len; i++, p = Next[p]) + if(Tail[p] < 0) + ud[len++].index = p; + + if(len >= All.DesLinkNgb) + len = subfind_unbind(ud, len, &len_non_gas); + + if(len >= All.DesLinkNgb) + { + /* ok, we found a substructure */ + + for(i = 0; i < len; i++) + Tail[ud[i].index] = nsubs; /* we use this to flag the substructures */ + + candidates[k].nsub = nsubs; + candidates[k].bound_length = len; + nsubs++; + } + else + { + candidates[k].nsub = -1; + candidates[k].bound_length = 0; + } + } + +#ifdef VERBOSE + printf("\nGroupLen=%d (gr=%d)\n", N, gr); + printf("Number of substructures: %d (before unbinding: %d)\n", nsubs, count_cand); +#endif /* #ifdef VERBOSE */ + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_boundlength); + + /* now we determine the parent subhalo for each candidate */ + for(k = 0; k < count_cand; k++) + { + candidates[k].subnr = k; + candidates[k].parent = 0; + } + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_rank); + + for(k = 0; k < count_cand; k++) + { + for(j = k + 1; j < count_cand; j++) + { + if(candidates[j].rank > candidates[k].rank + candidates[k].len) + break; + + if(candidates[k].rank + candidates[k].len >= candidates[j].rank + candidates[j].len) + { + if(candidates[k].bound_length >= All.DesLinkNgb) + candidates[j].parent = candidates[k].subnr; + } + else + { + char buf[1000]; + sprintf(buf, "k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, count_cand, + (int)candidates[k].rank, candidates[k].len, (int)candidates[k].bound_length, candidates[j].rank, + (int)candidates[j].len, candidates[j].bound_length); + terminate(buf); + } + } + } + + mysort(candidates, count_cand, sizeof(struct cand_dat), subfind_compare_serial_candidates_subnr); + + /* now determine the properties */ + Group[gr].Nsubs = nsubs; + Group[gr].Pos[0] = Group[gr].CM[0]; + Group[gr].Pos[1] = Group[gr].CM[1]; + Group[gr].Pos[2] = Group[gr].CM[2]; + + for(k = 0, subnr = 0, totlen = 0; k < nsubs; k++) + { + len = candidates[k].bound_length; + +#ifdef VERBOSE + printf("subnr=%d SubLen=%d\n", subnr, len); +#endif /* #ifdef VERBOSE */ + + totlen += len; + + for(i = 0, p = candidates[k].head, count = 0; i < candidates[k].len; i++) + { + if(Tail[p] == candidates[k].nsub) + ud[count++].index = p; + + p = Next[p]; + } + + if(count != len) + terminate("count=%d != len=%d k=%d subnr=%d nsubs=%d", count, len, k, subnr, nsubs); + + if(Nsubgroups > MaxNsubgroups) + terminate("Nsubgroups = %d >= MaxNsubgroups = %d", Nsubgroups, MaxNsubgroups); + + subfind_determine_sub_halo_properties(ud, len, &SubGroup[Nsubgroups], GrNr, subnr, 0, nsubgroups_cat); + + SubGroup[Nsubgroups].SubParent = candidates[k].parent; + SubGroup[Nsubgroups].SubNr = subnr; + SubGroup[Nsubgroups].GrNr = Group[gr].GrNr; + + if(subnr == 0) + { + for(j = 0; j < 3; j++) + Group[gr].Pos[j] = SubGroup[Nsubgroups].Pos[j]; + } + + Nsubgroups++; + + /* Let's now assign the subgroup number */ + + for(i = 0; i < len; i++) + PS[ud[i].index].SubNr = subnr; + + subnr++; + } + +#ifdef VERBOSE + printf("Fuzz=%d\n", N - totlen); +#endif /* #ifdef VERBOSE */ + + myfree(ud); + myfree(Len + Offs); + myfree(Tail + Offs); + myfree(Next + Offs); + myfree(Head + Offs); + + myfree(candidates); + + subfind_loctree_treefree(); + + return Offs; +} + +/*! \brief Unbinding algorithm. + * + * \param[in, out] ud Unbind data. + * \param[in] len length of ud array. + * \param[out] len_non_gas Number of particles which are not gas cells. + * + * \return Length of array minus the unbound particles. + */ +int subfind_unbind(struct unbind_data *ud, int len, int *len_non_gas) +{ + double *bnd_energy, energy_limit, weakly_bound_limit = 0; + int i, j, p, minindex, unbound, phaseflag, iter = 0; + double ddxx, s[3], dx[3], v[3], dv[3], pos[3]; + double vel_to_phys, H_of_a, atime, pot, minpot = 0; + double boxsize, xtmp; + double TotMass; + + boxsize = All.BoxSize; + + if(All.ComovingIntegrationOn) + { + vel_to_phys = 1.0 / All.Time; + H_of_a = hubble_function(All.Time); + atime = All.Time; + } + else + { + vel_to_phys = atime = 1; + H_of_a = 0; + } + + bnd_energy = (double *)mymalloc("bnd_energy", len * sizeof(double)); + + phaseflag = 0; /* this means we will recompute the potential for all particles */ + + do + { + subfind_loctree_treebuild(len, &ud); + + /* let's compute the potential */ + + if(phaseflag == 0) /* redo it for all the particles */ + { + for(i = 0, minindex = -1, minpot = 1.0e30; i < len; i++) + { + p = ud[i].index; + + pot = subfind_loctree_treeevaluate_potential(p); + + PS[p].Potential = All.G / All.cf_atime * pot; + + if(PS[p].Potential < minpot || minindex == -1) + { + minpot = PS[p].Potential; + minindex = p; + } + } + +#ifdef CELL_CENTER_GRAVITY + if(P[minindex].Type == 0) + { + for(j = 0; j < 3; j++) + pos[j] = PS[minindex].Center[j]; /* position of minimum potential */ + } + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + { + for(j = 0; j < 3; j++) + pos[j] = P[minindex].Pos[j]; /* position of minimum potential */ + } + } + else + { + /* we only repeat for those close to the unbinding threshold */ + for(i = 0; i < len; i++) + { + p = ud[i].index; + + if(PS[p].BindingEnergy >= weakly_bound_limit) + { + pot = subfind_loctree_treeevaluate_potential(p); + + PS[p].Potential *= All.G / All.cf_atime; + } + } + } + + /* let's get bulk velocity and the center-of-mass */ + + v[0] = v[1] = v[2] = 0; + s[0] = s[1] = s[2] = 0; + + for(i = 0, TotMass = 0; i < len; i++) + { + p = ud[i].index; + + for(j = 0; j < 3; j++) + { +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + ddxx = GRAVITY_NEAREST_X(PS[p].Center[j] - pos[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + ddxx = GRAVITY_NEAREST_X(P[p].Pos[j] - pos[j]); + s[j] += P[p].Mass * ddxx; + v[j] += P[p].Mass * P[p].Vel[j]; + } + TotMass += P[p].Mass; + } + + for(j = 0; j < 3; j++) + { + v[j] /= TotMass; + s[j] /= TotMass; /* center-of-mass */ + + s[j] += pos[j]; + + while(s[j] < 0) + s[j] += boxsize; + while(s[j] >= boxsize) + s[j] -= boxsize; + } + + for(i = 0; i < len; i++) + { + p = ud[i].index; + + for(j = 0; j < 3; j++) + { + dv[j] = vel_to_phys * (P[p].Vel[j] - v[j]); +#ifdef CELL_CENTER_GRAVITY + if(P[p].Type == 0) + dx[j] = atime * GRAVITY_NEAREST_X(PS[p].Center[j] - s[j]); + else +#endif /* #ifdef CELL_CENTER_GRAVITY */ + dx[j] = atime * GRAVITY_NEAREST_X(P[p].Pos[j] - s[j]); + + dv[j] += H_of_a * dx[j]; + } + + PS[p].BindingEnergy = PS[p].Potential + 0.5 * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]); + PS[p].BindingEnergy += + All.G / All.cf_atime * P[p].Mass / (All.ForceSoftening[P[p].SofteningType] / 2.8); /* note: add self-energy */ + + if(P[p].Type == 0) + PS[p].BindingEnergy += PS[p].Utherm; + + bnd_energy[i] = PS[p].BindingEnergy; + } + + mysort(bnd_energy, len, sizeof(double), subfind_compare_binding_energy); /* largest comes first! */ + + energy_limit = bnd_energy[(int)(0.25 * len)]; + + for(i = 0, unbound = 0; i < len - 1; i++) + { + if(bnd_energy[i] > 0) + unbound++; + else + unbound--; + + if(unbound <= 0) + break; + } + weakly_bound_limit = bnd_energy[i]; + + /* now omit unbound particles, but at most 1/4 of the original size */ + + for(i = 0, unbound = 0, *len_non_gas = 0; i < len; i++) + { + p = ud[i].index; + if(PS[p].BindingEnergy > 0 && PS[p].BindingEnergy > energy_limit) + { + unbound++; + ud[i] = ud[len - 1]; + i--; + len--; + } + else if(P[p].Type != 0) + (*len_non_gas)++; + } + + if(len < All.DesLinkNgb) + break; + + if(phaseflag == 0) + { + if(unbound > 0) + phaseflag = 1; + } + else + { + if(unbound == 0) + { + phaseflag = 0; /* this will make us repeat everything once more for all particles */ + unbound = 1; + } + } + + if(iter++ > MAXITER) + terminate("iter > MAXITER = %d", MAXITER); + } + while(unbound > 0); + + myfree(bnd_energy); + + return (len); +} + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Serial version of angular momentum calculation. + * + * \param[in] gr Group index. + * \param[in] Offs Offset of group (first index in PS). + * \param[in] snapnr (unused) + * \param[in] ngroups_cat (unused) + */ +int subfind_fof_calc_am_serial(int gr, int Offs, int snapnr, int ngroups_cat) +{ + long long index; + int len, i, k; + double Pos_pbc[3], Vel_tot[3], gr_Jtot[3], gr_Jdm[3], gr_Jgas[3], gr_Jstars[3], jpart[3]; + double gr_CMFrac, gr_CMFracType[NTYPES], gr_Ekin, gr_Ethr; + int gr_len_dm; + double gr_mass, gr_mass_gas, gr_mass_stars; + int ptype; + + while(PS[Offs].GrNr != Group[gr].GrNr) + { + Offs++; + if(Offs >= NumPart) + { + char buf[1000]; + sprintf(buf, "don't find a particle for groupnr=%d\n", Group[gr].GrNr); + + for(i = 0; i < NumPart; i++) + printf("task=%d i=%d PS[i].GrNr=%d\n", ThisTask, i, PS[i].GrNr); + + terminate(buf); + } + } + + len = Group[gr].Len; + + struct unbind_data *ud = (struct unbind_data *)mymalloc("ud", len * sizeof(struct unbind_data)); + + // get all fof particles + for(i = 0; i < len; i++) + ud[i].index = Offs + i; + + // initialize + gr_CMFrac = 0; + gr_Ekin = 0; + gr_Ethr = 0; + + for(k = 0; k < 3; k++) + { + gr_Jtot[k] = 0; + gr_Jdm[k] = 0; + gr_Jgas[k] = 0; + gr_Jstars[k] = 0; + } + for(k = 0; k < NTYPES; k++) + { + gr_CMFracType[k] = 0; + } + + // calc angular momentum for dm, gas, stars + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + gr_Ekin += (P[index].Mass / 2) * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + if(P[index].Type == 0) + gr_Ethr += P[index].Mass * SphP[PS[index].OldIndex].Utherm; + + gr_Jtot[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jtot[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jtot[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + gr_Jdm[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jdm[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jdm[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas (incl. winds) + { + gr_Jgas[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jgas[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jgas[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + gr_Jstars[0] += P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + gr_Jstars[1] += P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + gr_Jstars[2] += P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + + Group[gr].Ekin = gr_Ekin; + Group[gr].Ethr = gr_Ethr; + for(i = 0; i < 3; i++) + { + Group[gr].J[i] = gr_Jtot[i]; + Group[gr].JDM[i] = gr_Jdm[i]; + Group[gr].JGas[i] = gr_Jgas[i]; + Group[gr].JStars[i] = gr_Jstars[i]; + } + + // calc counter-rotating fractions + gr_len_dm = 0; + gr_mass = gr_mass_gas = gr_mass_stars = 0; + + for(k = 0; k < len; k++) + { + index = ud[k].index; + ptype = P[index].Type; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = P[index].Pos[i] - Group[gr].Pos[i]; + + for(i = 0; i < 3; i++) + Pos_pbc[i] = fof_periodic(Pos_pbc[i]); + + for(i = 0; i < 3; i++) + Pos_pbc[i] = Pos_pbc[i] * All.cf_atime; // units: phys kpc/h + + for(i = 0; i < 3; i++) + Vel_tot[i] = P[index].Vel[i] / All.cf_atime - Group[gr].Vel[i] / All.cf_atime + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[index].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[index].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[index].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + gr_mass += P[index].Mass; + if((gr_Jtot[0] * jpart[0] + gr_Jtot[1] * jpart[1] + gr_Jtot[2] * jpart[2]) < 0.) + gr_CMFrac += P[index].Mass; // / Group[gr].Mass; + + if(ptype == 1) // dm illustris + { + gr_len_dm++; + if((gr_Jdm[0] * jpart[0] + gr_Jdm[1] * jpart[1] + gr_Jdm[2] * jpart[2]) < 0.) + gr_CMFracType[1]++; + } + if(ptype == 0) // gas (incl. winds) + { + gr_mass_gas += P[index].Mass; + if((gr_Jgas[0] * jpart[0] + gr_Jgas[1] * jpart[1] + gr_Jgas[2] * jpart[2]) < 0.) + gr_CMFracType[0] += P[index].Mass; // / Group[gr].MassType[0]; + } + if(ptype == 4) // stars + { + gr_mass_stars += P[index].Mass; + if((gr_Jstars[0] * jpart[0] + gr_Jstars[1] * jpart[1] + gr_Jstars[2] * jpart[2]) < 0.) + gr_CMFracType[4] += P[index].Mass; // / Group[gr].MassType[4]; + } + } + + gr_CMFrac /= gr_mass; // Group[gr].Mass; + gr_CMFracType[1] /= gr_len_dm; + gr_CMFracType[0] /= gr_mass_gas; // Group[gr].MassType[0]; + gr_CMFracType[4] /= gr_mass_stars; // Group[gr].MassType[4]; + + Group[gr].CMFrac = gr_CMFrac; + for(i = 0; i < NTYPES; i++) + Group[gr].CMFracType[i] = gr_CMFracType[i]; + + myfree(ud); + return Offs; +} +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_so.c b/src/amuse/community/arepo/src/subfind/subfind_so.c new file mode 100644 index 0000000000..5f3774b6ea --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_so.c @@ -0,0 +1,964 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_so.c + * \date 05/2018 + * \brief Spherical overdensity algorithm for subfind. + * \details contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * double subfind_overdensity(void) + * static int subfind_overdensity_evaluate(int target, int mode, + * int threadid) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef SUBFIND + +#include "../fof/fof.h" +#include "subfind.h" + +static double *R200, *M200; + +static char *Todo; +static MyFloat *Left, *Right; +static int mainstep; + +static int subfind_overdensity_evaluate(int target, int mode, int threadid); + +#ifdef SUBFIND_EXTENDED_PROPERTIES +/*! \brief Structure for angular momentum properties. + */ +static struct Angular_Momentum +{ + double Pmom[3]; + double MassType[NTYPES]; + double Jtot[3]; + double Jdm[3]; + double Jgas[3]; + double Jstars[3]; + int LenType[NTYPES]; + double CMFrac; + double CMFracType[NTYPES]; + double Ekin; + double Epot; + double Ethr; + double N200; +} * AngMom; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + double R200; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double M200; + int GrNr; + int TaskOfGr; + int LocGrIndex; + struct Angular_Momentum AngMomIn; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + int Firstnode; +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant group data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in group arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + in->Pos[0] = Group[i].Pos[0]; + in->Pos[1] = Group[i].Pos[1]; + in->Pos[2] = Group[i].Pos[2]; + in->R200 = R200[i]; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + in->GrNr = Group[i].GrNr; + in->TaskOfGr = ThisTask; + in->LocGrIndex = i; + in->M200 = M200[i]; + in->AngMomIn = AngMom[i]; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + double Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + struct Angular_Momentum AngMomOut; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and group data arrays (AngMom,...) + * \param[in] i Index of particle in group arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + if(mainstep == 0) + M200[i] = out->Mass; +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + AngMom[i].Pmom[k] = out->AngMomOut.Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + AngMom[i].MassType[k] = out->AngMomOut.MassType[k]; + AngMom[i].LenType[k] = out->AngMomOut.LenType[k]; + } + AngMom[i].N200 = out->AngMomOut.N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + AngMom[i].Jtot[k] = out->AngMomOut.Jtot[k]; + AngMom[i].Jdm[k] = out->AngMomOut.Jdm[k]; + AngMom[i].Jgas[k] = out->AngMomOut.Jgas[k]; + AngMom[i].Jstars[k] = out->AngMomOut.Jstars[k]; + } + AngMom[i].Ekin = out->AngMomOut.Ekin; + AngMom[i].Ethr = out->AngMomOut.Ethr; + } + else if(mainstep == 2) + { + AngMom[i].CMFrac = out->AngMomOut.CMFrac; + for(int k = 0; k < NTYPES; k++) + AngMom[i].CMFracType[k] = out->AngMomOut.CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else /* combine */ + { + if(mainstep == 0) + M200[i] += out->Mass; +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + AngMom[i].Pmom[k] += out->AngMomOut.Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + AngMom[i].MassType[k] += out->AngMomOut.MassType[k]; + AngMom[i].LenType[k] += out->AngMomOut.LenType[k]; + } + AngMom[i].N200 += out->AngMomOut.N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + AngMom[i].Jtot[k] += out->AngMomOut.Jtot[k]; + AngMom[i].Jdm[k] += out->AngMomOut.Jdm[k]; + AngMom[i].Jgas[k] += out->AngMomOut.Jgas[k]; + AngMom[i].Jstars[k] += out->AngMomOut.Jstars[k]; + } + AngMom[i].Ekin += out->AngMomOut.Ekin; + AngMom[i].Ethr += out->AngMomOut.Ethr; + } + else if(mainstep == 2) + { + AngMom[i].CMFrac += out->AngMomOut.CMFrac; + for(int k = 0; k < NTYPES; k++) + AngMom[i].CMFracType[k] += out->AngMomOut.CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int i; + + { + int threadid = get_thread_num(); + + for(int j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + i = NextParticle++; + + if(i >= Ngroups) + break; + + if(Todo[i]) + { + R200[i] = 0.5 * (Left[i] + Right[i]); + subfind_overdensity_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + subfind_overdensity_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main routine executing the spherical overdensity algorithm. + * + * \return Time needed for calculation. + */ +double subfind_overdensity(void) +{ + long long ntot; + int i, npleft, rep, iter; + double t0, t1, overdensity, Deltas[4], rhoback, z, omegaz, x, DeltaMean200, DeltaCrit200, DeltaCrit500, DeltaTopHat; + double tstart = second(); + + Left = (MyFloat *)mymalloc("Left", sizeof(MyFloat) * Ngroups); + Right = (MyFloat *)mymalloc("Right", sizeof(MyFloat) * Ngroups); + R200 = (double *)mymalloc("R200", sizeof(double) * Ngroups); + M200 = (double *)mymalloc("M200", sizeof(double) * Ngroups); +#ifdef SUBFIND_EXTENDED_PROPERTIES + AngMom = (struct Angular_Momentum *)mymalloc("AngMom", sizeof(struct Angular_Momentum) * Ngroups); + Paux = (struct paux_data *)mymalloc("Paux", sizeof(struct paux_data) * NumPart); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + Todo = mymalloc("Todo", sizeof(char) * Ngroups); + + if(All.ComovingIntegrationOn) + z = 1 / All.Time - 1; + else + z = 0; + + rhoback = 3 * All.Omega0 * All.Hubble * All.Hubble / (8 * M_PI * All.G); + + omegaz = + All.Omega0 * pow(1 + z, 3) / (All.Omega0 * pow(1 + z, 3) + (1 - All.Omega0 - All.OmegaLambda) * pow(1 + z, 2) + All.OmegaLambda); + + DeltaMean200 = 200.0; + DeltaCrit200 = 200.0 / omegaz; + DeltaCrit500 = 500.0 / omegaz; + + x = omegaz - 1; + DeltaTopHat = 18 * M_PI * M_PI + 82 * x - 39 * x * x; + DeltaTopHat /= omegaz; + + Deltas[0] = DeltaMean200; /* standard fixed overdensity with respect to background */ + Deltas[1] = DeltaTopHat; /* tophat overdensity with respect to background */ + Deltas[2] = DeltaCrit200; /* overdensity of 200 relative to critical, expressed relative to background density */ + Deltas[3] = DeltaCrit500; /* overdensity of 500 relative to critical, expressed relative to background density */ + + generic_set_MaxNexport(); + + for(rep = 0; rep < 4; rep++) /* repeat for all four overdensity values */ + { +#ifdef SUBFIND_EXTENDED_PROPERTIES + int mainstepmax = 3; +#else /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + int mainstepmax = 1; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES #else */ + for(mainstep = 0; mainstep < mainstepmax; mainstep++) + { + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + { + if(mainstep == 0) + { + double rguess = pow(All.G * Group[i].Mass / (100 * All.Hubble * All.Hubble), 1.0 / 3); + + Right[i] = 3 * rguess; + Left[i] = 0; + } + Todo[i] = 1; + } + else + { + Todo[i] = 0; + } + } + + iter = 0; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 1) + NumPaux = 0; +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* we will repeat the whole thing for those groups where we didn't converge to a SO radius yet */ + do + { + t0 = second(); + + generic_comm_pattern(Ngroups, kernel_local, kernel_imported); + + if(mainstep == 0) + { + /* do final operations on results */ + for(i = 0, npleft = 0; i < Ngroups; i++) + { + if(Todo[i]) + { + overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback; + + if((Right[i] - Left[i]) > 1.0e-4 * Left[i]) + { + /* need to redo this group */ + npleft++; + + if(overdensity > Deltas[rep]) + Left[i] = R200[i]; + else + Right[i] = R200[i]; + + if(iter >= MAXITER - 10) + { + printf("gr=%d task=%d R200=%g Left=%g Right=%g Menclosed=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, + ThisTask, R200[i], Left[i], Right[i], M200[i], Right[i] - Left[i], Group[i].Pos[0], + Group[i].Pos[1], Group[i].Pos[2]); + myflush(stdout); + } + } + else + Todo[i] = 0; + } + } + } + else + for(i = 0, npleft = 0; i < Ngroups; i++) + Todo[i] = 0; + + sumup_large_ints(1, &npleft, &ntot); + + t1 = second(); + + if(ntot > 0) + { + iter++; + + if(iter > 0) + mpi_printf("SUBFIND: SO iteration %2d: need to repeat for %12lld halo centers. (took %g sec)\n", iter, ntot, + timediff(t0, t1)); + + if(iter > MAXITER) + terminate("failed to converge in SO iteration"); + } + } + while(ntot > 0); + } /* end of mainstep loop */ + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double *egypot = mymalloc("egypot", Ngroups * sizeof(double)); + + subfind_so_potegy(egypot); + + for(i = 0; i < Ngroups; i++) + { + double rate; + + /* work out sampling rate */ + if(AngMom[i].N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER) + rate = 1.0; + else + rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / AngMom[i].N200); + + AngMom[i].Epot = egypot[i] / (rate * rate); + } + + myfree(egypot); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(i = 0; i < Ngroups; i++) + { + if(Group[i].Nsubs > 0) + { + overdensity = M200[i] / (4.0 * M_PI / 3.0 * R200[i] * R200[i] * R200[i]) / rhoback; + + if((overdensity - Deltas[rep]) > 0.1 * Deltas[rep]) + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else if(M200[i] < 5 * Group[i].Mass / Group[i].Len) + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + } + else + { + R200[i] = M200[i] = 0; +#ifdef SUBFIND_EXTENDED_PROPERTIES + memset(&AngMom[i], 0, sizeof(struct Angular_Momentum)); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + + switch(rep) + { + case 0: + Group[i].M_Mean200 = M200[i]; + Group[i].R_Mean200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Mean200 = AngMom[i].Ekin; + Group[i].Ethr_Mean200 = AngMom[i].Ethr; + Group[i].Epot_Mean200 = AngMom[i].Epot; + Group[i].CMFrac_Mean200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Mean200[k] = AngMom[i].MassType[k]; + Group[i].LenType_Mean200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Mean200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Mean200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Mean200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Mean200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Mean200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 1: + Group[i].M_TopHat200 = M200[i]; + Group[i].R_TopHat200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_TopHat200 = AngMom[i].Ekin; + Group[i].Ethr_TopHat200 = AngMom[i].Ethr; + Group[i].Epot_TopHat200 = AngMom[i].Epot; + Group[i].CMFrac_TopHat200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_TopHat200[k] = AngMom[i].MassType[k]; + Group[i].LenType_TopHat200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_TopHat200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_TopHat200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_TopHat200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_TopHat200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_TopHat200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 2: + Group[i].M_Crit200 = M200[i]; + Group[i].R_Crit200 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Crit200 = AngMom[i].Ekin; + Group[i].Ethr_Crit200 = AngMom[i].Ethr; + Group[i].Epot_Crit200 = AngMom[i].Epot; + Group[i].CMFrac_Crit200 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Crit200[k] = AngMom[i].MassType[k]; + Group[i].LenType_Crit200[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Crit200[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Crit200[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Crit200[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Crit200[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Crit200[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + case 3: + Group[i].M_Crit500 = M200[i]; + Group[i].R_Crit500 = R200[i]; +#ifdef SUBFIND_EXTENDED_PROPERTIES + Group[i].Ekin_Crit500 = AngMom[i].Ekin; + Group[i].Ethr_Crit500 = AngMom[i].Ethr; + Group[i].Epot_Crit500 = AngMom[i].Epot; + Group[i].CMFrac_Crit500 = AngMom[i].CMFrac; + for(int k = 0; k < NTYPES; k++) + { + Group[i].MassType_Crit500[k] = AngMom[i].MassType[k]; + Group[i].LenType_Crit500[k] = AngMom[i].LenType[k]; + Group[i].CMFracType_Crit500[k] = AngMom[i].CMFracType[k]; + } + for(int k = 0; k < 3; k++) + { + Group[i].J_Crit500[k] = AngMom[i].Jtot[k]; + Group[i].JDM_Crit500[k] = AngMom[i].Jdm[k]; + Group[i].JGas_Crit500[k] = AngMom[i].Jgas[k]; + Group[i].JStars_Crit500[k] = AngMom[i].Jstars[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + break; + } + } + } + + myfree(Todo); +#ifdef SUBFIND_EXTENDED_PROPERTIES + myfree(Paux); + myfree(AngMom); +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + myfree(M200); + myfree(R200); + myfree(Right); + myfree(Left); + + double tend = second(); + return timediff(tstart, tend); +} + +/*! \brief Evaluate function of subfind_overdensity. + * + * \param[in] target Index of group. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return 0 + */ +static int subfind_overdensity_evaluate(int target, int mode, int threadid) +{ + int k, p, no, numnodes, *firstnode; + double hsml, mass; + MyDouble *pos; + struct NODE *current; + MyDouble dx, dy, dz, dist, r2; +#define FACT2 0.86602540 + MyDouble xtmp, ytmp, ztmp; + + data_in local, *in; + data_out out; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + in = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + in = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = in->Pos; + hsml = in->R200; + mass = 0; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + double Pmom[3], Mtot = 0, Jtot[3], Jdm[3], Jgas[3], Jstars[3], CMFrac = 0, N200 = 0; + double ekin = 0, etherm = 0; + double MassType[NTYPES], CMFracType[NTYPES]; + int LenType[NTYPES]; + + for(int i = 0; i < 3; i++) + { + Pmom[i] = 0; + Jtot[i] = 0; + Jdm[i] = 0; + Jgas[i] = 0; + Jstars[i] = 0; + } + for(int i = 0; i < NTYPES; i++) + { + MassType[i] = 0; + LenType[i] = 0; + CMFracType[i] = 0; + } + + if(mainstep == 1) + { + Mtot = in->M200; + N200 = in->AngMomIn.N200; + for(int i = 0; i < 3; i++) + Pmom[i] = in->AngMomIn.Pmom[i]; + } + else if(mainstep == 2) + { + Mtot = in->M200; + for(int i = 0; i < 3; i++) + { + Pmom[i] = in->AngMomIn.Pmom[i]; + Jtot[i] = in->AngMomIn.Jtot[i]; + Jdm[i] = in->AngMomIn.Jdm[i]; + Jgas[i] = in->AngMomIn.Jgas[i]; + Jstars[i] = in->AngMomIn.Jstars[i]; + } + for(int i = 0; i < NTYPES; i++) + MassType[i] = in->AngMomIn.MassType[i]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Tree_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + if(no < Tree_MaxPart) /* single particle */ + { + p = no; + no = Nextnode[no]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Pos_list[3 * p + 0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Pos_list[3 * p + 1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Pos_list[3 * p + 2] - pos[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + if(mainstep == 0) + mass += P[p].Mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int i = 0; i < 3; i++) + Pmom[i] += P[p].Mass * P[p].Vel[i] / All.cf_atime; // units: 10^10 M_sol/h km/s + + for(int i = 0; i < NTYPES; i++) + if(P[p].Type == i) + { + MassType[i] += P[p].Mass; + + LenType[i]++; + } + + N200 += 1.0; + } + else if(mainstep == 1) + { + double rate; + /* work out sampling rate */ + if(N200 < SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER) + rate = 1.0; + else + rate = (SUBFIND_SO_POT_CALCULATION_PARTICLE_NUMBER / N200); + + if(get_random_number_aux() < rate) + { + if(NumPaux >= NumPart) + terminate("NumPaux >= NumPart"); + + Paux[NumPaux].Pos[0] = NEAREST_X(P[p].Pos[0] - pos[0]); + Paux[NumPaux].Pos[1] = NEAREST_Y(P[p].Pos[1] - pos[1]); + Paux[NumPaux].Pos[2] = NEAREST_Z(P[p].Pos[2] - pos[2]); + Paux[NumPaux].Mass = P[p].Mass; + Paux[NumPaux].TaskOfGr = in->TaskOfGr; + Paux[NumPaux].LocGrIndex = in->LocGrIndex; + Paux[NumPaux].Type = P[p].Type; + Paux[NumPaux].SofteningType = P[p].SofteningType; + NumPaux++; + } + + int ptype = P[p].Type; + + double Pos_pbc[3], Vel_centre[3], Vel_tot[3]; + Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime; + Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime; + Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime; + + for(int i = 0; i < 3; i++) + Vel_centre[i] = (Pmom[i] / Mtot); // units: km/s + + for(int i = 0; i < 3; i++) + Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i]; + + ekin += 0.5 * P[p].Mass * (Vel_tot[0] * Vel_tot[0] + Vel_tot[1] * Vel_tot[1] + Vel_tot[2] * Vel_tot[2]); + + Jtot[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jtot[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jtot[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if(ptype == 1) // dm illustris + { + Jdm[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jdm[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jdm[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 0) // gas + { + etherm += P[p].Mass * PS[p].Utherm; + + Jgas[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jgas[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jgas[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + if(ptype == 4) // stars + { + Jstars[0] += P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + Jstars[1] += P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + Jstars[2] += P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + } + } + else if(mainstep == 2) + { + int ptype = P[p].Type; + + double Pos_pbc[3], Vel_centre[3], Vel_tot[3], jpart[3], Jtot[3]; + Pos_pbc[0] = NEAREST_X(P[p].Pos[0] - pos[0]) * All.cf_atime; + Pos_pbc[1] = NEAREST_Y(P[p].Pos[1] - pos[1]) * All.cf_atime; + Pos_pbc[2] = NEAREST_Z(P[p].Pos[2] - pos[2]) * All.cf_atime; + + for(int i = 0; i < 3; i++) + Vel_centre[i] = (Pmom[i] / Mtot); + + for(int i = 0; i < 3; i++) + Vel_tot[i] = P[p].Vel[i] / All.cf_atime - Vel_centre[i] + All.cf_Hrate * Pos_pbc[i]; + + jpart[0] = P[p].Mass * (Pos_pbc[1] * Vel_tot[2] - Pos_pbc[2] * Vel_tot[1]); + jpart[1] = P[p].Mass * (Pos_pbc[2] * Vel_tot[0] - Pos_pbc[0] * Vel_tot[2]); + jpart[2] = P[p].Mass * (Pos_pbc[0] * Vel_tot[1] - Pos_pbc[1] * Vel_tot[0]); + + if((Jtot[0] * jpart[0] + Jtot[1] * jpart[1] + Jtot[2] * jpart[2]) < 0.) + CMFrac += P[p].Mass / Mtot; + + if(ptype == 1) // dm + if((Jdm[0] * jpart[0] + Jdm[1] * jpart[1] + Jdm[2] * jpart[2]) < 0.) + CMFracType[1] += P[p].Mass / MassType[1]; + + if(ptype == 0) // gas + if((Jgas[0] * jpart[0] + Jgas[1] * jpart[1] + Jgas[2] * jpart[2]) < 0.) + CMFracType[0] += P[p].Mass / MassType[0]; + + if(ptype == 4) // stars + if((Jstars[0] * jpart[0] + Jstars[1] * jpart[1] + Jstars[2] * jpart[2]) < 0.) + CMFracType[4] += P[p].Mass / MassType[4]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + } + else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Nodes[no]; + + no = current->u.d.sibling; /* in case the node can be discarded */ + + dist = hsml + 0.5 * current->len; + dx = FOF_NEAREST_LONG_X(current->center[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(current->center[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(current->center[2] - pos[2]); + if(dz > dist) + continue; + /* now test against the minimal sphere enclosing everything */ + dist += FACT1 * current->len; + if((r2 = (dx * dx + dy * dy + dz * dz)) > dist * dist) + continue; + +#ifndef SUBFIND_EXTENDED_PROPERTIES + if(no >= Tree_FirstNonTopLevelNode) /* only do this for fully local nodes */ + { + /* test whether the node is contained within the sphere, which gives short-cut if we only need the mass */ + dist = hsml - FACT2 * current->len; + if(dist > 0) + if(r2 < dist * dist) + { + mass += current->u.d.mass; + continue; + } + } +#endif /* #ifndef SUBFIND_EXTENDED_PROPERTIES */ + + no = current->u.d.nextnode; /* ok, we need to open the node */ + } + else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ + { + int n = no - Tree_ImportedNodeOffset; + no = Nextnode[no - Tree_MaxNodes]; + + dist = hsml; + dx = FOF_NEAREST_LONG_X(Tree_Points[n].Pos[0] - pos[0]); + if(dx > dist) + continue; + dy = FOF_NEAREST_LONG_Y(Tree_Points[n].Pos[1] - pos[1]); + if(dy > dist) + continue; + dz = FOF_NEAREST_LONG_Z(Tree_Points[n].Pos[2] - pos[2]); + if(dz > dist) + continue; + if(dx * dx + dy * dy + dz * dz > dist * dist) + continue; + + mass += Tree_Points[n].Mass; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == MODE_IMPORTED_PARTICLES"); + + if(mode == MODE_LOCAL_PARTICLES) + tree_treefind_export_node_threads(no, target, threadid); + + no = Nextnode[no - Tree_MaxNodes]; + } + } + } + + out.Mass = mass; + +#ifdef SUBFIND_EXTENDED_PROPERTIES + if(mainstep == 0) + { + for(int k = 0; k < 3; k++) + out.AngMomOut.Pmom[k] = Pmom[k]; + for(int k = 0; k < NTYPES; k++) + { + out.AngMomOut.MassType[k] = MassType[k]; + out.AngMomOut.LenType[k] = LenType[k]; + } + + out.AngMomOut.N200 = N200; + } + else if(mainstep == 1) + { + for(int k = 0; k < 3; k++) + { + out.AngMomOut.Jtot[k] = Jtot[k]; + out.AngMomOut.Jdm[k] = Jdm[k]; + out.AngMomOut.Jgas[k] = Jgas[k]; + out.AngMomOut.Jstars[k] = Jstars[k]; + } + + out.AngMomOut.Ekin = ekin; + out.AngMomOut.Ethr = etherm; + } + else if(mainstep == 2) + { + out.AngMomOut.CMFrac = CMFrac; + for(int k = 0; k < NTYPES; k++) + out.AngMomOut.CMFracType[k] = CMFracType[k]; + } +#endif /* #ifdef SUBFIND_EXTENDED_PROPERTIES */ + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c new file mode 100644 index 0000000000..823cb62eb2 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_so_potegy.c @@ -0,0 +1,853 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_so_potegy.c + * \date 05/2018 + * \brief Calculates the the potential energy. + * \details contains functions: + * static void subfind_so_potegy_loctree_findExtent(int npart, + * int start) + * static int subfind_so_potegy_loctree_treebuild(int npart, + * int start) + * static void subfind_so_potegy_loctree_update_node_recursive( + * int no, int sib, int father) + * double subfind_so_potegy_loctree_treeevaluate_potential(int + * target) + * static size_t subfind_so_potegy_loctree_treeallocate(int + * maxnodes, int maxpart) + * static void subfind_so_potegy_loctree_treefree(void) + * static int subfind_compare_Paux_LocGrIndex(const void *a, + * const void *b) + * double subfind_so_potegy(double *egypot) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) + +#include "../fof/fof.h" +#include "subfind.h" + +static double RootLen, RootFac, RootBigFac, RootInverseLen, RootCenter[3], RootCorner[3]; +static int LocMaxPart; +static int MaxNodes, last; +static int *LocNextNode; +static unsigned long long *LocTree_IntPos_list; +static struct paux_data *LocPaux; + +static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father); + +/*! \brief Node structure for local tree. + */ +static struct LocNODE +{ + union + { + int suns[8]; /*!< temporary pointers to daughter nodes */ + struct + { + MyDouble s[3]; /*!< center of mass of node */ + MyDouble mass; /*!< mass of node */ + unsigned char maxsofttype; +#if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #if defined(MULTIPLE_NODE_SOFTENING) && defined(ADAPTIVE_HYDRO_SOFTENING) */ + int sibling; /*!< this gives the next node in the walk in case the current node can be used */ + int nextnode; /*!< this gives the next node in case the current node needs to be opened */ + } d; + } u; + + MyDouble center[3]; /*!< geometrical center of node */ + MyFloat len; /*!< sidelength of treenode */ + +#ifdef MULTIPLE_NODE_SOFTENING + MyDouble mass_per_type[NSOFTTYPES]; +#endif +} * LocNodes_base, /*!< points to the actual memory allocted for the nodes */ + *LocNodes; /*!< this is a pointer used to access the nodes which is shifted such that Nodes[LocMaxPart] + gives the first allocated node */ + +/*! \brief Finds spatial extent of local particles. + * + * Sets global 'Root*' variables that determine root node properties. + * + * \param[in] npart Number of particles. + * \param[in] start Start index. + * + * \return void + */ +static void subfind_so_potegy_loctree_findExtent(int npart, int start) +{ + double len, xmin[3], xmax[3]; + + /* determine extension */ + for(int i = 0; i < 3; i++) + { + xmin[i] = MAX_REAL_NUMBER; + xmax[i] = -MAX_REAL_NUMBER; + } + + for(int k = 0; k < npart; k++) + { + int i = start + k; + + for(int j = 0; j < 3; j++) + { + if(xmin[j] > LocPaux[i].Pos[j]) + xmin[j] = LocPaux[i].Pos[j]; + + if(xmax[j] < LocPaux[i].Pos[j]) + xmax[j] = LocPaux[i].Pos[j]; + } + } + + len = 0; + for(int j = 0; j < 3; j++) + if(xmax[j] - xmin[j] > len) + len = xmax[j] - xmin[j]; + + len *= 1.001; + + RootLen = len; + RootInverseLen = 1.0 / RootLen; + RootFac = 1.0 / len * (((peanokey)1) << (BITS_PER_DIMENSION)); + RootBigFac = (RootLen / (((long long)1) << 52)); + + for(int j = 0; j < 3; j++) + { + RootCenter[j] = 0.5 * (xmin[j] + xmax[j]); + RootCorner[j] = 0.5 * (xmin[j] + xmax[j]) - 0.5 * len; + } +} + +/*! \brief Builds local tree. + * + * \param[in] npart Number of particles. + * \param[in] start Start index. + * + * \return Number of nodes in tree. + */ +static int subfind_so_potegy_loctree_treebuild(int npart, int start) +{ + int subnode = 0, parent = -1, numnodes; + int nfree, th, nn; + struct LocNODE *nfreep; + + /* select first node */ + nfree = LocMaxPart; + nfreep = &LocNodes[nfree]; + + /* create an empty root node */ + nfreep->len = (MyFloat)RootLen; + for(int i = 0; i < 3; i++) + nfreep->center[i] = (MyFloat)RootCenter[i]; + + for(int i = 0; i < 8; i++) + nfreep->u.suns[i] = -1; + + numnodes = 1; + nfreep++; + nfree++; + + /* insert all particles */ + + LocTree_IntPos_list = + (unsigned long long *)mymalloc_movable(&LocTree_IntPos_list, "LocTree_IntPos_list", 3 * LocMaxPart * sizeof(unsigned long long)); + + for(int k = 0; k < npart; k++) + { + int i = start + k; + + MyDouble *posp; + + posp = &LocPaux[i].Pos[0]; + + unsigned long long xxb = force_double_to_int(((posp[0] - RootCorner[0]) * RootInverseLen) + 1.0); + unsigned long long yyb = force_double_to_int(((posp[1] - RootCorner[1]) * RootInverseLen) + 1.0); + unsigned long long zzb = force_double_to_int(((posp[2] - RootCorner[2]) * RootInverseLen) + 1.0); + unsigned long long mask = ((unsigned long long)1) << (52 - 1); + unsigned char shiftx = (52 - 1); + unsigned char shifty = (52 - 2); + unsigned char shiftz = (52 - 3); + signed long long centermask = (0xFFF0000000000000llu); + unsigned char levels = 0; + + unsigned long long *intposp = &LocTree_IntPos_list[3 * i]; + + *intposp++ = xxb; + *intposp++ = yyb; + *intposp++ = zzb; + + th = LocMaxPart; + + while(1) + { + if(th >= LocMaxPart) /* we are dealing with an internal node */ + { + subnode = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) | + ((unsigned char)((zzb & mask) >> (shiftz--)))); + + centermask >>= 1; + mask >>= 1; + levels++; + + if(levels > MAX_TREE_LEVEL) + { + /* seems like we're dealing with particles at identical (or extremely close) + * locations. Shift subnode index to allow tree construction. Note: Multipole moments + * of tree are still correct, but one should MAX_TREE_LEVEL large enough to have + * DomainLen/2^MAX_TREE_LEVEL < gravitational softening length + */ + for(int j = 0; j < 8; j++) + { + if(LocNodes[th].u.suns[subnode] < 0) + break; + + subnode++; + if(subnode >= 8) + subnode = 7; + } + } + + nn = LocNodes[th].u.suns[subnode]; + + if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */ + { + parent = th; /* note: subnode can still be used in the next step of the walk */ + th = nn; + } + else + { + /* here we have found an empty slot where we can + * attach the new particle as a leaf + */ + LocNodes[th].u.suns[subnode] = i; + break; /* done for this particle */ + } + } + else + { + /* we try to insert into a leaf with a single particle + * need to generate a new internal node at this point + */ + LocNodes[parent].u.suns[subnode] = nfree; + + /* the other is: */ + double len = ((double)(mask << 1)) * RootBigFac; + double cx = ((double)((xxb & centermask) | mask)) * RootBigFac + RootCorner[0]; + double cy = ((double)((yyb & centermask) | mask)) * RootBigFac + RootCorner[1]; + double cz = ((double)((zzb & centermask) | mask)) * RootBigFac + RootCorner[2]; + + nfreep->len = len; + nfreep->center[0] = cx; + nfreep->center[1] = cy; + nfreep->center[2] = cz; + + nfreep->u.suns[0] = -1; + nfreep->u.suns[1] = -1; + nfreep->u.suns[2] = -1; + nfreep->u.suns[3] = -1; + nfreep->u.suns[4] = -1; + nfreep->u.suns[5] = -1; + nfreep->u.suns[6] = -1; + nfreep->u.suns[7] = -1; + + unsigned long long *intppos = &LocTree_IntPos_list[3 * th]; + + subnode = (((unsigned char)((intppos[0] & mask) >> shiftx)) | ((unsigned char)((intppos[1] & mask) >> shifty)) | + ((unsigned char)((intppos[2] & mask) >> shiftz))); + + nfreep->u.suns[subnode] = th; + + th = nfree; /* resume trying to insert the new particle at + the newly created internal node */ + + numnodes++; + nfree++; + nfreep++; + + if(numnodes >= MaxNodes) + { + MaxNodes *= 1.2; + + LocNodes_base = (struct LocNODE *)myrealloc_movable(LocNodes_base, (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + nfreep = &LocNodes[nfree]; + + if(numnodes > MaxNodes) + { + char buf[1000]; + + sprintf(buf, "maximum number %d of tree-nodes reached., for particle %d %g %g %g", MaxNodes, i, + LocPaux[i].Pos[0], LocPaux[i].Pos[1], LocPaux[i].Pos[2]); + terminate(buf); + } + } + } + } + } + + myfree(LocTree_IntPos_list); + + /* now compute the multipole moments recursively */ + last = -1; + subfind_so_potegy_loctree_update_node_recursive(LocMaxPart, -1, -1); + + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = -1; + else + LocNextNode[last] = -1; + + return numnodes; +} + +/*! \brief Walk the tree and update node data recursively. + * + * This routine computes the multipole moments for a given internal node and + * all its subnodes using a recursive computation. Note that this switches + * the information stored in LocNodes[no].u from suns to d! + * + * + * \param[in] no Node index. + * \param[in] sib Sibling index. + * \param[in] father Parent index. + * + * \return void + */ +static void subfind_so_potegy_loctree_update_node_recursive(int no, int sib, int father) +{ + int j, jj, p, pp = 0, nextsib, suns[8]; + unsigned char maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + double mass_per_type[NSOFTTYPES]; +#ifdef ADAPTIVE_HYDRO_SOFTENING + unsigned char maxhydrosofttype; + unsigned char minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + double mass; + double s[3]; + + if(no >= LocMaxPart) + { + for(j = 0; j < 8; j++) + suns[j] = LocNodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will + overwrite one element (union!) */ + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + + mass = 0; + s[0] = 0; + s[1] = 0; + s[2] = 0; + maxsofttype = NSOFTTYPES + NSOFTTYPES_HYDRO; + +#ifdef MULTIPLE_NODE_SOFTENING + for(j = 0; j < NSOFTTYPES; j++) + mass_per_type[j] = 0; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + maxhydrosofttype = NSOFTTYPES; + minhydrosofttype = NSOFTTYPES + NSOFTTYPES_HYDRO - 1; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + for(j = 0; j < 8; j++) + { + if((p = suns[j]) >= 0) + { + /* check if we have a sibling on the same level */ + for(jj = j + 1; jj < 8; jj++) + if((pp = suns[jj]) >= 0) + break; + + if(jj < 8) /* yes, we do */ + nextsib = pp; + else + nextsib = sib; + + subfind_so_potegy_loctree_update_node_recursive(p, nextsib, no); + + if(p >= LocMaxPart) /* an internal node */ + { + mass += LocNodes[p].u.d.mass; /* we assume a fixed particle mass */ + s[0] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[0]; + s[1] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[1]; + s[2] += LocNodes[p].u.d.mass * LocNodes[p].u.d.s[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocNodes[p].u.d.maxsofttype]) + maxsofttype = LocNodes[p].u.d.maxsofttype; + +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + mass_per_type[k] += LocNodes[p].mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(maxhydrosofttype < LocNodes[p].u.d.maxhydrosofttype) + maxhydrosofttype = LocNodes[p].u.d.maxhydrosofttype; + if(minhydrosofttype > LocNodes[p].u.d.minhydrosofttype) + minhydrosofttype = LocNodes[p].u.d.minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + else /* a particle */ + { + mass += LocPaux[p].Mass; + + s[0] += LocPaux[p].Mass * LocPaux[p].Pos[0]; + s[1] += LocPaux[p].Mass * LocPaux[p].Pos[1]; + s[2] += LocPaux[p].Mass * LocPaux[p].Pos[2]; + + if(All.ForceSoftening[maxsofttype] < All.ForceSoftening[LocPaux[p].SofteningType]) + maxsofttype = LocPaux[p].SofteningType; +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + mass_per_type[LocPaux[p].Type == 0 ? 0 : LocPaux[p].SofteningType] += LocPaux[p].Mass; + + if(LocPaux[p].Type == 0) + { + if(maxhydrosofttype < LocPaux[p].SofteningType) + maxhydrosofttype = LocPaux[p].SofteningType; + if(minhydrosofttype > LocPaux[p].SofteningType) + minhydrosofttype = LocPaux[p].SofteningType; + } +#else /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + mass_per_type[LocPaux[p].SofteningType] += LocPaux[p].Mass; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING #else */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + } + + if(mass > 0) + { + s[0] /= mass; + s[1] /= mass; + s[2] /= mass; + } + else + { + s[0] = LocNodes[no].center[0]; + s[1] = LocNodes[no].center[1]; + s[2] = LocNodes[no].center[2]; + } + + LocNodes[no].u.d.s[0] = (MyFloat)s[0]; + LocNodes[no].u.d.s[1] = (MyFloat)s[1]; + LocNodes[no].u.d.s[2] = (MyFloat)s[2]; + LocNodes[no].u.d.mass = (MyFloat)mass; + LocNodes[no].u.d.maxsofttype = maxsofttype; +#ifdef MULTIPLE_NODE_SOFTENING + int k; + for(k = 0; k < NSOFTTYPES; k++) + LocNodes[no].mass_per_type[k] = mass_per_type[k]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + LocNodes[no].u.d.maxhydrosofttype = maxhydrosofttype; + LocNodes[no].u.d.minhydrosofttype = minhydrosofttype; +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + + LocNodes[no].u.d.sibling = sib; + } + else /* single particle or pseudo particle */ + { + if(last >= 0) + { + if(last >= LocMaxPart) + LocNodes[last].u.d.nextnode = no; + else + LocNextNode[last] = no; + } + + last = no; + } +} + +/*! \brief Calculates the gravitational potential energy of single particle. + * + * \pararm[in] target Target particle index (in LocPaux). + * + * \return Gravitational potential. + */ +double subfind_so_potegy_loctree_treeevaluate_potential(int target) +{ + struct LocNODE *nop = 0; + int no; + double r2, dx, dy, dz, mass, r, u, h_i, h_j, hmax, h_inv, wp; + double pot, pos_x, pos_y, pos_z, xtmp, ytmp, ztmp; + + pos_x = LocPaux[target].Pos[0]; + pos_y = LocPaux[target].Pos[1]; + pos_z = LocPaux[target].Pos[2]; + + h_i = All.ForceSoftening[LocPaux[target].SofteningType]; + + pot = 0; + + no = LocMaxPart; + + while(no >= 0) + { +#ifdef MULTIPLE_NODE_SOFTENING + int indi_flag1 = -1, indi_flag2 = 0; +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(no < LocMaxPart) /* single particle */ + { + dx = GRAVITY_NEAREST_X(LocPaux[no].Pos[0] - pos_x); + dy = GRAVITY_NEAREST_Y(LocPaux[no].Pos[1] - pos_y); + dz = GRAVITY_NEAREST_Z(LocPaux[no].Pos[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + mass = LocPaux[no].Mass; + + h_j = All.ForceSoftening[LocPaux[no].SofteningType]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + + no = LocNextNode[no]; + } + else + { + nop = &LocNodes[no]; + mass = nop->u.d.mass; + + dx = GRAVITY_NEAREST_X(nop->u.d.s[0] - pos_x); + dy = GRAVITY_NEAREST_Y(nop->u.d.s[1] - pos_y); + dz = GRAVITY_NEAREST_Z(nop->u.d.s[2] - pos_z); + + r2 = dx * dx + dy * dy + dz * dz; + + /* check Barnes-Hut opening criterion */ + if(nop->len * nop->len > r2 * All.ErrTolThetaSubfind * All.ErrTolThetaSubfind) + { + /* open cell */ + if(mass) + { + no = nop->u.d.nextnode; + continue; + } + } + + h_j = All.ForceSoftening[nop->u.d.maxsofttype]; + + if(h_j > h_i) + { +#ifdef MULTIPLE_NODE_SOFTENING +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(nop->u.d.maxhydrosofttype != nop->u.d.minhydrosofttype) + if(LocNodes[no].mass_per_type[0] > 0) + if(r2 < All.ForceSoftening[nop->u.d.maxhydrosofttype] * All.ForceSoftening[nop->u.d.maxhydrosofttype]) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + indi_flag1 = 0; + indi_flag2 = NSOFTTYPES; +#else /* #ifdef MULTIPLE_NODE_SOFTENING */ + + if(r2 < h_j * h_j) + { + /* open cell */ + no = nop->u.d.nextnode; + continue; + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING #else */ + hmax = h_j; + } + else + hmax = h_i; + + no = nop->u.d.sibling; /* node can be used */ + } + + r = sqrt(r2); +#ifdef MULTIPLE_NODE_SOFTENING + int type; + for(type = indi_flag1; type < indi_flag2; type++) + { + if(type >= 0) + { + mass = nop->mass_per_type[type]; + +#ifdef ADAPTIVE_HYDRO_SOFTENING + if(type == 0) + h_j = All.ForceSoftening[nop->u.d.maxhydrosofttype]; + else +#endif /* #ifdef ADAPTIVE_HYDRO_SOFTENING */ + h_j = All.ForceSoftening[type]; + + if(h_j > h_i) + hmax = h_j; + else + hmax = h_i; + } + + if(mass) + { +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + if(r >= hmax) + pot -= mass / r; + else + { + h_inv = 1.0 / hmax; + + u = r * h_inv; + + if(u < 0.5) + wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); + else + wp = -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))); + + pot += mass * h_inv * wp; +#ifdef MULTIPLE_NODE_SOFTENING + } + } +#endif /* #ifdef MULTIPLE_NODE_SOFTENING */ + } + } + + return pot; +} + +/*! \brief Allocates memory used for storage of the tree and auxiliary arrays + * for tree-walk and link-lists. + * + * \param[in] maxnodes Maximum number of nodes. + * \param[in] maxpart Maximum number of particles. + * + * \return Number of allocated bytes. + */ +static size_t subfind_so_potegy_loctree_treeallocate(int maxnodes, int maxpart) +{ + size_t bytes, allbytes = 0; + + if(LocNextNode) + terminate("loctree already allocated"); + + MaxNodes = maxnodes; + LocMaxPart = maxpart; + + LocNextNode = (int *)mymalloc("LocNextNode", bytes = maxpart * sizeof(int)); + allbytes += bytes; + + R2list = (r2type *)mymalloc("R2list", bytes = maxpart * sizeof(r2type)); + allbytes += bytes; + + LocNodes_base = (struct LocNODE *)mymalloc_movable(&LocNodes_base, "LocNodes_base", bytes = (MaxNodes + 1) * sizeof(struct LocNODE)); + LocNodes = LocNodes_base - LocMaxPart; + allbytes += bytes; + + return allbytes; +} + +/*! \brief Frees the allocated memory. + * + * \return void + */ +static void subfind_so_potegy_loctree_treefree(void) +{ + myfree(LocNodes_base); + myfree(R2list); + myfree(LocNextNode); + + LocNextNode = NULL; + R2list = NULL; + LocNodes_base = NULL; +} + +/*! \brief Comparison function for paux_data objects. + * + * Compares field LocGrIndex. + * + * \param[in] a First object to be compared. + * \param[in] b Second object to be compared. + * + * \return (-1,0,1); -1 if a < b. + */ +static int subfind_compare_Paux_LocGrIndex(const void *a, const void *b) +{ + if(((struct paux_data *)a)->LocGrIndex < ((struct paux_data *)b)->LocGrIndex) + return -1; + + if(((struct paux_data *)a)->LocGrIndex > ((struct paux_data *)b)->LocGrIndex) + return +1; + + return 0; +} + +/*! \brief Calculates potential energy of spherical overdensity groups. + * + * \param[out] egypot Array with potential energies in each group. + * + * \return Time this routine took. + */ +double subfind_so_potegy(double *egypot) +{ + double t0 = second(); + mpi_printf("SUBFIND: Starting SO potential energy computation\n"); + + size_t *count_send = (size_t *)mymalloc_movable(&count_send, "count_send", NTask * sizeof(size_t)); + size_t *offset_send = (size_t *)mymalloc_movable(&offset_send, "offset_send", NTask * sizeof(size_t)); + size_t *count_recv = (size_t *)mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(size_t)); + size_t *offset_recv = (size_t *)mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(size_t)); + + for(int i = 0; i < NTask; i++) + count_send[i] = 0; + + for(int i = 0; i < NumPaux; i++) + count_send[Paux[i].TaskOfGr]++; + + MPI_Alltoall(count_send, sizeof(size_t), MPI_BYTE, count_recv, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + offset_send[0] = offset_recv[0] = 0; + + for(int i = 1; i < NTask; i++) + { + offset_send[i] = offset_send[i - 1] + count_send[i - 1]; + offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1]; + } + + struct paux_data *PauxTmp = (struct paux_data *)mymalloc_movable(&PauxTmp, "PauxTmp", NumPaux * sizeof(struct paux_data)); + + for(int i = 0; i < NTask; i++) + count_send[i] = 0; + + for(int i = 0; i < NumPaux; i++) + { + int task = Paux[i].TaskOfGr; + int loc = offset_send[task] + count_send[task]++; + PauxTmp[loc] = Paux[i]; + } + + int NumPauxRecv = 0; + + for(int i = 0; i < NTask; i++) + NumPauxRecv += count_recv[i]; + + LocPaux = (struct paux_data *)mymalloc_movable(&LocPaux, "LocPaux", NumPauxRecv * sizeof(struct paux_data)); + + myMPI_Alltoallv(PauxTmp, count_send, offset_send, LocPaux, count_recv, offset_recv, sizeof(struct paux_data), 1, MPI_COMM_WORLD); + + myfree_movable(PauxTmp); + + qsort(LocPaux, NumPauxRecv, sizeof(struct paux_data), subfind_compare_Paux_LocGrIndex); + + int *group_len = (int *)mymalloc("group_len", Ngroups * sizeof(int)); + int *group_off = (int *)mymalloc("group_off", Ngroups * sizeof(int)); + + for(int i = 0; i < Ngroups; i++) + group_len[i] = 0; + + for(int i = 0; i < NumPauxRecv; i++) + { + int j = LocPaux[i].LocGrIndex; + if(j < 0 || j >= Ngroups) + terminate("j=%d Ngroups=%d", j, Ngroups); + + group_len[j]++; + } + + group_off[0] = 0; + + for(int i = 1; i < Ngroups; i++) + group_off[i] = group_off[i - 1] + group_len[i - 1]; + + int MaxAllocPart = NumPart; + // extend in case a single group holds more particles than NumPart + for(int i = 0; i < Ngroups; i++) + if(group_len[i] > MaxAllocPart) + MaxAllocPart = group_len[i]; + + subfind_so_potegy_loctree_treeallocate((int)(All.TreeAllocFactor * MaxAllocPart) + NTopnodes, MaxAllocPart); + + /* now do the actual potential calculation */ + for(int i = 0; i < Ngroups; i++) + { + subfind_so_potegy_loctree_findExtent(group_len[i], group_off[i]); + subfind_so_potegy_loctree_treebuild(group_len[i], group_off[i]); + + egypot[i] = 0; + + for(int j = 0; j < group_len[i]; j++) + { + int target = group_off[i] + j; + + double pot = subfind_so_potegy_loctree_treeevaluate_potential(target); + + /* remove self-potential */ + pot += LocPaux[target].Mass / (All.ForceSoftening[LocPaux[target].SofteningType] / 2.8); + + pot *= All.G / All.cf_atime; + + egypot[i] += 0.5 * pot * LocPaux[target].Mass; + } + } + + subfind_so_potegy_loctree_treefree(); + + myfree(group_off); + myfree(group_len); + + myfree(LocPaux); + + myfree(offset_recv); + myfree(count_recv); + myfree(offset_send); + myfree(count_send); + + double t1 = second(); + mpi_printf("SUBFIND: SO potential energy computation took %g sec\n", timediff(t0, t1)); + + return timediff(t0, t1); +} + +#endif /* #if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c new file mode 100644 index 0000000000..5787cb3441 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_sort_kernels.c @@ -0,0 +1,442 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_sort_kernels.c + * \date 05/2018 + * \brief Comparison functions that serve as sorting kernels for various + * different structs used in subfind. + * \details contains functions: + * int subfind_compare_procassign_GrNr(const void *a, + * const void *b) + * int subfind_compare_submp_GrNr_DM_Density(const void *a, + * const void *b) + * int subfind_compare_submp_OldIndex(const void *a, + * const void *b) + * int subfind_compare_ID_list(const void *a, const void *b) + * int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const + * void *b) + * int subfind_compare_dist_rotcurve(const void *a, const void + * *b) + * int subfind_compare_rlist_mhd(const void *a, const void *b) + * int subfind_compare_binding_energy(const void *a, const void + * *b) + * int subfind_compare_serial_candidates_boundlength(const void + * *a, const void *b) + * int subfind_compare_serial_candidates_rank(const void *a, + * const void *b) + * int subfind_compare_serial_candidates_subnr(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_subnr(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_nsubs(const void *a, + * const void *b) + * int subfind_compare_coll_candidates_boundlength(const void + * *a, const void *b) + * int subfind_compare_coll_candidates_rank(const void *a, + * const void *b) + * int subfind_fof_compare_ID(const void *a, const void *b) + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +#ifdef SUBFIND + +/*! \brief Comparison function for proc_assign_data objects. + * + * Sorting kernel comparing element GrNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_procassign_GrNr(const void *a, const void *b) +{ + if(((struct proc_assign_data *)a)->GrNr < ((struct proc_assign_data *)b)->GrNr) + return -1; + + if(((struct proc_assign_data *)a)->GrNr > ((struct proc_assign_data *)b)->GrNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for submp_data objects. + * + * Sorting kernel comparing element (most important first): + * GrNr, DM_Density. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for DM density, where -1 if a > b + */ +int subfind_compare_submp_GrNr_DM_Density(const void *a, const void *b) +{ + if(((struct submp_data *)a)->GrNr < ((struct submp_data *)b)->GrNr) + return -1; + + if(((struct submp_data *)a)->GrNr > ((struct submp_data *)b)->GrNr) + return +1; + + if(((struct submp_data *)a)->DM_Density > ((struct submp_data *)b)->DM_Density) + return -1; + + if(((struct submp_data *)a)->DM_Density < ((struct submp_data *)b)->DM_Density) + return +1; + + return 0; +} + +/*! \brief Comparison function for submp_data objects. + * + * Sorting kernel comparing element OldIndex. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_submp_OldIndex(const void *a, const void *b) +{ + if(((struct submp_data *)a)->OldIndex < ((struct submp_data *)b)->OldIndex) + return -1; + + if(((struct submp_data *)a)->OldIndex > ((struct submp_data *)b)->OldIndex) + return +1; + + return 0; +} + +/*! \brief Comparison function for id_list objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr, SubNr, Type, BindingEgy. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_ID_list(const void *a, const void *b) +{ + if(((struct id_list *)a)->GrNr < ((struct id_list *)b)->GrNr) + return -1; + + if(((struct id_list *)a)->GrNr > ((struct id_list *)b)->GrNr) + return +1; + + if(((struct id_list *)a)->SubNr < ((struct id_list *)b)->SubNr) + return -1; + + if(((struct id_list *)a)->SubNr > ((struct id_list *)b)->SubNr) + return +1; + + if(((struct id_list *)a)->Type < ((struct id_list *)b)->Type) + return -1; + + if(((struct id_list *)a)->Type > ((struct id_list *)b)->Type) + return +1; + + if(((struct id_list *)a)->BindingEgy < ((struct id_list *)b)->BindingEgy) + return -1; + + if(((struct id_list *)a)->BindingEgy > ((struct id_list *)b)->BindingEgy) + return +1; + + return 0; +} + +/*! \brief Comparison function for subgroup_properties objects. + * + * Sorting kernel comparing elements (most important first): + * GrNr and SubNr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_SubGroup_GrNr_SubNr(const void *a, const void *b) +{ + if(((struct subgroup_properties *)a)->GrNr < ((struct subgroup_properties *)b)->GrNr) + return -1; + + if(((struct subgroup_properties *)a)->GrNr > ((struct subgroup_properties *)b)->GrNr) + return +1; + + if(((struct subgroup_properties *)a)->SubNr < ((struct subgroup_properties *)b)->SubNr) + return -1; + + if(((struct subgroup_properties *)a)->SubNr > ((struct subgroup_properties *)b)->SubNr) + return +1; + + return 0; +} + +/*! \brief Comparison function for sort_r2list objects. + * + * Sorting kernel comparing element r. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_dist_rotcurve(const void *a, const void *b) +{ + if(((sort_r2list *)a)->r < ((sort_r2list *)b)->r) + return -1; + + if(((sort_r2list *)a)->r > ((sort_r2list *)b)->r) + return +1; + + return 0; +} + +/*! \brief Comparison function for variables of type double. + * + * Sorting kernel. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_binding_energy(const void *a, const void *b) +{ + if(*((double *)a) > *((double *)b)) + return -1; + + if(*((double *)a) < *((double *)b)) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * bound_length and rank. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, excpet bound length, where -1 if a > b. + */ +int subfind_compare_serial_candidates_boundlength(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->bound_length > ((struct cand_dat *)b)->bound_length) + return -1; + + if(((struct cand_dat *)a)->bound_length < ((struct cand_dat *)b)->bound_length) + return +1; + + if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank) + return -1; + + if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * rank and len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for len where -1 if a>b. + */ +int subfind_compare_serial_candidates_rank(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->rank < ((struct cand_dat *)b)->rank) + return -1; + + if(((struct cand_dat *)a)->rank > ((struct cand_dat *)b)->rank) + return +1; + + if(((struct cand_dat *)a)->len > ((struct cand_dat *)b)->len) + return -1; + + if(((struct cand_dat *)a)->len < ((struct cand_dat *)b)->len) + return +1; + + return 0; +} + +/*! \brief Comparison function for cand_dat objects. + * + * Sorting kernel comparing element subnr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_serial_candidates_subnr(const void *a, const void *b) +{ + if(((struct cand_dat *)a)->subnr < ((struct cand_dat *)b)->subnr) + return -1; + + if(((struct cand_dat *)a)->subnr > ((struct cand_dat *)b)->subnr) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing element subnr. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_coll_candidates_subnr(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->subnr < ((struct coll_cand_dat *)b)->subnr) + return -1; + + if(((struct coll_cand_dat *)a)->subnr > ((struct coll_cand_dat *)b)->subnr) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing element nsub. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_compare_coll_candidates_nsubs(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->nsub < ((struct coll_cand_dat *)b)->nsub) + return -1; + + if(((struct coll_cand_dat *)a)->nsub > ((struct coll_cand_dat *)b)->nsub) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * bound_length, rank. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for bound length where -1 if a > b. + */ +int subfind_compare_coll_candidates_boundlength(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->bound_length > ((struct coll_cand_dat *)b)->bound_length) + return -1; + + if(((struct coll_cand_dat *)a)->bound_length < ((struct coll_cand_dat *)b)->bound_length) + return +1; + + if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank) + return -1; + + if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank) + return +1; + + return 0; +} + +/*! \brief Comparison function for coll_cand_dat objects. + * + * Sorting kernel comparing elements (most important first): + * rank and len. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b, except for len, where -1 if a > b + */ +int subfind_compare_coll_candidates_rank(const void *a, const void *b) +{ + if(((struct coll_cand_dat *)a)->rank < ((struct coll_cand_dat *)b)->rank) + return -1; + + if(((struct coll_cand_dat *)a)->rank > ((struct coll_cand_dat *)b)->rank) + return +1; + + if(((struct coll_cand_dat *)a)->len > ((struct coll_cand_dat *)b)->len) + return -1; + + if(((struct coll_cand_dat *)a)->len < ((struct coll_cand_dat *)b)->len) + return +1; + + return 0; +} + +/*! \brief Comparison function for variables of MyIDType. + * + * Sorting kernel. + * + * \param[in] a First object to compare. + * \param[in] b Second object to compare. + * + * \return (-1,0,1), -1 if a < b. + */ +int subfind_fof_compare_ID(const void *a, const void *b) +{ + if(*((MyIDType *)a) < *((MyIDType *)b)) + return -1; + + if(*((MyIDType *)a) > *((MyIDType *)b)) + return +1; + + return 0; +} + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/subfind/subfind_vars.c b/src/amuse/community/arepo/src/subfind/subfind_vars.c new file mode 100644 index 0000000000..37d25a8cd5 --- /dev/null +++ b/src/amuse/community/arepo/src/subfind/subfind_vars.c @@ -0,0 +1,102 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/subfind/subfind_vars.c + * \date 05/2018 + * \brief Variables for the subfind algorithm. + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 14.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include "../main/allvars.h" + +#ifdef SUBFIND + +#include "../domain/domain.h" +#include "../fof/fof.h" +#include "subfind.h" + +double SubDomainCorner[3], SubDomainCenter[3], SubDomainLen, SubDomainFac; +double SubDomainInverseLen, SubDomainBigFac; + +MyDouble GrCM[3]; + +int GrNr; +int NumPartGroup; + +MPI_Comm SubComm; +int CommSplitColor; +int SubNTask, SubThisTask; +int SubTagOffset; + +struct topnode_data *SubTopNodes; +struct local_topnode_data *Sub_LocTopNodes; + +double SubTreeAllocFactor; + +#if defined(SUBFIND) && defined(SUBFIND_EXTENDED_PROPERTIES) +int *NodeGrNr; +#endif + +int *SubDomainTask; +int *SubDomainNodeIndex; +int *SubNextnode; +int SubNTopleaves; +int SubNTopnodes; + +int SubTree_MaxPart; +int SubTree_NumNodes; +int SubTree_MaxNodes; +int SubTree_FirstNonTopLevelNode; +int SubTree_NumPartImported; +int SubTree_NumPartExported; +int SubTree_ImportedNodeOffset; +int SubTree_NextFreeNode; +struct NODE *SubNodes; +struct ExtNODE *SubExtNodes; +int *SubTree_ResultIndexList; +int *SubTree_Task_list; +unsigned long long *SubTree_IntPos_list; +MyDouble *SubTree_Pos_list; + +int Ncollective; +int NprocsCollective; +int MaxNsubgroups = 0; +int MaxNgbs; +int MaxSerialGroupLen; + +r2type *R2list; + +int NumPaux; + +struct paux_data *Paux; +struct proc_assign_data *ProcAssign; +struct subgroup_properties *SubGroup; +struct nearest_r2_data *R2Loc; +struct nearest_ngb_data *NgbLoc; +struct submp_data *submp; +struct cand_dat *candidates; +struct coll_cand_dat *coll_candidates; + +#endif /* #ifdef SUBFIND */ diff --git a/src/amuse/community/arepo/src/time_integration/darkenergy.c b/src/amuse/community/arepo/src/time_integration/darkenergy.c new file mode 100644 index 0000000000..c04f181e9f --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/darkenergy.c @@ -0,0 +1,74 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/darkenergy.c + * \date 05/2018 + * \brief Contains the hubble function for a LCDM cosmology. + * \details Using Dark Energy instead of a cosmological constant can be + * archived by replacing Lambda by Lambda * a^(-3*(1+w)) in the + * Hubble function. w = -1 gives back a standard cosmological + * constant! Also w = -1/3 gives Lambda / a^2 which then cancel + * within the Hubble function and is then equal to the dynamics + * of a universe with Lambda = 0 ! + * + * For a time varying w once has to replace Lambda * a^(-3*(1+w)) + * by Lambda * exp(Integral(a,1,3*(1+w)/a)) + * + * Dark Energy does not alter the powerspectrum of initial + * conditions. To get the same cluster for various values or + * functions of w, once has do assign a new redshift to the + * initial conditions to match the linear growth factors, so + * g(z=0)/g(z_ini) == g_w(z=0)/g_w(z_ini^new). Also the initial + * velocities field has to be scaled by + *(Hubble_w(z_ini^new)*Omega_w(z_ini^new)^0.6)/(Hubble(z_ini)*Omega(z_ini)^0.6) + * where _w means the according functions including the terms for + * Dark Energy. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Hubble function. + * + * Returns the Hubble function at a given scalefactor for a LCDM cosmology. + * + * \param[in] a Scalefactor. + * + * \return Hubble parameter in internal units. + */ +double INLINE_FUNC hubble_function(double a) +{ + double hubble_a; + + hubble_a = All.Omega0 / (a * a * a) + (1 - All.Omega0 - All.OmegaLambda) / (a * a) + All.OmegaLambda; + hubble_a = All.Hubble * sqrt(hubble_a); + + return (hubble_a); +} diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c new file mode 100644 index 0000000000..88b7f89a34 --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c @@ -0,0 +1,484 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/do_gravity_hydro.c + * \date 05/2018 + * \brief Contains the two half step kick operators. + * \details This file contains the functions applying the gravitational + * acceleration to the particles (both gas and gravity only). + * The functions + * find_gravity_timesteps_and_do_gravity_step_first_half and + * do_gravity_step_second_half are directly called in the main + * time-evolution loop in run.c. + * contains functions: + * static inline void kick_particle(int i, double dt_gravkick, + * MySingle * Grav) + * void find_gravity_timesteps_and_do_gravity_step_first_half( + * void) + * void do_gravity_step_second_half(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +/*! \brief Applies gravity kick to particles. + * + * Apply change of velocity due to gravitational acceleration. + * For hydrodynamic cells, both velocity and momentum are updated. + * + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] dt_gravkick Timestep of gravity kick operation. + * \param[in] Grav Gravitational acceleration of particle. + * + * \return void + */ +static inline void kick_particle(int i, double dt_gravkick, MySingle* Grav) +{ + int j; + double dvel[3]; + if(P[i].Type == 0) + { + SphP[i].Energy -= 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + for(j = 0; j < 3; j++) /* do the kick for gas cells */ + { + dvel[j] = Grav[j] * dt_gravkick; + P[i].Vel[j] += dvel[j]; + SphP[i].Momentum[j] += P[i].Mass * dvel[j]; + } + SphP[i].Energy += 0.5 * P[i].Mass * (P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]); + } + else + { + for(j = 0; j < 3; j++) /* do the kick, only collisionless particles */ + P[i].Vel[j] += Grav[j] * dt_gravkick; + } +} + +/*! \brief Performs the first half step kick operator. + * + * This function applies a half step kick similar to + * do_gravity_step_second_half(). If we are on a PM step the kick due to + * the particle mesh's long range gravity is applied first. Afterwards the + * short range kick due to the tree force is added. + * In both cases the momentum and energy for gas cells is updated. + * + * \return void + */ +void find_gravity_timesteps_and_do_gravity_step_first_half(void) +{ +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + + TIMER_START(CPU_DRIFTS); + + int idx, i; + integertime ti_step, tstart, tend; + double dt_gravkick; + +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + ti_step = get_timestep_pm(); + + All.PM_Ti_begstep = All.PM_Ti_endstep; + All.PM_Ti_endstep = All.PM_Ti_begstep + ti_step; + + tstart = All.PM_Ti_begstep; + tend = tstart + ti_step / 2; + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + for(i = 0; i < NumPart; i++) + kick_particle(i, dt_gravkick, P[i].GravPM); + } +#endif /* #ifdef PMGRID */ + +#ifdef HIERARCHICAL_GRAVITY + /* First, move all active particles to the highest allowed timestep for this synchronization time. + * They will then cascade down to smaller timesteps as needed. + */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + int bin = All.HighestSynchronizedTimeBin; + int binold = P[i].TimeBinGrav; + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + + long long Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles; + + double dt_gravsum = 0; + + int bin_highest_occupied = 0; + int timebin; + /* go over all timebins */ + + for(timebin = All.HighestSynchronizedTimeBin; timebin >= 0; timebin--) + { + TimeBinsGravity.NActiveParticles = 0; + timebin_add_particles_of_timebin_to_list_of_active_particles(&TimeBinsGravity, timebin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles == 0) /* we are done at this point */ + break; + + /* calculate gravity for all active particles */ + if(TimeBinsGravity.GlobalNActiveParticles != Previous_GlobalNActiveGravity) + { + TIMER_STOP(CPU_DRIFTS); + + compute_grav_accelerations(timebin, FLAG_PARTIAL_TREE); + + TIMER_START(CPU_DRIFTS); + } + + int nfine = 0; + for(int i = 0; i < TimeBinsGravity.NActiveParticles; i++) + { + int target = TimeBinsGravity.ActiveParticleList[i]; + int binold = P[target].TimeBinGrav; + + if(test_if_grav_timestep_is_too_large(target, binold)) + nfine++; + } + + long long nfine_tot; + sumup_large_ints(1, &nfine, &nfine_tot); + + int push_down_flag = 0; + if(nfine_tot > 0.33 * TimeBinsGravity.GlobalNActiveParticles) + push_down_flag = 1; + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + int binold = P[i].TimeBinGrav; + + if(push_down_flag || test_if_grav_timestep_is_too_large(i, binold)) + { + int bin = binold - 1; + if(bin == 0) + { + print_particle_info(i); + terminate("timestep too small"); + } + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + else if(binold > bin_highest_occupied) + bin_highest_occupied = binold; + } + + if(All.HighestOccupiedTimeBin == 0) + { + MPI_Allreduce(&bin_highest_occupied, &All.HighestOccupiedTimeBin, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + + if(All.HighestOccupiedTimeBin > 0) + { + mpi_printf("KICKS: Special Start-up Fix: All.HighestOccupiedGravTimeBin=%d\n", All.HighestOccupiedTimeBin); + + for(i = 0; i < GRAVCOSTLEVELS; i++) + { + if(All.LevelToTimeBin[i] == 0) + All.LevelToTimeBin[i] = All.HighestOccupiedTimeBin; + } + } + } + + if(TimeBinsGravity.GlobalNActiveParticles) + { + ti_step = timebin ? (((integertime)1) << timebin) : 0; + tstart = All.Ti_begstep[timebin]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + if(timebin < All.HighestSynchronizedTimeBin) + { + ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0; + + tstart = All.Ti_begstep[timebin + 1]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick -= get_gravkick_factor(tstart, tend); + else + dt_gravkick -= (tend - tstart) * All.Timebase_interval; + } + + dt_gravsum += dt_gravkick; + + mpi_printf("KICKS: 1st gravity for hierarchical timebin=%d: %lld particles\n", timebin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } + Previous_GlobalNActiveGravity = TimeBinsGravity.GlobalNActiveParticles; + } + } + + /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); +#else /* #ifdef HIERARCHICAL_GRAVITY */ + +#ifdef FORCE_EQUAL_TIMESTEPS + // gravity timebin is already set, and not anymore 0 as All.HighestActiveTimeBin, but all particles should receive a first half kick + // in the 0-th timestep + if(All.NumCurrentTiStep == 0) + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS); + else +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + mpi_printf("KICKS: 1st gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + +#ifndef FORCE_EQUAL_TIMESTEPS + int binold = P[i].TimeBinGrav; + int bin = -1; + + ti_step = get_timestep_gravity(i); + timebins_get_bin_and_do_validity_checks(ti_step, &bin, P[i].TimeBinGrav); + + if(P[i].Type == 0) + { + int bin_hydro = P[i].TimeBinHydro; + if(bin_hydro < bin) + bin = bin_hydro; + } + + ti_step = bin ? (((integertime)1) << bin) : 0; + + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; +#else /* #ifndef FORCE_EQUAL_TIMESTEPS */ + int bin = P[i].TimeBinGrav; + ti_step = bin ? (((integertime)1) << bin) : 0; +#endif /* #ifndef FORCE_EQUAL_TIMESTEPS #else */ + + tstart = All.Ti_begstep[bin]; /* beginning of step */ + tend = tstart + ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + + TIMER_STOP(CPU_DRIFTS); +#endif +} + +/*! \brief Performs the second half step kick operator. + * + * This function applies a half step kick similar to + * do_gravity_step_first_half(). First the short range kick due to the tree + * force is added. If we are on a PM step the kick due to the particle mesh's + * long range gravity is applied too. In both cases the momentum and energy + * for gas cells is updated. + */ +void do_gravity_step_second_half(void) +{ +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + TIMER_START(CPU_DRIFTS); + int idx; + char fullmark[8]; + + if(All.HighestActiveTimeBin == All.HighestOccupiedTimeBin) + sprintf(fullmark, "(*)"); + else + fullmark[0] = 0; + + if(ThisTask == 0) + fprintf(FdTimings, "\nStep%s: %d, t: %g, dt: %g, highest active timebin: %d (lowest active: %d, highest occupied: %d)\n", + fullmark, All.NumCurrentTiStep, All.Time, All.TimeStep, All.HighestActiveTimeBin, All.LowestActiveTimeBin, + All.HighestOccupiedTimeBin); + + double dt_gravkick; +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + TIMER_STOP(CPU_DRIFTS); + long_range_force(); + TIMER_START(CPU_DRIFTS); + } +#endif /* #ifdef PMGRID */ +#ifdef HIERARCHICAL_GRAVITY + /* go over all timebins, in inverse sequence so that we end up getting the cumulative force at the end */ + for(int timebin = 0; timebin <= All.HighestActiveTimeBin; timebin++) + { + if(TimeBinSynchronized[timebin]) + { + /* need to make all timebins below the current one active */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, timebin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles) + { + TIMER_STOP(CPU_DRIFTS); + + compute_grav_accelerations(timebin, (timebin == All.HighestActiveTimeBin) ? FLAG_FULL_TREE : FLAG_PARTIAL_TREE); + + TIMER_START(CPU_DRIFTS); + + mpi_printf("KICKS: 2nd gravity for hierarchical timebin=%d: particles %lld\n", timebin, + TimeBinsGravity.GlobalNActiveParticles); + + integertime ti_step = timebin ? (((integertime)1) << timebin) : 0; + + integertime tend = All.Ti_begstep[timebin]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next + step at this point) */ + integertime tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + if(timebin < All.HighestActiveTimeBin) + { + ti_step = (timebin + 1) ? (((integertime)1) << (timebin + 1)) : 0; + + tend = All.Ti_begstep[timebin + 1]; /* end of step (Note: All.Ti_begstep[] has already been advanced for the next + step at this point) */ + tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick -= get_gravkick_factor(tstart, tend); + else + dt_gravkick -= (tend - tstart) * All.Timebase_interval; + } + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + + if(P[i].Type == 0) + { + if(All.HighestOccupiedTimeBin == timebin) + for(int j = 0; j < 3; j++) + SphP[i].FullGravAccel[j] = P[i].GravAccel[j]; + } + } + } + } + } + +#else /* #ifdef HIERARCHICAL_GRAVITY */ + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); + + if(TimeBinsGravity.GlobalNActiveParticles) + { + TIMER_STOP(CPU_DRIFTS); + + /* calculate gravity for all active particles */ + compute_grav_accelerations(All.HighestActiveTimeBin, FLAG_FULL_TREE); + + TIMER_START(CPU_DRIFTS); + + mpi_printf("KICKS: 2nd gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, + TimeBinsGravity.GlobalNActiveParticles); + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + int i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + integertime ti_step = P[i].TimeBinGrav ? (((integertime)1) << P[i].TimeBinGrav) : 0; + integertime tend = All.Ti_begstep[P[i].TimeBinGrav]; + integertime tstart = tend - ti_step / 2; /* midpoint of step */ + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + kick_particle(i, dt_gravkick, P[i].GravAccel); + } + } +#endif /* #ifdef HIERARCHICAL_GRAVITY #else */ + +#ifdef PMGRID + if(All.PM_Ti_endstep == All.Ti_Current) /* need to do long-range kick */ + { + integertime ti_step = All.PM_Ti_endstep - All.PM_Ti_begstep; + integertime tstart = All.PM_Ti_begstep + ti_step / 2; + integertime tend = tstart + ti_step / 2; + + if(All.ComovingIntegrationOn) + dt_gravkick = get_gravkick_factor(tstart, tend); + else + dt_gravkick = (tend - tstart) * All.Timebase_interval; + + for(int i = 0; i < NumPart; i++) + kick_particle(i, dt_gravkick, P[i].GravPM); + } +#endif /* #ifdef PMGRID */ + + TIMER_STOP(CPU_DRIFTS); +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY)|| defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) */ +} diff --git a/src/amuse/community/arepo/src/time_integration/driftfac.c b/src/amuse/community/arepo/src/time_integration/driftfac.c new file mode 100644 index 0000000000..eae2e438aa --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/driftfac.c @@ -0,0 +1,307 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/driftfac.c + * \date 05/2018 + * \brief Methods for drift and kick pre-factors needed for + * simulations in a cosmologically expanding box. + * \details contains functions: + * double drift_integ(double a, void *param) + * double gravkick_integ(double a, void *param) + * double hydrokick_integ(double a, void *param) + * void init_drift_table(void) + * double get_drift_factor(integertime time0, integertime time1) + * double get_gravkick_factor(integertime time0, integertime + * time1) + * double get_hydrokick_factor(integertime time0, integertime + * time1) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 05.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! table for the cosmological drift factors */ +static double DriftTable[DRIFT_TABLE_LENGTH]; + +/*! table for the cosmological kick factor for gravitational forces */ +static double GravKickTable[DRIFT_TABLE_LENGTH]; + +/*! table for the cosmological kick factor for hydrodynmical forces */ +static double HydroKickTable[DRIFT_TABLE_LENGTH]; + +static double logTimeBegin; +static double logTimeMax; + +/*! \brief Integrand for drift factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for drift factor calculation. + */ +double drift_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * a * a * a); +} + +/*! \brief Integrand for gravitational kick factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for gravitational kick factor calculation. + */ +double gravkick_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * a * a); +} + +/*! \brief Integrand for hydrodynamics kick factor calculation. + * + * For cosmological simulations. + * + * \param[in] a Scalefactor. + * \param[in] param (unused) + * + * \return Integrand for hydrodynamics kick factor calculation. + */ +double hydrokick_integ(double a, void *param) +{ + double h; + + h = hubble_function(a); + + return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a); +} + +/*! \brief Initializes lookup table for cosmological pre-factors for a drift. + * + * Numerical integrals using the integrand functions defined above. + * + * \return void + */ +void init_drift_table(void) +{ +#define WORKSIZE 100000 + int i; + double result, abserr; + + gsl_function F; + gsl_integration_workspace *workspace; + + logTimeBegin = log(All.TimeBegin); + logTimeMax = log(All.TimeMax); + + workspace = gsl_integration_workspace_alloc(WORKSIZE); + + for(i = 0; i < DRIFT_TABLE_LENGTH; i++) + { + F.function = &drift_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + DriftTable[i] = result; + + F.function = &gravkick_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + GravKickTable[i] = result; + + F.function = &hydrokick_integ; + gsl_integration_qag(&F, exp(logTimeBegin), exp(logTimeBegin + ((logTimeMax - logTimeBegin) / DRIFT_TABLE_LENGTH) * (i + 1)), 0, + 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); + HydroKickTable[i] = result; + } + + gsl_integration_workspace_free(workspace); +} + +/*! \brief This function integrates the cosmological prefactor for a drift + * step between time0 and time1. A lookup-table is used for reasons + * of speed. + * + * \param[in] time0 Start time. + * \param[in] time1 End time. + * + * \return \f[ \int_{a_0}^{a_1} \frac{{\rm d}a}{H(a)} \f]. + */ +double get_drift_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * DriftTable[0]; + else + df1 = DriftTable[i1 - 1] + (DriftTable[i1] - DriftTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * DriftTable[0]; + else + df2 = DriftTable[i2 - 1] + (DriftTable[i2] - DriftTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} + +/*! \brief This function integrates the cosmological prefactor for a + * gravitational kick between time0 and time1. A lookup-table is used + * for reasons of speed. + * + * \param[in] time0 Start time. + * \param[in] time1 End time. + * + * \return Gravkick factor. + */ +double get_gravkick_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * GravKickTable[0]; + else + df1 = GravKickTable[i1 - 1] + (GravKickTable[i1] - GravKickTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * GravKickTable[0]; + else + df2 = GravKickTable[i2 - 1] + (GravKickTable[i2] - GravKickTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} + +/*! \brief This function integrates the cosmological prefactor for a + * hydrodynamical kick between time0 and time1. A lookup-table is + * used for reasons of speed. + * + * \param[in] time0 Start time + * \param[in] time1 End time + * + * \return Hydro kick factor. + */ +double get_hydrokick_factor(integertime time0, integertime time1) +{ + double a1, a2, df1, df2, u1, u2; + int i1, i2; + static integertime last_time0 = -1, last_time1 = -1; + static double last_value; + + if(time0 == last_time0 && time1 == last_time1) + return last_value; + + /* note: will only be called for cosmological integration */ + + a1 = logTimeBegin + time0 * All.Timebase_interval; + a2 = logTimeBegin + time1 * All.Timebase_interval; + + u1 = (a1 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i1 = (int)u1; + if(i1 >= DRIFT_TABLE_LENGTH) + i1 = DRIFT_TABLE_LENGTH - 1; + + if(i1 <= 1) + df1 = u1 * HydroKickTable[0]; + else + df1 = HydroKickTable[i1 - 1] + (HydroKickTable[i1] - HydroKickTable[i1 - 1]) * (u1 - i1); + + u2 = (a2 - logTimeBegin) / (logTimeMax - logTimeBegin) * DRIFT_TABLE_LENGTH; + i2 = (int)u2; + if(i2 >= DRIFT_TABLE_LENGTH) + i2 = DRIFT_TABLE_LENGTH - 1; + + if(i2 <= 1) + df2 = u2 * HydroKickTable[0]; + else + df2 = HydroKickTable[i2 - 1] + (HydroKickTable[i2] - HydroKickTable[i2 - 1]) * (u2 - i2); + + last_time0 = time0; + last_time1 = time1; + + return last_value = (df2 - df1); +} diff --git a/src/amuse/community/arepo/src/time_integration/predict.c b/src/amuse/community/arepo/src/time_integration/predict.c new file mode 100644 index 0000000000..f377af5b5e --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/predict.c @@ -0,0 +1,506 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/predict.c + * \date 05/2018 + * \brief Routines to find the next sync point, manage the list + * of active timebins/active particles and to drift particles. + * \details contains functions: + * void reconstruct_timebins(void) + * void find_next_sync_point(void) + * void mark_active_timebins(void) + * void drift_all_particles(void) + * void drift_particle(int i, integertime time1) + * static int int_compare(const void *a, const void *b) + * void make_list_of_active_particles(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 08.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief This function (re)builds the time bin lists. + * + * It counts the number of particles in each timebin and updates the + * linked lists containing the particles of each time bin. Afterwards the + * linked list of active particles is updated by + * make_list_of_active_particles(). + * + * The linked lists for each timebin are stored in 'FirstInTimeBin', + * 'LastInTimeBin', 'PrevInTimeBin' and 'NextInTimeBin'. The counters + * of particles per timebin are 'TimeBinCount' and 'TimeBinCountSph'. + * + * \return void + */ +void reconstruct_timebins(void) +{ + TIMER_START(CPU_TIMELINE); + + int i, bin; + + for(bin = 0; bin < TIMEBINS; bin++) + { + TimeBinsHydro.TimeBinCount[bin] = 0; + TimeBinsHydro.FirstInTimeBin[bin] = -1; + TimeBinsHydro.LastInTimeBin[bin] = -1; + + TimeBinsGravity.TimeBinCount[bin] = 0; + TimeBinsGravity.FirstInTimeBin[bin] = -1; + TimeBinsGravity.LastInTimeBin[bin] = -1; + +#ifdef USE_SFR + TimeBinSfr[bin] = 0; +#endif + } + + for(i = 0; i < NumGas; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) + continue; + + if(P[i].Type != 0) + continue; + + bin = P[i].TimeBinHydro; + + if(TimeBinsHydro.TimeBinCount[bin] > 0) + { + TimeBinsHydro.PrevInTimeBin[i] = TimeBinsHydro.LastInTimeBin[bin]; + TimeBinsHydro.NextInTimeBin[i] = -1; + TimeBinsHydro.NextInTimeBin[TimeBinsHydro.LastInTimeBin[bin]] = i; + TimeBinsHydro.LastInTimeBin[bin] = i; + } + else + { + TimeBinsHydro.FirstInTimeBin[bin] = TimeBinsHydro.LastInTimeBin[bin] = i; + TimeBinsHydro.PrevInTimeBin[i] = TimeBinsHydro.NextInTimeBin[i] = -1; + } + TimeBinsHydro.TimeBinCount[bin]++; + +#ifdef USE_SFR + TimeBinSfr[bin] += SphP[i].Sfr; +#endif + } + + for(i = 0; i < NumPart; i++) + { + if(P[i].ID == 0 && P[i].Mass == 0) + continue; + + bin = P[i].TimeBinGrav; + + if(TimeBinsGravity.TimeBinCount[bin] > 0) + { + TimeBinsGravity.PrevInTimeBin[i] = TimeBinsGravity.LastInTimeBin[bin]; + TimeBinsGravity.NextInTimeBin[i] = -1; + TimeBinsGravity.NextInTimeBin[TimeBinsGravity.LastInTimeBin[bin]] = i; + TimeBinsGravity.LastInTimeBin[bin] = i; + } + else + { + TimeBinsGravity.FirstInTimeBin[bin] = TimeBinsGravity.LastInTimeBin[bin] = i; + TimeBinsGravity.PrevInTimeBin[i] = TimeBinsGravity.NextInTimeBin[i] = -1; + } + TimeBinsGravity.TimeBinCount[bin]++; + } + + make_list_of_active_particles(); + + TIMER_STOP(CPU_TIMELINE); +} + +/*! \brief This function finds the next synchronization point of the system. + * (i.e. the earliest point of time any of the particles needs a force + * computation). + * + * \return void + */ +void find_next_sync_point(void) +{ + int n; + integertime ti_next_kick, ti_next_kick_global, ti_next_for_bin, dt_bin; + double timeold; + + TIMER_START(CPU_DRIFTS); + + timeold = All.Time; + + All.NumCurrentTiStep++; + + /* find the next kick time */ + ti_next_kick = TIMEBASE; + + for(n = 0; n < TIMEBINS; n++) + { + int active = TimeBinsHydro.TimeBinCount[n]; + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + active += TimeBinsGravity.TimeBinCount[n]; +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + if(active) + { + if(n > 0) + { + dt_bin = (((integertime)1) << n); + ti_next_for_bin = (All.Ti_Current / dt_bin) * dt_bin + dt_bin; /* next kick time for this timebin */ + } + else + { + dt_bin = 0; + ti_next_for_bin = All.Ti_Current; + } + + if(ti_next_for_bin < ti_next_kick) + ti_next_kick = ti_next_for_bin; + } + } + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME + minimum_large_ints(1, &ti_next_kick, &ti_next_kick_global); +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ + MPI_Allreduce(&ti_next_kick, &ti_next_kick_global, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + + All.Previous_Ti_Current = All.Ti_Current; + All.Ti_Current = ti_next_kick_global; + + if(All.ComovingIntegrationOn) + All.Time = All.TimeBegin * exp(All.Ti_Current * All.Timebase_interval); + else + All.Time = All.TimeBegin + All.Ti_Current * All.Timebase_interval; + + set_cosmo_factors_for_current_time(); + + All.TimeStep = All.Time - timeold; + + mark_active_timebins(); + + TIMER_STOP(CPU_DRIFTS); +} + +/*! \brief Sets active timebins for current time-step in global variables. + * + * \return void + */ +void mark_active_timebins(void) +{ + int n; + int lowest_active_bin = TIMEBINS, highest_active_bin = 0; + int lowest_occupied_bin = TIMEBINS, highest_occupied_bin = 0; + int lowest_occupied_gravity_bin = TIMEBINS, highest_occupied_gravity_bin = 0; + int highest_synchronized_bin = 0; + int nsynchronized_gravity = 0, nsynchronized_hydro = 0; + integertime dt_bin; + + /* mark the bins that will be synchronized/active */ + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinsGravity.TimeBinCount[n]) + { + if(highest_occupied_gravity_bin < n) + highest_occupied_gravity_bin = n; + + if(lowest_occupied_gravity_bin > n) + lowest_occupied_gravity_bin = n; + } + + int active = TimeBinsHydro.TimeBinCount[n] + TimeBinsGravity.TimeBinCount[n]; + + if(active) + { + if(highest_occupied_bin < n) + highest_occupied_bin = n; + + if(lowest_occupied_bin > n) + lowest_occupied_bin = n; + } + + dt_bin = (((integertime)1) << n); + + if((All.Ti_Current % dt_bin) == 0) + { + TimeBinSynchronized[n] = 1; + All.Ti_begstep[n] = All.Ti_Current; + + nsynchronized_gravity += TimeBinsGravity.TimeBinCount[n]; + nsynchronized_hydro += TimeBinsHydro.TimeBinCount[n]; + + if(highest_synchronized_bin < n) + highest_synchronized_bin = n; + + if(active) + { + if(highest_active_bin < n) + highest_active_bin = n; + + if(lowest_active_bin > n) + lowest_active_bin = n; + } + } + else + TimeBinSynchronized[n] = 0; + } + + int lowest_in[3], lowest_out[3]; + lowest_in[0] = lowest_occupied_bin; + lowest_in[1] = lowest_occupied_gravity_bin; + lowest_in[2] = lowest_active_bin; + MPI_Allreduce(lowest_in, lowest_out, 3, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + All.LowestOccupiedTimeBin = lowest_out[0]; + All.LowestOccupiedGravTimeBin = lowest_out[1]; + All.LowestActiveTimeBin = lowest_out[2]; + + int highest_in[4], highest_out[4]; + highest_in[0] = highest_occupied_bin; + highest_in[1] = highest_occupied_gravity_bin; + highest_in[2] = highest_active_bin; + highest_in[3] = highest_synchronized_bin; + MPI_Allreduce(highest_in, highest_out, 4, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + All.HighestOccupiedTimeBin = highest_out[0]; + All.HighestOccupiedGravTimeBin = highest_out[1]; + All.HighestActiveTimeBin = highest_out[2]; + All.HighestSynchronizedTimeBin = highest_out[3]; + + /* note: the lowest synchronized bin is always 1 */ + + int input_ints[2 + 2 * TIMEBINS]; + long long output_longs[2 + 2 * TIMEBINS]; + + input_ints[0] = nsynchronized_hydro; + input_ints[1] = nsynchronized_gravity; + memcpy(input_ints + 2, TimeBinsGravity.TimeBinCount, TIMEBINS * sizeof(int)); + memcpy(input_ints + 2 + TIMEBINS, TimeBinsHydro.TimeBinCount, TIMEBINS * sizeof(int)); + + sumup_large_ints(2 + 2 * TIMEBINS, input_ints, output_longs); + + All.GlobalNSynchronizedHydro = output_longs[0]; + All.GlobalNSynchronizedGravity = output_longs[1]; + long long *tot_count_grav = output_longs + 2; + long long *tot_count_sph = output_longs + 2 + TIMEBINS; + + long long tot_grav = 0, tot_sph = 0; + + for(n = 0; n < TIMEBINS; n++) + { + tot_grav += tot_count_grav[n]; + tot_sph += tot_count_sph[n]; + + if(n > 0) + { + tot_count_grav[n] += tot_count_grav[n - 1]; + tot_count_sph[n] += tot_count_sph[n - 1]; + } + } + + All.SmallestTimeBinWithDomainDecomposition = All.HighestOccupiedTimeBin; + + for(n = All.HighestOccupiedTimeBin; n >= All.LowestOccupiedTimeBin; n--) + { + if(tot_count_grav[n] > All.ActivePartFracForNewDomainDecomp * tot_grav || + tot_count_sph[n] > All.ActivePartFracForNewDomainDecomp * tot_sph) + All.SmallestTimeBinWithDomainDecomposition = n; + } +} + +/*! \brief Applies drift operation to all particles to current time. + * + * \return void + */ +void drift_all_particles(void) +{ + int i; + + TIMER_START(CPU_DRIFTS); + + for(i = 0; i < NumPart; i++) + drift_particle(i, All.Ti_Current); + + TIMER_STOP(CPU_DRIFTS); +} + +/*! \brief This function drifts drifts a particle i to time1. + * + * \param[in] i Particle/cell index. + * \param[in] time1 Time to which particles get drifted. + * + * \return void + */ +void drift_particle(int i, integertime time1) +{ + int j; + + if(i < 0) + terminate("i=%d NumPart=%d", i, NumPart); + + integertime time0 = P[i].Ti_Current; + + if(time1 == time0) + return; + + if(time1 < time0) + terminate("no prediction into past allowed: time0=%lld time1=%lld\n", (long long)time0, (long long)time1); + + double dt_drift; + + if(All.ComovingIntegrationOn) + dt_drift = get_drift_factor(time0, time1); + else + dt_drift = (time1 - time0) * All.Timebase_interval; + + if(P[i].Type == 0) + { + for(j = 0; j < 3; j++) + { + P[i].Pos[j] += SphP[i].VelVertex[j] * dt_drift; + } + } + else + { +#ifndef MESHRELAX + for(j = 0; j < 3; j++) + P[i].Pos[j] += P[i].Vel[j] * dt_drift; + +#if defined(REFLECTIVE_X) + if(P[i].Pos[0] < 0 || P[i].Pos[0] > boxSize_X) + { + P[i].Pos[0] = 2 * (P[i].Pos[0] > boxSize_X ? 1 : 0) * boxSize_X - P[i].Pos[0]; + P[i].Vel[0] *= -1; + } +#endif /* #if defined(REFLECTIVE_X) */ +#if defined(REFLECTIVE_Y) + if(P[i].Pos[1] < 0 || P[i].Pos[1] > boxSize_Y) + { + P[i].Pos[1] = 2 * (P[i].Pos[1] > boxSize_Y ? 1 : 0) * boxSize_Y - P[i].Pos[1]; + P[i].Vel[1] *= -1; + } +#endif /* #if defined(REFLECTIVE_Y) */ +#if defined(REFLECTIVE_Z) + if(P[i].Pos[2] < 0 || P[i].Pos[2] > boxSize_Z) + { + P[i].Pos[2] = 2 * (P[i].Pos[2] > boxSize_Z ? 1 : 0) * boxSize_Z - P[i].Pos[2]; + P[i].Vel[2] *= -1; + } +#endif /* #if defined(REFLECTIVE_Z) */ + +#endif /* #ifndef MESHRELAX */ + } + + P[i].Ti_Current = time1; +} + +/*! \brief Comparison function for two integer values. + * + * \param[in] a First value. + * \param[in] b Second value. + * + * \return (-1,0,1); -1 if a < b + */ +static int int_compare(const void *a, const void *b) +{ + if(*((int *)a) < *((int *)b)) + return -1; + + if(*((int *)a) > *((int *)b)) + return +1; + + return 0; +} + +/*! \brief This function builds the linear list of active particles. + * + * The list is stored in the array ActiveParticleList of the TimeBinData + * structs. + * + * \return void + */ +void make_list_of_active_particles(void) +{ + TIMER_START(CPU_DRIFTS); + + int i, n; + /* make a link list with the particles in the active time bins */ + TimeBinsHydro.NActiveParticles = 0; + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinSynchronized[n]) + { + for(i = TimeBinsHydro.FirstInTimeBin[n]; i >= 0; i = TimeBinsHydro.NextInTimeBin[i]) + if((P[i].Type == 0) && !((P[i].ID == 0) && (P[i].Mass == 0))) + { + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + TimeBinsHydro.ActiveParticleList[TimeBinsHydro.NActiveParticles] = i; + TimeBinsHydro.NActiveParticles++; + } + } + } + + TimeBinsGravity.NActiveParticles = 0; + + for(n = 0; n < TIMEBINS; n++) + { + if(TimeBinSynchronized[n]) + { + for(i = TimeBinsGravity.FirstInTimeBin[n]; i >= 0; i = TimeBinsGravity.NextInTimeBin[i]) + { + if(!((P[i].ID == 0) && (P[i].Mass == 0))) + { + if(P[i].Ti_Current != All.Ti_Current) + drift_particle(i, All.Ti_Current); + + TimeBinsGravity.ActiveParticleList[TimeBinsGravity.NActiveParticles] = i; + TimeBinsGravity.NActiveParticles++; + } + } + } + } + + /* sort both lists for better memory efficiency */ + mysort(TimeBinsHydro.ActiveParticleList, TimeBinsHydro.NActiveParticles, sizeof(int), int_compare); + mysort(TimeBinsGravity.ActiveParticleList, TimeBinsGravity.NActiveParticles, sizeof(int), int_compare); + + int in[6]; + long long out[6]; + + n = 2; + in[0] = TimeBinsGravity.NActiveParticles; + in[1] = TimeBinsHydro.NActiveParticles; + + sumup_large_ints(n, in, out); + + TimeBinsGravity.GlobalNActiveParticles = out[0]; + TimeBinsHydro.GlobalNActiveParticles = out[1]; + + TIMER_STOP(CPU_DRIFTS); +} diff --git a/src/amuse/community/arepo/src/time_integration/timestep.c b/src/amuse/community/arepo/src/time_integration/timestep.c new file mode 100644 index 0000000000..4224b3cc8f --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep.c @@ -0,0 +1,980 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep.c + * \date 05/2018 + * \brief Routines for 'kicking' particles in + * momentum space and assigning new timesteps. + * \details contains functions: + * void set_cosmo_factors_for_current_time(void) + * void find_timesteps_without_gravity(void) + * void update_timesteps_from_gravity(void) + * integertime get_timestep_pm(void) + * integertime get_timestep_gravity(int p) + * integertime get_timestep_hydro(int p) + * void validate_timestep(double dt, integertime ti_step, int p) + * int test_if_grav_timestep_is_too_large(int p, int bin) + * void find_long_range_step_constraint(void) + * int get_timestep_bin(integertime ti_step) + * double get_time_difference_in_Gyr(double a0, double a1) + * void timebins_init(struct TimeBinData *tbData, const char + * *name, int *MaxPart) + * void timebins_allocate(struct TimeBinData *tbData) + * void timebins_reallocate(struct TimeBinData *tbData) + * void timebins_get_bin_and_do_validity_checks(integertime + * ti_step, int *bin_new, int bin_old) + * void timebin_move_particle(struct TimeBinData *tbData, int p, + * int timeBin_old, int timeBin_new) + * void timebin_remove_particle(struct TimeBinData *tbData, + * int idx, int bin) + * void timebin_add_particle(struct TimeBinData *tbData, int + * i_new, int i_old, int timeBin, int + * addToListOfActiveParticles) + * void timebin_cleanup_list_of_active_particles(struct + * TimeBinData *tbData) + * void timebin_move_sfr(int p, int timeBin_old, int + * timeBin_new) + * void timebin_make_list_of_active_particles_up_to_timebin( + * struct TimeBinData *tbData, int timebin) + * void timebin_add_particles_of_timebin_to_list_of_active_ + * particles(struct TimeBinData *tbData, int timebin) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Sets various cosmological factors for the current simulation time. + * + * \return void + */ +void set_cosmo_factors_for_current_time(void) +{ + if(All.ComovingIntegrationOn) + { + All.cf_atime = All.Time; + All.cf_a2inv = 1 / (All.Time * All.Time); + All.cf_a3inv = 1 / (All.Time * All.Time * All.Time); + All.cf_afac1 = pow(All.Time, 3 * GAMMA_MINUS1); + All.cf_afac2 = 1 / pow(All.Time, 3 * GAMMA - 2); + All.cf_afac3 = pow(All.Time, 3 * (1 - GAMMA) / 2.0); + All.cf_hubble_a = All.cf_H = All.cf_Hrate = hubble_function(All.Time); + All.cf_time_hubble_a = All.Time * All.cf_hubble_a; + All.cf_redshift = 1 / All.Time - 1; + } + else + { + All.cf_atime = 1; + All.cf_a2inv = 1; + All.cf_a3inv = 1; + All.cf_afac1 = 1; + All.cf_afac2 = 1; + All.cf_afac3 = 1; + All.cf_hubble_a = 1; + All.cf_H = All.Hubble; + All.cf_time_hubble_a = 1; + All.cf_Hrate = 0; + All.cf_redshift = 0; + } +} + +/*! \brief Finds hydrodynamic timesteps for all particles. + * + * Validates the timestep and moves particles to appropriate timebin/ linked + * list of particles. + * + * \return void + */ +void find_timesteps_without_gravity(void) +{ +#ifdef TREE_BASED_TIMESTEPS + tree_based_timesteps(); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + TIMER_START(CPU_TIMELINE); + + int idx, i, bin, binold; + integertime ti_step; + +#ifdef FORCE_EQUAL_TIMESTEPS + integertime globTimeStep = TIMEBASE; + +#ifdef PMGRID + globTimeStep = get_timestep_pm(); +#endif /* #ifdef PMGRID */ + +#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + ti_step = get_timestep_gravity(i); + if(ti_step < globTimeStep) + globTimeStep = ti_step; + } +#endif /* #if (defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE)) && !defined(MESHRELAX) \ + */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + ti_step = get_timestep_hydro(i); + if(ti_step < globTimeStep) + globTimeStep = ti_step; + } + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME + minimum_large_ints(1, &globTimeStep, &All.GlobalTimeStep); +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ + MPI_Allreduce(&globTimeStep, &All.GlobalTimeStep, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + + for(idx = 0; idx < TimeBinsGravity.NActiveParticles; idx++) + { + i = TimeBinsGravity.ActiveParticleList[idx]; + if(i < 0) + continue; + + timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinGrav); + binold = P[i].TimeBinGrav; + timebin_move_particle(&TimeBinsGravity, i, binold, bin); + P[i].TimeBinGrav = bin; + } + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + timebins_get_bin_and_do_validity_checks(All.GlobalTimeStep, &bin, P[i].TimeBinHydro); + binold = P[i].TimeBinHydro; + timebin_move_particle(&TimeBinsHydro, i, binold, bin); + P[i].TimeBinHydro = bin; + } + +#else /* #ifdef FORCE_EQUAL_TIMESTEPS */ + /* Calculate and assign hydro timesteps */ + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + + if(i < 0) + continue; + + ti_step = get_timestep_hydro(i); + + binold = P[i].TimeBinHydro; + + timebins_get_bin_and_do_validity_checks(ti_step, &bin, binold); + + timebin_move_particle(&TimeBinsHydro, i, binold, bin); + + P[i].TimeBinHydro = bin; + } +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS #else */ + + TIMER_STOP(CPU_TIMELINE); +} + +/*! \brief Moves particles to lower timestep bin if required by gravity + * timestep criterion. + * + * \return void + */ +void update_timesteps_from_gravity(void) +{ +#ifdef FORCE_EQUAL_TIMESTEPS + return; /* don't need to do this */ +#endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ + +#if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX) + return; +#endif /* #if !((defined(SELFGRAVITY) || defined(EXTERNALGRAVITY) || defined(EXACT_GRAVITY_FOR_PARTICLE_TYPE))) || defined(MESHRELAX) \ + */ + + TIMER_START(CPU_TIMELINE); + + int idx, i, binold; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].TimeBinGrav < P[i].TimeBinHydro) + { + binold = P[i].TimeBinHydro; + timebin_move_particle(&TimeBinsHydro, i, binold, P[i].TimeBinGrav); + P[i].TimeBinHydro = P[i].TimeBinGrav; + } + } + + TIMER_STOP(CPU_TIMELINE); +} + +#ifdef PMGRID +/*! \brief Returns particle-mesh timestep as an integer-time variable. + * + * \return Integer timestep of particle-mesh algorithm. + */ +integertime get_timestep_pm(void) +{ + integertime ti_step = TIMEBASE; + while(ti_step > (All.DtDisplacement / All.Timebase_interval)) + ti_step >>= 1; + + if(ti_step > (All.PM_Ti_endstep - All.PM_Ti_begstep)) /* PM-timestep wants to increase */ + { + int bin = get_timestep_bin(ti_step); + int binold = get_timestep_bin(All.PM_Ti_endstep - All.PM_Ti_begstep); + + while(TimeBinSynchronized[bin] == 0 && bin > binold) /* make sure the new step is synchronized */ + bin--; + + ti_step = bin ? (((integertime)1) << bin) : 0; + } + + if(All.Ti_Current == TIMEBASE) /* we here finish the last timestep. */ + ti_step = 0; + + return ti_step; +} +#endif /* #ifdef PMGRID */ + +/*! \brief Returns gravity timestep as an integer-time variable. + * + * \param[in] p Index of particle in P array. + * + * \return Integer timestep limited due to gravitational acceleration. + */ +integertime get_timestep_gravity(int p) +{ + double dt; + integertime ti_step; + + double ax, ay, az, ac; + { + /* calculate total acceleration */ + ax = All.cf_a2inv * P[p].GravAccel[0]; + ay = All.cf_a2inv * P[p].GravAccel[1]; + az = All.cf_a2inv * P[p].GravAccel[2]; + +#if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP) + ax += All.cf_a2inv * P[p].GravPM[0]; + ay += All.cf_a2inv * P[p].GravPM[1]; + az += All.cf_a2inv * P[p].GravPM[2]; +#endif /* #if defined(PMGRID) && !defined(NO_PMFORCE_IN_SHORT_RANGE_TIMESTEP) */ + + ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */ + + if(ac == 0) + ac = 1.0e-30; + + switch(All.TypeOfTimestepCriterion) + { + case 0: + /* only type 0 implemented at the moment -> remove type ? */ + dt = sqrt(2 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / 2.8 / ac); + break; + default: + terminate("Undefined timestep criterion"); + break; + } + +#ifdef EXTERNALGRAVITY + double dt_ext = sqrt(All.ErrTolIntAccuracy / P[p].dGravAccel); + if(dt_ext < dt) + dt = dt_ext; +#endif + } + + dt *= All.cf_hubble_a; + + if(P[p].Mass == 0 && P[p].ID == 0) + dt = All.MaxSizeTimestep; /* this particle has been swallowed or eliminated */ + + if(dt >= All.MaxSizeTimestep) + dt = All.MaxSizeTimestep; + + if(dt < All.MinSizeTimestep) + { +#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP + dt = All.MinSizeTimestep; +#else /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */ + print_particle_info(p); + terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep); +#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */ + } + +#ifdef PMGRID + if(dt >= All.DtDisplacement) + dt = All.DtDisplacement; +#endif /* #ifdef PMGRID */ + + ti_step = (integertime)(dt / All.Timebase_interval); + + validate_timestep(dt, ti_step, p); + + return ti_step; +} + +/*! \brief Returns hydrodynamics timestep as an integer-time variable. + * + * \param[in] p Index of particle in P and SphP array. + * + * \return Integer timestep limited due to CFL condition. + */ +integertime get_timestep_hydro(int p) +{ + double dt = 0, dt_courant = 0; + integertime ti_step; + + assert(P[p].Type == 0); + + double csnd = get_sound_speed(p); + +#if defined(VORONOI_STATIC_MESH) + csnd += sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime; +#endif /* #if defined(VORONOI_STATIC_MESH) */ + + double rad = get_cell_radius(p); + + if(csnd <= 0) + csnd = 1.0e-30; + + dt_courant = rad / csnd; + +#ifdef TREE_BASED_TIMESTEPS + if(dt_courant > SphP[p].CurrentMaxTiStep) + dt_courant = SphP[p].CurrentMaxTiStep; +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + + dt_courant *= All.CourantFac; + + if(All.ComovingIntegrationOn) + dt_courant *= All.Time; + + dt = dt_courant; + +#if defined(USE_SFR) + + if(P[p].Type == 0) /* to protect using a particle that has been turned into a star */ + { + double sfr = get_starformation_rate(p); + + double dt_sfr = 0.1 * P[p].Mass / (sfr / ((All.UnitMass_in_g / SOLAR_MASS) / (All.UnitTime_in_s / SEC_PER_YEAR))); + if(dt_sfr < dt) + dt = dt_sfr; + } +#endif /* #if defined(USE_SFR) */ + +#ifdef MHD_POWELL_LIMIT_TIMESTEP + double b = sqrt(SphP[p].B[0] * SphP[p].B[0] + SphP[p].B[1] * SphP[p].B[1] + SphP[p].B[2] * SphP[p].B[2]); + double bmin = sqrt(2 * 0.01 * SphP[p].Utherm * SphP[p].Density * All.cf_atime); + double v = sqrt(P[p].Vel[0] * P[p].Vel[0] + P[p].Vel[1] * P[p].Vel[1] + P[p].Vel[2] * P[p].Vel[2]) / All.cf_atime; + double dt_powell = 0.5 * (b + bmin) / (fabs(SphP[p].DivB / All.cf_atime * v)); + + if(dt_powell < dt) + dt = dt_powell; +#endif /* #ifdef MHD_POWELL_LIMIT_TIMESTEP */ + + /* convert the physical timestep to dloga if needed. Note: If comoving integration has not been selected, + All.cf_hubble_a=1. + */ + + dt *= All.cf_hubble_a; + + if(dt >= All.MaxSizeTimestep) + dt = All.MaxSizeTimestep; + +#ifdef TIMESTEP_OUTPUT_LIMIT + if(dt >= All.TimestepOutputLimit) + dt = All.TimestepOutputLimit; +#endif /* #ifdef TIMESTEP_OUTPUT_LIMIT */ + + if(dt < All.MinSizeTimestep) + { +#ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP + dt = All.MinSizeTimestep; +#else /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP */ + print_particle_info(p); + terminate("Timestep dt=%g below All.MinSizeTimestep=%g", dt, All.MinSizeTimestep); +#endif /* #ifdef NOSTOP_WHEN_BELOW_MINTIMESTEP #else */ + } + +#ifdef PMGRID + if(dt >= All.DtDisplacement) + dt = All.DtDisplacement; +#endif /* #ifdef PMGRID */ + + ti_step = (integertime)(dt / All.Timebase_interval); + + validate_timestep(dt, ti_step, p); + + return ti_step; +} + +/*! \brief Checks if timestep is a valid one. + * + * Terminates the simulation with error message otherwise. + * + * \return void + */ +void validate_timestep(double dt, integertime ti_step, int p) +{ + if(!(ti_step > 0 && ti_step < TIMEBASE)) + { + printf( + "\nError: An invalid timestep was assigned on the integer timeline!\n" + "We better stop.\n" + "Task=%d Part-ID=%lld type=%d", + ThisTask, (long long)P[p].ID, P[p].Type); + + printf("tibase=%g dt=%g ti_step=%d, xyz=(%g|%g|%g) vel=(%g|%g|%g) tree=(%g|%g|%g) mass=%g\n\n", All.Timebase_interval, dt, + (int)ti_step, P[p].Pos[0], P[p].Pos[1], P[p].Pos[2], P[p].Vel[0], P[p].Vel[1], P[p].Vel[2], P[p].GravAccel[0], + P[p].GravAccel[1], P[p].GravAccel[2], P[p].Mass); + + print_particle_info(p); + myflush(stdout); + terminate("integer timestep outside of allowed range"); + } + + if(ti_step == 1) + { + printf("Time-step of integer size 1 found for particle i=%d, pos=(%g|%g|%g), ID=%lld, dt=%g\n", p, P[p].Pos[0], P[p].Pos[1], + P[p].Pos[2], (long long)P[p].ID, dt); + print_particle_info(p); + } +} + +/*! \brief Checks if timestep according to its present timebin is too large + * compared to the requirements from gravity and hydrodynamics + * + * I.e. does the cell need to be moved to a finer timebin? + * + * \param[in] p Index of particle/cell. + * \param[in] bin Timebin to compare to. + * + * \return 0: not too large; 1: too large. + */ +int test_if_grav_timestep_is_too_large(int p, int bin) +{ + integertime ti_step_bin = bin ? (((integertime)1) << bin) : 0; + + integertime ti_step = get_timestep_gravity(p); + + if(P[p].Type == 0) + { + if((P[p].ID != 0) && (P[p].Mass != 0)) + { + int bin_hydro = P[p].TimeBinHydro; + integertime ti_step_hydro = bin_hydro ? (((integertime)1) << bin_hydro) : 0; + if(ti_step_hydro < ti_step) + ti_step = ti_step_hydro; + } + } + + if(ti_step < ti_step_bin) + return 1; + else + return 0; +} + +#ifdef PMGRID +/*! \brief Sets the global timestep for the long-range force calculation. + * + * Evaluates timestep constraints due to long range force acceleration of all + * simulation particles and finds its global minimum. + * + * \return void + */ +void find_long_range_step_constraint(void) +{ + int p; + double ax, ay, az, ac; + double dt, dtmin = MAX_DOUBLE_NUMBER; + + for(p = 0; p < NumPart; p++) + { + if(P[p].Type == 0) + continue; + +#ifdef PM_TIMESTEP_BASED_ON_TYPES + if(((1 << P[p].Type) & (PM_TIMESTEP_BASED_ON_TYPES))) +#endif /* #ifdef PM_TIMESTEP_BASED_ON_TYPES */ + { + /* calculate acceleration */ + ax = All.cf_a2inv * P[p].GravPM[0]; + ay = All.cf_a2inv * P[p].GravPM[1]; + az = All.cf_a2inv * P[p].GravPM[2]; + + ac = sqrt(ax * ax + ay * ay + az * az); /* this is now the physical acceleration */ + + if(ac < MIN_FLOAT_NUMBER) + ac = MIN_FLOAT_NUMBER; + + dt = sqrt(2.0 * All.ErrTolIntAccuracy * All.cf_atime * All.ForceSoftening[P[p].SofteningType] / (2.8 * ac)); + + dt *= All.cf_hubble_a; + + if(dt < dtmin) + dtmin = dt; + } + } + + dtmin *= 2.0; /* move it one timebin higher to prevent being too conservative */ + + MPI_Allreduce(&dtmin, &All.DtDisplacement, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + mpi_printf("TIMESTEPS: displacement time constraint: %g (%g)\n", All.DtDisplacement, All.MaxSizeTimestep); + + if(All.DtDisplacement > All.MaxSizeTimestep) + All.DtDisplacement = All.MaxSizeTimestep; + + if(All.DtDisplacement < All.MinSizeTimestep) + All.DtDisplacement = All.MinSizeTimestep; +} +#endif /* #ifdef PMGRID */ + +/*! \brief Converts an integer time to a time bin. + * + * \param[in] ti_step Timestep as integertime variable. + * + * \return Associated time-bin. + */ +int get_timestep_bin(integertime ti_step) +{ + int bin = -1; + + if(ti_step == 0) + return 0; + + if(ti_step == 1) + terminate("time-step of integer size 1 not allowed\n"); + + while(ti_step) + { + bin++; + ti_step >>= 1; + } + + return bin; +} + +/*! \brief Calculates time difference in Gyr between two time integration unit + * values. + * + * If simulation non-cosmological, a0 and a1 are proper time in code units, + * for cosmological simulation a0 and a1 are scalefactors. + * + * \param[in] a0 First time or scalefactor. + * \param[in] a1 Second time or scalefactor. + * + * \return Time difference in Gyr. + */ +double get_time_difference_in_Gyr(double a0, double a1) +{ + double result, time_diff = 0, t0, t1, factor1, factor2, term1, term2; + + if(All.ComovingIntegrationOn) + { + if(All.OmegaLambda + All.Omega0 != 1) + printf("only implemented for flat cosmology so far."); + + factor1 = 2.0 / (3.0 * sqrt(All.OmegaLambda)); + + term1 = sqrt(All.OmegaLambda / All.Omega0) * pow(a0, 1.5); + term2 = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a0, 3)); + factor2 = log(term1 + term2); + + t0 = factor1 * factor2; + + term1 = sqrt(All.OmegaLambda / All.Omega0) * pow(a1, 1.5); + term2 = sqrt(1 + All.OmegaLambda / All.Omega0 * pow(a1, 3)); + factor2 = log(term1 + term2); + + t1 = factor1 * factor2; + + result = t1 - t0; + + time_diff = result / (HUBBLE * All.HubbleParam); /* now in seconds */ + time_diff /= SEC_PER_MEGAYEAR * 1000; /* now in gigayears */ + } + else + { + time_diff = (a1 - a0) * All.UnitTime_in_s / All.HubbleParam; /* now in seconds */ + time_diff /= SEC_PER_MEGAYEAR * 1000; /* now in gigayears */ + } + + return time_diff; +} + +/*! \brief Initializes time bin data. + * + * Does not allocate anything! + * + * \param[out] tbData Time bin data to be initialized. + * \param[in] name Name stored in time bin data. + * \param[in] MaxPart Maximum number of particles in time bin data. + * + * \return void + */ +void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart) +{ + int i; + tbData->NActiveParticles = 0; + tbData->ActiveParticleList = 0; + + for(i = 0; i < TIMEBINS; i++) + { + tbData->FirstInTimeBin[i] = -1; + tbData->LastInTimeBin[i] = -1; + } + + tbData->NextInTimeBin = 0; + tbData->PrevInTimeBin = 0; + + strncpy(tbData->Name, name, 99); + tbData->Name[99] = 0; + tbData->MaxPart = MaxPart; +} + +/*! \brief Allocates linked lists in time bin data. + * + * With tbData->MaxPart elements. + * + * \param[in, out] tbData Pointer to time bin data to be allocated. + * + * \return void + */ +void timebins_allocate(struct TimeBinData *tbData) +{ + char Identifier[200]; + Identifier[199] = 0; + + snprintf(Identifier, 199, "NextActiveParticle%s", tbData->Name); + tbData->ActiveParticleList = (int *)mymalloc_movable(&tbData->ActiveParticleList, Identifier, *(tbData->MaxPart) * sizeof(int)); + + snprintf(Identifier, 199, "NextInTimeBin%s", tbData->Name); + tbData->NextInTimeBin = (int *)mymalloc_movable(&tbData->NextInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int)); + + snprintf(Identifier, 199, "PrevInTimeBin%s", tbData->Name); + tbData->PrevInTimeBin = (int *)mymalloc_movable(&tbData->PrevInTimeBin, Identifier, *(tbData->MaxPart) * sizeof(int)); +} + +/*! \brief Re-allocates linked lists in time bin data. + * + * With tbData->MaxPart elements. + * + * \param[out] tbData Pointer to time bin data to be re-allocated. + * + * \return void + */ +void timebins_reallocate(struct TimeBinData *tbData) +{ + tbData->ActiveParticleList = (int *)myrealloc_movable(tbData->ActiveParticleList, *(tbData->MaxPart) * sizeof(int)); + tbData->NextInTimeBin = (int *)myrealloc_movable(tbData->NextInTimeBin, *(tbData->MaxPart) * sizeof(int)); + tbData->PrevInTimeBin = (int *)myrealloc_movable(tbData->PrevInTimeBin, *(tbData->MaxPart) * sizeof(int)); +} + +/*! \brief Gets timebin and checks if bin is valid. + * + * Checks for example if old bin is synchronized with the bin it should be + * moved to. + * + * \param[in] ti_step Timestep in integertime. + * \param[out] bin_new New time bin. + * \param[in] bin_old Old time bin. + * + * \return void + */ +void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old) +{ + /* make it a power 2 subdivision */ + integertime ti_min = TIMEBASE; + while(ti_min > ti_step) + ti_min >>= 1; + ti_step = ti_min; + + /* get timestep bin */ + int bin = -1; + + if(ti_step == 0) + bin = 0; + + if(ti_step == 1) + terminate("time-step of integer size 1 not allowed\n"); + + while(ti_step) + { + bin++; + ti_step >>= 1; + } + + if(bin > bin_old) /* timestep wants to increase */ + { + while(TimeBinSynchronized[bin] == 0 && bin > bin_old) /* make sure the new step is synchronized */ + bin--; + + ti_step = bin ? (((integertime)1) << bin) : 0; + } + + if(All.Ti_Current >= TIMEBASE) /* we here finish the last timestep. */ + { + ti_step = 0; + bin = 0; + } + + if((TIMEBASE - All.Ti_Current) < ti_step) /* check that we don't run beyond the end */ + { + terminate("we are beyond the end of the timeline"); /* should not happen */ + } + + *bin_new = bin; +} + +/*! \brief Move particle from one time bin to another. + * + * \param[in, out] tbData Time bin data structure to operate on. + * \param[in] p Index of the particle to be moved. + * \param[in] timeBin_old Old time bin of particle to be moved. + * \param[in] timeBin_new New time bin of particle to be moved. + * + * \return void + */ +void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new) +{ + if(timeBin_old == timeBin_new) + return; + + tbData->TimeBinCount[timeBin_old]--; + + int prev = tbData->PrevInTimeBin[p]; + int next = tbData->NextInTimeBin[p]; + + if(tbData->FirstInTimeBin[timeBin_old] == p) + tbData->FirstInTimeBin[timeBin_old] = next; + if(tbData->LastInTimeBin[timeBin_old] == p) + tbData->LastInTimeBin[timeBin_old] = prev; + if(prev >= 0) + tbData->NextInTimeBin[prev] = next; + if(next >= 0) + tbData->PrevInTimeBin[next] = prev; + + if(tbData->TimeBinCount[timeBin_new] > 0) + { + tbData->PrevInTimeBin[p] = tbData->LastInTimeBin[timeBin_new]; + tbData->NextInTimeBin[tbData->LastInTimeBin[timeBin_new]] = p; + tbData->NextInTimeBin[p] = -1; + tbData->LastInTimeBin[timeBin_new] = p; + } + else + { + tbData->FirstInTimeBin[timeBin_new] = tbData->LastInTimeBin[timeBin_new] = p; + tbData->PrevInTimeBin[p] = tbData->NextInTimeBin[p] = -1; + } + + tbData->TimeBinCount[timeBin_new]++; + +#ifdef USE_SFR + if((P[p].Type == 0) && (tbData == &TimeBinsHydro)) + timebin_move_sfr(p, timeBin_old, timeBin_new); +#endif /* #ifdef USE_SFR */ +} + +/*! \brief Removes a particle from time bin structure. + * + * Can only be done with active particles. + * + * \param[in, out] tbData Time bin structure to be operated on. + * \param[in] idx Index of particle in ActiveParticleList. + * \param[in] bin Timebin in which particle is currently. If left -1, function + * will determine bin by itself. + * + * \return void + */ +void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin) +{ + int p = tbData->ActiveParticleList[idx]; + tbData->ActiveParticleList[idx] = -1; + + if(bin == -1) + { + if(tbData == &TimeBinsGravity) + bin = P[p].TimeBinGrav; + else + bin = P[p].TimeBinHydro; + } + + tbData->TimeBinCount[bin]--; + + if(p >= 0) + { + int prev = tbData->PrevInTimeBin[p]; + int next = tbData->NextInTimeBin[p]; + + if(prev >= 0) + tbData->NextInTimeBin[prev] = next; + if(next >= 0) + tbData->PrevInTimeBin[next] = prev; + + if(tbData->FirstInTimeBin[bin] == p) + tbData->FirstInTimeBin[bin] = next; + if(tbData->LastInTimeBin[bin] == p) + tbData->LastInTimeBin[bin] = prev; + } +} + +/* \brief Inserts a particle into the timebin struct behind another already + * existing particle. + * + * \param[in, out] tbData Time bin structure to be operated on. + * \param[in] i_new New index in linked lists of time bin data. + * \param[in] i_old old index in linked lists of time bin data. + * \param[in] timeBin Time bin to which it should be added. + * \param[in] addToListOfActiveParticles Flag if particle should be added as + * an active particle. + * + * \return void + */ +void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles) +{ + tbData->TimeBinCount[timeBin]++; + + if(i_old < 0) + { + /* if we don't have an existing particle to add if after, let's take the last one in this timebin */ + i_old = tbData->LastInTimeBin[timeBin]; + + if(i_old < 0) + { + /* the timebin is empty at the moment, so just add the new particle */ + tbData->FirstInTimeBin[timeBin] = i_new; + tbData->LastInTimeBin[timeBin] = i_new; + tbData->NextInTimeBin[i_new] = -1; + tbData->PrevInTimeBin[i_new] = -1; + } + } + + if(i_old >= 0) + { + /* otherwise we added it already */ + tbData->PrevInTimeBin[i_new] = i_old; + tbData->NextInTimeBin[i_new] = tbData->NextInTimeBin[i_old]; + if(tbData->NextInTimeBin[i_old] >= 0) + tbData->PrevInTimeBin[tbData->NextInTimeBin[i_old]] = i_new; + tbData->NextInTimeBin[i_old] = i_new; + if(tbData->LastInTimeBin[timeBin] == i_old) + tbData->LastInTimeBin[timeBin] = i_new; + } + + if(addToListOfActiveParticles) + { + tbData->ActiveParticleList[tbData->NActiveParticles] = i_new; + tbData->NActiveParticles++; + } +} + +/*! \brief Removes active particles that have ID and Mass 0, i.e. that were + * flagged as deleted from time bin data structure. + * + * \param[in, out] tbData Time bin data structure to be operated on. + * + * \return void + */ +void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData) +{ + int idx, i; + for(idx = 0; idx < tbData->NActiveParticles; idx++) + { + i = tbData->ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].ID == 0 && P[i].Mass == 0) + timebin_remove_particle(tbData, idx, -1); + } +} + +#ifdef USE_SFR +/*! \brief Updates TimeBinSfr when a gas cell changes timebin. + * + * \param[in] p Index of cell in SphP array. + * \param[in] timeBin_old Old time bin. + * \param[in] timeBin_new New time bin. + * + * \return void + */ +void timebin_move_sfr(int p, int timeBin_old, int timeBin_new) +{ + TimeBinSfr[timeBin_old] -= SphP[p].Sfr; + TimeBinSfr[timeBin_new] += SphP[p].Sfr; +} +#endif /* #ifdef USE_SFR */ + +/*! \brief Crates list of active particles up to a specified timebin. + * + * \param[in, out] tbData Time bin data to be operated on. + * \param[in] timebin Up to which timebin should particles be included. + * + * \return void + */ +void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin) +{ + int tbin; + tbData->NActiveParticles = 0; + for(tbin = timebin; tbin >= 0; tbin--) + timebin_add_particles_of_timebin_to_list_of_active_particles(tbData, tbin); +} + +/*! \brief Add particles of a specific timebin to active particle list. + * + * \param[in, out] tbData Time bin data to be operated on. + * \param[in] timebin Time bin which should be included. + * + * \return void + */ +void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin) +{ + int i; + for(i = tbData->FirstInTimeBin[timebin]; i >= 0; i = tbData->NextInTimeBin[i]) + if(!(P[i].ID == 0 && P[i].Mass == 0)) + { + tbData->ActiveParticleList[tbData->NActiveParticles] = i; + tbData->NActiveParticles++; + } +} diff --git a/src/amuse/community/arepo/src/time_integration/timestep.h b/src/amuse/community/arepo/src/time_integration/timestep.h new file mode 100644 index 0000000000..c110178a31 --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep.h @@ -0,0 +1,88 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep.h + * \date 05/2018 + * \brief Header for timestep criteria. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 29.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef TIMESTEP_H +#define TIMESTEP_H + +#include "../main/allvars.h" + +#ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME +typedef long long integertime; +#define TIMEBINS 60 +#define TIMEBASE \ + (((long long)1) << TIMEBINS) /* The simulated timespan is mapped onto the integer interval [0,TIMESPAN], \ + * where TIMESPAN needs to be a power of 2. */ +#else /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME */ +typedef int integertime; +#define TIMEBINS 29 +#define TIMEBASE (1 << TIMEBINS) +#endif /* #ifdef ENLARGE_DYNAMIC_RANGE_IN_TIME #else */ + +/*! \brief Linked list for particles in specific timebin. + */ +struct TimeBinData +{ + int NActiveParticles; + long long GlobalNActiveParticles; + int *ActiveParticleList; + int TimeBinCount[TIMEBINS]; + + int FirstInTimeBin[TIMEBINS]; + int LastInTimeBin[TIMEBINS]; + int *NextInTimeBin; + int *PrevInTimeBin; + char Name[100]; + int *MaxPart; +}; + +void find_timesteps_without_gravity(void); +void update_timesteps_from_gravity(void); +integertime get_timestep_gravity(int p); +integertime get_timestep_hydro(int p); +integertime get_timestep_pm(void); +int test_if_grav_timestep_is_too_large(int p, int bin); +void validate_timestep(double dt, integertime ti_step, int p); +int get_timestep_bin(integertime ti_step); +double get_time_difference_in_Gyr(double a0, double a1); + +/* TimeBinData stuff */ +void timebins_init(struct TimeBinData *tbData, const char *name, int *MaxPart); +void timebins_allocate(struct TimeBinData *tbData); +void timebins_reallocate(struct TimeBinData *tbData); +void timebins_get_bin_and_do_validity_checks(integertime ti_step, int *bin_new, int bin_old); +void timebin_move_particle(struct TimeBinData *tbData, int p, int timeBin_old, int timeBin_new); +void timebin_add_particle(struct TimeBinData *tbData, int i_new, int i_old, int timeBin, int addToListOfActiveParticles); +void timebin_remove_particle(struct TimeBinData *tbData, int idx, int bin); +void timebin_cleanup_list_of_active_particles(struct TimeBinData *tbData); +void timebin_move_sfr(int p, int timeBin_old, int timeBin_new); +void timebin_make_list_of_active_particles_up_to_timebin(struct TimeBinData *tbData, int timebin); +void timebin_add_particles_of_timebin_to_list_of_active_particles(struct TimeBinData *tbData, int timebin); + +#endif /* TIMESTEP */ diff --git a/src/amuse/community/arepo/src/time_integration/timestep_treebased.c b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c new file mode 100644 index 0000000000..1e58a240cb --- /dev/null +++ b/src/amuse/community/arepo/src/time_integration/timestep_treebased.c @@ -0,0 +1,494 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/time_integration/timestep_treebased.c + * \date 05/2018 + * \brief Algorithm to compute non-local time-step criterion. + * \details This is necessary for local time-stepping if material that + * would require a short time-step is arriving in cells that + * would formally be integrated at a large time-step. + * contains functions: + * static void particle2in(data_in * in, int i, int firstnode) + * static void out2particle(data_out * out, int i, int mode) + * static void kernel_local(void) + * static void kernel_imported(void) + * void tree_based_timesteps(void) + * int timestep_evaluate(int target, int mode, int threadid) + * void tree_based_timesteps_setsoundspeeds(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#ifdef TREE_BASED_TIMESTEPS +/*! \brief Local data structure for collecting particle/cell data that is sent + * to other processors if needed. Type called data_in and static + * pointers DataIn and DataGet needed by generic_comm_helpers2. + */ +typedef struct +{ + MyDouble Pos[3]; + MyFloat Vel[3]; + MyFloat Csnd; + MyFloat cellrad; + MyFloat CurrentMaxTiStep; + + int Firstnode; /* this is needed as part of the communication alogorithm */ +} data_in; + +static data_in *DataIn, *DataGet; + +/*! \brief Routine that fills the relevant particle/cell data into the input + * structure defined above. Needed by generic_comm_helpers2. + * + * \param[out] in Data structure to fill. + * \param[in] i Index of particle in P and SphP arrays. + * \param[in] firstnode First note of communication. + * + * \return void + */ +static void particle2in(data_in *in, int i, int firstnode) +{ + int k; + + for(k = 0; k < 3; k++) + { + in->Pos[k] = P[i].Pos[k]; + in->Vel[k] = P[i].Vel[k]; + } + + in->Csnd = SphP[i].Csnd; + in->cellrad = get_cell_radius(i); + in->CurrentMaxTiStep = SphP[i].CurrentMaxTiStep; + + in->Firstnode = firstnode; +} + +/*! \brief Local data structure that holds results acquired on remote + * processors. Type called data_out and static pointers DataResult and + * DataOut needed by generic_comm_helpers2. + */ +typedef struct +{ + MyFloat CurrentMaxTiStep; +} data_out; + +static data_out *DataResult, *DataOut; + +/*! \brief Routine to store or combine result data. Needed by + * generic_comm_helpers2. + * + * \param[in] out Data to be moved to appropriate variables in global + * particle and cell data arrays (P, SphP,...) + * \param[in] i Index of particle in P and SphP arrays + * \param[in] mode Mode of function: local particles or information that was + * communicated from other tasks and has to be added locally? + * + * \return void + */ +static void out2particle(data_out *out, int i, int mode) +{ + if(mode == MODE_LOCAL_PARTICLES) /* initial store */ + { + SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep; + } + else /* combine */ + { + if(SphP[i].CurrentMaxTiStep > out->CurrentMaxTiStep) + SphP[i].CurrentMaxTiStep = out->CurrentMaxTiStep; + } +} + +#include "../utils/generic_comm_helpers2.h" + +/*! \brief Routine that defines what to do with local particles. + * + * Calls the *_evaluate function in MODE_LOCAL_PARTICLES. + * + * \return void + */ +static void kernel_local(void) +{ + int idx; + + /* do local particles */ + { + int j, threadid = get_thread_num(); + + for(j = 0; j < NTask; j++) + Thread[threadid].Exportflag[j] = -1; + + while(1) + { + if(Thread[threadid].ExportSpace < MinSpace) + break; + + idx = NextParticle++; + + if(idx >= TimeBinsHydro.NActiveParticles) + break; + + int i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + if(P[i].Mass == 0 && P[i].ID == 0) + continue; + + timestep_evaluate(i, MODE_LOCAL_PARTICLES, threadid); + } + } +} + +/*! \brief Routine that defines what to do with imported particles. + * + * Calls the *_evaluate function in MODE_IMPORTED_PARTICLES. + * + * \return void + */ +static void kernel_imported(void) +{ + /* now do the particles that were sent to us */ + int i, cnt = 0; + { + int threadid = get_thread_num(); + + while(1) + { + i = cnt++; + + if(i >= Nimport) + break; + + timestep_evaluate(i, MODE_IMPORTED_PARTICLES, threadid); + } + } +} + +/*! \brief Main function to call tree-based timesteps. + * + * This function is called in find_timesteps_without_gravity() (timestep.c). + * + * \return void + */ +void tree_based_timesteps(void) +{ + CPU_Step[CPU_MISC] += measure_time(); + + tree_based_timesteps_setsoundspeeds(); + + generic_set_MaxNexport(); + + double t0 = second(); + + generic_comm_pattern(TimeBinsHydro.NActiveParticles, kernel_local, kernel_imported); + + double t1 = second(); + + mpi_printf("TIMESTEPS: timestep-treewalk: sec=%g\n", timediff(t0, t1)); + + CPU_Step[CPU_TREE_TIMESTEPS] += measure_time(); +} + +/*! \brief The 'core' of the tree-based timestep computation. + * + * A target particle is specified which may either be local, or reside in the + * communication buffer. + * + * \param[in] target Index of particle/cell. + * \param[in] mode Flag if it operates on local or imported data. + * \param[in] threadid ID of thread. + * + * \return cost, i.e. number of nodes that had to be opened. + */ +int timestep_evaluate(int target, int mode, int threadid) +{ + int k, cost = 0, numnodes, *firstnode; + MyDouble *pos; + MyFloat *vel; + double dxp, dxm, dyp, dym, dzp, dzm, pos_m[3], pos_p[3]; + int no, p; + struct NgbNODE *current; + double dx, dy, dz, dist, csnd, cellrad, xtmp, ytmp, ztmp; + + data_out out; + data_in local, *target_data; + + if(mode == MODE_LOCAL_PARTICLES) + { + particle2in(&local, target, 0); + target_data = &local; + + numnodes = 1; + firstnode = NULL; + } + else + { + target_data = &DataGet[target]; + + generic_get_numnodes(target, &numnodes, &firstnode); + } + + pos = target_data->Pos; + vel = target_data->Vel; + csnd = target_data->Csnd; + cellrad = target_data->cellrad; + + out.CurrentMaxTiStep = target_data->CurrentMaxTiStep; + + pos_m[0] = pos[0] - boxSize_X; + pos_p[0] = pos[0] + boxSize_X; + pos_m[1] = pos[1] - boxSize_Y; + pos_p[1] = pos[1] + boxSize_Y; + pos_m[2] = pos[2] - boxSize_Z; + pos_p[2] = pos[2] + boxSize_Z; + + double atimeinv; + if(All.ComovingIntegrationOn) + atimeinv = 1 / All.Time; + else + atimeinv = 1.0; + + /* Now start the actual tree-walk computation for this particle */ + + for(k = 0; k < numnodes; k++) + { + if(mode == MODE_LOCAL_PARTICLES) + { + no = Ngb_MaxPart; /* root node */ + } + else + { + no = firstnode[k]; + no = Ngb_Nodes[no].u.d.nextnode; /* open it */ + } + + while(no >= 0) + { + cost++; + + if(no < Ngb_MaxPart) /* single particle */ + { + p = no; + no = Ngb_Nextnode[no]; + + if(P[p].Type > 0) + continue; + + if(P[p].Mass == 0 && P[p].ID == 0) /* skip eliminated cells */ + continue; + + if(P[p].Ti_Current != All.Ti_Current) + { + drift_particle(p, All.Ti_Current); + } + + dx = NEAREST_X(P[p].Pos[0] - pos[0]); + dy = NEAREST_Y(P[p].Pos[1] - pos[1]); + dz = NEAREST_Z(P[p].Pos[2] - pos[2]); + + dist = sqrt(dx * dx + dy * dy + dz * dz); + + if(dist > 0) + { + double vsig = csnd + SphP[p].Csnd - + ((P[p].Vel[0] - vel[0]) * dx + (P[p].Vel[1] - vel[1]) * dy + (P[p].Vel[2] - vel[2]) * dz) / dist; + + if(vsig > 0) + { + dist += cellrad; /* take one cell radius as minimum distance in order to protect against unreasonably small steps + if two mesh-generating points are extremely close */ + if(out.CurrentMaxTiStep > dist / vsig) + out.CurrentMaxTiStep = dist / vsig; + } + } + } + else if(no < Ngb_MaxPart + Ngb_MaxNodes) /* internal */ + { + if(mode == MODE_IMPORTED_PARTICLES) + { + if(no < + Ngb_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ + break; + } + + current = &Ngb_Nodes[no]; + + if(current->Ti_Current != All.Ti_Current) + { + drift_node(current, All.Ti_Current); + } + + if(pos[0] > current->u.d.range_max[0] && pos_m[0] < current->u.d.range_min[0]) + { + dxp = pos[0] - current->u.d.range_max[0]; + dxm = pos_m[0] - current->u.d.range_min[0]; /* negative */ + } + else if(pos_p[0] > current->u.d.range_max[0] && pos[0] < current->u.d.range_min[0]) + { + dxp = pos_p[0] - current->u.d.range_max[0]; + dxm = pos[0] - current->u.d.range_min[0]; /* negative */ + } + else + dxp = dxm = 0; + + if(pos[1] > current->u.d.range_max[1] && pos_m[1] < current->u.d.range_min[1]) + { + dyp = pos[1] - current->u.d.range_max[1]; + dym = pos_m[1] - current->u.d.range_min[1]; /* negative */ + } + else if(pos_p[1] > current->u.d.range_max[1] && pos[1] < current->u.d.range_min[1]) + { + dyp = pos_p[1] - current->u.d.range_max[1]; + dym = pos[1] - current->u.d.range_min[1]; /* negative */ + } + else + dyp = dym = 0; + + if(pos[2] > current->u.d.range_max[2] && pos_m[2] < current->u.d.range_min[2]) + { + dzp = pos[2] - current->u.d.range_max[2]; + dzm = pos_m[2] - current->u.d.range_min[2]; /* negative */ + } + else if(pos_p[2] > current->u.d.range_max[2] && pos[2] < current->u.d.range_min[2]) + { + dzp = pos_p[2] - current->u.d.range_max[2]; + dzm = pos[2] - current->u.d.range_min[2]; /* negative */ + } + else + dzp = dzm = 0; + + double vsig = csnd + ExtNgb_Nodes[no].MaxCsnd; + + int flag = 0; + + if(dxp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[0] * atimeinv - vel[0]))) + flag++; + else if(-dxm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[0] * atimeinv - vel[0]))) + flag++; + + if(dyp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[1] * atimeinv - vel[1]))) + flag++; + else if(-dym + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[1] * atimeinv - vel[1]))) + flag++; + + if(dzp + cellrad < out.CurrentMaxTiStep * (vsig + (ExtNgb_Nodes[no].vmax[2] * atimeinv - vel[2]))) + flag++; + else if(-dzm + cellrad < out.CurrentMaxTiStep * (vsig - (ExtNgb_Nodes[no].vmin[2] * atimeinv - vel[2]))) + flag++; + + if(flag >= 3) + { + /* need to open */ + no = current->u.d.nextnode; + continue; + } + + /* in this case the node can be discarded */ + no = current->u.d.sibling; + continue; + } + else /* pseudo particle */ + { + if(mode == MODE_IMPORTED_PARTICLES) + terminate("mode == 1"); + + if(target >= 0) /* if no target is given, export will not occur */ + ngb_treefind_export_node_threads(no, target, threadid, 0); + + no = Ngb_Nextnode[no - Ngb_MaxNodes]; + continue; + } + } + } + + /* Now collect the result at the right place */ + if(mode == MODE_LOCAL_PARTICLES) + out2particle(&out, target, MODE_LOCAL_PARTICLES); + else + DataResult[target] = out; + + return cost; +} + +/*! \brief Sets local sound speed and time-step limits from local conditions. + * + * This is a sort of initialization of the tree-based time-steps algorithm. + * + * \return void + */ +void tree_based_timesteps_setsoundspeeds(void) +{ + int idx, i; + double rad, csnd; + double hubble_a, atime; + + if(All.ComovingIntegrationOn) + { + hubble_a = hubble_function(All.Time); + atime = All.Time; + } + else + hubble_a = atime = 1; + + for(idx = 0; idx < TimeBinsHydro.NActiveParticles; idx++) + { + i = TimeBinsHydro.ActiveParticleList[idx]; + if(i < 0) + continue; + + csnd = get_sound_speed(i); + + if(csnd <= 1.0e-30) + csnd = 1.0e-30; + + SphP[i].Csnd = csnd; + + rad = get_cell_radius(i); + +#ifdef VORONOI_STATIC_MESH + csnd += sqrt(P[i].Vel[0] * P[i].Vel[0] + P[i].Vel[1] * P[i].Vel[1] + P[i].Vel[2] * P[i].Vel[2]) / All.cf_atime; +#else /* #ifdef VORONOI_STATIC_MESH */ + csnd += sqrt((P[i].Vel[0] - SphP[i].VelVertex[0]) * (P[i].Vel[0] - SphP[i].VelVertex[0]) + + (P[i].Vel[1] - SphP[i].VelVertex[1]) * (P[i].Vel[1] - SphP[i].VelVertex[1]) + + (P[i].Vel[2] - SphP[i].VelVertex[2]) * (P[i].Vel[2] - SphP[i].VelVertex[2])) / + All.cf_atime; +#endif /* #ifdef VORONOI_STATIC_MESH #else */ + + SphP[i].CurrentMaxTiStep = rad / csnd; + + /* note: for cosmological integration, CurrentMaxTiStep stores 1/a times the maximum allowed physical timestep */ + + if(SphP[i].CurrentMaxTiStep >= All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac) + SphP[i].CurrentMaxTiStep = All.MaxSizeTimestep / (atime * hubble_a) / All.CourantFac; + } +} + +#endif /* #ifdef TREE_BASED_TIMESTEPS */ diff --git a/src/amuse/community/arepo/src/utils/allocate.c b/src/amuse/community/arepo/src/utils/allocate.c new file mode 100644 index 0000000000..234ec06041 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/allocate.c @@ -0,0 +1,133 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/allocate.c + * \date 05/2018 + * \brief Functions to allocate and reallocate global arrays. + * \details contains functions + * void allocate_memory(void) + * void reallocate_memory_maxpart(void) + * void reallocate_memory_maxpartsph(void) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Allocates memory for global arrays. + * + * This routine allocates memory for + * - particle storage, both the collisionless and the cells (SPH particles), + * - the ordered binary tree of the timeline, + * - communication buffers. + * + * \return void + */ +void allocate_memory(void) +{ + int NTaskTimesThreads; + + NTaskTimesThreads = MaxThreads * NTask; + + Exportflag = (int *)mymalloc("Exportflag", NTaskTimesThreads * sizeof(int)); + Exportindex = (int *)mymalloc("Exportindex", NTaskTimesThreads * sizeof(int)); + Exportnodecount = (int *)mymalloc("Exportnodecount", NTaskTimesThreads * sizeof(int)); + + Send = (struct send_recv_counts *)mymalloc("Send", sizeof(struct send_recv_counts) * NTask); + Recv = (struct send_recv_counts *)mymalloc("Recv", sizeof(struct send_recv_counts) * NTask); + + TasksThatSend = (int *)mymalloc("TasksThatSend", sizeof(int) * NTask); + TasksThatRecv = (int *)mymalloc("TasksThatRecv", sizeof(int) * NTask); + + Send_count = (int *)mymalloc("Send_count", sizeof(int) * NTaskTimesThreads); + Send_offset = (int *)mymalloc("Send_offset", sizeof(int) * NTaskTimesThreads); + Recv_count = (int *)mymalloc("Recv_count", sizeof(int) * NTask); + Recv_offset = (int *)mymalloc("Recv_offset", sizeof(int) * NTask); + + Send_count_nodes = (int *)mymalloc("Send_count_nodes", sizeof(int) * NTask); + Send_offset_nodes = (int *)mymalloc("Send_offset_nodes", sizeof(int) * NTask); + Recv_count_nodes = (int *)mymalloc("Recv_count_nodes", sizeof(int) * NTask); + Recv_offset_nodes = (int *)mymalloc("Recv_offset_nodes", sizeof(int) * NTask); + + Mesh_Send_count = (int *)mymalloc("Mesh_Send_count", sizeof(int) * NTask); + Mesh_Send_offset = (int *)mymalloc("Mesh_Send_offset", sizeof(int) * NTask); + Mesh_Recv_count = (int *)mymalloc("Mesh_Recv_count", sizeof(int) * NTask); + Mesh_Recv_offset = (int *)mymalloc("Mesh_Recv_offset", sizeof(int) * NTask); + + Force_Send_count = (int *)mymalloc("Force_Send_count", sizeof(int) * NTask); + Force_Send_offset = (int *)mymalloc("Force_Send_offset", sizeof(int) * NTask); + Force_Recv_count = (int *)mymalloc("Force_Recv_count", sizeof(int) * NTask); + Force_Recv_offset = (int *)mymalloc("Force_Recv_offset", sizeof(int) * NTask); + + mpi_printf("ALLOCATE: initial allocation for MaxPart = %d\n", All.MaxPart); + P = (struct particle_data *)mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data)); + + mpi_printf("ALLOCATE: initial allocation for MaxPartSph = %d\n", All.MaxPartSph); + SphP = (struct sph_particle_data *)mymalloc_movable(&SphP, "SphP", All.MaxPartSph * sizeof(struct sph_particle_data)); + +#ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE + PartSpecialListGlobal = (struct special_particle_data *)mymalloc_movable(&PartSpecialListGlobal, "PartSpecialListGlobal", + All.MaxPartSpecial * sizeof(struct special_particle_data)); +#endif /* #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE */ + + timebins_allocate(&TimeBinsHydro); + timebins_allocate(&TimeBinsGravity); + + /* set to zero */ + memset(P, 0, All.MaxPart * sizeof(struct particle_data)); + memset(SphP, 0, All.MaxPartSph * sizeof(struct sph_particle_data)); +} + +/*! \brief Reallocates memory for particle data. + * + * Reallocates memory for P and TimeBinsGravity arrays. + * + * \return void + */ +void reallocate_memory_maxpart(void) +{ + mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart); + + P = (struct particle_data *)myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data)); + timebins_reallocate(&TimeBinsGravity); +} + +/*! \brief Reallocate memory for cell data. + * + * Reallocates memory for cells in SphP and TimeBinsHydro arrays. + * + * \return void + */ +void reallocate_memory_maxpartsph(void) +{ + mpi_printf("ALLOCATE: Changing to MaxPartSph = %d\n", All.MaxPartSph); + + SphP = (struct sph_particle_data *)myrealloc_movable(SphP, All.MaxPartSph * sizeof(struct sph_particle_data)); + timebins_reallocate(&TimeBinsHydro); +} diff --git a/src/amuse/community/arepo/src/utils/debug.c b/src/amuse/community/arepo/src/utils/debug.c new file mode 100644 index 0000000000..c425ce7d40 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/debug.c @@ -0,0 +1,148 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/debug.c + * \date 05/2018 + * \brief Print relevant information about a particle / face for + * debugging. + * \details The functions contained in this file are mostly called when a + * condition, that causes the abort of the run, is met. In that + * case, the information about the state of the particle / face + * which triggered that condition is printed to the standard + * output. + * contains functions: + * void print_particle_info(int i) + * void print_particle_info_from_ID(MyIDType ID) + * void print_state_info(struct state *st) + * void print_state_face_info(struct state_face *st) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 03.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Prints particle / cell information to standard output. + * + * \param[in] i Index of particle / cell. + * + * \return void + */ +void print_particle_info(int i) +{ + printf("Task=%d, ID=%llu, Type=%d, TimeBinGrav=%d, TimeBinHydro=%d, Mass=%g, pos=%g|%g|%g, vel=%g|%g|%g\n", ThisTask, + (unsigned long long)P[i].ID, P[i].Type, P[i].TimeBinGrav, P[i].TimeBinHydro, P[i].Mass, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2], + P[i].Vel[0], P[i].Vel[1], P[i].Vel[2]); +#ifdef PMGRID + printf("GravAccel=%g|%g|%g, GravPM=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1], + P[i].GravAccel[2], P[i].GravPM[0], P[i].GravPM[1], P[i].GravPM[2], All.ForceSoftening[P[i].SofteningType], P[i].SofteningType, + P[i].OldAcc); +#else /* #ifdef PMGRID */ + printf("GravAccel=%g|%g|%g, Soft=%g, SoftType=%d, OldAcc=%g\n", P[i].GravAccel[0], P[i].GravAccel[1], P[i].GravAccel[2], + All.ForceSoftening[P[i].SofteningType], P[i].SofteningType, P[i].OldAcc); +#endif /* #ifdef PMGRID #else */ + + if(P[i].Type == 0) + { + printf("Vol=%g, rad=%g, rho=%g, p=%g,u=%g, velVertex=%g|%g|%g, csnd=%g\n", SphP[i].Volume, get_cell_radius(i), SphP[i].Density, + SphP[i].Pressure, SphP[i].Utherm, SphP[i].VelVertex[0], SphP[i].VelVertex[1], SphP[i].VelVertex[2], get_sound_speed(i)); + printf("Center-Pos=%g|%g|%g\n", SphP[i].Center[0] - P[i].Pos[0], SphP[i].Center[1] - P[i].Pos[1], + SphP[i].Center[2] - P[i].Pos[2]); +#ifndef MHD + printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2], + SphP[i].Energy, SphP[i].Utherm * P[i].Mass, + 0.5 * P[i].Mass * + ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) + + (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) + + (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass))); +#else /* #ifndef MHD */ + printf("Mom=%g|%g|%g, Energy=%g, EInt=%g, EKin=%g, EB=%g\n", SphP[i].Momentum[0], SphP[i].Momentum[1], SphP[i].Momentum[2], + SphP[i].Energy, SphP[i].Utherm * P[i].Mass, + 0.5 * P[i].Mass * + ((SphP[i].Momentum[0] / P[i].Mass) * (SphP[i].Momentum[0] / P[i].Mass) + + (SphP[i].Momentum[1] / P[i].Mass) * (SphP[i].Momentum[1] / P[i].Mass) + + (SphP[i].Momentum[2] / P[i].Mass) * (SphP[i].Momentum[2] / P[i].Mass)), + 0.5 * SphP[i].Volume * (SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2])); +#endif /* #ifndef MHD #else */ + +#ifdef MHD + double err = pow(SphP[i].Volume, 1. / 3.) * fabs(SphP[i].DivB) / + sqrt(SphP[i].B[0] * SphP[i].B[0] + SphP[i].B[1] * SphP[i].B[1] + SphP[i].B[2] * SphP[i].B[2]); + printf("B=%g|%g|%g, divb=%g, err=%g\n", SphP[i].B[0], SphP[i].B[1], SphP[i].B[2], SphP[i].DivB, err); +#endif /* #ifdef MHD */ + +#ifdef TREE_BASED_TIMESTEPS + printf("ID=%llu SphP[p].CurrentMaxTiStep=%g\n", (unsigned long long)P[i].ID, SphP[i].CurrentMaxTiStep); +#endif /* #ifdef TREE_BASED_TIMESTEPS */ + } +} + +/*! \brief Prints particle / cell information of the cell with a specific ID. + * + * \param[in] ID particle / cell ID. + * + * \return void + */ +void print_particle_info_from_ID(MyIDType ID) +{ + int i; + for(i = 0; i < NumPart; i++) + if(P[i].ID == ID) + print_particle_info(i); +} + +/*! \brief Prints information of the left or right state of a face to standard + * output. + * + * \param[in] st Structure containing the left or right state of a face. + * + * \return void + */ +void print_state_info(struct state *st) +{ + printf("Task=%d, ID=%llu rho=%g, p=%g, vel=%g|%g|%g, velVertex=%g|%g|%g\n", ThisTask, (unsigned long long)st->ID, st->rho, st->press, + st->velx, st->vely, st->velz, st->velVertex[0], st->velVertex[1], st->velVertex[2]); + printf("dx=%g, dy=%g, dz=%g, dt_half=%g\n", st->dx, st->dy, st->dz, st->dt_half); + printf("timeBin=%d, volume=%g, activearea=%g, surfacearea=%g, csnd=%g\n", st->timeBin, st->volume, st->activearea, st->surfacearea, + st->csnd); +#ifdef MHD + printf("B=%g|%g|%g\n", st->Bx, st->By, st->Bz); +#endif /* #ifdef MHD */ +} + +/*! \brief Prints information of the state the of a face as determined by + * the Riemman solver to standard output. + * + * \param[in] st Structure containing the state of a face after the solution + * of the Riemann problem. + * + * \return void + */ +void print_state_face_info(struct state_face *st) +{ + printf("rho=%g, p=%g, vel=%g|%g|%g\n", st->rho, st->press, st->velx, st->vely, st->velz); +} diff --git a/src/amuse/community/arepo/src/utils/dtypes.h b/src/amuse/community/arepo/src/utils/dtypes.h new file mode 100644 index 0000000000..816412b529 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/dtypes.h @@ -0,0 +1,195 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/dtypes.h + * \date 05/2018 + * \brief Definition of intrinsic datatypes. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifndef DTYPES_H +#define DTYPES_H + +#ifndef FFTW +#define CONCAT(prefix, name) prefix##name +#ifdef DOUBLEPRECISION_FFTW +#define FFTW(x) CONCAT(fftw_, x) +#else /* #ifdef DOUBLEPRECISION_FFTW */ +#define FFTW(x) CONCAT(fftwf_, x) +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +#endif /* #ifndef FFTW */ + +#ifndef LONGIDS +typedef unsigned int MyIDType; +#define MPI_MYIDTYPE MPI_UNSIGNED +#else /* #ifndef LONGIDS */ +typedef unsigned long long MyIDType; +#define MPI_MYIDTYPE MPI_UNSIGNED_LONG_LONG +#endif /* #ifndef LONGIDS #else */ + +#ifndef DOUBLEPRECISION /* default is single-precision */ +typedef float MySingle; +typedef float MyFloat; +typedef float MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_FLOAT +#else /* #ifndef DOUBLEPRECISION */ +#if(DOUBLEPRECISION == 2) /* mixed precision */ +typedef float MySingle; +typedef float MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_DOUBLE +#else /* #if (DOUBLEPRECISION == 2) */ +#if(DOUBLEPRECISION == 3) /* mixed precision, fewer single precision variables */ +typedef float MySingle; +typedef double MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_FLOAT +#define MPI_MYDOUBLE MPI_DOUBLE +#else /* #if (DOUBLEPRECISION == 3) */ +/* everything double-precision */ +typedef double MySingle; +typedef double MyFloat; +typedef double MyDouble; +#define MPI_MYFLOAT MPI_DOUBLE +#define MPI_MYDOUBLE MPI_DOUBLE +#endif /* #if (DOUBLEPRECISION == 3) #else */ +#endif /* #if (DOUBLEPRECISION == 2) #else */ +#endif /* #ifndef DOUBLEPRECISION #else */ + +#ifdef OUTPUT_IN_DOUBLEPRECISION +typedef double MyOutputFloat; +#else /* #ifdef OUTPUT_IN_DOUBLEPRECISION */ +typedef float MyOutputFloat; +#endif /* #ifdef OUTPUT_IN_DOUBLEPRECISION #else */ + +#ifdef INPUT_IN_DOUBLEPRECISION +typedef double MyInputFloat; +#else /* #ifdef INPUT_IN_DOUBLEPRECISION */ +typedef float MyInputFloat; +#endif /* #ifdef INPUT_IN_DOUBLEPRECISION #else */ + +#ifndef NGB_TREE_DOUBLEPRECISION +typedef float MyNgbTreeFloat; +#define MAX_NGBRANGE_NUMBER MAX_FLOAT_NUMBER +#else /* #ifndef NGB_TREE_DOUBLEPRECISION */ +typedef double MyNgbTreeFloat; +#define MAX_NGBRANGE_NUMBER MAX_DOUBLE_NUMBER +#endif /* #ifndef NGB_TREE_DOUBLEPRECISION #else */ + +#if defined(PMGRID) +#include + +#ifdef DOUBLEPRECISION_FFTW +typedef double fft_real; +typedef fftw_complex fft_complex; +#else /* #ifdef DOUBLEPRECISION_FFTW */ +typedef float fft_real; +typedef fftwf_complex fft_complex; +#endif /* #ifdef DOUBLEPRECISION_FFTW #else */ +typedef ptrdiff_t fft_ptrdiff_t; + +typedef struct +{ + int NgridX, NgridY, NgridZ; + int Ngridz, Ngrid2; + + FFTW(plan) forward_plan_zdir; + FFTW(plan) forward_plan_xdir; + FFTW(plan) forward_plan_ydir; + + FFTW(plan) backward_plan_zdir; + FFTW(plan) backward_plan_ydir; + FFTW(plan) backward_plan_xdir; + +#ifndef FFT_COLUMN_BASED + + int *slab_to_task; /*!< Maps a slab index to the task responsible for the slab */ + int *slabs_x_per_task; + int *first_slab_x_of_task; /*!< Array containing the index of the first slab of each task */ + int *slabs_y_per_task; /*!< Array containing the number of slabs each task is responsible for */ + int *first_slab_y_of_task; /*!< Array containing the index of the first slab of each task */ + + int nslab_x, slabstart_x, nslab_y, slabstart_y; + int largest_x_slab; /*!< size of the largest slab in x direction */ + int largest_y_slab; /*!< size of the largest slab in y direction */ + +#else /* #ifndef FFT_COLUMN_BASED */ + + size_t max_datasize; + size_t fftsize; + + int base_firstcol, base_ncol, base_lastcol; + int transposed_firstcol, transposed_ncol; + int second_transposed_firstcol, second_transposed_ncol; + size_t second_transposed_ncells; + + int firstcol_XZ, ncol_XZ; + int firstcol_YZ, ncol_YZ; + + int pivotcol; /* to go from column number to task */ + int avg; + int tasklastsection; + + size_t *offsets_send_A; + size_t *offsets_recv_A; + size_t *offsets_send_B; + size_t *offsets_recv_B; + size_t *offsets_send_C; + size_t *offsets_recv_C; + size_t *offsets_send_D; + size_t *offsets_recv_D; + size_t *offsets_send_13; + size_t *offsets_recv_13; + size_t *offsets_send_23; + size_t *offsets_recv_23; + size_t *offsets_send_13back; + size_t *offsets_recv_13back; + size_t *offsets_send_23back; + size_t *offsets_recv_23back; + + size_t *count_send_A; + size_t *count_recv_A; + size_t *count_send_B; + size_t *count_recv_B; + size_t *count_send_C; + size_t *count_recv_C; + size_t *count_send_D; + size_t *count_recv_D; + size_t *count_send_13; + size_t *count_recv_13; + size_t *count_send_23; + size_t *count_recv_23; + size_t *count_send_13back; + size_t *count_recv_13back; + size_t *count_send_23back; + size_t *count_recv_23back; + +#endif /* #ifndef FFT_COLUMN_BASED */ +} fft_plan; + +#endif /* #if defined(PMGRID) */ + +#endif /* #ifndef DTYPES_H */ diff --git a/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h new file mode 100644 index 0000000000..a159b17045 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/generic_comm_helpers2.h @@ -0,0 +1,724 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/generic_comm_helpers.h + * \date 05/2018 + * \brief Generic 'template' MPI communication structure used in many + * parts of the code. + * \details Usage: + * see e.g. src/init/density.c + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 04.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#ifdef USE_SUBCOMM_COMMUNICATOR +#define MYCOMMUNICATOR SubComm +#define MyThisTask SubThisTask +#define MyNTask SubNTask +#else /* #ifdef USE_SUBCOMM_COMMUNICATOR */ +#define MYCOMMUNICATOR MPI_COMM_WORLD +#define MyThisTask ThisTask +#define MyNTask NTask +#endif /* #ifdef USE_SUBCOMM_COMMUNICATOR #else */ + +#define EXTRA_SPACE 16384 + +typedef struct datanodelist datanodelist; +typedef struct data_partlist data_partlist; + +static size_t ExportSpace; +static size_t MinSpace; +static int NextParticle; +static int Nexport, Nimport; +static int NexportNodes, NimportNodes; +static long long SumNexport; +static int *NodeDataIn; +static int *NodeDataGet; + +static char callorigin[1000]; + +#ifdef USE_DSDE +static void generic_prepare_import_counts_ibarrier(void); +#endif /* #ifdef USE_DSDE */ + +#ifdef USE_INLINED_IBARRIER +static void generic_prepare_import_counts_inlined_ibarrier(void); +#endif /* #ifdef USE_INLINED_IBARRIER */ + +#define generic_set_MaxNexport(...) \ + { \ + generic_set_info(__FUNCTION__, __FILE__, __LINE__); \ + } + +/*! \brief This function determines how much buffer space we may use based on + * the memory that is locally still free, and it computes how much + * memory may at most be needed to process a single particle. We will + * only continue with a particle if this can still be safely processed. + */ +static void generic_set_info(const char *func, const char *file, int line) +{ + ExportSpace = 0.3 * (FreeBytes); /* we just grab at most 30% of the still available memory here */ + ExportSpace /= NUM_THREADS; + ExportSpace -= NumPart * sizeof(int); /* to account for the neighbor list buffer that every thread allocated */ + + /* make the size a multiple both of data_partlist and datanodelist */ + ExportSpace /= (sizeof(data_partlist) * sizeof(datanodelist)); + ExportSpace *= (sizeof(data_partlist) * sizeof(datanodelist)); + + MinSpace = + (MyNTask - 1) * (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out)) + NTopleaves * (sizeof(datanodelist) + sizeof(int)); + + sprintf(callorigin, "%s|%d|", file, line); + +#ifdef VERBOSE + mpi_printf( + "GENERIC: function %s(), file %s, line %d: MinSpace = %g MB NTopleaves = %d ExportSpace = %g MB sizeof(data_in)=%d " + "sizeof(data_out)=%d\n", + func, file, line, MinSpace / (1024.0 * 1024.0), NTopleaves, ExportSpace / (1024.0 * 1024.0)), + (int)sizeof(data_in), (int)sizeof(data_out); +#endif /* #ifdef VERBOSE */ + + if(ExportSpace < MinSpace) + terminate( + "Bummer. Can't even safely process a single particle for the available memory. FreeBytes=%lld ExportSpace=%lld " + "MinSpace=%lld MyNTask=%d NTopleaves=%d", + (long long)FreeBytes, (long long)ExportSpace, (long long)MinSpace, MyNTask, NTopleaves); +} + +/*! \brief This function does the memory allocation at the beginning of a loop + * over the remaining local particles. The fields PartList[] and + * NodeList[] share the buffer space of size "ExportSpace" (in bytes). + * Here PartList will be filled in from the beginning, while NodeList + * will be filled in from the end. Since we do not know a priory the + * relative share of these two fields, we can make optimum use of + * the available space in this way. + */ +static void generic_alloc_partlist_nodelist_ngblist_threadbufs(void) +{ + for(int i = 0; i < NUM_THREADS; i++) + { + Thread[i].Nexport = 0; + Thread[i].NexportNodes = 0; + Thread[i].ExportSpace = ExportSpace; + Thread[i].InitialSpace = ExportSpace; + Thread[i].ItemSize = (sizeof(data_partlist) + sizeof(data_in) + sizeof(data_out)); + + Thread[i].PartList = (struct data_partlist *)mymalloc_movable_g(&Thread[i].PartList, "PartList", ExportSpace); + /* note: the NodeList array will be attached to the end of this buffer, growing backwards */ + /* Thread[i].NodeList = (struct datanodelist *) (((char *) Thread[i].PartList) + InitialSpace); + */ + Thread[i].Ngblist = (int *)mymalloc_movable_g(&Thread[i].Ngblist, "Ngblist", NumPart * sizeof(int)); + Thread[i].R2list = (double *)mymalloc_movable_g(&Thread[i].R2list, "R2list", NumPart * sizeof(double)); + Thread[i].Exportflag = Exportflag + i * ((((MyNTask - 1) / 16) + 1) * 16); + } +} + +/*! \brief The corresponding deallocation routine. + */ +static void generic_free_partlist_nodelist_ngblist_threadbufs(void) +{ + for(int i = NUM_THREADS - 1; i >= 0; i--) + { + myfree(Thread[i].R2list); + myfree(Thread[i].Ngblist); + myfree(Thread[i].PartList); + Thread[i].R2list = NULL; + Thread[i].Ngblist = NULL; + Thread[i].PartList = NULL; + } +} + +static void generic_prepare_export_counts(void) +{ + for(int j = 0; j < MyNTask; j++) + { + Send[j].Count = 0; + Send[j].CountNodes = 0; + } + + Nexport = 0; + NexportNodes = 0; + + for(int i = 0; i < NUM_THREADS; i++) + { + for(int j = 0; j < Thread[i].Nexport; j++) + Send[Thread[i].PartList[j].Task].Count++; + + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace); + + for(int j = 0; j < Thread[i].NexportNodes; j++) + Send[nodelist[-1 - j].Task].CountNodes++; + + Nexport += Thread[i].Nexport; + NexportNodes += Thread[i].NexportNodes; + } + + SumNexport += Nexport; +} + +/*! \brief Establishes the Recv counts from the Send counts (effectively a big + * transpose). + */ +static void generic_prepare_import_counts(void) +{ + /* our standard approach for this is to use an all-to-all communication. For very large processor counts, + * this in principle becomes inefficient since mostly zeros need to be communicated. + * we have also two option experimental communication routines that use a sparse=communication pattern instead. + */ +#ifdef USE_DSDE + generic_prepare_import_counts_ibarrier(); +#else /* #ifdef USE_DSDE */ +#ifdef USE_INLINED_IBARRIER + generic_prepare_import_counts_inlined_ibarrier(); +#else /* #ifdef USE_INLINED_IBARRIER */ + /* the default */ + MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_BYTE, Recv, sizeof(struct send_recv_counts), MPI_BYTE, MYCOMMUNICATOR); +#endif /* #ifdef USE_INLINED_IBARRIER #else */ +#endif /* #ifdef USE_DSDE #else */ +} + +/*! \brief Initializes offset tables that we need for the communication. + */ +static void generic_prepare_export_offsets(void) +{ + Send_offset[0] = 0; + Send_offset_nodes[0] = 0; + + for(int j = 1; j < MyNTask; j++) + { + Send_offset[j] = Send_offset[j - 1] + Send[j - 1].Count; + Send_offset_nodes[j] = Send_offset_nodes[j - 1] + Send[j - 1].CountNodes; + } +} + +/*! \brief Organizes the particle and node data for export in contiguous + * memory regions. + */ +static void generic_prepare_particle_data_for_export(void) +{ + int *rel_node_index = (int *)mymalloc_g("rel_node_index", MyNTask * sizeof(int)); + + for(int j = 0; j < MyNTask; j++) + { + Send[j].Count = 0; + Send[j].CountNodes = 0; + rel_node_index[j] = 0; + } + + for(int i = 0; i < NUM_THREADS; i++) + { + struct datanodelist *nodelist = (struct datanodelist *)(((char *)Thread[i].PartList) + Thread[i].InitialSpace); + + for(int j = 0, jj = 0; j < Thread[i].Nexport; j++) + { + int task = Thread[i].PartList[j].Task; + int off = Send_offset[task] + Send[task].Count++; + + int target = Thread[i].PartList[j].Index; + + particle2in(&DataIn[off], target, rel_node_index[task]); + + if(j < Thread[i].Nexport - 1) + if(Thread[i].PartList[j].Index == Thread[i].PartList[j + 1].Index) + continue; + + while(jj < Thread[i].NexportNodes && Thread[i].PartList[j].Index == nodelist[-1 - jj].Index) + { + int task = nodelist[-1 - jj].Task; + int off = Send_offset_nodes[task] + Send[task].CountNodes++; + + NodeDataIn[off] = nodelist[-1 - jj].Node; + + rel_node_index[task]++; + jj++; + } + } + } + + myfree(rel_node_index); +} + +/*! \brief Driver routine to process the results that we obtained for a + * particle from a remote processor by working on it with the supplied + * out2particle() routine. + */ +static void generic_add_results_to_local(void) +{ + for(int j = 0; j < MyNTask; j++) + Send[j].Count = 0; + + for(int i = 0; i < NUM_THREADS; i++) + for(int j = 0; j < Thread[i].Nexport; j++) + { + int task = Thread[i].PartList[j].Task; + int off = Send_offset[task] + Send[task].Count++; + + int target = Thread[i].PartList[j].Index; + + out2particle(&DataOut[off], target, MODE_IMPORTED_PARTICLES); + } +} + +/*! \brief This function is called in the actual tree walk routine to find out + * how the number and starting index of the section in the node-list + * that needs to be processed for the imported particle. + */ +static void generic_get_numnodes(int target, int *numnodes, int **firstnode) +{ + if(target == Nimport - 1) + *numnodes = NimportNodes - DataGet[target].Firstnode; + else + *numnodes = DataGet[target + 1].Firstnode - DataGet[target].Firstnode; + + *firstnode = &NodeDataGet[DataGet[target].Firstnode]; +} + +/*! \brief Calculates how many space we need to allocate to safely process a + * certain number of nodes and particles that are imported. + */ +static size_t generic_calc_import_storage(int nimport, int nimportnodes) +{ + size_t needed = nimport * sizeof(data_in) + nimportnodes * sizeof(int) + nimport * sizeof(data_out); + + /* add some extra space to not go to the last byte */ + needed += EXTRA_SPACE; + + return needed; +} + +/*! \brief This routine carries out the communication step in several phases + * if needed. + */ +static void generic_multiple_phases(void (*kernel)(void)) +{ + int ncycles; + + for(int ngrpstart = 1; ngrpstart < (1 << PTask); ngrpstart += ncycles) + { + /* now decide how many cycles we can process in this iteration */ + ncycles = (1 << PTask) - ngrpstart; + + do + { + Nimport = 0; + NimportNodes = 0; + + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + + if(recvTask < MyNTask) + { + if(Recv[recvTask].Count > 0) + { + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + int flag = 0, flagall; + + if(generic_calc_import_storage(Nimport, NimportNodes) > FreeBytes) + flag = 1; + + MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_MAX, MYCOMMUNICATOR); + + if(flagall) + ncycles /= 2; + else + break; + } + while(ncycles > 0); + + if(ncycles == 0) + terminate( + "Seems like we can't even do one cycle: ncycles=%d ngrpstart=%d Nimport=%d NimportNodes=%d FreeBytes=%lld needed " + "storage=%lld", + ncycles, ngrpstart, Nimport, NimportNodes, (long long)FreeBytes, + (long long)generic_calc_import_storage(Nimport, NimportNodes)); + + if(ngrpstart == 1 && ncycles != ((1 << PTask) - ngrpstart) && MyThisTask == 0) + warn("need multiple import/export phases to avoid memory overflow"); + + /* now allocated the import and results buffers */ + + DataGet = (data_in *)mymalloc_movable_g(&DataGet, "DataGet", Nimport * sizeof(data_in)); + NodeDataGet = (int *)mymalloc_movable_g(&NodeDataGet, "NodeDataGet", NimportNodes * sizeof(int)); + DataResult = (data_out *)mymalloc_movable_g(&DataResult, "DataResult", Nimport * sizeof(data_out)); + + Nimport = 0; + NimportNodes = 0; + + /* exchange particle data */ + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + + if(recvTask < MyNTask) + { + if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0) + { + size_t len = sizeof(data_in); + + /* get the particles */ + MPI_Sendrecv(&DataIn[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, + &DataGet[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_A, MYCOMMUNICATOR, + MPI_STATUS_IGNORE); + + /* get the nodes */ + MPI_Sendrecv(&NodeDataIn[Send_offset_nodes[recvTask]], Send[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B, + &NodeDataGet[NimportNodes], Recv[recvTask].CountNodes, MPI_INT, recvTask, TAG_GRAV_B, MYCOMMUNICATOR, + MPI_STATUS_IGNORE); + + for(int k = 0; k < Recv[recvTask].Count; k++) + DataGet[Nimport + k].Firstnode += NimportNodes; + + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + /* now do the actual work for the imported points */ + kernel(); + + /* send the results */ + Nimport = 0; + NimportNodes = 0; + + for(int ngrp = ngrpstart; ngrp < ngrpstart + ncycles; ngrp++) + { + int recvTask = MyThisTask ^ ngrp; + if(recvTask < MyNTask) + { + if(Send[recvTask].Count > 0 || Recv[recvTask].Count > 0) + { + size_t len = sizeof(data_out); + + /* exchange the results */ + MPI_Sendrecv(&DataResult[Nimport], Recv[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + &DataOut[Send_offset[recvTask]], Send[recvTask].Count * len, MPI_BYTE, recvTask, TAG_HYDRO_B, + MYCOMMUNICATOR, MPI_STATUS_IGNORE); + + Nimport += Recv[recvTask].Count; + NimportNodes += Recv[recvTask].CountNodes; + } + } + } + + myfree(DataResult); + myfree(NodeDataGet); + myfree(DataGet); + } +} + +/*! \brief This function deals with the communication step, and then processes + * the imported particles, and finally computes the results back. If + * there is not enough memory available to hold all the data sent to + * us from other processors, we process the incoming data in multiple + * stages, which will always be possible. + */ +static void generic_exchange(void (*kernel)(void)) +{ + /* set up Sendcount table */ + generic_prepare_export_counts(); + + /* do the all-to-all exchange so that we have the Recvcount table as well */ + generic_prepare_import_counts(); + + /* prepare offsets in export tables */ + generic_prepare_export_offsets(); + + /* allocate particle data buffers */ + DataIn = (data_in *)mymalloc_movable_g(&DataIn, "DataIn", Nexport * sizeof(data_in)); + NodeDataIn = (int *)mymalloc_movable_g(&NodeDataIn, "NodeDataIn", NexportNodes * sizeof(int)); + DataOut = (data_out *)mymalloc_movable_g(&DataOut, "DataOut", Nexport * sizeof(data_out)); + + /* prepare particle data for export */ + generic_prepare_particle_data_for_export(); + + /* export particles and process them, if needed in several installments */ + generic_multiple_phases(kernel); + + /* add the results to the local particles */ + generic_add_results_to_local(); + + myfree(DataOut); + myfree(NodeDataIn); + myfree(DataIn); +} + +/* \brief Implements a repeated loop over the local particles in the list, + * processing them with the local kernel function, until we're done or + * the export buffer is full. Then we exchange the data, and process + * the imported ones with the provided kernel. We repeat if neeed until + * all processors are done. + */ +static int generic_comm_pattern(int nactive, void (*kernel_loc)(void), void (*kernel_imp)(void)) +{ + int ndone_flag, ndone, iter = 0; + + SumNexport = 0; /* can be queried as a book-keeping variable */ + + NextParticle = 0; /* first particle index for this task */ + + do + { + iter++; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + /* do local particles */ + kernel_loc(); + + /* do all necessary bookkeeping, data exchange, and processing of imported particles */ + generic_exchange(kernel_imp); + + /* free the rest of the buffers */ + generic_free_partlist_nodelist_ngblist_threadbufs(); + + /* check whether we are done */ + if(NextParticle >= nactive) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR); + } + while(ndone < MyNTask); + + return iter; +} + +/*! \brief Same as generic_comm_pattern but you can pass the indices of the + * particles to be processed. + */ +static int generic_comm_pattern_for_given_particles(int nactive, int indices[], void (*kernel_loc)(int, int *), + void (*kernel_imp)(void)) +{ + int ndone_flag, ndone, iter = 0; + + SumNexport = 0; /* can be queried as a book-keeping variable */ + + NextParticle = 0; /* first particle index for this task */ + + do + { + iter++; + + /* allocate buffers to arrange communication */ + generic_alloc_partlist_nodelist_ngblist_threadbufs(); + + /* do local particles */ + kernel_loc(nactive, indices); + + /* do all necessary bookkeeping, data exchange, and processing of imported particles */ + generic_exchange(kernel_imp); + + /* free the rest of the buffers */ + generic_free_partlist_nodelist_ngblist_threadbufs(); + + /* check whether we are done */ + if(NextParticle >= nactive) + ndone_flag = 1; + else + ndone_flag = 0; + + MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MYCOMMUNICATOR); + } + while(ndone < MyNTask); + + return iter; +} + +#ifdef USE_INLINED_IBARRIER +/*! \brief Can replace + * MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv, + * sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR); + * with a space communication pattern that effectively involves a + * home-grown non-blocking barrier to establish that we can stop + * listening. + */ +static void generic_prepare_import_counts_inlined_ibarrier(void) +{ + int nLevels = my_fls(MyNTask - 1); + int received_levels = 0, sent_levels = 0; + + int *stagelist = (int *)mymalloc("stagelist", nLevels * sizeof(int)); + for(int j = 0; j < nLevels; j++) + stagelist[j] = j; + + MPI_Request *level_requests = (MPI_Request *)mymalloc("level_requests", nLevels * sizeof(MPI_Request)); + + MPI_Request *requests = (MPI_Request *)mymalloc("requests", MyNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(int j = 0; j < MyNTask; j++) + { + if(Send[j].Count > 0) + MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]); + + Recv[j].Count = 0; + Recv[j].CountNodes = 0; + } + + int barrier_active = 0; + + while(1) + { + int flag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + } + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_BARRIER, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + int stage; + MPI_Recv(&stage, 1, MPI_INT, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + received_levels |= (1 << stage); + } + + if(barrier_active) + { + for(int stage = 0; stage < nLevels; stage++) + if(!(sent_levels & (1 << stage))) + { + int mask = ((1 << stage) - 1); + + if((mask & received_levels) == mask) + { + sent_levels |= (1 << stage); + + int target = (MyThisTask + (1 << stage)) % MyNTask; + + MPI_Issend(&stagelist[stage], 1, MPI_INT, target, TAG_BARRIER, MYCOMMUNICATOR, &level_requests[stage]); + } + } + + if(received_levels == ((1 << nLevels) - 1) && send_levels == ((1 << nLevels) - 1)) + break; + } + else + { + MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE); + + if(flag) + barrier_active = 1; + } + } + + MPI_Waitall(nLevels, level_requests, MPI_STATUSES_IGNORE); /* as we are going to free stagelist */ + + myfree(requests); + myfree(level_requests); + myfree(stagelist); +} +#endif /* #ifdef USE_INLINED_IBARRIER */ + +#ifdef USE_DSDE +/*! \brief Can replace + * MPI_Alltoall(Send, sizeof(struct send_recv_counts), MPI_INT, Recv, + * sizeof(struct send_recv_counts), MPI_INT, MYCOMMUNICATOR); + * with a space communication pattern that involves a non-blocking + * barrier (requires MPI-3.0). + */ +static int generic_prepare_import_counts_ibarrier(void) +{ + MPI_Request barrier_request; + MPI_Request *requests = (MPI_Request *)mymalloc_movable(&requests, "requests", MyNTask * sizeof(MPI_Request)); + int n_requests = 0; + + for(int j = 0; j < MyNTask; j++) + { + if(Send[j].Count > 0) + MPI_Issend(&Send[j], sizeof(struct send_recv_counts), MPI_BYTE, j, TAG_N, MYCOMMUNICATOR, &requests[n_requests++]); + + Recv[j].Count = 0; + Recv[j].CountNodes = 0; + } + + int barrier_active = 0; + + while(1) + { + int flag; + MPI_Status status; + + MPI_Iprobe(MPI_ANY_SOURCE, TAG_N, MYCOMMUNICATOR, &flag, &status); + + if(flag) + { + int source = status.MPI_SOURCE; + int tag = status.MPI_TAG; + + int count; + MPI_Get_count(&status, MPI_BYTE, &count); + + if(tag == TAG_N && source != MyThisTask) + { + if(count != 8) + terminate("count=%d\n", count); + + MPI_Recv(&Recv[source], sizeof(struct send_recv_counts), MPI_BYTE, source, tag, MYCOMMUNICATOR, MPI_STATUS_IGNORE); + } + } + + if(barrier_active) + { + int flag2; + + MPI_Test(&barrier_request, &flag2, &status); + + if(flag2 != 0) + break; + } + else + { + MPI_Testall(n_requests, requests, &flag, MPI_STATUSES_IGNORE); + + if(flag) + { + barrier_active = 1; + + MPI_Ibarrier(MYCOMMUNICATOR, &barrier_request); + } + } + } + + myfree(requests); +} +#endif /* #ifdef USE_DSDE */ diff --git a/src/amuse/community/arepo/src/utils/mpz_extension.c b/src/amuse/community/arepo/src/utils/mpz_extension.c new file mode 100644 index 0000000000..87ba79dc9a --- /dev/null +++ b/src/amuse/community/arepo/src/utils/mpz_extension.c @@ -0,0 +1,119 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/mpz_extension.c + * \date 05/2018 + * \brief Auxiliary functions to facilitate usage of mpz functions. + * \details Integer arithmetic used by Voronoi mesh construction. + * contains functions: + * void MY_mpz_set_si(mpz_t dest, signed long long int val) + * void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long + * int val) + * void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, + * unsigned long long int val) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 20.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#include "../mesh/voronoi/voronoi.h" + +#if USEDBITS > 31 + +/*! \brief Sets mpz variable from signed long long int. + * + * For Arepo-internal use of mpz. + * + * \param[out] dest Variable to be set. + * \param[in] val Value in signed long long int. + * + * \return void + */ +void MY_mpz_set_si(mpz_t dest, signed long long int val) +{ + mpz_t tmp, tmp2; + + unsigned long int lower = (unsigned long int)(val & 0xffffffffL); + signed long int higher = (signed long int)(val >> 32); + + mpz_init(tmp); + mpz_init(tmp2); + + mpz_set_si(tmp, higher); + mpz_mul_2exp(tmp2, tmp, 32); + mpz_add_ui(dest, tmp2, lower); + + mpz_clear(tmp2); + mpz_clear(tmp); +} + +/*! \brief Multiplies an mpz type with a signed long long int. + * + * \param[out] pred Result of multiplication. + * \param[in] mult Multiplicator (mpz_t). + * \param[in] val Multiplicand (signed long long int). + * + * \return void + */ +void MY_mpz_mul_si(mpz_t prod, mpz_t mult, signed long long int val) +{ + mpz_t tmp; + + mpz_init(tmp); + + MY_mpz_set_si(tmp, val); + + mpz_mul(prod, mult, tmp); + + mpz_clear(tmp); +} + +/*! \brief Subtracts 'val' from 'mult'. + * + * \param[out] prod Result of subtraction. + * \param[in] mult Minuend (mpz_t). + * \param[in] val Subtrahend (unsigned long long int). + * + * \return void + */ +void MY_mpz_sub_ui(mpz_t prod, mpz_t mult, unsigned long long int val) +{ + mpz_t tmp; + + mpz_init(tmp); + + MY_mpz_set_si(tmp, val); + + mpz_sub(prod, mult, tmp); + + mpz_clear(tmp); +} + +#endif diff --git a/src/amuse/community/arepo/src/utils/mymalloc.c b/src/amuse/community/arepo/src/utils/mymalloc.c new file mode 100644 index 0000000000..f3173883e6 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/mymalloc.c @@ -0,0 +1,792 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/mymalloc.c + * \date 05/2018 + * \brief Manager for dynamic memory allocation. + * \details This module handles the dynamic memory allocation for AREPO. + * To avoid memory allocation/dellocation overhead a big chunk of + * memory (which will be the maximum amount of dinamically + * allocatable memory) is allocated upon initialization. This + * chunk is then filled by the memory blocks as in a stack + * structure. The blocks are automatically aligned to a 64 bit + * boundary. Memory blocks come in two flavours: movable and + * non-movable. In non-movable blocks the starting address is + * fixed once the block is allocated and cannot be changed. + * Due to the stack structure of the dynamic memory, this implies + * that the last (non-movable) block allocated must be the first + * block to be deallocated. If this condition is not met, an abort + * condition is triggered. If more flexibility is needed, movable + * memory blocks can be used. In this case, the starting address + * of the block is again fixed upon allocation but the block can + * be shifted (therefore its initial address changes) according + * to needs. For a movable block to be successfully shifted it is + * required that all the subsequent allocated blocks are movable. + * Again, an abort condition is triggered if this condition is + * not met. Movable blocks can be deallocated in any order + * provided that the condition just described holds. The gap + * resulting form the deallocation of a block that is not in + * the last position will be automatically filled by shifting all + * the blocks coming after the deallocated block. + * + * contains functions: + * static void *hmalloc(size_t size) (HUGEPAGES) + * static void *hmalloc(size_t size) + * void mymalloc_init(void) + * void report_memory_usage(int rank, char *tabbuf) + * void report_detailed_memory_usage_of_largest_task(void) + * void dump_memory_table(void) + * int dump_memory_table_buffer(char *p) + * void *mymalloc_fullinfo + * void *mymalloc_movable_fullinfo + * size_t roundup_to_multiple_of_cacheline_size(size_t n) + * void myfree_fullinfo(void *p, const char *func, const char + * *file, int line) + * void *myfree_query_last_block(void) + * void myfree_movable_fullinfo(void *p, const char *func, + * const char *file, int line) + * void *myrealloc_fullinfo(void *p, size_t n, const char + * *func, const char *file, int line) + * void *myrealloc_movable_fullinfo(void *p, size_t n, + * const char *func, const char *file, int line) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 07.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#define CACHELINESIZE 64 + +#define MAXBLOCKS 5000 +#define MAXCHARS 40 + +static size_t AllocatedBytesGeneric; + +static size_t HighMarkBytes; +static size_t HighMarkBytesWithoutGeneric; + +static double OldGlobHighMarkMB; +static double OldGlobHighMarkMBWithoutGeneric; + +static size_t TotBytes; /*!< The total dimension (in bytes) of dynamic memory available to the current task. */ +static void *Base; /*!< Base pointer (initial memory address) of the stack. */ + +static unsigned long Nblocks; /*!< The current number of allocated memory blocks. */ + +static void **Table; /*!< Table containing the initial addresses of the allocated memory blocks. */ +static size_t *BlockSize; /*!< Array containing the size (in bytes) of all the allocated memory blocks. */ +static char *MovableFlag; /*!< Identifies whether a block is movable. */ +static char *GenericFlag; /*!< Identifies whether a block has been identified in the generic allocation routines. */ +static void ***BasePointers; /*!< Base pointers containing the initial addresses of movable memory blocks */ +static char *VarName; /*!< The name of the variable with which the block has been allocated. */ +static char *FunctionName; /*!< The function name that has allocated the memory block. */ +static char *ParentFileName; /*!< The location from which the generich routines were called */ +static char *FileName; /*!< The file name where the function that has allocated the block is called. */ +static int *LineNumber; /*!< The line number in FileName where the function that allocated the block has been called. */ +static char *HighMarkTabBuf; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use that has + occurred on this task */ +static char *HighMarkTabBufWithoutGeneric; /*!< This is a buffer that holds the log-file output corresponding to the largest memory use + that has occurred on this task */ + +#ifdef HUGEPAGES +#include +/*! \brief Allocation function wrapper for hugepages usage. + * + * \param[in] size Size of the allocated memory. + * + * \return void pointer to address in memory. + */ +static void *hmalloc(size_t size) +{ + void *p = get_hugepage_region(size, GHR_STRICT); + + if(!p) + { + warn("Failed to get_hugepage_region of size %g\n", size / (1024.0 * 1024)); + + p = malloc(size); + + if(!p) + terminate("Failed to allocate memory of size %g\n", size / (1024.0 * 1024)); + } + + memset(p, 255, size); + memset(p, 0, size); + + return p; +} +#else /* #ifdef HUGEPAGES */ + +/*! \brief Allocation function wrapper without hugepages usage. + * + * \param[in] size Size of the allocated memory. + * + * \return void pointer to address in memory. + */ +static void *hmalloc(size_t size) { return malloc(size); } +#endif /* #ifdef HUGEPAGES #else */ + +/*! \brief Initializes memory manager. + * + * This function initializes the memory manager. In particular, it sets + * the global variables of the module to their initial value and allocates + * the memory for the stack. + * + * \return void + */ +void mymalloc_init(void) +{ + BlockSize = (size_t *)hmalloc(MAXBLOCKS * sizeof(size_t)); + Table = (void **)hmalloc(MAXBLOCKS * sizeof(void *)); + MovableFlag = (char *)hmalloc(MAXBLOCKS * sizeof(char)); + GenericFlag = (char *)hmalloc(MAXBLOCKS * sizeof(char)); + BasePointers = (void ***)hmalloc(MAXBLOCKS * sizeof(void **)); + VarName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + FunctionName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + ParentFileName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + FileName = (char *)hmalloc(MAXBLOCKS * MAXCHARS * sizeof(char)); + LineNumber = (int *)hmalloc(MAXBLOCKS * sizeof(int)); + HighMarkTabBuf = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); + HighMarkTabBufWithoutGeneric = (char *)hmalloc((100 + 4 * MAXCHARS) * (MAXBLOCKS + 10)); + + memset(VarName, 0, MAXBLOCKS * MAXCHARS); + memset(FunctionName, 0, MAXBLOCKS * MAXCHARS); + memset(ParentFileName, 0, MAXBLOCKS * MAXCHARS); + memset(FileName, 0, MAXBLOCKS * MAXCHARS); + + size_t n = All.MaxMemSize * ((size_t)1024 * 1024); + + n = roundup_to_multiple_of_cacheline_size(n); + + if(!(Base = hmalloc(n))) + terminate("Failed to allocate memory for `Base' (%d Mbytes).\n", All.MaxMemSize); + + TotBytes = FreeBytes = n; + + AllocatedBytes = 0; + Nblocks = 0; + HighMarkBytes = 0; + HighMarkBytesWithoutGeneric = 0; + OldGlobHighMarkMB = 0; + OldGlobHighMarkMBWithoutGeneric = 0; +} + +/*! \brief Writes memory usage in FdMemory. + * + * \param[in] rank Number of tasks involved. + * \param[in] tabbuf Header message written in FdMemory. + * + * \return void + */ +void report_memory_usage(int rank, char *tabbuf) +{ + if(ThisTask == rank) + { + char *buf = mymalloc("buf", (100 + 4 * MAXCHARS) * (Nblocks + 10)); + int cc = 0; + cc += sprintf(buf + cc, "\nMEMORY: Largest Allocation = %g Mbyte | Largest Allocation Without Generic = %g Mbyte\n\n", + OldGlobHighMarkMB, OldGlobHighMarkMBWithoutGeneric); + + cc += sprintf(buf + cc, "%s", tabbuf); + if(ThisTask == 0) + { + if(RestartFlag <= 2) + { + fprintf(FdMemory, "%s", buf); + fflush(FdMemory); + } + } + else + { + MPI_Send(&cc, 1, MPI_INT, 0, TAG_N, MPI_COMM_WORLD); + MPI_Send(buf, cc + 1, MPI_BYTE, 0, TAG_PDATA, MPI_COMM_WORLD); + } + myfree(buf); + } + + if(ThisTask == 0 && rank > 0) + { + int cc; + MPI_Recv(&cc, 1, MPI_INT, rank, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + char *buf = mymalloc("buf", cc + 1); + MPI_Recv(buf, cc + 1, MPI_BYTE, rank, TAG_PDATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if(RestartFlag <= 2) + { + fprintf(FdMemory, "%s", buf); + fflush(FdMemory); + } + myfree(buf); + } +} + +/*! \brief Writes memory useage of largest task in FdMemory. + * + * \return void + */ +void report_detailed_memory_usage_of_largest_task(void) +{ + int flag = 0; + + struct + { + double mem; + int rank; + } local, global; + + local.mem = HighMarkBytes / (1024.0 * 1024.0); + local.rank = ThisTask; + + MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + + if(global.mem >= 1.05 * OldGlobHighMarkMB) + { + OldGlobHighMarkMB = global.mem; + flag |= 1; + } + + local.mem = HighMarkBytesWithoutGeneric / (1024.0 * 1024.0); + local.rank = ThisTask; + + MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + + if(global.mem >= 1.05 * OldGlobHighMarkMBWithoutGeneric) + { + OldGlobHighMarkMBWithoutGeneric = global.mem; + flag |= 2; + } + + if(flag & 2) + report_memory_usage(global.rank, HighMarkTabBufWithoutGeneric); + + if(flag & 1) + report_memory_usage(global.rank, HighMarkTabBuf); +} + +/*! \brief Dumps the buffer where the memory information is stored to the + * standard output. + * + * \return void + */ +void dump_memory_table(void) +{ + char *buf = malloc(200 * (Nblocks + 10)); + dump_memory_table_buffer(buf); + printf("%s", buf); + free(buf); +} + +/*! \brief Fills the output buffer with the memory log. + * + * \param[out] p Output buffer. + * + * \return The number of charcter written to p. + */ +int dump_memory_table_buffer(char *p) +{ + int cc = 0; + size_t totBlocksize = 0; + + cc += + sprintf(p + cc, "-------------------------- Allocated Memory Blocks---- ( Step %8d )------------------\n", All.NumCurrentTiStep); + cc += sprintf(p + cc, "Task Nr F Variable MBytes Cumulative Function|File|Linenumber\n"); + cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + for(int i = 0; i < Nblocks; i++) + { + totBlocksize += BlockSize[i]; + + cc += sprintf(p + cc, "%4d %5d %d %40s %10.4f %10.4f %s%s()|%s|%d\n", ThisTask, i, MovableFlag[i], VarName + i * MAXCHARS, + BlockSize[i] / (1024.0 * 1024.0), totBlocksize / (1024.0 * 1024.0), ParentFileName + i * MAXCHARS, + FunctionName + i * MAXCHARS, FileName + i * MAXCHARS, LineNumber[i]); + } + cc += sprintf(p + cc, "------------------------------------------------------------------------------------------\n"); + + return cc; +} + +/*! \brief Allocates a non-movable memory block and store the relative + * information. + * + * \param[in] varname Name of the variable to be stored in the allocated + * block. + * \param[in] n Size of the memory block in bytes. + * \param[in] func Name of function that has called the allocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the allocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the allocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the allocated memory block. + */ +void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int line, int clear_flag, char *callorigin) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks >= MAXBLOCKS) + terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line, + MAXBLOCKS); + + if(n > FreeBytes) + { + dump_memory_table(); + terminate( + "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g " + "MB).\n", + ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1); + if(callorigin) + { + strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1); + GenericFlag[Nblocks] = 1; + AllocatedBytesGeneric += n; + } + else + { + memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS); + GenericFlag[Nblocks] = 0; + } + strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1); + strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1); + LineNumber[Nblocks] = line; + + AllocatedBytes += n; + BlockSize[Nblocks] = n; + MovableFlag[Nblocks] = 0; + + Nblocks += 1; + + if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric) + { + HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric; + dump_memory_table_buffer(HighMarkTabBufWithoutGeneric); + } + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + if(clear_flag) + memset(Table[Nblocks - 1], 0, n); + + return Table[Nblocks - 1]; +} + +/*! \brief Allocates a movable memory block and store the relative information. + * + * \param[in] ptr Pointer to the initial memory address of the block. + * \param[in] varname Name of the variable to be stored in the allocated block. + * \param[in] n Size of the memory block in bytes. + * \param[in] func Name of function that has called the allocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the allocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the allocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the allocated memory block. + */ +void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, int line, + char *callorigin) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks >= MAXBLOCKS) + terminate("Task=%d: No blocks left in mymalloc_fullinfo() at %s()/%s/line %d. MAXBLOCKS=%d\n", ThisTask, func, file, line, + MAXBLOCKS); + + if(n > FreeBytes) + { + dump_memory_table(); + terminate( + "\nTask=%d: Not enough memory in mymalloc_fullinfo() to allocate %g MB for variable '%s' at %s()/%s/line %d (FreeBytes=%g " + "MB).\n", + ThisTask, n / (1024.0 * 1024.0), varname, func, file, line, FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + strncpy(VarName + Nblocks * MAXCHARS, varname, MAXCHARS - 1); + if(callorigin) + { + strncpy(ParentFileName + Nblocks * MAXCHARS, callorigin, MAXCHARS - 1); + GenericFlag[Nblocks] = 1; + AllocatedBytesGeneric += n; + } + else + { + memset(ParentFileName + Nblocks * MAXCHARS, 0, MAXCHARS); + GenericFlag[Nblocks] = 0; + } + strncpy(FunctionName + Nblocks * MAXCHARS, func, MAXCHARS - 1); + strncpy(FileName + Nblocks * MAXCHARS, file, MAXCHARS - 1); + LineNumber[Nblocks] = line; + + AllocatedBytes += n; + BlockSize[Nblocks] = n; + MovableFlag[Nblocks] = 1; + BasePointers[Nblocks] = ptr; + + Nblocks += 1; + + if(AllocatedBytes - AllocatedBytesGeneric > HighMarkBytesWithoutGeneric) + { + HighMarkBytesWithoutGeneric = AllocatedBytes - AllocatedBytesGeneric; + dump_memory_table_buffer(HighMarkTabBufWithoutGeneric); + } + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[Nblocks - 1]; +} + +/*! \brief Rounds up size to cachline size. + * + * \param[in] n Size. + * + * \return Rounded up size. + */ +size_t roundup_to_multiple_of_cacheline_size(size_t n) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + return n; +} + +/*! \brief Deallocates a non-movable memory block. + * + * For this operation to be successful the block that has to be deallocated + * must be the last allocated one. + * + * \param[in] p Pointer to the memory block to be deallocated. + * \param[in] func Name of function that has called the deallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the deallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the deallocation routine was + * called (usually given by the __LINE__ macro). + */ +void myfree_fullinfo(void *p, const char *func, const char *file, int line) +{ + if(Nblocks == 0) + terminate("no allocated blocks that could be freed"); + + if(p != Table[Nblocks - 1]) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myfree() at %s()/%s/line %d: not the last allocated block!\n", ThisTask, func, file, line); + } + + Nblocks -= 1; + AllocatedBytes -= BlockSize[Nblocks]; + + if(GenericFlag[Nblocks]) + AllocatedBytesGeneric -= BlockSize[Nblocks]; + + FreeBytes += BlockSize[Nblocks]; +} + +/*! \brief Finds last allocated block. + * + * \return void pointer to last allocated block. + */ +void *myfree_query_last_block(void) +{ + if(Nblocks == 0) + terminate("no allocated blocks that could be returned"); + + return Table[Nblocks - 1]; +} + +/*! \brief Deallocates a movable memory block. + * + * For this operation to be successful all the blocks allocated after the + * block that has to be freed must be of movable type. + * + * \param[in] p pointer to the memory block to be deallocated. + * \param[in] func name of function that has called the deallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file file where the function that has called the deallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line line number of file where the deallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return void + */ +void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line) +{ + int i; + + if(Nblocks == 0) + terminate("no allocated blocks that could be freed"); + + /* first, let's find the block */ + int nr; + + for(nr = Nblocks - 1; nr >= 0; nr--) + if(p == Table[nr]) + break; + + if(nr < 0) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask, func, + file, line); + } + + if(nr < Nblocks - 1) /* the block is not the last allocated block */ + { + /* check that all subsequent blocks are actually movable */ + for(i = nr + 1; i < Nblocks; i++) + if(MovableFlag[i] == 0) + { + dump_memory_table(); + myflush(stdout); + terminate( + "Task=%d: Wrong call of myfree_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable " + "allocated blocks\n", + ThisTask, func, file, line, nr); + } + } + + if(GenericFlag[nr]) + AllocatedBytesGeneric -= BlockSize[nr]; + + AllocatedBytes -= BlockSize[nr]; + FreeBytes += BlockSize[nr]; + + ptrdiff_t offset = -BlockSize[nr]; + size_t length = 0; + + for(i = nr + 1; i < Nblocks; i++) + length += BlockSize[i]; + + if(nr < Nblocks - 1) + memmove(Table[nr + 1] + offset, Table[nr + 1], length); + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i] += offset; + *BasePointers[i] = *BasePointers[i] + offset; + } + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i - 1] = Table[i]; + BasePointers[i - 1] = BasePointers[i]; + BlockSize[i - 1] = BlockSize[i]; + MovableFlag[i - 1] = MovableFlag[i]; + GenericFlag[i - 1] = GenericFlag[i]; + + strncpy(VarName + (i - 1) * MAXCHARS, VarName + i * MAXCHARS, MAXCHARS - 1); + strncpy(FunctionName + (i - 1) * MAXCHARS, FunctionName + i * MAXCHARS, MAXCHARS - 1); + strncpy(ParentFileName + (i - 1) * MAXCHARS, ParentFileName + i * MAXCHARS, MAXCHARS - 1); + strncpy(FileName + (i - 1) * MAXCHARS, FileName + i * MAXCHARS, MAXCHARS - 1); + LineNumber[i - 1] = LineNumber[i]; + } + + Nblocks -= 1; +} + +/*! \brief Reallocates an existing non-movable memory block. + * + * For this operation to be successful this must be the last allocated block. + * + * \param[in] p Pointer to the existing memory block to be reallocated. + * \param[in] n The new size of the memory block in bytes. + * \param[in] func Name of function that has called the reallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the reallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the reallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the newly allocated memory block. + */ +void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line) +{ + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks == 0) + terminate("no allocated blocks that could be reallocated"); + + if(p != Table[Nblocks - 1]) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myrealloc() at %s()/%s/line %d - not the last allocated block!\n", ThisTask, func, file, line); + } + + if(GenericFlag[Nblocks - 1]) + AllocatedBytesGeneric -= BlockSize[Nblocks - 1]; + + AllocatedBytes -= BlockSize[Nblocks - 1]; + FreeBytes += BlockSize[Nblocks - 1]; + + if(n > FreeBytes) + { + dump_memory_table(); + terminate("Task=%d: Not enough memory in myremalloc(n=%g MB) at %s()/%s/line %d. previous=%g FreeBytes=%g MB\n", ThisTask, + n / (1024.0 * 1024.0), func, file, line, BlockSize[Nblocks - 1] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0)); + } + Table[Nblocks - 1] = Base + (TotBytes - FreeBytes); + FreeBytes -= n; + + AllocatedBytes += n; + BlockSize[Nblocks - 1] = n; + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[Nblocks - 1]; +} + +/*! \brief Reallocates an existing movable memory block. + * + * For this operation to be successful all the blocks allocated after the + * block that has to be reallocated must be of movable type. + * + * \param[in] p Pointer to the existing memory block to be reallocated. + * \param[in] n The new size of the memory block in bytes. + * \param[in] func Name of function that has called the reallocation routine + * (usually given by the __FUNCTION__ macro). + * \param[in] file File where the function that has called the reallocation + * routine resides (usually given by the __FILE__ macro). + * \param[in] line Line number of file where the reallocation routine was + * called (usually given by the __LINE__ macro). + * + * \return A pointer to the beginning of the newly allocated memory block. + */ +void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line) +{ + int i; + + if((n % CACHELINESIZE) > 0) + n = (n / CACHELINESIZE + 1) * CACHELINESIZE; + + if(n < CACHELINESIZE) + n = CACHELINESIZE; + + if(Nblocks == 0) + terminate("no allocated blocks that could be reallocated"); + + /* first, let's find the block */ + int nr; + + for(nr = Nblocks - 1; nr >= 0; nr--) + if(p == Table[nr]) + break; + + if(nr < 0) + { + dump_memory_table(); + terminate("Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - this block has not been allocated!\n", ThisTask, + func, file, line); + } + + if(nr < Nblocks - 1) /* the block is not the last allocated block */ + { + /* check that all subsequent blocks are actually movable */ + for(i = nr + 1; i < Nblocks; i++) + if(MovableFlag[i] == 0) + { + dump_memory_table(); + terminate( + "Task=%d: Wrong call of myrealloc_movable() from %s()/%s/line %d - behind block=%d there are subsequent non-movable " + "allocated blocks\n", + ThisTask, func, file, line, nr); + } + } + + if(GenericFlag[nr]) + terminate("unexpected"); + + AllocatedBytes -= BlockSize[nr]; + FreeBytes += BlockSize[nr]; + + if(n > FreeBytes) + { + dump_memory_table(); + terminate("Task=%d: at %s()/%s/line %d: Not enough memory in myremalloc_movable(n=%g MB). previous=%g FreeBytes=%g MB\n", + ThisTask, func, file, line, n / (1024.0 * 1024.0), BlockSize[nr] / (1024.0 * 1024.0), FreeBytes / (1024.0 * 1024.0)); + } + + ptrdiff_t offset = n - BlockSize[nr]; + size_t length = 0; + + for(i = nr + 1; i < Nblocks; i++) + length += BlockSize[i]; + + if(nr < Nblocks - 1) + memmove(Table[nr + 1] + offset, Table[nr + 1], length); + + for(i = nr + 1; i < Nblocks; i++) + { + Table[i] += offset; + + *BasePointers[i] = *BasePointers[i] + offset; + } + + FreeBytes -= n; + AllocatedBytes += n; + BlockSize[nr] = n; + + if(AllocatedBytes > HighMarkBytes) + { + HighMarkBytes = AllocatedBytes; + dump_memory_table_buffer(HighMarkTabBuf); + } + + return Table[nr]; +} diff --git a/src/amuse/community/arepo/src/utils/parallel_sort.c b/src/amuse/community/arepo/src/utils/parallel_sort.c new file mode 100644 index 0000000000..f825a9f220 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/parallel_sort.c @@ -0,0 +1,743 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/parallel_sort.c + * \date 05/2018 + * \brief MPI parallel sorting routine. + * \details contains functions: + * int parallel_sort_indirect_compare(const void *a, + * const void *b) + * double parallel_sort(void *base, size_t nmemb, size_t size, + * int (*compar) (const void *, const void *)) + * double parallel_sort_comm(void *base, size_t nmemb, size_t + * size, int (*compar) (const void *, const void *), + * MPI_Comm comm) + * static void get_local_rank(char *element, size_t + * tie_braking_rank, char *base, size_t nmemb, size_t size, + * size_t noffs_thistask, long long left, long long right, + * size_t * loc, int (*compar) (const void *, const void *)) + * static void check_local_rank(char *element, size_t + * tie_braking_rank, char *base, size_t nmemb, size_t size, + * size_t noffs_thistask, long long left, long long right, + * size_t loc, int (*compar) (const void *, const void *)) + * static void serial_sort(char *base, size_t nmemb, size_t + * size, int (*compar) (const void *, const void *)) + * static void msort_serial_with_tmp(char *base, size_t n, + * size_t s, int (*compar) (const void *, const void *), + * char *t) + * void parallel_sort_test_order(char *base, size_t nmemb, + * size_t size, int (*compar) (const void *, const void *)) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 21.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +#define TRANSFER_SIZE_LIMIT 1000000000 +#define MAX_ITER_PARALLEL_SORT 500 + +/* Note: For gcc-4.1.2, I found that the compiler produces incorrect code for this routune if optimization level O1 or higher is used. + * In gcc-4.3.4, this problem is absent. + */ + +#define TAG_TRANSFER 100 + +static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); +static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t); +static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask, + long long left, long long right, size_t *loc, int (*compar)(const void *, const void *)); + +static int (*comparfunc)(const void *, const void *); +static char *median_element_list; +static size_t element_size; + +/*! \brief Wrapper for comparison of two elements. + * + * \param[in] a First element. + * \param[in] b Second element. + * + * \return (-1,0,+1) -1 if a < b. + */ +int parallel_sort_indirect_compare(const void *a, const void *b) +{ + return (*comparfunc)(median_element_list + *((int *)a) * element_size, median_element_list + *((int *)b) * element_size); +} + +/*! \brief Main function to perform a parallel sort. + * + * Using MPI_COMM_WORLD as communicator. + * + * \param[in, out] base Array to be sorted. + * \param nmemb Number of entries in array. + * \param[in] size Size of an element in array to be sorted. + * \param[in] compar Comparison function. + * + * \return Time it took to sort array. + */ +double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + return parallel_sort_comm(base, nmemb, size, compar, MPI_COMM_WORLD); +} + +/*! \brief Function to perform a parallel sort with specified MPI communicator. + * + * \param[in, out] base Array to be sorted. + * \param[in] nmemb Number of entries in array. + * \param[in] size Size of an element in array to be sorted. + * \param[in] compar Comparison function. + * \param[in] comm MPI communicator. + * + * \return Time it took to sort array. + */ +double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), MPI_Comm comm) +{ + int i, j, ranks_not_found, Local_ThisTask, Local_NTask, Local_PTask, Color, new_max_loc; + size_t tie_braking_rank, new_tie_braking_rank, rank; + MPI_Comm MPI_CommLocal; + + double ta = second(); + + /* do a serial sort of the local data up front */ + serial_sort((char *)base, nmemb, size, compar); + + /* we create a communicator that contains just those tasks with nmemb > 0. This makes + * it easier to deal with CPUs that do not hold any data. + */ + if(nmemb) + Color = 1; + else + Color = 0; + + MPI_Comm_split(comm, Color, ThisTask, &MPI_CommLocal); + MPI_Comm_rank(MPI_CommLocal, &Local_ThisTask); + MPI_Comm_size(MPI_CommLocal, &Local_NTask); + + if(Local_NTask > 1 && Color == 1) + { + for(Local_PTask = 0; Local_NTask > (1 << Local_PTask); Local_PTask++) + ; + + size_t *nlist = (size_t *)mymalloc("nlist", Local_NTask * sizeof(size_t)); + size_t *noffs = (size_t *)mymalloc("noffs", Local_NTask * sizeof(size_t)); + + MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + + for(i = 1, noffs[0] = 0; i < Local_NTask; i++) + noffs[i] = noffs[i - 1] + nlist[i - 1]; + + char *element_guess = mymalloc("element_guess", Local_NTask * size); + size_t *element_tie_braking_rank = mymalloc("element_tie_braking_rank", Local_NTask * sizeof(size_t)); + size_t *desired_glob_rank = mymalloc("desired_glob_rank", Local_NTask * sizeof(size_t)); + size_t *current_glob_rank = mymalloc("current_glob_rank", Local_NTask * sizeof(size_t)); + size_t *current_loc_rank = mymalloc("current_loc_rank", Local_NTask * sizeof(size_t)); + long long *range_left = mymalloc("range_left", Local_NTask * sizeof(long long)); + long long *range_right = mymalloc("range_right", Local_NTask * sizeof(long long)); + int *max_loc = mymalloc("max_loc", Local_NTask * sizeof(int)); + + size_t *list = mymalloc("list", Local_NTask * sizeof(size_t)); + size_t *range_len_list = mymalloc("range_len_list", Local_NTask * sizeof(long long)); + char *median_element = mymalloc("median_element", size); + median_element_list = mymalloc("median_element_list", Local_NTask * size); + size_t *tie_braking_rank_list = mymalloc("tie_braking_rank_list", Local_NTask * sizeof(size_t)); + int *index_list = mymalloc("index_list", Local_NTask * sizeof(int)); + int *max_loc_list = mymalloc("max_loc_list", Local_NTask * sizeof(int)); + size_t *source_range_len_list = mymalloc("source_range_len_list", Local_NTask * sizeof(long long)); + size_t *source_tie_braking_rank_list = mymalloc("source_tie_braking_rank_list", Local_NTask * sizeof(long long)); + char *source_median_element_list = mymalloc("source_median_element_list", Local_NTask * size); + char *new_element_guess = mymalloc("new_element_guess", size); + + for(i = 0; i < Local_NTask - 1; i++) + { + desired_glob_rank[i] = noffs[i + 1]; + current_glob_rank[i] = 0; + range_left[i] = 0; /* first element that it can be */ + range_right[i] = nmemb; /* first element that it can not be */ + } + + /* now we determine the first split element guess, which is the same for all divisions in the first iteration */ + + /* find the median of each processor, and then take the median among those values. + * This should work reasonably well even for extremely skewed distributions + */ + long long range_len = range_right[0] - range_left[0]; + + if(range_len >= 1) + { + long long mid = (range_left[0] + range_right[0]) / 2; + memcpy(median_element, (char *)base + mid * size, size); + tie_braking_rank = mid + noffs[Local_ThisTask]; + } + + MPI_Gather(&range_len, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, 0, MPI_CommLocal); + MPI_Gather(median_element, size, MPI_BYTE, median_element_list, size, MPI_BYTE, 0, MPI_CommLocal); + MPI_Gather(&tie_braking_rank, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal); + + if(Local_ThisTask == 0) + { + for(j = 0; j < Local_NTask; j++) + max_loc_list[j] = j; + + /* eliminate the elements that are undefined because the corresponding CPU has zero range left */ + int nleft = Local_NTask; + + for(j = 0; j < nleft; j++) + { + if(range_len_list[j] < 1) + { + range_len_list[j] = range_len_list[nleft - 1]; + if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1)) + { + memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size); + memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t)); + max_loc_list[j] = max_loc_list[nleft - 1]; + } + + nleft--; + j--; + } + } + + /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well) */ + comparfunc = compar; + element_size = size; + for(j = 0; j < nleft; j++) + index_list[j] = j; + qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare); + + /* now select the median of the medians */ + int mid = nleft / 2; + memcpy(&element_guess[0], median_element_list + index_list[mid] * size, size); + element_tie_braking_rank[0] = tie_braking_rank_list[index_list[mid]]; + max_loc[0] = max_loc_list[index_list[mid]]; + } + + MPI_Bcast(element_guess, size, MPI_BYTE, 0, MPI_CommLocal); + MPI_Bcast(&element_tie_braking_rank[0], sizeof(size_t), MPI_BYTE, 0, MPI_CommLocal); + MPI_Bcast(&max_loc[0], 1, MPI_INT, 0, MPI_CommLocal); + + for(i = 1; i < Local_NTask - 1; i++) + { + memcpy(element_guess + i * size, element_guess, size); + element_tie_braking_rank[i] = element_tie_braking_rank[0]; + max_loc[i] = max_loc[0]; + } + + int iter = 0; + + do + { + for(i = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) + { + get_local_rank(element_guess + i * size, element_tie_braking_rank[i], (char *)base, nmemb, size, + noffs[Local_ThisTask], range_left[i], range_right[i], ¤t_loc_rank[i], compar); + } + } + + /* now compute the global ranks by summing the local ranks */ + /* Note: the last element in current_loc_rank is not defined. It will be summed by the last processor, and stored in the last + * element of current_glob_rank */ + MPI_Alltoall(current_loc_rank, sizeof(size_t), MPI_BYTE, list, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + for(j = 0, rank = 0; j < Local_NTask; j++) + rank += list[j]; + MPI_Allgather(&rank, sizeof(size_t), MPI_BYTE, current_glob_rank, sizeof(size_t), MPI_BYTE, MPI_CommLocal); + + for(i = 0, ranks_not_found = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */ + { + ranks_not_found++; + + if(current_glob_rank[i] < desired_glob_rank[i]) + { + range_left[i] = current_loc_rank[i]; + + if(Local_ThisTask == max_loc[i]) + range_left[i]++; + } + + if(current_glob_rank[i] > desired_glob_rank[i]) + range_right[i] = current_loc_rank[i]; + } + } + + /* now we need to determine new element guesses */ + for(i = 0; i < Local_NTask - 1; i++) + { + if(current_glob_rank[i] != desired_glob_rank[i]) /* here we're not yet done */ + { + /* find the median of each processor, and then take the median among those values. + * This should work reasonably well even for extremely skewed distributions + */ + source_range_len_list[i] = range_right[i] - range_left[i]; + + if(source_range_len_list[i] >= 1) + { + long long middle = (range_left[i] + range_right[i]) / 2; + memcpy(source_median_element_list + i * size, (char *)base + middle * size, size); + source_tie_braking_rank_list[i] = middle + noffs[Local_ThisTask]; + } + } + } + + MPI_Alltoall(source_range_len_list, sizeof(long long), MPI_BYTE, range_len_list, sizeof(long long), MPI_BYTE, MPI_CommLocal); + MPI_Alltoall(source_median_element_list, size, MPI_BYTE, median_element_list, size, MPI_BYTE, MPI_CommLocal); + MPI_Alltoall(source_tie_braking_rank_list, sizeof(size_t), MPI_BYTE, tie_braking_rank_list, sizeof(size_t), MPI_BYTE, + MPI_CommLocal); + + if(Local_ThisTask < Local_NTask - 1) + { + if(current_glob_rank[Local_ThisTask] != + desired_glob_rank[Local_ThisTask]) /* in this case we're not yet done for this split point */ + { + for(j = 0; j < Local_NTask; j++) + max_loc_list[j] = j; + + /* eliminate the elements that are undefined because the corresponding CPU has zero range left */ + int nleft = Local_NTask; + + for(j = 0; j < nleft; j++) + { + if(range_len_list[j] < 1) + { + range_len_list[j] = range_len_list[nleft - 1]; + if(range_len_list[nleft - 1] >= 1 && j != (nleft - 1)) + { + memcpy(median_element_list + j * size, median_element_list + (nleft - 1) * size, size); + memcpy(tie_braking_rank_list + j, tie_braking_rank_list + (nleft - 1), sizeof(size_t)); + max_loc_list[j] = max_loc_list[nleft - 1]; + } + + nleft--; + j--; + } + } + + if((iter & 1)) + { + int max_range, maxj; + + for(j = 0, maxj = 0, max_range = 0; j < nleft; j++) + if(range_len_list[j] > max_range) + { + max_range = range_len_list[j]; + maxj = j; + } + + /* now select the median element from the task which has the largest range */ + memcpy(new_element_guess, median_element_list + maxj * size, size); + new_tie_braking_rank = tie_braking_rank_list[maxj]; + new_max_loc = max_loc_list[maxj]; + } + else + { + /* do a serial sort of the remaining elements (indirectly, so that we have the order of tie braking list as well) + */ + comparfunc = compar; + element_size = size; + for(j = 0; j < nleft; j++) + index_list[j] = j; + qsort(index_list, nleft, sizeof(int), parallel_sort_indirect_compare); + + /* now select the median of the medians */ + int mid = nleft / 2; + memcpy(new_element_guess, median_element_list + index_list[mid] * size, size); + new_tie_braking_rank = tie_braking_rank_list[index_list[mid]]; + new_max_loc = max_loc_list[index_list[mid]]; + } + } + else + { + /* in order to preserve existing guesses */ + memcpy(new_element_guess, element_guess + Local_ThisTask * size, size); + new_tie_braking_rank = element_tie_braking_rank[Local_ThisTask]; + new_max_loc = max_loc[Local_ThisTask]; + } + } + + MPI_Allgather(new_element_guess, size, MPI_BYTE, element_guess, size, MPI_BYTE, MPI_CommLocal); + MPI_Allgather(&new_tie_braking_rank, sizeof(size_t), MPI_BYTE, element_tie_braking_rank, sizeof(size_t), MPI_BYTE, + MPI_CommLocal); + MPI_Allgather(&new_max_loc, 1, MPI_INT, max_loc, 1, MPI_INT, MPI_CommLocal); + + iter++; + + if(iter > (MAX_ITER_PARALLEL_SORT - 100) && Local_ThisTask == 0) + { + printf("PSORT: iter=%d: ranks_not_found=%d Local_NTask=%d\n", iter, ranks_not_found, Local_NTask); + myflush(stdout); + if(iter > MAX_ITER_PARALLEL_SORT) + terminate("can't find the split points. That's odd"); + } + } + while(ranks_not_found); + + myfree(new_element_guess); + myfree(source_median_element_list); + myfree(source_tie_braking_rank_list); + myfree(source_range_len_list); + myfree(max_loc_list); + myfree(index_list); + myfree(tie_braking_rank_list); + myfree(median_element_list); + myfree(median_element); + + /* At this point we have found all the elements corresponding to the desired split points */ + /* we can now go ahead and determine how many elements of the local CPU have to go to each other CPU */ + + if(nmemb * size > (1LL << 31)) + terminate("currently, local data must be smaller than 2 GB"); + /* note: to restrict this limitation, the send/recv count arrays have to made 64-bit, + * and the MPI data exchange though MPI_Alltoall has to be modified such that buffers > 2 GB become possible + */ + + int *send_count = mymalloc("send_count", Local_NTask * sizeof(int)); + int *recv_count = mymalloc("recv_count", Local_NTask * sizeof(int)); + int *send_offset = mymalloc("send_offset", Local_NTask * sizeof(int)); + int *recv_offset = mymalloc("recv_offset", Local_NTask * sizeof(int)); + + for(i = 0; i < Local_NTask; i++) + send_count[i] = 0; + + int target = 0; + + for(i = 0; i < nmemb; i++) + { + while(target < Local_NTask - 1) + { + int cmp = compar((char *)base + i * size, element_guess + target * size); + if(cmp == 0) + { + if(i + noffs[Local_ThisTask] < element_tie_braking_rank[target]) + cmp = -1; + else if(i + noffs[Local_ThisTask] > element_tie_braking_rank[target]) + cmp = +1; + } + if(cmp >= 0) + target++; + else + break; + } + send_count[target]++; + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_CommLocal); + + size_t nimport; + + for(j = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < Local_NTask; j++) + { + nimport += recv_count[j]; + + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + if(nimport != nmemb) + terminate("nimport != nmemb"); + + for(j = 0; j < Local_NTask; j++) + { + send_count[j] *= size; + recv_count[j] *= size; + + send_offset[j] *= size; + recv_offset[j] *= size; + } + + char *basetmp = mymalloc("basetmp", nmemb * size); + + /* exchange the data */ + MPI_Alltoallv(base, send_count, send_offset, MPI_BYTE, basetmp, recv_count, recv_offset, MPI_BYTE, MPI_CommLocal); + + memcpy(base, basetmp, nmemb * size); + myfree(basetmp); + + serial_sort((char *)base, nmemb, size, compar); + + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); + + myfree(range_len_list); + myfree(list); + myfree(max_loc); + myfree(range_right); + myfree(range_left); + myfree(current_loc_rank); + myfree(current_glob_rank); + myfree(desired_glob_rank); + myfree(element_tie_braking_rank); + myfree(element_guess); + myfree(noffs); + myfree(nlist); + } + + MPI_Comm_free(&MPI_CommLocal); + + double tb = second(); + return timediff(ta, tb); +} + +/*! \brief Get rank of an element. + * + * \param[in] element Element of which we want the rank. + * \param[in] tie_braking_rank The inital global rank of this element (needed + * for braking ties). + * \param[in] base Base address of local data. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of local data. + * \param[in] noffs_thistask Cumulative length of data on lower tasks. + * \param[in] left Range of elements on local task that may hold the element. + * \param[in] right Range of elements on local task that may hold the element. + * \param[out] loc Local rank of the element. + * \param[in] compar User-specified comparison function. + * + * \return void + */ +static void get_local_rank(char *element, size_t tie_braking_rank, char *base, size_t nmemb, size_t size, size_t noffs_thistask, + long long left, long long right, size_t *loc, int (*compar)(const void *, const void *)) +{ + if(right < left) + terminate("right < left"); + + if(left == 0 && right == nmemb + 1) + { + if(compar(base + (nmemb - 1) * size, element) < 0) + { + *loc = nmemb; + return; + } + else if(compar(base, element) > 0) + { + *loc = 0; + return; + } + } + + if(right == left) /* looks like we already converged to the proper rank */ + { + *loc = left; + } + else + { + if(compar(base + (right - 1) * size, element) < 0) /* the last element is smaller, hence all elements are on the left */ + *loc = (right - 1) + 1; + else if(compar(base + left * size, element) > 0) /* the first element is already larger, hence no element is on the left */ + *loc = left; + else + { + while(right > left) + { + long long mid = ((right - 1) + left) / 2; + + int cmp = compar(base + mid * size, element); + if(cmp == 0) + { + if(mid + noffs_thistask < tie_braking_rank) + cmp = -1; + else if(mid + noffs_thistask > tie_braking_rank) + cmp = +1; + } + + if(cmp == 0) /* element has exactly been found */ + { + *loc = mid; + break; + } + + if((right - 1) == left) /* elements is not on this CPU */ + { + if(cmp < 0) + *loc = mid + 1; + else + *loc = mid; + break; + } + + if(cmp < 0) + { + left = mid + 1; + } + else + { + if((right - 1) == left + 1) + { + if(mid != left) + terminate("Can't be: -->left=%lld right=%lld\n", left, right); + + *loc = left; + break; + } + + right = mid; + } + } + } + } +} + +/*! \brief Wrapper for serial sorting algorithm. + * + * Calls a merge sort algorithm. + * + * \param[in, out] base Array to be sorted. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of each element. + * \param[in] compar Comparison funciton. + * + * \return void + */ +static void serial_sort(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + size_t storage = nmemb * size; + char *tmp = (char *)mymalloc("tmp", storage); + + msort_serial_with_tmp(base, nmemb, size, compar, tmp); + + myfree(tmp); +} + +/*! \brief Merge sort algorithm (serial). + * + * \param[in, out] base Array to be sorted. + * \param[in] n Number of elements. + * \param[in] s Size of each element. + * \param[in] compar Comparison function. + * \param[in, out] t Array for temporary data storage. + * + * \return void + */ +static void msort_serial_with_tmp(char *base, size_t n, size_t s, int (*compar)(const void *, const void *), char *t) +{ + char *tmp; + char *b1, *b2; + size_t n1, n2; + + if(n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = base; + b2 = base + n1 * s; + + msort_serial_with_tmp(b1, n1, s, compar, t); + msort_serial_with_tmp(b2, n2, s, compar, t); + + tmp = t; + + while(n1 > 0 && n2 > 0) + { + if(compar(b1, b2) < 0) + { + --n1; + memcpy(tmp, b1, s); + tmp += s; + b1 += s; + } + else + { + --n2; + memcpy(tmp, b2, s); + tmp += s; + b2 += s; + } + } + + if(n1 > 0) + memcpy(tmp, b1, n1 * s); + + memcpy(base, t, (n - n2) * s); +} + +/*! \brief Test function for parallel sort. + * + * \param[in] base Array to be checked. + * \param[in] nmemb Number of elements in array. + * \param[in] size Size of each element. + * \param[in] compar Comparison function. + * + * \return void + */ +void parallel_sort_test_order(char *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) +{ + int i, recv, send; + size_t *nlist; + + nlist = (size_t *)mymalloc("nlist", NTask * sizeof(size_t)); + + MPI_Allgather(&nmemb, sizeof(size_t), MPI_BYTE, nlist, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 0, recv = -1; i < ThisTask && nmemb > 0; i++) + if(nlist[i] > 0) + recv = i; + + for(i = ThisTask + 1, send = -1; nmemb > 0 && i < NTask; i++) + if(nlist[i] > 0) + { + send = i; + break; + } + + char *element = mymalloc("element", size); + + MPI_Request requests[2]; + int nreq = 0; + + if(send >= 0) + MPI_Isend(base + (nmemb - 1) * size, size, MPI_BYTE, send, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]); + + if(recv >= 0) + MPI_Irecv(element, size, MPI_BYTE, recv, TAG_TRANSFER, MPI_COMM_WORLD, &requests[nreq++]); + + MPI_Waitall(nreq, requests, MPI_STATUSES_IGNORE); + + if(recv >= 0) + { + for(i = 0; i < nmemb; i++) + { + if(compar(element, base + i * size) > 0) + terminate("wrong order"); + } + } + + myfree(element); + myfree(nlist); +} diff --git a/src/amuse/community/arepo/src/utils/predicates.c b/src/amuse/community/arepo/src/utils/predicates.c new file mode 100644 index 0000000000..bd06b00166 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/predicates.c @@ -0,0 +1,4292 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/predicates.c + * \date 05/2018 + * \brief + * \details + * + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + */ + +/*****************************************************************************/ +/* */ +/* Routines for Arbitrary Precision Floating-point Arithmetic */ +/* and Fast Robust Geometric Predicates */ +/* (predicates.c) */ +/* */ +/* May 18, 1996 */ +/* */ +/* Placed in the public domain by */ +/* Jonathan Richard Shewchuk */ +/* School of Computer Science */ +/* Carnegie Mellon University */ +/* 5000 Forbes Avenue */ +/* Pittsburgh, Pennsylvania 15213-3891 */ +/* jrs@cs.cmu.edu */ +/* */ +/* This file contains C implementation of algorithms for exact addition */ +/* and multiplication of floating-point numbers, and predicates for */ +/* robustly performing the orientation and incircle tests used in */ +/* computational geometry. The algorithms and underlying theory are */ +/* described in Jonathan Richard Shewchuk. "Adaptive Precision Floating- */ +/* Point Arithmetic and Fast Robust Geometric Predicates." Technical */ +/* Report CMU-CS-96-140, School of Computer Science, Carnegie Mellon */ +/* University, Pittsburgh, Pennsylvania, May 1996. (Submitted to */ +/* Discrete & Computational Geometry.) */ +/* */ +/* This file, the paper listed above, and other information are available */ +/* from the Web page http://www.cs.cmu.edu/~quake/robust.html . */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Using this code: */ +/* */ +/* First, read the short or long version of the paper (from the Web page */ +/* above). */ +/* */ +/* Be sure to call exactinit() once, before calling any of the arithmetic */ +/* functions or geometric predicates. Also be sure to turn on the */ +/* optimizer when compiling this file. */ +/* */ +/* */ +/* Several geometric predicates are defined. Their parameters are all */ +/* points. Each point is an array of two or three floating-point */ +/* numbers. The geometric predicates, described in the papers, are */ +/* */ +/* orient2d(pa, pb, pc) */ +/* orient2dfast(pa, pb, pc) */ +/* orient3d(pa, pb, pc, pd) */ +/* orient3dfast(pa, pb, pc, pd) */ +/* incircle(pa, pb, pc, pd) */ +/* incirclefast(pa, pb, pc, pd) */ +/* insphere(pa, pb, pc, pd, pe) */ +/* inspherefast(pa, pb, pc, pd, pe) */ +/* */ +/* Those with suffix "fast" are approximate, non-robust versions. Those */ +/* without the suffix are adaptive precision, robust versions. There */ +/* are also versions with the suffices "exact" and "slow", which are */ +/* non-adaptive, exact arithmetic versions, which I use only for timings */ +/* in my arithmetic papers. */ +/* */ +/* */ +/* An expansion is represented by an array of floating-point numbers, */ +/* sorted from smallest to largest magnitude (possibly with interspersed */ +/* zeros). The length of each expansion is stored as a separate integer, */ +/* and each arithmetic function returns an integer which is the length */ +/* of the expansion it created. */ +/* */ +/* Several arithmetic functions are defined. Their parameters are */ +/* */ +/* e, f Input expansions */ +/* elen, flen Lengths of input expansions (must be >= 1) */ +/* h Output expansion */ +/* b Input scalar */ +/* */ +/* The arithmetic functions are */ +/* */ +/* grow_expansion(elen, e, b, h) */ +/* grow_expansion_zeroelim(elen, e, b, h) */ +/* expansion_sum(elen, e, flen, f, h) */ +/* expansion_sum_zeroelim1(elen, e, flen, f, h) */ +/* expansion_sum_zeroelim2(elen, e, flen, f, h) */ +/* fast_expansion_sum(elen, e, flen, f, h) */ +/* fast_expansion_sum_zeroelim(elen, e, flen, f, h) */ +/* linear_expansion_sum(elen, e, flen, f, h) */ +/* linear_expansion_sum_zeroelim(elen, e, flen, f, h) */ +/* scale_expansion(elen, e, b, h) */ +/* scale_expansion_zeroelim(elen, e, b, h) */ +/* compress(elen, e, h) */ +/* */ +/* All of these are described in the long version of the paper; some are */ +/* described in the short version. All return an integer that is the */ +/* length of h. Those with suffix _zeroelim perform zero elimination, */ +/* and are recommended over their counterparts. The procedure */ +/* fast_expansion_sum_zeroelim() (or linear_expansion_sum_zeroelim() on */ +/* processors that do not use the round-to-even tiebreaking rule) is */ +/* recommended over expansion_sum_zeroelim(). Each procedure has a */ +/* little note next to it (in the code below) that tells you whether or */ +/* not the output expansion may be the same array as one of the input */ +/* expansions. */ +/* */ +/* */ +/* If you look around below, you'll also find macros for a bunch of */ +/* simple unrolled arithmetic operations, and procedures for printing */ +/* expansions (commented out because they don't work with all C */ +/* compilers) and for generating random floating-point numbers whose */ +/* significand bits are all random. Most of the macros have undocumented */ +/* requirements that certain of their parameters should not be the same */ +/* variable; for safety, better to make sure all the parameters are */ +/* distinct variables. Feel free to send email to jrs@cs.cmu.edu if you */ +/* have questions. */ +/* */ +/*****************************************************************************/ + +#include +#include +#include +#include + +/* On some machines, the exact arithmetic routines might be defeated by the */ +/* use of internal extended precision floating-point registers. Sometimes */ +/* this problem can be fixed by defining certain values to be volatile, */ +/* thus forcing them to be stored to memory and rounded off. This isn't */ +/* a great solution, though, as it slows the arithmetic down. */ +/* */ +/* To try this out, write "#define INEXACT volatile" below. Normally, */ +/* however, INEXACT should be defined to be nothing. ("#define INEXACT".) */ + +#define INEXACT /* Nothing */ +/* #define INEXACT volatile */ + +#define REAL double /* float or double */ +#define REALPRINT doubleprint +#define REALRAND doublerand +#define NARROWRAND narrowdoublerand +#define UNIFORMRAND uniformdoublerand + +/* Which of the following two methods of finding the absolute values is */ +/* fastest is compiler-dependent. A few compilers can inline and optimize */ +/* the fabs() call; but most will incur the overhead of a function call, */ +/* which is disastrously slow. A faster way on IEEE machines might be to */ +/* mask the appropriate bit, but that's difficult to do in C. */ + +#define Absolute(a) ((a) >= 0.0 ? (a) : -(a)) +/* #define Absolute(a) fabs(a) */ + +/* Many of the operations are broken up into two pieces, a main part that */ +/* performs an approximate operation, and a "tail" that computes the */ +/* roundoff error of that operation. */ +/* */ +/* The operations Fast_Two_Sum(), Fast_Two_Diff(), Two_Sum(), Two_Diff(), */ +/* Split(), and Two_Product() are all implemented as described in the */ +/* reference. Each of these macros requires certain variables to be */ +/* defined in the calling routine. The variables `bvirt', `c', `abig', */ +/* `_i', `_j', `_k', `_l', `_m', and `_n' are declared `INEXACT' because */ +/* they store the result of an operation that may incur roundoff error. */ +/* The input parameter `x' (or the highest numbered `x_' parameter) must */ +/* also be declared `INEXACT'. */ + +#define Fast_Two_Sum_Tail(a, b, x, y) \ + bvirt = x - a; \ + y = b - bvirt + +#define Fast_Two_Sum(a, b, x, y) \ + x = (REAL)(a + b); \ + Fast_Two_Sum_Tail(a, b, x, y) + +#define Fast_Two_Diff_Tail(a, b, x, y) \ + bvirt = a - x; \ + y = bvirt - b + +#define Fast_Two_Diff(a, b, x, y) \ + x = (REAL)(a - b); \ + Fast_Two_Diff_Tail(a, b, x, y) + +#define Two_Sum_Tail(a, b, x, y) \ + bvirt = (REAL)(x - a); \ + avirt = x - bvirt; \ + bround = b - bvirt; \ + around = a - avirt; \ + y = around + bround + +#define Two_Sum(a, b, x, y) \ + x = (REAL)(a + b); \ + Two_Sum_Tail(a, b, x, y) + +#define Two_Diff_Tail(a, b, x, y) \ + bvirt = (REAL)(a - x); \ + avirt = x + bvirt; \ + bround = bvirt - b; \ + around = a - avirt; \ + y = around + bround + +#define Two_Diff(a, b, x, y) \ + x = (REAL)(a - b); \ + Two_Diff_Tail(a, b, x, y) + +#define Split(a, ahi, alo) \ + c = (REAL)(splitter * a); \ + abig = (REAL)(c - a); \ + ahi = c - abig; \ + alo = a - ahi + +#define Two_Product_Tail(a, b, x, y) \ + Split(a, ahi, alo); \ + Split(b, bhi, blo); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +#define Two_Product(a, b, x, y) \ + x = (REAL)(a * b); \ + Two_Product_Tail(a, b, x, y) + +/* Two_Product_Presplit() is Two_Product() where one of the inputs has */ +/* already been split. Avoids redundant splitting. */ + +#define Two_Product_Presplit(a, b, bhi, blo, x, y) \ + x = (REAL)(a * b); \ + Split(a, ahi, alo); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +/* Two_Product_2Presplit() is Two_Product() where both of the inputs have */ +/* already been split. Avoids redundant splitting. */ + +#define Two_Product_2Presplit(a, ahi, alo, b, bhi, blo, x, y) \ + x = (REAL)(a * b); \ + err1 = x - (ahi * bhi); \ + err2 = err1 - (alo * bhi); \ + err3 = err2 - (ahi * blo); \ + y = (alo * blo) - err3 + +/* Square() can be done more quickly than Two_Product(). */ + +#define Square_Tail(a, x, y) \ + Split(a, ahi, alo); \ + err1 = x - (ahi * ahi); \ + err3 = err1 - ((ahi + ahi) * alo); \ + y = (alo * alo) - err3 + +#define Square(a, x, y) \ + x = (REAL)(a * a); \ + Square_Tail(a, x, y) + +/* Macros for summing expansions of various fixed lengths. These are all */ +/* unrolled versions of Expansion_Sum(). */ + +#define Two_One_Sum(a1, a0, b, x2, x1, x0) \ + Two_Sum(a0, b, _i, x0); \ + Two_Sum(a1, _i, x2, x1) + +#define Two_One_Diff(a1, a0, b, x2, x1, x0) \ + Two_Diff(a0, b, _i, x0); \ + Two_Sum(a1, _i, x2, x1) + +#define Two_Two_Sum(a1, a0, b1, b0, x3, x2, x1, x0) \ + Two_One_Sum(a1, a0, b0, _j, _0, x0); \ + Two_One_Sum(_j, _0, b1, x3, x2, x1) + +#define Two_Two_Diff(a1, a0, b1, b0, x3, x2, x1, x0) \ + Two_One_Diff(a1, a0, b0, _j, _0, x0); \ + Two_One_Diff(_j, _0, b1, x3, x2, x1) + +#define Four_One_Sum(a3, a2, a1, a0, b, x4, x3, x2, x1, x0) \ + Two_One_Sum(a1, a0, b, _j, x1, x0); \ + Two_One_Sum(a3, a2, _j, x4, x3, x2) + +#define Four_Two_Sum(a3, a2, a1, a0, b1, b0, x5, x4, x3, x2, x1, x0) \ + Four_One_Sum(a3, a2, a1, a0, b0, _k, _2, _1, _0, x0); \ + Four_One_Sum(_k, _2, _1, _0, b1, x5, x4, x3, x2, x1) + +#define Four_Four_Sum(a3, a2, a1, a0, b4, b3, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \ + Four_Two_Sum(a3, a2, a1, a0, b1, b0, _l, _2, _1, _0, x1, x0); \ + Four_Two_Sum(_l, _2, _1, _0, b4, b3, x7, x6, x5, x4, x3, x2) + +#define Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Four_One_Sum(a3, a2, a1, a0, b, _j, x3, x2, x1, x0); \ + Four_One_Sum(a7, a6, a5, a4, _j, x8, x7, x6, x5, x4) + +#define Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Eight_One_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b0, _k, _6, _5, _4, _3, _2, _1, _0, x0); \ + Eight_One_Sum(_k, _6, _5, _4, _3, _2, _1, _0, b1, x9, x8, x7, x6, x5, x4, x3, x2, x1) + +#define Eight_Four_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b4, b3, b1, b0, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0) \ + Eight_Two_Sum(a7, a6, a5, a4, a3, a2, a1, a0, b1, b0, _l, _6, _5, _4, _3, _2, _1, _0, x1, x0); \ + Eight_Two_Sum(_l, _6, _5, _4, _3, _2, _1, _0, b4, b3, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2) + +/* Macros for multiplying expansions of various fixed lengths. */ + +#define Two_One_Product(a1, a0, b, x3, x2, x1, x0) \ + Split(b, bhi, blo); \ + Two_Product_Presplit(a0, b, bhi, blo, _i, x0); \ + Two_Product_Presplit(a1, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x1); \ + Fast_Two_Sum(_j, _k, x3, x2) + +#define Four_One_Product(a3, a2, a1, a0, b, x7, x6, x5, x4, x3, x2, x1, x0) \ + Split(b, bhi, blo); \ + Two_Product_Presplit(a0, b, bhi, blo, _i, x0); \ + Two_Product_Presplit(a1, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x1); \ + Fast_Two_Sum(_j, _k, _i, x2); \ + Two_Product_Presplit(a2, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x3); \ + Fast_Two_Sum(_j, _k, _i, x4); \ + Two_Product_Presplit(a3, b, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, x5); \ + Fast_Two_Sum(_j, _k, x7, x6) + +#define Two_Two_Product(a1, a0, b1, b0, x7, x6, x5, x4, x3, x2, x1, x0) \ + Split(a0, a0hi, a0lo); \ + Split(b0, bhi, blo); \ + Two_Product_2Presplit(a0, a0hi, a0lo, b0, bhi, blo, _i, x0); \ + Split(a1, a1hi, a1lo); \ + Two_Product_2Presplit(a1, a1hi, a1lo, b0, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _k, _1); \ + Fast_Two_Sum(_j, _k, _l, _2); \ + Split(b1, bhi, blo); \ + Two_Product_2Presplit(a0, a0hi, a0lo, b1, bhi, blo, _i, _0); \ + Two_Sum(_1, _0, _k, x1); \ + Two_Sum(_2, _k, _j, _1); \ + Two_Sum(_l, _j, _m, _2); \ + Two_Product_2Presplit(a1, a1hi, a1lo, b1, bhi, blo, _j, _0); \ + Two_Sum(_i, _0, _n, _0); \ + Two_Sum(_1, _0, _i, x2); \ + Two_Sum(_2, _i, _k, _1); \ + Two_Sum(_m, _k, _l, _2); \ + Two_Sum(_j, _n, _k, _0); \ + Two_Sum(_1, _0, _j, x3); \ + Two_Sum(_2, _j, _i, _1); \ + Two_Sum(_l, _i, _m, _2); \ + Two_Sum(_1, _k, _i, x4); \ + Two_Sum(_2, _i, _k, x5); \ + Two_Sum(_m, _k, x7, x6) + +/* An expansion of length two can be squared more quickly than finding the */ +/* product of two different expansions of length two, and the result is */ +/* guaranteed to have no more than six (rather than eight) components. */ + +#define Two_Square(a1, a0, x5, x4, x3, x2, x1, x0) \ + Square(a0, _j, x0); \ + _0 = a0 + a0; \ + Two_Product(a1, _0, _k, _1); \ + Two_One_Sum(_k, _1, _j, _l, _2, x1); \ + Square(a1, _j, _1); \ + Two_Two_Sum(_j, _1, _l, _2, x5, x4, x3, x2) + +REAL splitter; /* = 2^ceiling(p / 2) + 1. Used to split floats in half. */ +REAL epsilon; /* = 2^(-p). Used to estimate roundoff errors. */ + +/* A set of coefficients used to calculate maximum roundoff errors. */ +REAL resulterrbound; +REAL ccwerrboundA, ccwerrboundB, ccwerrboundC; +REAL o3derrboundA, o3derrboundB, o3derrboundC; +REAL iccerrboundA, iccerrboundB, iccerrboundC; +REAL isperrboundA, isperrboundB, isperrboundC; + +/*****************************************************************************/ +/* */ +/* doubleprint() Print the bit representation of a double. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void doubleprint(number) +double number; +{ + unsigned long long no; + unsigned long long sign, expo; + int exponent; + int i, bottomi; + + no = *(unsigned long long *) &number; + sign = no & 0x8000000000000000ll; + expo = (no >> 52) & 0x7ffll; + exponent = (int) expo; + exponent = exponent - 1023; + if (sign) { + printf("-"); + } else { + printf(" "); + } + if (exponent == -1023) { + printf( + "0.0000000000000000000000000000000000000000000000000000_ ( )"); + } else { + printf("1."); + bottomi = -1; + for (i = 0; i < 52; i++) { + if (no & 0x0008000000000000ll) { + printf("1"); + bottomi = i; + } else { + printf("0"); + } + no <<= 1; + } + printf("_%d (%d)", exponent, exponent - 1 - bottomi); + } +} +*/ + +/*****************************************************************************/ +/* */ +/* floatprint() Print the bit representation of a float. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void floatprint(number) +float number; +{ + unsigned no; + unsigned sign, expo; + int exponent; + int i, bottomi; + + no = *(unsigned *) &number; + sign = no & 0x80000000; + expo = (no >> 23) & 0xff; + exponent = (int) expo; + exponent = exponent - 127; + if (sign) { + printf("-"); + } else { + printf(" "); + } + if (exponent == -127) { + printf("0.00000000000000000000000_ ( )"); + } else { + printf("1."); + bottomi = -1; + for (i = 0; i < 23; i++) { + if (no & 0x00400000) { + printf("1"); + bottomi = i; + } else { + printf("0"); + } + no <<= 1; + } + printf("_%3d (%3d)", exponent, exponent - 1 - bottomi); + } +} +*/ + +/*****************************************************************************/ +/* */ +/* expansion_print() Print the bit representation of an expansion. */ +/* */ +/* Useful for debugging exact arithmetic routines. */ +/* */ +/*****************************************************************************/ + +/* +void expansion_print(elen, e) +int elen; +REAL *e; +{ + int i; + + for (i = elen - 1; i >= 0; i--) { + REALPRINT(e[i]); + if (i > 0) { + printf(" +\n"); + } else { + printf("\n"); + } + } +} +*/ + +/*****************************************************************************/ +/* */ +/* doublerand() Generate a double with random 53-bit significand and a */ +/* random exponent in [0, 511]. */ +/* */ +/*****************************************************************************/ + +double doublerand() +{ + double result; + double expo; + long a, b, c; + long i; + + a = random(); + b = random(); + c = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + for(i = 512, expo = 2; i <= 131072; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* narrowdoublerand() Generate a double with random 53-bit significand */ +/* and a random exponent in [0, 7]. */ +/* */ +/*****************************************************************************/ + +double narrowdoublerand() +{ + double result; + double expo; + long a, b, c; + long i; + + a = random(); + b = random(); + c = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* uniformdoublerand() Generate a double with random 53-bit significand. */ +/* */ +/*****************************************************************************/ + +double uniformdoublerand() +{ + double result; + long a, b; + + a = random(); + b = random(); + result = (double)(a - 1073741824) * 8388608.0 + (double)(b >> 8); + return result; +} + +/*****************************************************************************/ +/* */ +/* floatrand() Generate a float with random 24-bit significand and a */ +/* random exponent in [0, 63]. */ +/* */ +/*****************************************************************************/ + +float floatrand() +{ + float result; + float expo; + long a, c; + long i; + + a = random(); + c = random(); + result = (float)((a - 1073741824) >> 6); + for(i = 512, expo = 2; i <= 16384; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* narrowfloatrand() Generate a float with random 24-bit significand and */ +/* a random exponent in [0, 7]. */ +/* */ +/*****************************************************************************/ + +float narrowfloatrand() +{ + float result; + float expo; + long a, c; + long i; + + a = random(); + c = random(); + result = (float)((a - 1073741824) >> 6); + for(i = 512, expo = 2; i <= 2048; i *= 2, expo = expo * expo) + { + if(c & i) + { + result *= expo; + } + } + return result; +} + +/*****************************************************************************/ +/* */ +/* uniformfloatrand() Generate a float with random 24-bit significand. */ +/* */ +/*****************************************************************************/ + +float uniformfloatrand() +{ + float result; + long a; + + a = random(); + result = (float)((a - 1073741824) >> 6); + return result; +} + +/*****************************************************************************/ +/* */ +/* exactinit() Initialize the variables used for exact arithmetic. */ +/* */ +/* `epsilon' is the largest power of two such that 1.0 + epsilon = 1.0 in */ +/* floating-point arithmetic. `epsilon' bounds the relative roundoff */ +/* error. It is used for floating-point error analysis. */ +/* */ +/* `splitter' is used to split floating-point numbers into two half- */ +/* length significands for exact multiplication. */ +/* */ +/* I imagine that a highly optimizing compiler might be too smart for its */ +/* own good, and somehow cause this routine to fail, if it pretends that */ +/* floating-point arithmetic is too much like real arithmetic. */ +/* */ +/* Don't change this routine unless you fully understand it. */ +/* */ +/*****************************************************************************/ + +void exactinit() +{ + REAL half; + REAL check, lastcheck; + int every_other; + + every_other = 1; + half = 0.5; + epsilon = 1.0; + splitter = 1.0; + check = 1.0; + /* Repeatedly divide `epsilon' by two until it is too small to add to */ + /* one without causing roundoff. (Also check if the sum is equal to */ + /* the previous sum, for machines that round up instead of using exact */ + /* rounding. Not that this library will work on such machines anyway. */ + do + { + lastcheck = check; + epsilon *= half; + if(every_other) + { + splitter *= 2.0; + } + every_other = !every_other; + check = 1.0 + epsilon; + } + while((check != 1.0) && (check != lastcheck)); + splitter += 1.0; + + /* Error bounds for orientation and incircle tests. */ + resulterrbound = (3.0 + 8.0 * epsilon) * epsilon; + ccwerrboundA = (3.0 + 16.0 * epsilon) * epsilon; + ccwerrboundB = (2.0 + 12.0 * epsilon) * epsilon; + ccwerrboundC = (9.0 + 64.0 * epsilon) * epsilon * epsilon; + o3derrboundA = (7.0 + 56.0 * epsilon) * epsilon; + o3derrboundB = (3.0 + 28.0 * epsilon) * epsilon; + o3derrboundC = (26.0 + 288.0 * epsilon) * epsilon * epsilon; + iccerrboundA = (10.0 + 96.0 * epsilon) * epsilon; + iccerrboundB = (4.0 + 48.0 * epsilon) * epsilon; + iccerrboundC = (44.0 + 576.0 * epsilon) * epsilon * epsilon; + isperrboundA = (16.0 + 224.0 * epsilon) * epsilon; + isperrboundB = (5.0 + 72.0 * epsilon) * epsilon; + isperrboundC = (71.0 + 1408.0 * epsilon) * epsilon * epsilon; +} + +/*****************************************************************************/ +/* */ +/* grow_expansion() Add a scalar to an expansion. */ +/* */ +/* Sets h = e + b. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int grow_expansion(elen, e, b, h) /* e and h can be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int eindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = b; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, h[eindex]); + Q = Qnew; + } + h[eindex] = Q; + return eindex + 1; +} + +/*****************************************************************************/ +/* */ +/* grow_expansion_zeroelim() Add a scalar to an expansion, eliminating */ +/* zero components from the output expansion. */ +/* */ +/* Sets h = e + b. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int grow_expansion_zeroelim(elen, e, b, h) /* e and h can be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + REAL Q, hh; + INEXACT REAL Qnew; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + hindex = 0; + Q = b; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int findex, hindex, hlast; + REAL hnow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = f[0]; + for(hindex = 0; hindex < elen; hindex++) + { + hnow = e[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + Q = f[findex]; + for(hindex = findex; hindex <= hlast; hindex++) + { + hnow = h[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[++hlast] = Q; + } + return hlast + 1; +} + +/*****************************************************************************/ +/* */ +/* expansion_sum_zeroelim1() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum_zeroelim1(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + int index, findex, hindex, hlast; + REAL hnow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + Q = f[0]; + for(hindex = 0; hindex < elen; hindex++) + { + hnow = e[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + Q = f[findex]; + for(hindex = findex; hindex <= hlast; hindex++) + { + hnow = h[hindex]; + Two_Sum(Q, hnow, Qnew, h[hindex]); + Q = Qnew; + } + h[++hlast] = Q; + } + hindex = -1; + for(index = 0; index <= hlast; index++) + { + hnow = h[index]; + if(hnow != 0.0) + { + h[++hindex] = hnow; + } + } + if(hindex == -1) + { + return 1; + } + else + { + return hindex + 1; + } +} + +/*****************************************************************************/ +/* */ +/* expansion_sum_zeroelim2() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the nonadjacent property as well. (That is, */ +/* if e has one of these properties, so will h.) Does NOT maintain the */ +/* strongly nonoverlapping property. */ +/* */ +/*****************************************************************************/ + +int expansion_sum_zeroelim2(elen, e, flen, f, h) + /* e and h can be the same, but f and h cannot. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, hh; + INEXACT REAL Qnew; + int eindex, findex, hindex, hlast; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + + hindex = 0; + Q = f[0]; + for(eindex = 0; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + h[hindex] = Q; + hlast = hindex; + for(findex = 1; findex < flen; findex++) + { + hindex = 0; + Q = f[findex]; + for(eindex = 0; eindex <= hlast; eindex++) + { + enow = h[eindex]; + Two_Sum(Q, enow, Qnew, hh); + Q = Qnew; + if(hh != 0) + { + h[hindex++] = hh; + } + } + h[hindex] = Q; + hlast = hindex; + } + return hlast + 1; +} + +/*****************************************************************************/ +/* */ +/* fast_expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* If round-to-even is used (as with IEEE 754), maintains the strongly */ +/* nonoverlapping property. (That is, if e is strongly nonoverlapping, h */ +/* will be also.) Does NOT maintain the nonoverlapping or nonadjacent */ +/* properties. */ +/* */ +/*****************************************************************************/ + +int fast_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + Q = enow; + enow = e[++eindex]; + } + else + { + Q = fnow; + fnow = f[++findex]; + } + hindex = 0; + if((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Fast_Two_Sum(enow, Q, Qnew, h[0]); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, Q, Qnew, h[0]); + fnow = f[++findex]; + } + Q = Qnew; + hindex = 1; + while((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Two_Sum(Q, enow, Qnew, h[hindex]); + enow = e[++eindex]; + } + else + { + Two_Sum(Q, fnow, Qnew, h[hindex]); + fnow = f[++findex]; + } + Q = Qnew; + hindex++; + } + } + while(eindex < elen) + { + Two_Sum(Q, enow, Qnew, h[hindex]); + enow = e[++eindex]; + Q = Qnew; + hindex++; + } + while(findex < flen) + { + Two_Sum(Q, fnow, Qnew, h[hindex]); + fnow = f[++findex]; + Q = Qnew; + hindex++; + } + h[hindex] = Q; + return hindex + 1; +} + +/*****************************************************************************/ +/* */ +/* fast_expansion_sum_zeroelim() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See the long version of my paper for details. */ +/* */ +/* If round-to-even is used (as with IEEE 754), maintains the strongly */ +/* nonoverlapping property. (That is, if e is strongly nonoverlapping, h */ +/* will be also.) Does NOT maintain the nonoverlapping or nonadjacent */ +/* properties. */ +/* */ +/*****************************************************************************/ + +int fast_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q; + INEXACT REAL Qnew; + INEXACT REAL hh; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + Q = enow; + enow = e[++eindex]; + } + else + { + Q = fnow; + fnow = f[++findex]; + } + hindex = 0; + if((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Fast_Two_Sum(enow, Q, Qnew, hh); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, Q, Qnew, hh); + fnow = f[++findex]; + } + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + while((eindex < elen) && (findex < flen)) + { + if((fnow > enow) == (fnow > -enow)) + { + Two_Sum(Q, enow, Qnew, hh); + enow = e[++eindex]; + } + else + { + Two_Sum(Q, fnow, Qnew, hh); + fnow = f[++findex]; + } + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + } + while(eindex < elen) + { + Two_Sum(Q, enow, Qnew, hh); + enow = e[++eindex]; + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + while(findex < flen) + { + Two_Sum(Q, fnow, Qnew, hh); + fnow = f[++findex]; + Q = Qnew; + if(hh != 0.0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* linear_expansion_sum() Sum two expansions. */ +/* */ +/* Sets h = e + f. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. (That is, if e is */ +/* nonoverlapping, h will be also.) */ +/* */ +/*****************************************************************************/ + +int linear_expansion_sum(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, q; + INEXACT REAL Qnew; + INEXACT REAL R; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + REAL enow, fnow; + REAL g0; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + if((fnow > enow) == (fnow > -enow)) + { + g0 = enow; + enow = e[++eindex]; + } + else + { + g0 = fnow; + fnow = f[++findex]; + } + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, g0, Qnew, q); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, g0, Qnew, q); + fnow = f[++findex]; + } + Q = Qnew; + for(hindex = 0; hindex < elen + flen - 2; hindex++) + { + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, q, R, h[hindex]); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, q, R, h[hindex]); + fnow = f[++findex]; + } + Two_Sum(Q, R, Qnew, q); + Q = Qnew; + } + h[hindex] = q; + h[hindex + 1] = Q; + return hindex + 2; +} + +/*****************************************************************************/ +/* */ +/* linear_expansion_sum_zeroelim() Sum two expansions, eliminating zero */ +/* components from the output expansion. */ +/* */ +/* Sets h = e + f. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. (That is, if e is */ +/* nonoverlapping, h will be also.) */ +/* */ +/*****************************************************************************/ + +int linear_expansion_sum_zeroelim(elen, e, flen, f, h) /* h cannot be e or f. */ + int elen; +REAL *e; +int flen; +REAL *f; +REAL *h; +{ + REAL Q, q, hh; + INEXACT REAL Qnew; + INEXACT REAL R; + INEXACT REAL bvirt; + REAL avirt, bround, around; + int eindex, findex, hindex; + int count; + REAL enow, fnow; + REAL g0; + + enow = e[0]; + fnow = f[0]; + eindex = findex = 0; + hindex = 0; + if((fnow > enow) == (fnow > -enow)) + { + g0 = enow; + enow = e[++eindex]; + } + else + { + g0 = fnow; + fnow = f[++findex]; + } + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, g0, Qnew, q); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, g0, Qnew, q); + fnow = f[++findex]; + } + Q = Qnew; + for(count = 2; count < elen + flen; count++) + { + if((eindex < elen) && ((findex >= flen) || ((fnow > enow) == (fnow > -enow)))) + { + Fast_Two_Sum(enow, q, R, hh); + enow = e[++eindex]; + } + else + { + Fast_Two_Sum(fnow, q, R, hh); + fnow = f[++findex]; + } + Two_Sum(Q, R, Qnew, q); + Q = Qnew; + if(hh != 0) + { + h[hindex++] = hh; + } + } + if(q != 0) + { + h[hindex++] = q; + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* scale_expansion() Multiply an expansion by a scalar. */ +/* */ +/* Sets h = be. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int scale_expansion(elen, e, b, h) /* e and h cannot be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + INEXACT REAL Q; + INEXACT REAL sum; + INEXACT REAL product1; + REAL product0; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + + Split(b, bhi, blo); + Two_Product_Presplit(e[0], b, bhi, blo, Q, h[0]); + hindex = 1; + for(eindex = 1; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Product_Presplit(enow, b, bhi, blo, product1, product0); + Two_Sum(Q, product0, sum, h[hindex]); + hindex++; + Two_Sum(product1, sum, Q, h[hindex]); + hindex++; + } + h[hindex] = Q; + return elen + elen; +} + +/*****************************************************************************/ +/* */ +/* scale_expansion_zeroelim() Multiply an expansion by a scalar, */ +/* eliminating zero components from the */ +/* output expansion. */ +/* */ +/* Sets h = be. See either version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), maintains the strongly nonoverlapping and nonadjacent */ +/* properties as well. (That is, if e has one of these properties, so */ +/* will h.) */ +/* */ +/*****************************************************************************/ + +int scale_expansion_zeroelim(elen, e, b, h) /* e and h cannot be the same. */ + int elen; +REAL *e; +REAL b; +REAL *h; +{ + INEXACT REAL Q, sum; + REAL hh; + INEXACT REAL product1; + REAL product0; + int eindex, hindex; + REAL enow; + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + + Split(b, bhi, blo); + Two_Product_Presplit(e[0], b, bhi, blo, Q, hh); + hindex = 0; + if(hh != 0) + { + h[hindex++] = hh; + } + for(eindex = 1; eindex < elen; eindex++) + { + enow = e[eindex]; + Two_Product_Presplit(enow, b, bhi, blo, product1, product0); + Two_Sum(Q, product0, sum, hh); + if(hh != 0) + { + h[hindex++] = hh; + } + Fast_Two_Sum(product1, sum, Q, hh); + if(hh != 0) + { + h[hindex++] = hh; + } + } + if((Q != 0.0) || (hindex == 0)) + { + h[hindex++] = Q; + } + return hindex; +} + +/*****************************************************************************/ +/* */ +/* compress() Compress an expansion. */ +/* */ +/* See the long version of my paper for details. */ +/* */ +/* Maintains the nonoverlapping property. If round-to-even is used (as */ +/* with IEEE 754), then any nonoverlapping expansion is converted to a */ +/* nonadjacent expansion. */ +/* */ +/*****************************************************************************/ + +int compress(elen, e, h) /* e and h may be the same. */ + int elen; +REAL *e; +REAL *h; +{ + REAL Q, q; + INEXACT REAL Qnew; + int eindex, hindex; + INEXACT REAL bvirt; + REAL enow, hnow; + int top, bottom; + + bottom = elen - 1; + Q = e[bottom]; + for(eindex = elen - 2; eindex >= 0; eindex--) + { + enow = e[eindex]; + Fast_Two_Sum(Q, enow, Qnew, q); + if(q != 0) + { + h[bottom--] = Qnew; + Q = q; + } + else + { + Q = Qnew; + } + } + top = 0; + for(hindex = bottom + 1; hindex < elen; hindex++) + { + hnow = h[hindex]; + Fast_Two_Sum(hnow, Q, Qnew, q); + if(q != 0) + { + h[top++] = q; + } + Q = Qnew; + } + h[top] = Q; + return top + 1; +} + +/*****************************************************************************/ +/* */ +/* estimate() Produce a one-word estimate of an expansion's value. */ +/* */ +/* See either version of my paper for details. */ +/* */ +/*****************************************************************************/ + +REAL estimate(elen, e) int elen; +REAL *e; +{ + REAL Q; + int eindex; + + Q = e[0]; + for(eindex = 1; eindex < elen; eindex++) + { + Q += e[eindex]; + } + return Q; +} + +/*****************************************************************************/ +/* */ +/* orient2dfast() Approximate 2D orientation test. Nonrobust. */ +/* orient2dexact() Exact 2D orientation test. Robust. */ +/* orient2dslow() Another exact 2D orientation test. Robust. */ +/* orient2d() Adaptive exact 2D orientation test. Robust. */ +/* */ +/* Return a positive value if the points pa, pb, and pc occur */ +/* in counterclockwise order; a negative value if they occur */ +/* in clockwise order; and zero if they are collinear. The */ +/* result is also a rough approximation of twice the signed */ +/* area of the triangle defined by the three points. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In orient2d() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, orient2d() is usually quite */ +/* fast, but will run more slowly when the input points are collinear or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL orient2dfast(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + REAL acx, bcx, acy, bcy; + + acx = pa[0] - pc[0]; + bcx = pb[0] - pc[0]; + acy = pa[1] - pc[1]; + bcy = pb[1] - pc[1]; + return acx * bcy - acy * bcx; +} + +REAL orient2dexact(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + INEXACT REAL axby1, axcy1, bxcy1, bxay1, cxay1, cxby1; + REAL axby0, axcy0, bxcy0, bxay0, cxay0, cxby0; + REAL aterms[4], bterms[4], cterms[4]; + INEXACT REAL aterms3, bterms3, cterms3; + REAL v[8], w[12]; + int vlength, wlength; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Two_Diff(axby1, axby0, axcy1, axcy0, aterms3, aterms[2], aterms[1], aterms[0]); + aterms[3] = aterms3; + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(bxcy1, bxcy0, bxay1, bxay0, bterms3, bterms[2], bterms[1], bterms[0]); + bterms[3] = bterms3; + + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(cxay1, cxay0, cxby1, cxby0, cterms3, cterms[2], cterms[1], cterms[0]); + cterms[3] = cterms3; + + vlength = fast_expansion_sum_zeroelim(4, aterms, 4, bterms, v); + wlength = fast_expansion_sum_zeroelim(vlength, v, 4, cterms, w); + + return w[wlength - 1]; +} + +REAL orient2dslow(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + INEXACT REAL acx, acy, bcx, bcy; + REAL acxtail, acytail; + REAL bcxtail, bcytail; + REAL negate, negatetail; + REAL axby[8], bxay[8]; + INEXACT REAL axby7, bxay7; + REAL deter[16]; + int deterlen; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pc[0], acx, acxtail); + Two_Diff(pa[1], pc[1], acy, acytail); + Two_Diff(pb[0], pc[0], bcx, bcxtail); + Two_Diff(pb[1], pc[1], bcy, bcytail); + + Two_Two_Product(acx, acxtail, bcy, bcytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -acy; + negatetail = -acytail; + Two_Two_Product(bcx, bcxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + + deterlen = fast_expansion_sum_zeroelim(8, axby, 8, bxay, deter); + + return deter[deterlen - 1]; +} + +REAL orient2dadapt(pa, pb, pc, detsum) REAL *pa; +REAL *pb; +REAL *pc; +REAL detsum; +{ + INEXACT REAL acx, acy, bcx, bcy; + REAL acxtail, acytail, bcxtail, bcytail; + INEXACT REAL detleft, detright; + REAL detlefttail, detrighttail; + REAL det, errbound; + REAL B[4], C1[8], C2[12], D[16]; + INEXACT REAL B3; + int C1length, C2length, Dlength; + REAL u[4]; + INEXACT REAL u3; + INEXACT REAL s1, t1; + REAL s0, t0; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + acx = (REAL)(pa[0] - pc[0]); + bcx = (REAL)(pb[0] - pc[0]); + acy = (REAL)(pa[1] - pc[1]); + bcy = (REAL)(pb[1] - pc[1]); + + Two_Product(acx, bcy, detleft, detlefttail); + Two_Product(acy, bcx, detright, detrighttail); + + Two_Two_Diff(detleft, detlefttail, detright, detrighttail, B3, B[2], B[1], B[0]); + B[3] = B3; + + det = estimate(4, B); + errbound = ccwerrboundB * detsum; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pc[0], acx, acxtail); + Two_Diff_Tail(pb[0], pc[0], bcx, bcxtail); + Two_Diff_Tail(pa[1], pc[1], acy, acytail); + Two_Diff_Tail(pb[1], pc[1], bcy, bcytail); + + if((acxtail == 0.0) && (acytail == 0.0) && (bcxtail == 0.0) && (bcytail == 0.0)) + { + return det; + } + + errbound = ccwerrboundC * detsum + resulterrbound * Absolute(det); + det += (acx * bcytail + bcy * acxtail) - (acy * bcxtail + bcx * acytail); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Product(acxtail, bcy, s1, s0); + Two_Product(acytail, bcx, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + C1length = fast_expansion_sum_zeroelim(4, B, 4, u, C1); + + Two_Product(acx, bcytail, s1, s0); + Two_Product(acy, bcxtail, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + C2length = fast_expansion_sum_zeroelim(C1length, C1, 4, u, C2); + + Two_Product(acxtail, bcytail, s1, s0); + Two_Product(acytail, bcxtail, t1, t0); + Two_Two_Diff(s1, s0, t1, t0, u3, u[2], u[1], u[0]); + u[3] = u3; + Dlength = fast_expansion_sum_zeroelim(C2length, C2, 4, u, D); + + return (D[Dlength - 1]); +} + +REAL orient2d(pa, pb, pc) REAL *pa; +REAL *pb; +REAL *pc; +{ + REAL detleft, detright, det; + REAL detsum, errbound; + + detleft = (pa[0] - pc[0]) * (pb[1] - pc[1]); + detright = (pa[1] - pc[1]) * (pb[0] - pc[0]); + det = detleft - detright; + + if(detleft > 0.0) + { + if(detright <= 0.0) + { + return det; + } + else + { + detsum = detleft + detright; + } + } + else if(detleft < 0.0) + { + if(detright >= 0.0) + { + return det; + } + else + { + detsum = -detleft - detright; + } + } + else + { + return det; + } + + errbound = ccwerrboundA * detsum; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + return orient2dadapt(pa, pb, pc, detsum); +} + +/*****************************************************************************/ +/* */ +/* orient3dfast() Approximate 3D orientation test. Nonrobust. */ +/* orient3dexact() Exact 3D orientation test. Robust. */ +/* orient3dslow() Another exact 3D orientation test. Robust. */ +/* orient3d() Adaptive exact 3D orientation test. Robust. */ +/* */ +/* Return a positive value if the point pd lies below the */ +/* plane passing through pa, pb, and pc; "below" is defined so */ +/* that pa, pb, and pc appear in counterclockwise order when */ +/* viewed from above the plane. Returns a negative value if */ +/* pd lies above the plane. Returns zero if the points are */ +/* coplanar. The result is also a rough approximation of six */ +/* times the signed volume of the tetrahedron defined by the */ +/* four points. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In orient3d() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, orient3d() is usually quite */ +/* fast, but will run more slowly when the input points are coplanar or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL orient3dfast(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx; + REAL ady, bdy, cdy; + REAL adz, bdz, cdz; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + adz = pa[2] - pd[2]; + bdz = pb[2] - pd[2]; + cdz = pc[2] - pd[2]; + + return adx * (bdy * cdz - bdz * cdy) + bdx * (cdy * adz - cdz * ady) + cdx * (ady * bdz - adz * bdy); +} + +REAL orient3dexact(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1; + INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1; + REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0; + REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + REAL temp8[8]; + int templen; + REAL abc[12], bcd[12], cda[12], dab[12]; + int abclen, bcdlen, cdalen, dablen; + REAL adet[24], bdet[24], cdet[24], ddet[24]; + int alen, blen, clen, dlen; + REAL abdet[48], cddet[48]; + int ablen, cdlen; + REAL deter[96]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8); + cdalen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda); + templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8); + dablen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab); + for(i = 0; i < 4; i++) + { + bd[i] = -bd[i]; + ac[i] = -ac[i]; + } + templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8); + abclen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc); + templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8); + bcdlen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd); + + alen = scale_expansion_zeroelim(bcdlen, bcd, pa[2], adet); + blen = scale_expansion_zeroelim(cdalen, cda, -pb[2], bdet); + clen = scale_expansion_zeroelim(dablen, dab, pc[2], cdet); + dlen = scale_expansion_zeroelim(abclen, abc, -pd[2], ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL orient3dslow(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL adx, ady, adz, bdx, bdy, bdz, cdx, cdy, cdz; + REAL adxtail, adytail, adztail; + REAL bdxtail, bdytail, bdztail; + REAL cdxtail, cdytail, cdztail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7; + REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8]; + REAL temp16[16], temp32[32], temp32t[32]; + int temp16len, temp32len, temp32tlen; + REAL adet[64], bdet[64], cdet[64]; + int alen, blen, clen; + REAL abdet[128]; + int ablen; + REAL deter[192]; + int deterlen; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pd[0], adx, adxtail); + Two_Diff(pa[1], pd[1], ady, adytail); + Two_Diff(pa[2], pd[2], adz, adztail); + Two_Diff(pb[0], pd[0], bdx, bdxtail); + Two_Diff(pb[1], pd[1], bdy, bdytail); + Two_Diff(pb[2], pd[2], bdz, bdztail); + Two_Diff(pc[0], pd[0], cdx, cdxtail); + Two_Diff(pc[1], pd[1], cdy, cdytail); + Two_Diff(pc[2], pd[2], cdz, cdztail); + + Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -ady; + negatetail = -adytail; + Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bdy; + negatetail = -bdytail; + Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + negate = -cdy; + negatetail = -cdytail; + Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + + temp16len = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, adz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, adztail, temp32t); + alen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, adet); + + temp16len = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, bdz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, bdztail, temp32t); + blen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, bdet); + + temp16len = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16); + temp32len = scale_expansion_zeroelim(temp16len, temp16, cdz, temp32); + temp32tlen = scale_expansion_zeroelim(temp16len, temp16, cdztail, temp32t); + clen = fast_expansion_sum_zeroelim(temp32len, temp32, temp32tlen, temp32t, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter); + + return deter[deterlen - 1]; +} + +REAL orient3dadapt(pa, pb, pc, pd, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL permanent; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz; + REAL det, errbound; + + INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1; + REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0; + REAL bc[4], ca[4], ab[4]; + INEXACT REAL bc3, ca3, ab3; + REAL adet[8], bdet[8], cdet[8]; + int alen, blen, clen; + REAL abdet[16]; + int ablen; + REAL *finnow, *finother, *finswap; + REAL fin1[192], fin2[192]; + int finlength; + + REAL adxtail, bdxtail, cdxtail; + REAL adytail, bdytail, cdytail; + REAL adztail, bdztail, cdztail; + INEXACT REAL at_blarge, at_clarge; + INEXACT REAL bt_clarge, bt_alarge; + INEXACT REAL ct_alarge, ct_blarge; + REAL at_b[4], at_c[4], bt_c[4], bt_a[4], ct_a[4], ct_b[4]; + int at_blen, at_clen, bt_clen, bt_alen, ct_alen, ct_blen; + INEXACT REAL bdxt_cdy1, cdxt_bdy1, cdxt_ady1; + INEXACT REAL adxt_cdy1, adxt_bdy1, bdxt_ady1; + REAL bdxt_cdy0, cdxt_bdy0, cdxt_ady0; + REAL adxt_cdy0, adxt_bdy0, bdxt_ady0; + INEXACT REAL bdyt_cdx1, cdyt_bdx1, cdyt_adx1; + INEXACT REAL adyt_cdx1, adyt_bdx1, bdyt_adx1; + REAL bdyt_cdx0, cdyt_bdx0, cdyt_adx0; + REAL adyt_cdx0, adyt_bdx0, bdyt_adx0; + REAL bct[8], cat[8], abt[8]; + int bctlen, catlen, abtlen; + INEXACT REAL bdxt_cdyt1, cdxt_bdyt1, cdxt_adyt1; + INEXACT REAL adxt_cdyt1, adxt_bdyt1, bdxt_adyt1; + REAL bdxt_cdyt0, cdxt_bdyt0, cdxt_adyt0; + REAL adxt_cdyt0, adxt_bdyt0, bdxt_adyt0; + REAL u[4], v[12], w[16]; + INEXACT REAL u3; + int vlength, wlength; + REAL negate; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k; + REAL _0; + + adx = (REAL)(pa[0] - pd[0]); + bdx = (REAL)(pb[0] - pd[0]); + cdx = (REAL)(pc[0] - pd[0]); + ady = (REAL)(pa[1] - pd[1]); + bdy = (REAL)(pb[1] - pd[1]); + cdy = (REAL)(pc[1] - pd[1]); + adz = (REAL)(pa[2] - pd[2]); + bdz = (REAL)(pb[2] - pd[2]); + cdz = (REAL)(pc[2] - pd[2]); + + Two_Product(bdx, cdy, bdxcdy1, bdxcdy0); + Two_Product(cdx, bdy, cdxbdy1, cdxbdy0); + Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + alen = scale_expansion_zeroelim(4, bc, adz, adet); + + Two_Product(cdx, ady, cdxady1, cdxady0); + Two_Product(adx, cdy, adxcdy1, adxcdy0); + Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]); + ca[3] = ca3; + blen = scale_expansion_zeroelim(4, ca, bdz, bdet); + + Two_Product(adx, bdy, adxbdy1, adxbdy0); + Two_Product(bdx, ady, bdxady1, bdxady0); + Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + clen = scale_expansion_zeroelim(4, ab, cdz, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1); + + det = estimate(finlength, fin1); + errbound = o3derrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pd[0], adx, adxtail); + Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail); + Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail); + Two_Diff_Tail(pa[1], pd[1], ady, adytail); + Two_Diff_Tail(pb[1], pd[1], bdy, bdytail); + Two_Diff_Tail(pc[1], pd[1], cdy, cdytail); + Two_Diff_Tail(pa[2], pd[2], adz, adztail); + Two_Diff_Tail(pb[2], pd[2], bdz, bdztail); + Two_Diff_Tail(pc[2], pd[2], cdz, cdztail); + + if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0) && + (adztail == 0.0) && (bdztail == 0.0) && (cdztail == 0.0)) + { + return det; + } + + errbound = o3derrboundC * permanent + resulterrbound * Absolute(det); + det += (adz * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) + adztail * (bdx * cdy - bdy * cdx)) + + (bdz * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) + bdztail * (cdx * ady - cdy * adx)) + + (cdz * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) + cdztail * (adx * bdy - ady * bdx)); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + finnow = fin1; + finother = fin2; + + if(adxtail == 0.0) + { + if(adytail == 0.0) + { + at_b[0] = 0.0; + at_blen = 1; + at_c[0] = 0.0; + at_clen = 1; + } + else + { + negate = -adytail; + Two_Product(negate, bdx, at_blarge, at_b[0]); + at_b[1] = at_blarge; + at_blen = 2; + Two_Product(adytail, cdx, at_clarge, at_c[0]); + at_c[1] = at_clarge; + at_clen = 2; + } + } + else + { + if(adytail == 0.0) + { + Two_Product(adxtail, bdy, at_blarge, at_b[0]); + at_b[1] = at_blarge; + at_blen = 2; + negate = -adxtail; + Two_Product(negate, cdy, at_clarge, at_c[0]); + at_c[1] = at_clarge; + at_clen = 2; + } + else + { + Two_Product(adxtail, bdy, adxt_bdy1, adxt_bdy0); + Two_Product(adytail, bdx, adyt_bdx1, adyt_bdx0); + Two_Two_Diff(adxt_bdy1, adxt_bdy0, adyt_bdx1, adyt_bdx0, at_blarge, at_b[2], at_b[1], at_b[0]); + at_b[3] = at_blarge; + at_blen = 4; + Two_Product(adytail, cdx, adyt_cdx1, adyt_cdx0); + Two_Product(adxtail, cdy, adxt_cdy1, adxt_cdy0); + Two_Two_Diff(adyt_cdx1, adyt_cdx0, adxt_cdy1, adxt_cdy0, at_clarge, at_c[2], at_c[1], at_c[0]); + at_c[3] = at_clarge; + at_clen = 4; + } + } + if(bdxtail == 0.0) + { + if(bdytail == 0.0) + { + bt_c[0] = 0.0; + bt_clen = 1; + bt_a[0] = 0.0; + bt_alen = 1; + } + else + { + negate = -bdytail; + Two_Product(negate, cdx, bt_clarge, bt_c[0]); + bt_c[1] = bt_clarge; + bt_clen = 2; + Two_Product(bdytail, adx, bt_alarge, bt_a[0]); + bt_a[1] = bt_alarge; + bt_alen = 2; + } + } + else + { + if(bdytail == 0.0) + { + Two_Product(bdxtail, cdy, bt_clarge, bt_c[0]); + bt_c[1] = bt_clarge; + bt_clen = 2; + negate = -bdxtail; + Two_Product(negate, ady, bt_alarge, bt_a[0]); + bt_a[1] = bt_alarge; + bt_alen = 2; + } + else + { + Two_Product(bdxtail, cdy, bdxt_cdy1, bdxt_cdy0); + Two_Product(bdytail, cdx, bdyt_cdx1, bdyt_cdx0); + Two_Two_Diff(bdxt_cdy1, bdxt_cdy0, bdyt_cdx1, bdyt_cdx0, bt_clarge, bt_c[2], bt_c[1], bt_c[0]); + bt_c[3] = bt_clarge; + bt_clen = 4; + Two_Product(bdytail, adx, bdyt_adx1, bdyt_adx0); + Two_Product(bdxtail, ady, bdxt_ady1, bdxt_ady0); + Two_Two_Diff(bdyt_adx1, bdyt_adx0, bdxt_ady1, bdxt_ady0, bt_alarge, bt_a[2], bt_a[1], bt_a[0]); + bt_a[3] = bt_alarge; + bt_alen = 4; + } + } + if(cdxtail == 0.0) + { + if(cdytail == 0.0) + { + ct_a[0] = 0.0; + ct_alen = 1; + ct_b[0] = 0.0; + ct_blen = 1; + } + else + { + negate = -cdytail; + Two_Product(negate, adx, ct_alarge, ct_a[0]); + ct_a[1] = ct_alarge; + ct_alen = 2; + Two_Product(cdytail, bdx, ct_blarge, ct_b[0]); + ct_b[1] = ct_blarge; + ct_blen = 2; + } + } + else + { + if(cdytail == 0.0) + { + Two_Product(cdxtail, ady, ct_alarge, ct_a[0]); + ct_a[1] = ct_alarge; + ct_alen = 2; + negate = -cdxtail; + Two_Product(negate, bdy, ct_blarge, ct_b[0]); + ct_b[1] = ct_blarge; + ct_blen = 2; + } + else + { + Two_Product(cdxtail, ady, cdxt_ady1, cdxt_ady0); + Two_Product(cdytail, adx, cdyt_adx1, cdyt_adx0); + Two_Two_Diff(cdxt_ady1, cdxt_ady0, cdyt_adx1, cdyt_adx0, ct_alarge, ct_a[2], ct_a[1], ct_a[0]); + ct_a[3] = ct_alarge; + ct_alen = 4; + Two_Product(cdytail, bdx, cdyt_bdx1, cdyt_bdx0); + Two_Product(cdxtail, bdy, cdxt_bdy1, cdxt_bdy0); + Two_Two_Diff(cdyt_bdx1, cdyt_bdx0, cdxt_bdy1, cdxt_bdy0, ct_blarge, ct_b[2], ct_b[1], ct_b[0]); + ct_b[3] = ct_blarge; + ct_blen = 4; + } + } + + bctlen = fast_expansion_sum_zeroelim(bt_clen, bt_c, ct_blen, ct_b, bct); + wlength = scale_expansion_zeroelim(bctlen, bct, adz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + catlen = fast_expansion_sum_zeroelim(ct_alen, ct_a, at_clen, at_c, cat); + wlength = scale_expansion_zeroelim(catlen, cat, bdz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + abtlen = fast_expansion_sum_zeroelim(at_blen, at_b, bt_alen, bt_a, abt); + wlength = scale_expansion_zeroelim(abtlen, abt, cdz, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + if(adztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, bc, adztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, ca, bdztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdztail != 0.0) + { + vlength = scale_expansion_zeroelim(4, ab, cdztail, v); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, vlength, v, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + if(adxtail != 0.0) + { + if(bdytail != 0.0) + { + Two_Product(adxtail, bdytail, adxt_bdyt1, adxt_bdyt0); + Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdztail != 0.0) + { + Two_One_Product(adxt_bdyt1, adxt_bdyt0, cdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(cdytail != 0.0) + { + negate = -adxtail; + Two_Product(negate, cdytail, adxt_cdyt1, adxt_cdyt0); + Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdztail != 0.0) + { + Two_One_Product(adxt_cdyt1, adxt_cdyt0, bdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + if(bdxtail != 0.0) + { + if(cdytail != 0.0) + { + Two_Product(bdxtail, cdytail, bdxt_cdyt1, bdxt_cdyt0); + Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adztail != 0.0) + { + Two_One_Product(bdxt_cdyt1, bdxt_cdyt0, adztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(adytail != 0.0) + { + negate = -bdxtail; + Two_Product(negate, adytail, bdxt_adyt1, bdxt_adyt0); + Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdztail != 0.0) + { + Two_One_Product(bdxt_adyt1, bdxt_adyt0, cdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + if(cdxtail != 0.0) + { + if(adytail != 0.0) + { + Two_Product(cdxtail, adytail, cdxt_adyt1, cdxt_adyt0); + Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdztail != 0.0) + { + Two_One_Product(cdxt_adyt1, cdxt_adyt0, bdztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if(bdytail != 0.0) + { + negate = -cdxtail; + Two_Product(negate, bdytail, cdxt_bdyt1, cdxt_bdyt0); + Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adz, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adztail != 0.0) + { + Two_One_Product(cdxt_bdyt1, cdxt_bdyt0, adztail, u3, u[2], u[1], u[0]); + u[3] = u3; + finlength = fast_expansion_sum_zeroelim(finlength, finnow, 4, u, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + } + + if(adztail != 0.0) + { + wlength = scale_expansion_zeroelim(bctlen, bct, adztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdztail != 0.0) + { + wlength = scale_expansion_zeroelim(catlen, cat, bdztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdztail != 0.0) + { + wlength = scale_expansion_zeroelim(abtlen, abt, cdztail, w); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, wlength, w, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + return finnow[finlength - 1]; +} + +REAL orient3d(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx, ady, bdy, cdy, adz, bdz, cdz; + REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady; + REAL det; + REAL permanent, errbound; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + adz = pa[2] - pd[2]; + bdz = pb[2] - pd[2]; + cdz = pc[2] - pd[2]; + + bdxcdy = bdx * cdy; + cdxbdy = cdx * bdy; + + cdxady = cdx * ady; + adxcdy = adx * cdy; + + adxbdy = adx * bdy; + bdxady = bdx * ady; + + det = adz * (bdxcdy - cdxbdy) + bdz * (cdxady - adxcdy) + cdz * (adxbdy - bdxady); + + permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * Absolute(adz) + (Absolute(cdxady) + Absolute(adxcdy)) * Absolute(bdz) + + (Absolute(adxbdy) + Absolute(bdxady)) * Absolute(cdz); + errbound = o3derrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return orient3dadapt(pa, pb, pc, pd, permanent); +} + +/*****************************************************************************/ +/* */ +/* incirclefast() Approximate 2D incircle test. Nonrobust. */ +/* incircleexact() Exact 2D incircle test. Robust. */ +/* incircleslow() Another exact 2D incircle test. Robust. */ +/* incircle() Adaptive exact 2D incircle test. Robust. */ +/* */ +/* Return a positive value if the point pd lies inside the */ +/* circle passing through pa, pb, and pc; a negative value if */ +/* it lies outside; and zero if the four points are cocircular.*/ +/* The points pa, pb, and pc must be in counterclockwise */ +/* order, or the sign of the result will be reversed. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In incircle() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, incircle() is usually quite */ +/* fast, but will run more slowly when the input points are cocircular or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL incirclefast(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, ady, bdx, bdy, cdx, cdy; + REAL abdet, bcdet, cadet; + REAL alift, blift, clift; + + adx = pa[0] - pd[0]; + ady = pa[1] - pd[1]; + bdx = pb[0] - pd[0]; + bdy = pb[1] - pd[1]; + cdx = pc[0] - pd[0]; + cdy = pc[1] - pd[1]; + + abdet = adx * bdy - bdx * ady; + bcdet = bdx * cdy - cdx * bdy; + cadet = cdx * ady - adx * cdy; + alift = adx * adx + ady * ady; + blift = bdx * bdx + bdy * bdy; + clift = cdx * cdx + cdy * cdy; + + return alift * bcdet + blift * cadet + clift * abdet; +} + +REAL incircleexact(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxay1, axcy1, bxdy1; + INEXACT REAL bxay1, cxby1, dxcy1, axdy1, cxay1, dxby1; + REAL axby0, bxcy0, cxdy0, dxay0, axcy0, bxdy0; + REAL bxay0, cxby0, dxcy0, axdy0, cxay0, dxby0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + REAL temp8[8]; + int templen; + REAL abc[12], bcd[12], cda[12], dab[12]; + int abclen, bcdlen, cdalen, dablen; + REAL det24x[24], det24y[24], det48x[48], det48y[48]; + int xlen, ylen; + REAL adet[96], bdet[96], cdet[96], ddet[96]; + int alen, blen, clen, dlen; + REAL abdet[192], cddet[192]; + int ablen, cdlen; + REAL deter[384]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + templen = fast_expansion_sum_zeroelim(4, cd, 4, da, temp8); + cdalen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, cda); + templen = fast_expansion_sum_zeroelim(4, da, 4, ab, temp8); + dablen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, dab); + for(i = 0; i < 4; i++) + { + bd[i] = -bd[i]; + ac[i] = -ac[i]; + } + templen = fast_expansion_sum_zeroelim(4, ab, 4, bc, temp8); + abclen = fast_expansion_sum_zeroelim(templen, temp8, 4, ac, abc); + templen = fast_expansion_sum_zeroelim(4, bc, 4, cd, temp8); + bcdlen = fast_expansion_sum_zeroelim(templen, temp8, 4, bd, bcd); + + xlen = scale_expansion_zeroelim(bcdlen, bcd, pa[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, pa[0], det48x); + ylen = scale_expansion_zeroelim(bcdlen, bcd, pa[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, pa[1], det48y); + alen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, adet); + + xlen = scale_expansion_zeroelim(cdalen, cda, pb[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, -pb[0], det48x); + ylen = scale_expansion_zeroelim(cdalen, cda, pb[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, -pb[1], det48y); + blen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, bdet); + + xlen = scale_expansion_zeroelim(dablen, dab, pc[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, pc[0], det48x); + ylen = scale_expansion_zeroelim(dablen, dab, pc[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, pc[1], det48y); + clen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, cdet); + + xlen = scale_expansion_zeroelim(abclen, abc, pd[0], det24x); + xlen = scale_expansion_zeroelim(xlen, det24x, -pd[0], det48x); + ylen = scale_expansion_zeroelim(abclen, abc, pd[1], det24y); + ylen = scale_expansion_zeroelim(ylen, det24y, -pd[1], det48y); + dlen = fast_expansion_sum_zeroelim(xlen, det48x, ylen, det48y, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL incircleslow(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy; + REAL adxtail, bdxtail, cdxtail; + REAL adytail, bdytail, cdytail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, axcy7, bxay7, cxby7, cxay7; + REAL axby[8], bxcy[8], axcy[8], bxay[8], cxby[8], cxay[8]; + REAL temp16[16]; + int temp16len; + REAL detx[32], detxx[64], detxt[32], detxxt[64], detxtxt[64]; + int xlen, xxlen, xtlen, xxtlen, xtxtlen; + REAL x1[128], x2[192]; + int x1len, x2len; + REAL dety[32], detyy[64], detyt[32], detyyt[64], detytyt[64]; + int ylen, yylen, ytlen, yytlen, ytytlen; + REAL y1[128], y2[192]; + int y1len, y2len; + REAL adet[384], bdet[384], cdet[384], abdet[768], deter[1152]; + int alen, blen, clen, ablen, deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pd[0], adx, adxtail); + Two_Diff(pa[1], pd[1], ady, adytail); + Two_Diff(pb[0], pd[0], bdx, bdxtail); + Two_Diff(pb[1], pd[1], bdy, bdytail); + Two_Diff(pc[0], pd[0], cdx, cdxtail); + Two_Diff(pc[1], pd[1], cdy, cdytail); + + Two_Two_Product(adx, adxtail, bdy, bdytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -ady; + negatetail = -adytail; + Two_Two_Product(bdx, bdxtail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + Two_Two_Product(bdx, bdxtail, cdy, cdytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bdy; + negatetail = -bdytail; + Two_Two_Product(cdx, cdxtail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + Two_Two_Product(cdx, cdxtail, ady, adytail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + negate = -cdy; + negatetail = -cdytail; + Two_Two_Product(adx, adxtail, negate, negatetail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + + temp16len = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, adx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, adx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, adxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, adx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, adxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, ady, dety); + yylen = scale_expansion_zeroelim(ylen, dety, ady, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, adytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, ady, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, adytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + alen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, adet); + + temp16len = fast_expansion_sum_zeroelim(8, cxay, 8, axcy, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, bdx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, bdx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, bdxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, bdx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bdxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, bdy, dety); + yylen = scale_expansion_zeroelim(ylen, dety, bdy, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, bdytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, bdy, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, bdytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + blen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, bdet); + + temp16len = fast_expansion_sum_zeroelim(8, axby, 8, bxay, temp16); + + xlen = scale_expansion_zeroelim(temp16len, temp16, cdx, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, cdx, detxx); + xtlen = scale_expansion_zeroelim(temp16len, temp16, cdxtail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, cdx, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cdxtail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + + ylen = scale_expansion_zeroelim(temp16len, temp16, cdy, dety); + yylen = scale_expansion_zeroelim(ylen, dety, cdy, detyy); + ytlen = scale_expansion_zeroelim(temp16len, temp16, cdytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, cdy, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, cdytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + + clen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, deter); + + return deter[deterlen - 1]; +} + +REAL incircleadapt(pa, pb, pc, pd, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL permanent; +{ + INEXACT REAL adx, bdx, cdx, ady, bdy, cdy; + REAL det, errbound; + + INEXACT REAL bdxcdy1, cdxbdy1, cdxady1, adxcdy1, adxbdy1, bdxady1; + REAL bdxcdy0, cdxbdy0, cdxady0, adxcdy0, adxbdy0, bdxady0; + REAL bc[4], ca[4], ab[4]; + INEXACT REAL bc3, ca3, ab3; + REAL axbc[8], axxbc[16], aybc[8], ayybc[16], adet[32]; + int axbclen, axxbclen, aybclen, ayybclen, alen; + REAL bxca[8], bxxca[16], byca[8], byyca[16], bdet[32]; + int bxcalen, bxxcalen, bycalen, byycalen, blen; + REAL cxab[8], cxxab[16], cyab[8], cyyab[16], cdet[32]; + int cxablen, cxxablen, cyablen, cyyablen, clen; + REAL abdet[64]; + int ablen; + REAL fin1[1152], fin2[1152]; + REAL *finnow, *finother, *finswap; + int finlength; + + REAL adxtail, bdxtail, cdxtail, adytail, bdytail, cdytail; + INEXACT REAL adxadx1, adyady1, bdxbdx1, bdybdy1, cdxcdx1, cdycdy1; + REAL adxadx0, adyady0, bdxbdx0, bdybdy0, cdxcdx0, cdycdy0; + REAL aa[4], bb[4], cc[4]; + INEXACT REAL aa3, bb3, cc3; + INEXACT REAL ti1, tj1; + REAL ti0, tj0; + REAL u[4], v[4]; + INEXACT REAL u3, v3; + REAL temp8[8], temp16a[16], temp16b[16], temp16c[16]; + REAL temp32a[32], temp32b[32], temp48[48], temp64[64]; + int temp8len, temp16alen, temp16blen, temp16clen; + int temp32alen, temp32blen, temp48len, temp64len; + REAL axtbb[8], axtcc[8], aytbb[8], aytcc[8]; + int axtbblen, axtcclen, aytbblen, aytcclen; + REAL bxtaa[8], bxtcc[8], bytaa[8], bytcc[8]; + int bxtaalen, bxtcclen, bytaalen, bytcclen; + REAL cxtaa[8], cxtbb[8], cytaa[8], cytbb[8]; + int cxtaalen, cxtbblen, cytaalen, cytbblen; + REAL axtbc[8], aytbc[8], bxtca[8], bytca[8], cxtab[8], cytab[8]; + int axtbclen = 0, aytbclen = 0, bxtcalen = 0, bytcalen = 0, cxtablen = 0, cytablen = 0; + REAL axtbct[16], aytbct[16], bxtcat[16], bytcat[16], cxtabt[16], cytabt[16]; + int axtbctlen, aytbctlen, bxtcatlen, bytcatlen, cxtabtlen, cytabtlen; + REAL axtbctt[8], aytbctt[8], bxtcatt[8]; + REAL bytcatt[8], cxtabtt[8], cytabtt[8]; + int axtbcttlen, aytbcttlen, bxtcattlen, bytcattlen, cxtabttlen, cytabttlen; + REAL abt[8], bct[8], cat[8]; + int abtlen, bctlen, catlen; + REAL abtt[4], bctt[4], catt[4]; + int abttlen, bcttlen, cattlen; + INEXACT REAL abtt3, bctt3, catt3; + REAL negate; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + adx = (REAL)(pa[0] - pd[0]); + bdx = (REAL)(pb[0] - pd[0]); + cdx = (REAL)(pc[0] - pd[0]); + ady = (REAL)(pa[1] - pd[1]); + bdy = (REAL)(pb[1] - pd[1]); + cdy = (REAL)(pc[1] - pd[1]); + + Two_Product(bdx, cdy, bdxcdy1, bdxcdy0); + Two_Product(cdx, bdy, cdxbdy1, cdxbdy0); + Two_Two_Diff(bdxcdy1, bdxcdy0, cdxbdy1, cdxbdy0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + axbclen = scale_expansion_zeroelim(4, bc, adx, axbc); + axxbclen = scale_expansion_zeroelim(axbclen, axbc, adx, axxbc); + aybclen = scale_expansion_zeroelim(4, bc, ady, aybc); + ayybclen = scale_expansion_zeroelim(aybclen, aybc, ady, ayybc); + alen = fast_expansion_sum_zeroelim(axxbclen, axxbc, ayybclen, ayybc, adet); + + Two_Product(cdx, ady, cdxady1, cdxady0); + Two_Product(adx, cdy, adxcdy1, adxcdy0); + Two_Two_Diff(cdxady1, cdxady0, adxcdy1, adxcdy0, ca3, ca[2], ca[1], ca[0]); + ca[3] = ca3; + bxcalen = scale_expansion_zeroelim(4, ca, bdx, bxca); + bxxcalen = scale_expansion_zeroelim(bxcalen, bxca, bdx, bxxca); + bycalen = scale_expansion_zeroelim(4, ca, bdy, byca); + byycalen = scale_expansion_zeroelim(bycalen, byca, bdy, byyca); + blen = fast_expansion_sum_zeroelim(bxxcalen, bxxca, byycalen, byyca, bdet); + + Two_Product(adx, bdy, adxbdy1, adxbdy0); + Two_Product(bdx, ady, bdxady1, bdxady0); + Two_Two_Diff(adxbdy1, adxbdy0, bdxady1, bdxady0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + cxablen = scale_expansion_zeroelim(4, ab, cdx, cxab); + cxxablen = scale_expansion_zeroelim(cxablen, cxab, cdx, cxxab); + cyablen = scale_expansion_zeroelim(4, ab, cdy, cyab); + cyyablen = scale_expansion_zeroelim(cyablen, cyab, cdy, cyyab); + clen = fast_expansion_sum_zeroelim(cxxablen, cxxab, cyyablen, cyyab, cdet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, clen, cdet, fin1); + + det = estimate(finlength, fin1); + errbound = iccerrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pd[0], adx, adxtail); + Two_Diff_Tail(pa[1], pd[1], ady, adytail); + Two_Diff_Tail(pb[0], pd[0], bdx, bdxtail); + Two_Diff_Tail(pb[1], pd[1], bdy, bdytail); + Two_Diff_Tail(pc[0], pd[0], cdx, cdxtail); + Two_Diff_Tail(pc[1], pd[1], cdy, cdytail); + if((adxtail == 0.0) && (bdxtail == 0.0) && (cdxtail == 0.0) && (adytail == 0.0) && (bdytail == 0.0) && (cdytail == 0.0)) + { + return det; + } + + errbound = iccerrboundC * permanent + resulterrbound * Absolute(det); + det += ((adx * adx + ady * ady) * ((bdx * cdytail + cdy * bdxtail) - (bdy * cdxtail + cdx * bdytail)) + + 2.0 * (adx * adxtail + ady * adytail) * (bdx * cdy - bdy * cdx)) + + ((bdx * bdx + bdy * bdy) * ((cdx * adytail + ady * cdxtail) - (cdy * adxtail + adx * cdytail)) + + 2.0 * (bdx * bdxtail + bdy * bdytail) * (cdx * ady - cdy * adx)) + + ((cdx * cdx + cdy * cdy) * ((adx * bdytail + bdy * adxtail) - (ady * bdxtail + bdx * adytail)) + + 2.0 * (cdx * cdxtail + cdy * cdytail) * (adx * bdy - ady * bdx)); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + finnow = fin1; + finother = fin2; + + if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0)) + { + Square(adx, adxadx1, adxadx0); + Square(ady, adyady1, adyady0); + Two_Two_Sum(adxadx1, adxadx0, adyady1, adyady0, aa3, aa[2], aa[1], aa[0]); + aa[3] = aa3; + } + if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0)) + { + Square(bdx, bdxbdx1, bdxbdx0); + Square(bdy, bdybdy1, bdybdy0); + Two_Two_Sum(bdxbdx1, bdxbdx0, bdybdy1, bdybdy0, bb3, bb[2], bb[1], bb[0]); + bb[3] = bb3; + } + if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0)) + { + Square(cdx, cdxcdx1, cdxcdx0); + Square(cdy, cdycdy1, cdycdy0); + Two_Two_Sum(cdxcdx1, cdxcdx0, cdycdy1, cdycdy0, cc3, cc[2], cc[1], cc[0]); + cc[3] = cc3; + } + + if(adxtail != 0.0) + { + axtbclen = scale_expansion_zeroelim(4, bc, adxtail, axtbc); + temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, 2.0 * adx, temp16a); + + axtcclen = scale_expansion_zeroelim(4, cc, adxtail, axtcc); + temp16blen = scale_expansion_zeroelim(axtcclen, axtcc, bdy, temp16b); + + axtbblen = scale_expansion_zeroelim(4, bb, adxtail, axtbb); + temp16clen = scale_expansion_zeroelim(axtbblen, axtbb, -cdy, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + aytbclen = scale_expansion_zeroelim(4, bc, adytail, aytbc); + temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, 2.0 * ady, temp16a); + + aytbblen = scale_expansion_zeroelim(4, bb, adytail, aytbb); + temp16blen = scale_expansion_zeroelim(aytbblen, aytbb, cdx, temp16b); + + aytcclen = scale_expansion_zeroelim(4, cc, adytail, aytcc); + temp16clen = scale_expansion_zeroelim(aytcclen, aytcc, -bdx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdxtail != 0.0) + { + bxtcalen = scale_expansion_zeroelim(4, ca, bdxtail, bxtca); + temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, 2.0 * bdx, temp16a); + + bxtaalen = scale_expansion_zeroelim(4, aa, bdxtail, bxtaa); + temp16blen = scale_expansion_zeroelim(bxtaalen, bxtaa, cdy, temp16b); + + bxtcclen = scale_expansion_zeroelim(4, cc, bdxtail, bxtcc); + temp16clen = scale_expansion_zeroelim(bxtcclen, bxtcc, -ady, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + bytcalen = scale_expansion_zeroelim(4, ca, bdytail, bytca); + temp16alen = scale_expansion_zeroelim(bytcalen, bytca, 2.0 * bdy, temp16a); + + bytcclen = scale_expansion_zeroelim(4, cc, bdytail, bytcc); + temp16blen = scale_expansion_zeroelim(bytcclen, bytcc, adx, temp16b); + + bytaalen = scale_expansion_zeroelim(4, aa, bdytail, bytaa); + temp16clen = scale_expansion_zeroelim(bytaalen, bytaa, -cdx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdxtail != 0.0) + { + cxtablen = scale_expansion_zeroelim(4, ab, cdxtail, cxtab); + temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, 2.0 * cdx, temp16a); + + cxtbblen = scale_expansion_zeroelim(4, bb, cdxtail, cxtbb); + temp16blen = scale_expansion_zeroelim(cxtbblen, cxtbb, ady, temp16b); + + cxtaalen = scale_expansion_zeroelim(4, aa, cdxtail, cxtaa); + temp16clen = scale_expansion_zeroelim(cxtaalen, cxtaa, -bdy, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + cytablen = scale_expansion_zeroelim(4, ab, cdytail, cytab); + temp16alen = scale_expansion_zeroelim(cytablen, cytab, 2.0 * cdy, temp16a); + + cytaalen = scale_expansion_zeroelim(4, aa, cdytail, cytaa); + temp16blen = scale_expansion_zeroelim(cytaalen, cytaa, bdx, temp16b); + + cytbblen = scale_expansion_zeroelim(4, bb, cdytail, cytbb); + temp16clen = scale_expansion_zeroelim(cytbblen, cytbb, -adx, temp16c); + + temp32alen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16clen, temp16c, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + if((adxtail != 0.0) || (adytail != 0.0)) + { + if((bdxtail != 0.0) || (bdytail != 0.0) || (cdxtail != 0.0) || (cdytail != 0.0)) + { + Two_Product(bdxtail, cdy, ti1, ti0); + Two_Product(bdx, cdytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -bdy; + Two_Product(cdxtail, negate, ti1, ti0); + negate = -bdytail; + Two_Product(cdx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + bctlen = fast_expansion_sum_zeroelim(4, u, 4, v, bct); + + Two_Product(bdxtail, cdytail, ti1, ti0); + Two_Product(cdxtail, bdytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, bctt3, bctt[2], bctt[1], bctt[0]); + bctt[3] = bctt3; + bcttlen = 4; + } + else + { + bct[0] = 0.0; + bctlen = 1; + bctt[0] = 0.0; + bcttlen = 1; + } + + if(adxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(axtbclen, axtbc, adxtail, temp16a); + axtbctlen = scale_expansion_zeroelim(bctlen, bct, adxtail, axtbct); + temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, 2.0 * adx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(bdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, cc, adxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, bb, -adxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(axtbctlen, axtbct, adxtail, temp32a); + axtbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adxtail, axtbctt); + temp16alen = scale_expansion_zeroelim(axtbcttlen, axtbctt, 2.0 * adx, temp16a); + temp16blen = scale_expansion_zeroelim(axtbcttlen, axtbctt, adxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(aytbclen, aytbc, adytail, temp16a); + aytbctlen = scale_expansion_zeroelim(bctlen, bct, adytail, aytbct); + temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, 2.0 * ady, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(aytbctlen, aytbct, adytail, temp32a); + aytbcttlen = scale_expansion_zeroelim(bcttlen, bctt, adytail, aytbctt); + temp16alen = scale_expansion_zeroelim(aytbcttlen, aytbctt, 2.0 * ady, temp16a); + temp16blen = scale_expansion_zeroelim(aytbcttlen, aytbctt, adytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if((bdxtail != 0.0) || (bdytail != 0.0)) + { + if((cdxtail != 0.0) || (cdytail != 0.0) || (adxtail != 0.0) || (adytail != 0.0)) + { + Two_Product(cdxtail, ady, ti1, ti0); + Two_Product(cdx, adytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -cdy; + Two_Product(adxtail, negate, ti1, ti0); + negate = -cdytail; + Two_Product(adx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + catlen = fast_expansion_sum_zeroelim(4, u, 4, v, cat); + + Two_Product(cdxtail, adytail, ti1, ti0); + Two_Product(adxtail, cdytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, catt3, catt[2], catt[1], catt[0]); + catt[3] = catt3; + cattlen = 4; + } + else + { + cat[0] = 0.0; + catlen = 1; + catt[0] = 0.0; + cattlen = 1; + } + + if(bdxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(bxtcalen, bxtca, bdxtail, temp16a); + bxtcatlen = scale_expansion_zeroelim(catlen, cat, bdxtail, bxtcat); + temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, 2.0 * bdx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(cdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, aa, bdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, cdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(adytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, cc, -bdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(bxtcatlen, bxtcat, bdxtail, temp32a); + bxtcattlen = scale_expansion_zeroelim(cattlen, catt, bdxtail, bxtcatt); + temp16alen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, 2.0 * bdx, temp16a); + temp16blen = scale_expansion_zeroelim(bxtcattlen, bxtcatt, bdxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(bytcalen, bytca, bdytail, temp16a); + bytcatlen = scale_expansion_zeroelim(catlen, cat, bdytail, bytcat); + temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, 2.0 * bdy, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(bytcatlen, bytcat, bdytail, temp32a); + bytcattlen = scale_expansion_zeroelim(cattlen, catt, bdytail, bytcatt); + temp16alen = scale_expansion_zeroelim(bytcattlen, bytcatt, 2.0 * bdy, temp16a); + temp16blen = scale_expansion_zeroelim(bytcattlen, bytcatt, bdytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + if((cdxtail != 0.0) || (cdytail != 0.0)) + { + if((adxtail != 0.0) || (adytail != 0.0) || (bdxtail != 0.0) || (bdytail != 0.0)) + { + Two_Product(adxtail, bdy, ti1, ti0); + Two_Product(adx, bdytail, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, u3, u[2], u[1], u[0]); + u[3] = u3; + negate = -ady; + Two_Product(bdxtail, negate, ti1, ti0); + negate = -adytail; + Two_Product(bdx, negate, tj1, tj0); + Two_Two_Sum(ti1, ti0, tj1, tj0, v3, v[2], v[1], v[0]); + v[3] = v3; + abtlen = fast_expansion_sum_zeroelim(4, u, 4, v, abt); + + Two_Product(adxtail, bdytail, ti1, ti0); + Two_Product(bdxtail, adytail, tj1, tj0); + Two_Two_Diff(ti1, ti0, tj1, tj0, abtt3, abtt[2], abtt[1], abtt[0]); + abtt[3] = abtt3; + abttlen = 4; + } + else + { + abt[0] = 0.0; + abtlen = 1; + abtt[0] = 0.0; + abttlen = 1; + } + + if(cdxtail != 0.0) + { + temp16alen = scale_expansion_zeroelim(cxtablen, cxtab, cdxtail, temp16a); + cxtabtlen = scale_expansion_zeroelim(abtlen, abt, cdxtail, cxtabt); + temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, 2.0 * cdx, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + if(adytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, bb, cdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, adytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(bdytail != 0.0) + { + temp8len = scale_expansion_zeroelim(4, aa, -cdxtail, temp8); + temp16alen = scale_expansion_zeroelim(temp8len, temp8, bdytail, temp16a); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp16alen, temp16a, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + + temp32alen = scale_expansion_zeroelim(cxtabtlen, cxtabt, cdxtail, temp32a); + cxtabttlen = scale_expansion_zeroelim(abttlen, abtt, cdxtail, cxtabtt); + temp16alen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, 2.0 * cdx, temp16a); + temp16blen = scale_expansion_zeroelim(cxtabttlen, cxtabtt, cdxtail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + if(cdytail != 0.0) + { + temp16alen = scale_expansion_zeroelim(cytablen, cytab, cdytail, temp16a); + cytabtlen = scale_expansion_zeroelim(abtlen, abt, cdytail, cytabt); + temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, 2.0 * cdy, temp32a); + temp48len = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp32alen, temp32a, temp48); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp48len, temp48, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + + temp32alen = scale_expansion_zeroelim(cytabtlen, cytabt, cdytail, temp32a); + cytabttlen = scale_expansion_zeroelim(abttlen, abtt, cdytail, cytabtt); + temp16alen = scale_expansion_zeroelim(cytabttlen, cytabtt, 2.0 * cdy, temp16a); + temp16blen = scale_expansion_zeroelim(cytabttlen, cytabtt, cdytail, temp16b); + temp32blen = fast_expansion_sum_zeroelim(temp16alen, temp16a, temp16blen, temp16b, temp32b); + temp64len = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64); + finlength = fast_expansion_sum_zeroelim(finlength, finnow, temp64len, temp64, finother); + finswap = finnow; + finnow = finother; + finother = finswap; + } + } + + return finnow[finlength - 1]; +} + +REAL incircle(pa, pb, pc, pd) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +{ + REAL adx, bdx, cdx, ady, bdy, cdy; + REAL bdxcdy, cdxbdy, cdxady, adxcdy, adxbdy, bdxady; + REAL alift, blift, clift; + REAL det; + REAL permanent, errbound; + + adx = pa[0] - pd[0]; + bdx = pb[0] - pd[0]; + cdx = pc[0] - pd[0]; + ady = pa[1] - pd[1]; + bdy = pb[1] - pd[1]; + cdy = pc[1] - pd[1]; + + bdxcdy = bdx * cdy; + cdxbdy = cdx * bdy; + alift = adx * adx + ady * ady; + + cdxady = cdx * ady; + adxcdy = adx * cdy; + blift = bdx * bdx + bdy * bdy; + + adxbdy = adx * bdy; + bdxady = bdx * ady; + clift = cdx * cdx + cdy * cdy; + + det = alift * (bdxcdy - cdxbdy) + blift * (cdxady - adxcdy) + clift * (adxbdy - bdxady); + + permanent = (Absolute(bdxcdy) + Absolute(cdxbdy)) * alift + (Absolute(cdxady) + Absolute(adxcdy)) * blift + + (Absolute(adxbdy) + Absolute(bdxady)) * clift; + errbound = iccerrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return incircleadapt(pa, pb, pc, pd, permanent); +} + +/*****************************************************************************/ +/* */ +/* inspherefast() Approximate 3D insphere test. Nonrobust. */ +/* insphereexact() Exact 3D insphere test. Robust. */ +/* insphereslow() Another exact 3D insphere test. Robust. */ +/* insphere() Adaptive exact 3D insphere test. Robust. */ +/* */ +/* Return a positive value if the point pe lies inside the */ +/* sphere passing through pa, pb, pc, and pd; a negative value */ +/* if it lies outside; and zero if the five points are */ +/* cospherical. The points pa, pb, pc, and pd must be ordered */ +/* so that they have a positive orientation (as defined by */ +/* orient3d()), or the sign of the result will be reversed. */ +/* */ +/* Only the first and last routine should be used; the middle two are for */ +/* timings. */ +/* */ +/* The last three use exact arithmetic to ensure a correct answer. The */ +/* result returned is the determinant of a matrix. In insphere() only, */ +/* this determinant is computed adaptively, in the sense that exact */ +/* arithmetic is used only to the degree it is needed to ensure that the */ +/* returned value has the correct sign. Hence, insphere() is usually quite */ +/* fast, but will run more slowly when the input points are cospherical or */ +/* nearly so. */ +/* */ +/*****************************************************************************/ + +REAL inspherefast(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + REAL aex, bex, cex, dex; + REAL aey, bey, cey, dey; + REAL aez, bez, cez, dez; + REAL alift, blift, clift, dlift; + REAL ab, bc, cd, da, ac, bd; + REAL abc, bcd, cda, dab; + + aex = pa[0] - pe[0]; + bex = pb[0] - pe[0]; + cex = pc[0] - pe[0]; + dex = pd[0] - pe[0]; + aey = pa[1] - pe[1]; + bey = pb[1] - pe[1]; + cey = pc[1] - pe[1]; + dey = pd[1] - pe[1]; + aez = pa[2] - pe[2]; + bez = pb[2] - pe[2]; + cez = pc[2] - pe[2]; + dez = pd[2] - pe[2]; + + ab = aex * bey - bex * aey; + bc = bex * cey - cex * bey; + cd = cex * dey - dex * cey; + da = dex * aey - aex * dey; + + ac = aex * cey - cex * aey; + bd = bex * dey - dex * bey; + + abc = aez * bc - bez * ac + cez * ab; + bcd = bez * cd - cez * bd + dez * bc; + cda = cez * da + dez * ac + aez * cd; + dab = dez * ab + aez * bd + bez * da; + + alift = aex * aex + aey * aey + aez * aez; + blift = bex * bex + bey * bey + bez * bez; + clift = cex * cex + cey * cey + cez * cez; + dlift = dex * dex + dey * dey + dez * dez; + + return (dlift * abc - clift * dab) + (blift * cda - alift * bcd); +} + +REAL insphereexact(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + INEXACT REAL axby1, bxcy1, cxdy1, dxey1, exay1; + INEXACT REAL bxay1, cxby1, dxcy1, exdy1, axey1; + INEXACT REAL axcy1, bxdy1, cxey1, dxay1, exby1; + INEXACT REAL cxay1, dxby1, excy1, axdy1, bxey1; + REAL axby0, bxcy0, cxdy0, dxey0, exay0; + REAL bxay0, cxby0, dxcy0, exdy0, axey0; + REAL axcy0, bxdy0, cxey0, dxay0, exby0; + REAL cxay0, dxby0, excy0, axdy0, bxey0; + REAL ab[4], bc[4], cd[4], de[4], ea[4]; + REAL ac[4], bd[4], ce[4], da[4], eb[4]; + REAL temp8a[8], temp8b[8], temp16[16]; + int temp8alen, temp8blen, temp16len; + REAL abc[24], bcd[24], cde[24], dea[24], eab[24]; + REAL abd[24], bce[24], cda[24], deb[24], eac[24]; + int abclen, bcdlen, cdelen, dealen, eablen; + int abdlen, bcelen, cdalen, deblen, eaclen; + REAL temp48a[48], temp48b[48]; + int temp48alen, temp48blen; + REAL abcd[96], bcde[96], cdea[96], deab[96], eabc[96]; + int abcdlen, bcdelen, cdealen, deablen, eabclen; + REAL temp192[192]; + REAL det384x[384], det384y[384], det384z[384]; + int xlen, ylen, zlen; + REAL detxy[768]; + int xylen; + REAL adet[1152], bdet[1152], cdet[1152], ddet[1152], edet[1152]; + int alen, blen, clen, dlen, elen; + REAL abdet[2304], cddet[2304], cdedet[3456]; + int ablen, cdlen; + REAL deter[5760]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + Two_Product(pa[0], pb[1], axby1, axby0); + Two_Product(pb[0], pa[1], bxay1, bxay0); + Two_Two_Diff(axby1, axby0, bxay1, bxay0, ab[3], ab[2], ab[1], ab[0]); + + Two_Product(pb[0], pc[1], bxcy1, bxcy0); + Two_Product(pc[0], pb[1], cxby1, cxby0); + Two_Two_Diff(bxcy1, bxcy0, cxby1, cxby0, bc[3], bc[2], bc[1], bc[0]); + + Two_Product(pc[0], pd[1], cxdy1, cxdy0); + Two_Product(pd[0], pc[1], dxcy1, dxcy0); + Two_Two_Diff(cxdy1, cxdy0, dxcy1, dxcy0, cd[3], cd[2], cd[1], cd[0]); + + Two_Product(pd[0], pe[1], dxey1, dxey0); + Two_Product(pe[0], pd[1], exdy1, exdy0); + Two_Two_Diff(dxey1, dxey0, exdy1, exdy0, de[3], de[2], de[1], de[0]); + + Two_Product(pe[0], pa[1], exay1, exay0); + Two_Product(pa[0], pe[1], axey1, axey0); + Two_Two_Diff(exay1, exay0, axey1, axey0, ea[3], ea[2], ea[1], ea[0]); + + Two_Product(pa[0], pc[1], axcy1, axcy0); + Two_Product(pc[0], pa[1], cxay1, cxay0); + Two_Two_Diff(axcy1, axcy0, cxay1, cxay0, ac[3], ac[2], ac[1], ac[0]); + + Two_Product(pb[0], pd[1], bxdy1, bxdy0); + Two_Product(pd[0], pb[1], dxby1, dxby0); + Two_Two_Diff(bxdy1, bxdy0, dxby1, dxby0, bd[3], bd[2], bd[1], bd[0]); + + Two_Product(pc[0], pe[1], cxey1, cxey0); + Two_Product(pe[0], pc[1], excy1, excy0); + Two_Two_Diff(cxey1, cxey0, excy1, excy0, ce[3], ce[2], ce[1], ce[0]); + + Two_Product(pd[0], pa[1], dxay1, dxay0); + Two_Product(pa[0], pd[1], axdy1, axdy0); + Two_Two_Diff(dxay1, dxay0, axdy1, axdy0, da[3], da[2], da[1], da[0]); + + Two_Product(pe[0], pb[1], exby1, exby0); + Two_Product(pb[0], pe[1], bxey1, bxey0); + Two_Two_Diff(exby1, exby0, bxey1, bxey0, eb[3], eb[2], eb[1], eb[0]); + + temp8alen = scale_expansion_zeroelim(4, bc, pa[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, -pb[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ab, pc[2], temp8a); + abclen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abc); + + temp8alen = scale_expansion_zeroelim(4, cd, pb[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, -pc[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, bc, pd[2], temp8a); + bcdlen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bcd); + + temp8alen = scale_expansion_zeroelim(4, de, pc[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ce, -pd[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, cd, pe[2], temp8a); + cdelen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cde); + + temp8alen = scale_expansion_zeroelim(4, ea, pd[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, da, -pe[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, de, pa[2], temp8a); + dealen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, dea); + + temp8alen = scale_expansion_zeroelim(4, ab, pe[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, eb, -pa[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ea, pb[2], temp8a); + eablen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eab); + + temp8alen = scale_expansion_zeroelim(4, bd, pa[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, da, pb[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ab, pd[2], temp8a); + abdlen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, abd); + + temp8alen = scale_expansion_zeroelim(4, ce, pb[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, eb, pc[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, bc, pe[2], temp8a); + bcelen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, bce); + + temp8alen = scale_expansion_zeroelim(4, da, pc[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, pd[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, cd, pa[2], temp8a); + cdalen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, cda); + + temp8alen = scale_expansion_zeroelim(4, eb, pd[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, pe[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, de, pb[2], temp8a); + deblen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, deb); + + temp8alen = scale_expansion_zeroelim(4, ac, pe[2], temp8a); + temp8blen = scale_expansion_zeroelim(4, ce, pa[2], temp8b); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp8alen = scale_expansion_zeroelim(4, ea, pc[2], temp8a); + eaclen = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp16len, temp16, eac); + + temp48alen = fast_expansion_sum_zeroelim(cdelen, cde, bcelen, bce, temp48a); + temp48blen = fast_expansion_sum_zeroelim(deblen, deb, bcdlen, bcd, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + bcdelen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, bcde); + xlen = scale_expansion_zeroelim(bcdelen, bcde, pa[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pa[0], det384x); + ylen = scale_expansion_zeroelim(bcdelen, bcde, pa[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pa[1], det384y); + zlen = scale_expansion_zeroelim(bcdelen, bcde, pa[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pa[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + alen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, adet); + + temp48alen = fast_expansion_sum_zeroelim(dealen, dea, cdalen, cda, temp48a); + temp48blen = fast_expansion_sum_zeroelim(eaclen, eac, cdelen, cde, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + cdealen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, cdea); + xlen = scale_expansion_zeroelim(cdealen, cdea, pb[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pb[0], det384x); + ylen = scale_expansion_zeroelim(cdealen, cdea, pb[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pb[1], det384y); + zlen = scale_expansion_zeroelim(cdealen, cdea, pb[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pb[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + blen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, bdet); + + temp48alen = fast_expansion_sum_zeroelim(eablen, eab, deblen, deb, temp48a); + temp48blen = fast_expansion_sum_zeroelim(abdlen, abd, dealen, dea, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + deablen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, deab); + xlen = scale_expansion_zeroelim(deablen, deab, pc[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pc[0], det384x); + ylen = scale_expansion_zeroelim(deablen, deab, pc[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pc[1], det384y); + zlen = scale_expansion_zeroelim(deablen, deab, pc[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pc[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + clen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, cdet); + + temp48alen = fast_expansion_sum_zeroelim(abclen, abc, eaclen, eac, temp48a); + temp48blen = fast_expansion_sum_zeroelim(bcelen, bce, eablen, eab, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + eabclen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, eabc); + xlen = scale_expansion_zeroelim(eabclen, eabc, pd[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pd[0], det384x); + ylen = scale_expansion_zeroelim(eabclen, eabc, pd[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pd[1], det384y); + zlen = scale_expansion_zeroelim(eabclen, eabc, pd[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pd[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + dlen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, ddet); + + temp48alen = fast_expansion_sum_zeroelim(bcdlen, bcd, abdlen, abd, temp48a); + temp48blen = fast_expansion_sum_zeroelim(cdalen, cda, abclen, abc, temp48b); + for(i = 0; i < temp48blen; i++) + { + temp48b[i] = -temp48b[i]; + } + abcdlen = fast_expansion_sum_zeroelim(temp48alen, temp48a, temp48blen, temp48b, abcd); + xlen = scale_expansion_zeroelim(abcdlen, abcd, pe[0], temp192); + xlen = scale_expansion_zeroelim(xlen, temp192, pe[0], det384x); + ylen = scale_expansion_zeroelim(abcdlen, abcd, pe[1], temp192); + ylen = scale_expansion_zeroelim(ylen, temp192, pe[1], det384y); + zlen = scale_expansion_zeroelim(abcdlen, abcd, pe[2], temp192); + zlen = scale_expansion_zeroelim(zlen, temp192, pe[2], det384z); + xylen = fast_expansion_sum_zeroelim(xlen, det384x, ylen, det384y, detxy); + elen = fast_expansion_sum_zeroelim(xylen, detxy, zlen, det384z, edet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + cdelen = fast_expansion_sum_zeroelim(cdlen, cddet, elen, edet, cdedet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdelen, cdedet, deter); + + return deter[deterlen - 1]; +} + +REAL insphereslow(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez; + REAL aextail, bextail, cextail, dextail; + REAL aeytail, beytail, ceytail, deytail; + REAL aeztail, beztail, ceztail, deztail; + REAL negate, negatetail; + INEXACT REAL axby7, bxcy7, cxdy7, dxay7, axcy7, bxdy7; + INEXACT REAL bxay7, cxby7, dxcy7, axdy7, cxay7, dxby7; + REAL axby[8], bxcy[8], cxdy[8], dxay[8], axcy[8], bxdy[8]; + REAL bxay[8], cxby[8], dxcy[8], axdy[8], cxay[8], dxby[8]; + REAL ab[16], bc[16], cd[16], da[16], ac[16], bd[16]; + int ablen, bclen, cdlen, dalen, aclen, bdlen; + REAL temp32a[32], temp32b[32], temp64a[64], temp64b[64], temp64c[64]; + int temp32alen, temp32blen, temp64alen, temp64blen, temp64clen; + REAL temp128[128], temp192[192]; + int temp128len, temp192len; + REAL detx[384], detxx[768], detxt[384], detxxt[768], detxtxt[768]; + int xlen, xxlen, xtlen, xxtlen, xtxtlen; + REAL x1[1536], x2[2304]; + int x1len, x2len; + REAL dety[384], detyy[768], detyt[384], detyyt[768], detytyt[768]; + int ylen, yylen, ytlen, yytlen, ytytlen; + REAL y1[1536], y2[2304]; + int y1len, y2len; + REAL detz[384], detzz[768], detzt[384], detzzt[768], detztzt[768]; + int zlen, zzlen, ztlen, zztlen, ztztlen; + REAL z1[1536], z2[2304]; + int z1len, z2len; + REAL detxy[4608]; + int xylen; + REAL adet[6912], bdet[6912], cdet[6912], ddet[6912]; + int alen, blen, clen, dlen; + REAL abdet[13824], cddet[13824], deter[27648]; + int deterlen; + int i; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL a0hi, a0lo, a1hi, a1lo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j, _k, _l, _m, _n; + REAL _0, _1, _2; + + Two_Diff(pa[0], pe[0], aex, aextail); + Two_Diff(pa[1], pe[1], aey, aeytail); + Two_Diff(pa[2], pe[2], aez, aeztail); + Two_Diff(pb[0], pe[0], bex, bextail); + Two_Diff(pb[1], pe[1], bey, beytail); + Two_Diff(pb[2], pe[2], bez, beztail); + Two_Diff(pc[0], pe[0], cex, cextail); + Two_Diff(pc[1], pe[1], cey, ceytail); + Two_Diff(pc[2], pe[2], cez, ceztail); + Two_Diff(pd[0], pe[0], dex, dextail); + Two_Diff(pd[1], pe[1], dey, deytail); + Two_Diff(pd[2], pe[2], dez, deztail); + + Two_Two_Product(aex, aextail, bey, beytail, axby7, axby[6], axby[5], axby[4], axby[3], axby[2], axby[1], axby[0]); + axby[7] = axby7; + negate = -aey; + negatetail = -aeytail; + Two_Two_Product(bex, bextail, negate, negatetail, bxay7, bxay[6], bxay[5], bxay[4], bxay[3], bxay[2], bxay[1], bxay[0]); + bxay[7] = bxay7; + ablen = fast_expansion_sum_zeroelim(8, axby, 8, bxay, ab); + Two_Two_Product(bex, bextail, cey, ceytail, bxcy7, bxcy[6], bxcy[5], bxcy[4], bxcy[3], bxcy[2], bxcy[1], bxcy[0]); + bxcy[7] = bxcy7; + negate = -bey; + negatetail = -beytail; + Two_Two_Product(cex, cextail, negate, negatetail, cxby7, cxby[6], cxby[5], cxby[4], cxby[3], cxby[2], cxby[1], cxby[0]); + cxby[7] = cxby7; + bclen = fast_expansion_sum_zeroelim(8, bxcy, 8, cxby, bc); + Two_Two_Product(cex, cextail, dey, deytail, cxdy7, cxdy[6], cxdy[5], cxdy[4], cxdy[3], cxdy[2], cxdy[1], cxdy[0]); + cxdy[7] = cxdy7; + negate = -cey; + negatetail = -ceytail; + Two_Two_Product(dex, dextail, negate, negatetail, dxcy7, dxcy[6], dxcy[5], dxcy[4], dxcy[3], dxcy[2], dxcy[1], dxcy[0]); + dxcy[7] = dxcy7; + cdlen = fast_expansion_sum_zeroelim(8, cxdy, 8, dxcy, cd); + Two_Two_Product(dex, dextail, aey, aeytail, dxay7, dxay[6], dxay[5], dxay[4], dxay[3], dxay[2], dxay[1], dxay[0]); + dxay[7] = dxay7; + negate = -dey; + negatetail = -deytail; + Two_Two_Product(aex, aextail, negate, negatetail, axdy7, axdy[6], axdy[5], axdy[4], axdy[3], axdy[2], axdy[1], axdy[0]); + axdy[7] = axdy7; + dalen = fast_expansion_sum_zeroelim(8, dxay, 8, axdy, da); + Two_Two_Product(aex, aextail, cey, ceytail, axcy7, axcy[6], axcy[5], axcy[4], axcy[3], axcy[2], axcy[1], axcy[0]); + axcy[7] = axcy7; + negate = -aey; + negatetail = -aeytail; + Two_Two_Product(cex, cextail, negate, negatetail, cxay7, cxay[6], cxay[5], cxay[4], cxay[3], cxay[2], cxay[1], cxay[0]); + cxay[7] = cxay7; + aclen = fast_expansion_sum_zeroelim(8, axcy, 8, cxay, ac); + Two_Two_Product(bex, bextail, dey, deytail, bxdy7, bxdy[6], bxdy[5], bxdy[4], bxdy[3], bxdy[2], bxdy[1], bxdy[0]); + bxdy[7] = bxdy7; + negate = -bey; + negatetail = -beytail; + Two_Two_Product(dex, dextail, negate, negatetail, dxby7, dxby[6], dxby[5], dxby[4], dxby[3], dxby[2], dxby[1], dxby[0]); + dxby[7] = dxby7; + bdlen = fast_expansion_sum_zeroelim(8, bxdy, 8, dxby, bd); + + temp32alen = scale_expansion_zeroelim(cdlen, cd, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(cdlen, cd, -beztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(bdlen, bd, cez, temp32a); + temp32blen = scale_expansion_zeroelim(bdlen, bd, ceztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(bclen, bc, -dez, temp32a); + temp32blen = scale_expansion_zeroelim(bclen, bc, -deztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, aex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, aex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, aextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, aex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, aextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, aey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, aey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, aeytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, aey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, aeytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, aez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, aez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, aeztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, aez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, aeztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + alen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, adet); + + temp32alen = scale_expansion_zeroelim(dalen, da, cez, temp32a); + temp32blen = scale_expansion_zeroelim(dalen, da, ceztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(aclen, ac, dez, temp32a); + temp32blen = scale_expansion_zeroelim(aclen, ac, deztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(cdlen, cd, aez, temp32a); + temp32blen = scale_expansion_zeroelim(cdlen, cd, aeztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, bex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, bex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, bextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, bex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, bextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, bey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, bey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, beytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, bey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, beytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, bez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, bez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, beztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, bez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, beztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + blen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, bdet); + + temp32alen = scale_expansion_zeroelim(ablen, ab, -dez, temp32a); + temp32blen = scale_expansion_zeroelim(ablen, ab, -deztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(bdlen, bd, -aez, temp32a); + temp32blen = scale_expansion_zeroelim(bdlen, bd, -aeztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(dalen, da, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(dalen, da, -beztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, cex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, cex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, cextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, cex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, cextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, cey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, cey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, ceytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, cey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, ceytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, cez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, cez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, ceztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, cez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, ceztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + clen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, cdet); + + temp32alen = scale_expansion_zeroelim(bclen, bc, aez, temp32a); + temp32blen = scale_expansion_zeroelim(bclen, bc, aeztail, temp32b); + temp64alen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64a); + temp32alen = scale_expansion_zeroelim(aclen, ac, -bez, temp32a); + temp32blen = scale_expansion_zeroelim(aclen, ac, -beztail, temp32b); + temp64blen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64b); + temp32alen = scale_expansion_zeroelim(ablen, ab, cez, temp32a); + temp32blen = scale_expansion_zeroelim(ablen, ab, ceztail, temp32b); + temp64clen = fast_expansion_sum_zeroelim(temp32alen, temp32a, temp32blen, temp32b, temp64c); + temp128len = fast_expansion_sum_zeroelim(temp64alen, temp64a, temp64blen, temp64b, temp128); + temp192len = fast_expansion_sum_zeroelim(temp64clen, temp64c, temp128len, temp128, temp192); + xlen = scale_expansion_zeroelim(temp192len, temp192, dex, detx); + xxlen = scale_expansion_zeroelim(xlen, detx, dex, detxx); + xtlen = scale_expansion_zeroelim(temp192len, temp192, dextail, detxt); + xxtlen = scale_expansion_zeroelim(xtlen, detxt, dex, detxxt); + for(i = 0; i < xxtlen; i++) + { + detxxt[i] *= 2.0; + } + xtxtlen = scale_expansion_zeroelim(xtlen, detxt, dextail, detxtxt); + x1len = fast_expansion_sum_zeroelim(xxlen, detxx, xxtlen, detxxt, x1); + x2len = fast_expansion_sum_zeroelim(x1len, x1, xtxtlen, detxtxt, x2); + ylen = scale_expansion_zeroelim(temp192len, temp192, dey, dety); + yylen = scale_expansion_zeroelim(ylen, dety, dey, detyy); + ytlen = scale_expansion_zeroelim(temp192len, temp192, deytail, detyt); + yytlen = scale_expansion_zeroelim(ytlen, detyt, dey, detyyt); + for(i = 0; i < yytlen; i++) + { + detyyt[i] *= 2.0; + } + ytytlen = scale_expansion_zeroelim(ytlen, detyt, deytail, detytyt); + y1len = fast_expansion_sum_zeroelim(yylen, detyy, yytlen, detyyt, y1); + y2len = fast_expansion_sum_zeroelim(y1len, y1, ytytlen, detytyt, y2); + zlen = scale_expansion_zeroelim(temp192len, temp192, dez, detz); + zzlen = scale_expansion_zeroelim(zlen, detz, dez, detzz); + ztlen = scale_expansion_zeroelim(temp192len, temp192, deztail, detzt); + zztlen = scale_expansion_zeroelim(ztlen, detzt, dez, detzzt); + for(i = 0; i < zztlen; i++) + { + detzzt[i] *= 2.0; + } + ztztlen = scale_expansion_zeroelim(ztlen, detzt, deztail, detztzt); + z1len = fast_expansion_sum_zeroelim(zzlen, detzz, zztlen, detzzt, z1); + z2len = fast_expansion_sum_zeroelim(z1len, z1, ztztlen, detztzt, z2); + xylen = fast_expansion_sum_zeroelim(x2len, x2, y2len, y2, detxy); + dlen = fast_expansion_sum_zeroelim(z2len, z2, xylen, detxy, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + deterlen = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, deter); + + return deter[deterlen - 1]; +} + +REAL insphereadapt(pa, pb, pc, pd, pe, permanent) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +REAL permanent; +{ + INEXACT REAL aex, bex, cex, dex, aey, bey, cey, dey, aez, bez, cez, dez; + REAL det, errbound; + + INEXACT REAL aexbey1, bexaey1, bexcey1, cexbey1; + INEXACT REAL cexdey1, dexcey1, dexaey1, aexdey1; + INEXACT REAL aexcey1, cexaey1, bexdey1, dexbey1; + REAL aexbey0, bexaey0, bexcey0, cexbey0; + REAL cexdey0, dexcey0, dexaey0, aexdey0; + REAL aexcey0, cexaey0, bexdey0, dexbey0; + REAL ab[4], bc[4], cd[4], da[4], ac[4], bd[4]; + INEXACT REAL ab3, bc3, cd3, da3, ac3, bd3; + REAL abeps, bceps, cdeps, daeps, aceps, bdeps; + REAL temp8a[8], temp8b[8], temp8c[8], temp16[16], temp24[24], temp48[48]; + int temp8alen, temp8blen, temp8clen, temp16len, temp24len, temp48len; + REAL xdet[96], ydet[96], zdet[96], xydet[192]; + int xlen, ylen, zlen, xylen; + REAL adet[288], bdet[288], cdet[288], ddet[288]; + int alen, blen, clen, dlen; + REAL abdet[576], cddet[576]; + int ablen, cdlen; + REAL fin1[1152]; + int finlength; + + REAL aextail, bextail, cextail, dextail; + REAL aeytail, beytail, ceytail, deytail; + REAL aeztail, beztail, ceztail, deztail; + + INEXACT REAL bvirt; + REAL avirt, bround, around; + INEXACT REAL c; + INEXACT REAL abig; + REAL ahi, alo, bhi, blo; + REAL err1, err2, err3; + INEXACT REAL _i, _j; + REAL _0; + + aex = (REAL)(pa[0] - pe[0]); + bex = (REAL)(pb[0] - pe[0]); + cex = (REAL)(pc[0] - pe[0]); + dex = (REAL)(pd[0] - pe[0]); + aey = (REAL)(pa[1] - pe[1]); + bey = (REAL)(pb[1] - pe[1]); + cey = (REAL)(pc[1] - pe[1]); + dey = (REAL)(pd[1] - pe[1]); + aez = (REAL)(pa[2] - pe[2]); + bez = (REAL)(pb[2] - pe[2]); + cez = (REAL)(pc[2] - pe[2]); + dez = (REAL)(pd[2] - pe[2]); + + Two_Product(aex, bey, aexbey1, aexbey0); + Two_Product(bex, aey, bexaey1, bexaey0); + Two_Two_Diff(aexbey1, aexbey0, bexaey1, bexaey0, ab3, ab[2], ab[1], ab[0]); + ab[3] = ab3; + + Two_Product(bex, cey, bexcey1, bexcey0); + Two_Product(cex, bey, cexbey1, cexbey0); + Two_Two_Diff(bexcey1, bexcey0, cexbey1, cexbey0, bc3, bc[2], bc[1], bc[0]); + bc[3] = bc3; + + Two_Product(cex, dey, cexdey1, cexdey0); + Two_Product(dex, cey, dexcey1, dexcey0); + Two_Two_Diff(cexdey1, cexdey0, dexcey1, dexcey0, cd3, cd[2], cd[1], cd[0]); + cd[3] = cd3; + + Two_Product(dex, aey, dexaey1, dexaey0); + Two_Product(aex, dey, aexdey1, aexdey0); + Two_Two_Diff(dexaey1, dexaey0, aexdey1, aexdey0, da3, da[2], da[1], da[0]); + da[3] = da3; + + Two_Product(aex, cey, aexcey1, aexcey0); + Two_Product(cex, aey, cexaey1, cexaey0); + Two_Two_Diff(aexcey1, aexcey0, cexaey1, cexaey0, ac3, ac[2], ac[1], ac[0]); + ac[3] = ac3; + + Two_Product(bex, dey, bexdey1, bexdey0); + Two_Product(dex, bey, dexbey1, dexbey0); + Two_Two_Diff(bexdey1, bexdey0, dexbey1, dexbey0, bd3, bd[2], bd[1], bd[0]); + bd[3] = bd3; + + temp8alen = scale_expansion_zeroelim(4, cd, bez, temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, -cez, temp8b); + temp8clen = scale_expansion_zeroelim(4, bc, dez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, -aex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, -aey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, aez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, -aez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + alen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, adet); + + temp8alen = scale_expansion_zeroelim(4, da, cez, temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, dez, temp8b); + temp8clen = scale_expansion_zeroelim(4, cd, aez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, bex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, bey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, bez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, bez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + blen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, bdet); + + temp8alen = scale_expansion_zeroelim(4, ab, dez, temp8a); + temp8blen = scale_expansion_zeroelim(4, bd, aez, temp8b); + temp8clen = scale_expansion_zeroelim(4, da, bez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, -cex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, -cey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, cez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, -cez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + clen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, cdet); + + temp8alen = scale_expansion_zeroelim(4, bc, aez, temp8a); + temp8blen = scale_expansion_zeroelim(4, ac, -bez, temp8b); + temp8clen = scale_expansion_zeroelim(4, ab, cez, temp8c); + temp16len = fast_expansion_sum_zeroelim(temp8alen, temp8a, temp8blen, temp8b, temp16); + temp24len = fast_expansion_sum_zeroelim(temp8clen, temp8c, temp16len, temp16, temp24); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dex, temp48); + xlen = scale_expansion_zeroelim(temp48len, temp48, dex, xdet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dey, temp48); + ylen = scale_expansion_zeroelim(temp48len, temp48, dey, ydet); + temp48len = scale_expansion_zeroelim(temp24len, temp24, dez, temp48); + zlen = scale_expansion_zeroelim(temp48len, temp48, dez, zdet); + xylen = fast_expansion_sum_zeroelim(xlen, xdet, ylen, ydet, xydet); + dlen = fast_expansion_sum_zeroelim(xylen, xydet, zlen, zdet, ddet); + + ablen = fast_expansion_sum_zeroelim(alen, adet, blen, bdet, abdet); + cdlen = fast_expansion_sum_zeroelim(clen, cdet, dlen, ddet, cddet); + finlength = fast_expansion_sum_zeroelim(ablen, abdet, cdlen, cddet, fin1); + + det = estimate(finlength, fin1); + errbound = isperrboundB * permanent; + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + Two_Diff_Tail(pa[0], pe[0], aex, aextail); + Two_Diff_Tail(pa[1], pe[1], aey, aeytail); + Two_Diff_Tail(pa[2], pe[2], aez, aeztail); + Two_Diff_Tail(pb[0], pe[0], bex, bextail); + Two_Diff_Tail(pb[1], pe[1], bey, beytail); + Two_Diff_Tail(pb[2], pe[2], bez, beztail); + Two_Diff_Tail(pc[0], pe[0], cex, cextail); + Two_Diff_Tail(pc[1], pe[1], cey, ceytail); + Two_Diff_Tail(pc[2], pe[2], cez, ceztail); + Two_Diff_Tail(pd[0], pe[0], dex, dextail); + Two_Diff_Tail(pd[1], pe[1], dey, deytail); + Two_Diff_Tail(pd[2], pe[2], dez, deztail); + if((aextail == 0.0) && (aeytail == 0.0) && (aeztail == 0.0) && (bextail == 0.0) && (beytail == 0.0) && (beztail == 0.0) && + (cextail == 0.0) && (ceytail == 0.0) && (ceztail == 0.0) && (dextail == 0.0) && (deytail == 0.0) && (deztail == 0.0)) + { + return det; + } + + errbound = isperrboundC * permanent + resulterrbound * Absolute(det); + abeps = (aex * beytail + bey * aextail) - (aey * bextail + bex * aeytail); + bceps = (bex * ceytail + cey * bextail) - (bey * cextail + cex * beytail); + cdeps = (cex * deytail + dey * cextail) - (cey * dextail + dex * ceytail); + daeps = (dex * aeytail + aey * dextail) - (dey * aextail + aex * deytail); + aceps = (aex * ceytail + cey * aextail) - (aey * cextail + cex * aeytail); + bdeps = (bex * deytail + dey * bextail) - (bey * dextail + dex * beytail); + det += (((bex * bex + bey * bey + bez * bez) * + ((cez * daeps + dez * aceps + aez * cdeps) + (ceztail * da3 + deztail * ac3 + aeztail * cd3)) + + (dex * dex + dey * dey + dez * dez) * + ((aez * bceps - bez * aceps + cez * abeps) + (aeztail * bc3 - beztail * ac3 + ceztail * ab3))) - + ((aex * aex + aey * aey + aez * aez) * + ((bez * cdeps - cez * bdeps + dez * bceps) + (beztail * cd3 - ceztail * bd3 + deztail * bc3)) + + (cex * cex + cey * cey + cez * cez) * + ((dez * abeps + aez * bdeps + bez * daeps) + (deztail * ab3 + aeztail * bd3 + beztail * da3)))) + + 2.0 * (((bex * bextail + bey * beytail + bez * beztail) * (cez * da3 + dez * ac3 + aez * cd3) + + (dex * dextail + dey * deytail + dez * deztail) * (aez * bc3 - bez * ac3 + cez * ab3)) - + ((aex * aextail + aey * aeytail + aez * aeztail) * (bez * cd3 - cez * bd3 + dez * bc3) + + (cex * cextail + cey * ceytail + cez * ceztail) * (dez * ab3 + aez * bd3 + bez * da3))); + if((det >= errbound) || (-det >= errbound)) + { + return det; + } + + return insphereexact(pa, pb, pc, pd, pe); +} + +REAL insphere(pa, pb, pc, pd, pe) REAL *pa; +REAL *pb; +REAL *pc; +REAL *pd; +REAL *pe; +{ + REAL aex, bex, cex, dex; + REAL aey, bey, cey, dey; + REAL aez, bez, cez, dez; + REAL aexbey, bexaey, bexcey, cexbey, cexdey, dexcey, dexaey, aexdey; + REAL aexcey, cexaey, bexdey, dexbey; + REAL alift, blift, clift, dlift; + REAL ab, bc, cd, da, ac, bd; + REAL abc, bcd, cda, dab; + REAL aezplus, bezplus, cezplus, dezplus; + REAL aexbeyplus, bexaeyplus, bexceyplus, cexbeyplus; + REAL cexdeyplus, dexceyplus, dexaeyplus, aexdeyplus; + REAL aexceyplus, cexaeyplus, bexdeyplus, dexbeyplus; + REAL det; + REAL permanent, errbound; + + aex = pa[0] - pe[0]; + bex = pb[0] - pe[0]; + cex = pc[0] - pe[0]; + dex = pd[0] - pe[0]; + aey = pa[1] - pe[1]; + bey = pb[1] - pe[1]; + cey = pc[1] - pe[1]; + dey = pd[1] - pe[1]; + aez = pa[2] - pe[2]; + bez = pb[2] - pe[2]; + cez = pc[2] - pe[2]; + dez = pd[2] - pe[2]; + + aexbey = aex * bey; + bexaey = bex * aey; + ab = aexbey - bexaey; + bexcey = bex * cey; + cexbey = cex * bey; + bc = bexcey - cexbey; + cexdey = cex * dey; + dexcey = dex * cey; + cd = cexdey - dexcey; + dexaey = dex * aey; + aexdey = aex * dey; + da = dexaey - aexdey; + + aexcey = aex * cey; + cexaey = cex * aey; + ac = aexcey - cexaey; + bexdey = bex * dey; + dexbey = dex * bey; + bd = bexdey - dexbey; + + abc = aez * bc - bez * ac + cez * ab; + bcd = bez * cd - cez * bd + dez * bc; + cda = cez * da + dez * ac + aez * cd; + dab = dez * ab + aez * bd + bez * da; + + alift = aex * aex + aey * aey + aez * aez; + blift = bex * bex + bey * bey + bez * bez; + clift = cex * cex + cey * cey + cez * cez; + dlift = dex * dex + dey * dey + dez * dez; + + det = (dlift * abc - clift * dab) + (blift * cda - alift * bcd); + + aezplus = Absolute(aez); + bezplus = Absolute(bez); + cezplus = Absolute(cez); + dezplus = Absolute(dez); + aexbeyplus = Absolute(aexbey); + bexaeyplus = Absolute(bexaey); + bexceyplus = Absolute(bexcey); + cexbeyplus = Absolute(cexbey); + cexdeyplus = Absolute(cexdey); + dexceyplus = Absolute(dexcey); + dexaeyplus = Absolute(dexaey); + aexdeyplus = Absolute(aexdey); + aexceyplus = Absolute(aexcey); + cexaeyplus = Absolute(cexaey); + bexdeyplus = Absolute(bexdey); + dexbeyplus = Absolute(dexbey); + permanent = + ((cexdeyplus + dexceyplus) * bezplus + (dexbeyplus + bexdeyplus) * cezplus + (bexceyplus + cexbeyplus) * dezplus) * alift + + ((dexaeyplus + aexdeyplus) * cezplus + (aexceyplus + cexaeyplus) * dezplus + (cexdeyplus + dexceyplus) * aezplus) * blift + + ((aexbeyplus + bexaeyplus) * dezplus + (bexdeyplus + dexbeyplus) * aezplus + (dexaeyplus + aexdeyplus) * bezplus) * clift + + ((bexceyplus + cexbeyplus) * aezplus + (cexaeyplus + aexceyplus) * bezplus + (aexbeyplus + bexaeyplus) * cezplus) * dlift; + errbound = isperrboundA * permanent; + if((det > errbound) || (-det > errbound)) + { + return det; + } + + return insphereadapt(pa, pb, pc, pd, pe, permanent); +} diff --git a/src/amuse/community/arepo/src/utils/system.c b/src/amuse/community/arepo/src/utils/system.c new file mode 100644 index 0000000000..027974da55 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/system.c @@ -0,0 +1,1300 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/system.c + * \date 05/2018 + * \brief Small functions for interaction with operating system and + * libraries and other auxiliary functions. + * \details contains functions: + * void subdivide_evenly(int N, int pieces, int index, int *first, int *count) + * void permutate_chunks_in_list(int ncount, int *list) + * int get_thread_num(void) + * int system_compare_hostname(const void *a, const void *b) + * int system_compare_first_task(const void *a, const void *b) + * int system_compare_task(const void *a, const void *b) + * void determine_compute_nodes(void) + * void allreduce_sparse_double_sum(double *loc, double *glob, int N) + * void allreduce_sparse_imin(int *loc, int *glob, int N) + * double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)) + * double dabs(double a) + * double dmax(double a, double b) + * size_t smax(size_t a, size_t b) + * double dmin(double a, double b) + * double max_array(double *a, int num_elements) + * int imax(int a, int b) + * int imin(int a, int b) + * int myflush(FILE * fstream) + * int flush_everything(void) + * void enable_core_dumps_and_fpu_exceptions(void) + * void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno) + * double get_random_number(void) + * double get_random_number_aux(void) + * double second(void) + * double measure_time(void) + * double timediff(double t0, double t1) + * void minimum_large_ints(int n, long long *src, long long *res) + * void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm) + * void sumup_large_ints(int n, int *src, long long *res) + * void sumup_longs(int n, long long *src, long long *res) + * size_t sizemax(size_t a, size_t b) + * void report_VmRSS(void) + * long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long + * *SwapFree) void check_maxmemsize_setting(void) void mpi_report_committable_memory(void) int my_ffsll(peanokey i) int my_fls(int x) + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 11.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../main/allvars.h" +#include "../main/proto.h" + +/*! \brief Divides N elements evenly on pieces chunks, writes in first and + * count arrays. + * + * \param[in] N Number of elements. + * \param[in] pieces Number of chunks. + * \param[in] index Index of piece that is needed as output. + * \param[out] first Index of first element of piece number 'index'. + * \param[out] count Number of elements of piece number 'index'. + * + * \return void + */ +void subdivide_evenly(int N, int pieces, int index, int *first, int *count) +{ + int avg = (N - 1) / pieces + 1; + int exc = pieces * avg - N; + int indexlastsection = pieces - exc; + + if(index < indexlastsection) + { + *first = index * avg; + *count = avg; + } + else + { + *first = index * avg - (index - indexlastsection); + *count = avg - 1; + } +} + +/*! \brief Permutes chunks in a list. + * + * \param[in] ncount Number of elements in list. + * \param[in, out] list List to be permuted. + * + * \return void + */ +void permutate_chunks_in_list(int ncount, int *list) +{ +#define WALK_N_PIECES 32 /*!< Number of sets, the chunks are divided into */ +#define WALK_N_SIZE 500 /*!< Number of particles per chunk */ + + int nchunk; /*!< Number of chunk sets used */ + int nchunksize; /*!< Size of each chunk */ + int currentchunk; /*!< Chunk set currently processed */ + int nextparticle; + + if(ncount > WALK_N_PIECES * WALK_N_SIZE) + { + nchunk = WALK_N_PIECES; + nchunksize = WALK_N_SIZE; + } + else + { + nchunk = 1; + nchunksize = ncount; + } + + currentchunk = 0; + + int *chunked_TargetList = (int *)mymalloc("chunked_TargetList", ncount * sizeof(int)); + int n, i; + for(n = 0, nextparticle = 0; n < ncount; n++) + { + i = nextparticle; + + chunked_TargetList[n] = list[i]; + if(i < ncount) + { + nextparticle++; + + if((nextparticle % nchunksize) == 0) + nextparticle += (nchunk - 1) * nchunksize; + + if(nextparticle >= ncount) + { + currentchunk++; + if(currentchunk < nchunk) + nextparticle = currentchunk * nchunksize; + } + } + } + + for(n = 0; n < ncount; n++) + list[n] = chunked_TargetList[n]; + + myfree(chunked_TargetList); +} + +/*! \brief Returns thread number. + * + * Unused. + * + * \return 0 + */ +int get_thread_num(void) { return 0; } + +/*! \brief Structure for a data of compute node. + */ +static struct node_data +{ + int task, this_node, first_task_in_this_node; + int first_index, rank_in_node, tasks_in_node; + char name[MPI_MAX_PROCESSOR_NAME]; +} loc_node, *list_of_nodes; + +/*! \brief Compares first nodename and then task of node_data objects. + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if aname, ((struct node_data *)b)->name); + + if(cmp == 0) + { + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + cmp = -1; + else + cmp = +1; + } + + return cmp; +} + +/*! \brief Compares node_data objects; first first_task_this_node and then + * task. + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if afirst_task_in_this_node < ((struct node_data *)b)->first_task_in_this_node) + return -1; + + if(((struct node_data *)a)->first_task_in_this_node > ((struct node_data *)b)->first_task_in_this_node) + return +1; + + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + return -1; + + if(((struct node_data *)a)->task > ((struct node_data *)b)->task) + return +1; + + return 0; +} + +/*! \brief Compares task of node_data objects + * + * Sorting kernel. + * + * \param[in] a First element to compare. + * \param[in] b Second element to compare. + * + * \return (-1,0,1); -1 if a->task < b->task + */ +int system_compare_task(const void *a, const void *b) +{ + if(((struct node_data *)a)->task < ((struct node_data *)b)->task) + return -1; + + if(((struct node_data *)a)->task > ((struct node_data *)b)->task) + return +1; + + return 0; +} + +/*! \brief Determines the compute nodes the simulation is running on. + * + * Reports this to file uses-machines.txt. + * + * \return void + */ +void determine_compute_nodes(void) +{ + int len, nodes, i, no, rank, first_index; + + MPI_Get_processor_name(loc_node.name, &len); + loc_node.task = ThisTask; + + list_of_nodes = malloc(sizeof(struct node_data) * + NTask); /* Note: Internal memory allocation routines are not yet available when this function is called */ + + MPI_Allgather(&loc_node, sizeof(struct node_data), MPI_BYTE, list_of_nodes, sizeof(struct node_data), MPI_BYTE, MPI_COMM_WORLD); + + if(ThisTask == 0) + { + FILE *fd; + if(!(fd = fopen("uses-machines.txt", "w"))) + terminate("can't write file with used machines"); + for(i = 0; i < NTask; i++) + fprintf(fd, "%5d %s\n", list_of_nodes[i].task, list_of_nodes[i].name); + fclose(fd); + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_hostname); + + list_of_nodes[0].first_task_in_this_node = list_of_nodes[0].task; + + for(i = 1, nodes = 1; i < NTask; i++) + { + if(strcmp(list_of_nodes[i].name, list_of_nodes[i - 1].name) != 0) + { + list_of_nodes[i].first_task_in_this_node = list_of_nodes[i].task; + nodes++; + } + else + list_of_nodes[i].first_task_in_this_node = list_of_nodes[i - 1].first_task_in_this_node; + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_first_task); + + for(i = 0; i < NTask; i++) + list_of_nodes[i].tasks_in_node = 0; + + for(i = 0, no = 0, rank = 0, first_index = 0; i < NTask; i++) + { + if(i ? list_of_nodes[i].first_task_in_this_node != list_of_nodes[i - 1].first_task_in_this_node : 0) + { + no++; + rank = 0; + first_index = i; + } + + list_of_nodes[i].first_index = first_index; + list_of_nodes[i].this_node = no; + list_of_nodes[i].rank_in_node = rank++; + list_of_nodes[first_index].tasks_in_node++; + } + + int max_count = 0; + int min_count = (1 << 30); + + for(i = 0; i < NTask; i++) + { + list_of_nodes[i].tasks_in_node = list_of_nodes[list_of_nodes[i].first_index].tasks_in_node; + + if(list_of_nodes[i].tasks_in_node > max_count) + max_count = list_of_nodes[i].tasks_in_node; + if(list_of_nodes[i].tasks_in_node < min_count) + min_count = list_of_nodes[i].tasks_in_node; + } + + qsort(list_of_nodes, NTask, sizeof(struct node_data), system_compare_task); + + TasksInThisNode = list_of_nodes[ThisTask].tasks_in_node; + RankInThisNode = list_of_nodes[ThisTask].rank_in_node; + + ThisNode = list_of_nodes[ThisTask].this_node; + + NumNodes = nodes; + MinTasksPerNode = min_count; + MaxTasksPerNode = max_count; + + free(list_of_nodes); +} + +/*! \brief Home-made Allreduce function for double variables with sum reduction + * operation, optimized for sparse vectors. + * + * Tries to avoid communicating and adding up a lot of zeros, which can be + * faster than a brute-force MPI_Allreduce. + * + * \param[in] loc Local array. + * \param[out] glob Global (result) array. + * \param[in] N number of elements in array. + * + * \return void + */ +void allreduce_sparse_double_sum(double *loc, double *glob, int N) +{ + int i, j, n, loc_first_n, nimport, nexport, task, ngrp; + + int *send_count = mymalloc("send_count", sizeof(int) * NTask); + int *recv_count = mymalloc("recv_count", sizeof(int) * NTask); + int *send_offset = mymalloc("send_offset", sizeof(int) * NTask); + int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask); + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + + int blk = N / NTask; + int rmd = N - blk * NTask; /* remainder */ + int pivot_n = rmd * (blk + 1); + + for(task = 0, loc_first_n = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_n += blocksize[task]; + } + + double *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(double)); + memset(loc_data, 0, blocksize[ThisTask] * sizeof(double)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* find for each non-zero element the processor where it should go for being summed */ + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + send_count[task]++; + } + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++) + { + nexport += send_count[j]; + nimport += recv_count[j]; + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + struct ind_data + { + int n; + double val; + } * export_data, *import_data; + + export_data = mymalloc("export_data", nexport * sizeof(struct ind_data)); + import_data = mymalloc("import_data", nimport * sizeof(struct ind_data)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + int index = send_offset[task] + send_count[task]++; + export_data[index].n = n; + export_data[index].val = loc[n]; + } + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_data[i].n - loc_first_n; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]); + + loc_data[j] += import_data[i].val; + } + + myfree(import_data); + myfree(export_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(double); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + + myfree(loc_data); + myfree(blocksize); + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); +} + +/*! \brief Home-made Allreduce function for int variables with minimum as a + * reduction operation. + * + * Tries to avoid communicating and adding up a lot of zeros, which can be + * faster than a brute-force MPI_Allreduce. + * + * \param[in] loc Local array. + * \param[out] glob Global (result) array. + * \param[in] N number of elements in array. + * + * \return void + */ +void allreduce_sparse_imin(int *loc, int *glob, int N) +{ + int i, j, n, loc_first_n, nimport, nexport, task, ngrp; + + int *send_count = mymalloc("send_count", sizeof(int) * NTask); + int *recv_count = mymalloc("recv_count", sizeof(int) * NTask); + int *send_offset = mymalloc("send_offset", sizeof(int) * NTask); + int *recv_offset = mymalloc("recv_offset", sizeof(int) * NTask); + int *blocksize = mymalloc("blocksize", sizeof(int) * NTask); + + int blk = N / NTask; + int rmd = N - blk * NTask; /* remainder */ + int pivot_n = rmd * (blk + 1); + + for(task = 0, loc_first_n = 0; task < NTask; task++) + { + if(task < rmd) + blocksize[task] = blk + 1; + else + blocksize[task] = blk; + + if(task < ThisTask) + loc_first_n += blocksize[task]; + } + + int *loc_data = mymalloc("loc_data", blocksize[ThisTask] * sizeof(int)); + for(i = 0; i < blocksize[ThisTask]; i++) + { + loc_data[i] = INT_MAX; + } + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + /* find for each non-zero element the processor where it should go for being summed */ + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + send_count[task]++; + } + } + + MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); + + for(j = 0, nexport = 0, nimport = 0, recv_offset[0] = 0, send_offset[0] = 0; j < NTask; j++) + { + nexport += send_count[j]; + nimport += recv_count[j]; + if(j > 0) + { + send_offset[j] = send_offset[j - 1] + send_count[j - 1]; + recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; + } + } + + struct ind_data + { + int n; + int val; + } * export_data, *import_data; + + export_data = mymalloc("export_data", nexport * sizeof(struct ind_data)); + import_data = mymalloc("import_data", nimport * sizeof(struct ind_data)); + + for(j = 0; j < NTask; j++) + send_count[j] = 0; + + for(n = 0; n < N; n++) + { + if(loc[n] != 0) + { + if(n < pivot_n) + task = n / (blk + 1); + else + task = rmd + (n - pivot_n) / blk; /* note: if blk=0, then this case can not occur */ + + int index = send_offset[task] + send_count[task]++; + export_data[index].n = n; + export_data[index].val = loc[n]; + } + } + + for(ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */ + { + int recvTask = ThisTask ^ ngrp; + if(recvTask < NTask) + if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) + MPI_Sendrecv(&export_data[send_offset[recvTask]], send_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, recvTask, + TAG_DENS_B, &import_data[recv_offset[recvTask]], recv_count[recvTask] * sizeof(struct ind_data), MPI_BYTE, + recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + for(i = 0; i < nimport; i++) + { + int j = import_data[i].n - loc_first_n; + + if(j < 0 || j >= blocksize[ThisTask]) + terminate("j=%d < 0 || j>= blocksize[ThisTask]=%d", j, blocksize[ThisTask]); + + loc_data[j] = imin(loc_data[j], import_data[i].val); + } + + myfree(import_data); + myfree(export_data); + + /* now share the cost data across all processors */ + int *bytecounts = (int *)mymalloc("bytecounts", sizeof(int) * NTask); + int *byteoffset = (int *)mymalloc("byteoffset", sizeof(int) * NTask); + + for(task = 0; task < NTask; task++) + bytecounts[task] = blocksize[task] * sizeof(int); + + for(task = 1, byteoffset[0] = 0; task < NTask; task++) + byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; + + MPI_Allgatherv(loc_data, bytecounts[ThisTask], MPI_BYTE, glob, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); + + myfree(byteoffset); + myfree(bytecounts); + + myfree(loc_data); + myfree(blocksize); + myfree(recv_offset); + myfree(send_offset); + myfree(recv_count); + myfree(send_count); +} + +/*! \brief Wrapper function for quicksort. + * + * \param[in, out] base Array to be sorted. + * \param[in] nel Number of elements to be sorted. + * \param[in] width Size of each element in array. + * \param [in] compar Compare function (sorting kernel). + * + * \return The elapsed CPU time. + */ +double mysort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *)) +{ + double t0, t1; + + t0 = second(); + + qsort(base, nel, width, compar); + + t1 = second(); + + return timediff(t0, t1); +} + +/*! \brief Absolute value of a double variable. + * + * \param[in] a Double variable. + * + * \return Absolute value of a. + */ +double dabs(double a) +{ + if(a < 0) + return -a; + else + return a; +} + +/*! \brief Maximum value of two double variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Maximum value of a and b. + */ +double dmax(double a, double b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Maximum value of two size_t type variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Maximum value of the two variables. + */ +size_t smax(size_t a, size_t b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Minimum value of two double variables. + * + * \param[in] a First variable. + * \param[in] b Second variable. + * + * \return Minimum value of a and b. + */ +double dmin(double a, double b) +{ + if(a < b) + return a; + else + return b; +} + +/*! \brief Maximum value in an array of double variables. + * + * \param[in] a Array of double variables. + * \param[in] num_elements Number of elements in array. + * + * \return Maximum value. + */ +double max_array(double *a, int num_elements) +{ + int i; + double max = -DBL_MAX; + for(i = 0; i < num_elements; i++) + { + if(a[i] > max) + { + max = a[i]; + } + } + return (max); +} + +/*! \brief Maximum value of two integers. + * + * \param[in] a First integer variable. + * \param[in] b Second integer variable. + * + * \return Maximum of a and b. + */ +int imax(int a, int b) +{ + if(a > b) + return a; + else + return b; +} + +/*! \brief Minimum value of two integers. + * + * \param[in] a First integer variable. + * \param[in] b Second integer variable. + * + * \return Minimum of a and b. + */ +int imin(int a, int b) +{ + if(a < b) + return a; + else + return b; +} + +/*! \brief Flush (i.e. empty buffer) of a file output stream. + * + * \brief[in] fstream Pointer to file output. + * + * \return Status. + */ +int myflush(FILE *fstream) +{ +#ifdef REDUCE_FLUSH + /* do nothing */ + return 0; +#else /* #ifdef REDUCE_FLUSH */ + return fflush(fstream); +#endif /* #ifdef REDUCE_FLUSH #else */ +} + +/*! \brief Flush for all global log-files. + * + * Only flushes in predefined intervals. + * + * \return status (0: did nothing, 1 did flush) + */ +int flush_everything(void) +{ +#ifndef REDUCE_FLUSH + return 0; +#else /* #ifndef REDUCE_FLUSH */ + if(ThisTask == 0) + { + if((CPUThisRun - All.FlushLast) < All.FlushCpuTimeDiff) + { + return 0; + } + else + { + All.FlushLast = CPUThisRun; + } + } + else + { + return 0; + } +#endif /* #ifndef REDUCE_FLUSH #else */ + + mpi_printf("Flushing...\n"); + + fflush(FdDomain); + fflush(FdMemory); + fflush(FdTimings); + fflush(FdInfo); + fflush(FdTimebin); + fflush(FdBalance); + fflush(FdCPU); + fflush(FdEnergy); + +#ifdef OUTPUT_CPU_CSV + fflush(FdCPUCSV); +#endif /* #ifdef OUTPUT_CPU_CSV */ + +#ifdef USE_SFR + fflush(FdSfr); +#endif + + return 1; +} + +#ifdef DEBUG +#include +/*! \brief Allows core dumps that are readable by debugger. + * + * \return void + */ +void enable_core_dumps_and_fpu_exceptions(void) +{ +#ifdef DEBUG_ENABLE_FPU_EXCEPTIONS + /* enable floating point exceptions */ + + extern int feenableexcept(int __excepts); + feenableexcept(FE_DIVBYZERO | FE_INVALID); + + /* Note: FPU exceptions appear not to work properly + * when the Intel C-Compiler for Linux is used + */ +#endif /* #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS */ + + /* set core-dump size to infinity */ + struct rlimit rlim; + getrlimit(RLIMIT_CORE, &rlim); + rlim.rlim_cur = RLIM_INFINITY; + setrlimit(RLIMIT_CORE, &rlim); + + /* MPICH catches the signales SIGSEGV, SIGBUS, and SIGFPE.... + * The following statements reset things to the default handlers, + * which will generate a core file. + */ + signal(SIGSEGV, SIG_DFL); + signal(SIGBUS, SIG_DFL); + signal(SIGFPE, SIG_DFL); + signal(SIGINT, SIG_DFL); +} +#endif /* #ifdef DEBUG */ + +/*! \brief Wrapper for error handling; terminates code. + * + * \param[in] reason Error message. + * \param[in] file File in which error occured. + * \param[in] line Line in which error occured. + * \param[in] gsl_errno Error code. + * + * \return void + */ +void my_gsl_error_handler(const char *reason, const char *file, int line, int gsl_errno) +{ + terminate("GSL has reported an error: reason='%s', error handler called from file '%s', line %d, with error code %d", reason, file, + line, gsl_errno); +} + +/*! \brief Returns a random number from standard random number generator. + * + * \return Random number [0,1). + */ +double get_random_number(void) { return gsl_rng_uniform(random_generator); } + +/*! \brief Returns a random number from auxiliary random number generator. + * + * \return Random number [0,1). + */ +double get_random_number_aux(void) { return gsl_rng_uniform(random_generator_aux); } + +/*! \brief Wall-clock time in seconds. + * + * \return The current value of time as a floating-point value. + */ +double second(void) { return MPI_Wtime(); } + +/*! \brief Timing routine. + * + * Strategy: call this at end of functions to account for time in this + * function, and before another (nontrivial) function is called. + * + * \return Time passed since last call of this function. + */ +double measure_time(void) +{ + double t, dt; + + t = second(); + dt = t - WallclockTime; + WallclockTime = t; + + return dt; +} + +/*! \brief Time difference. + * + * Returns the time difference between two measurements + * obtained with second(). The routine takes care of the + * possible overflow of the tick counter on 32bit systems. + * + * \param[in] t0 First time. + * \param[in] t1 Second time. + * + * \return Time difference. + */ +double timediff(double t0, double t1) +{ + double dt; + + dt = t1 - t0; + + if(dt < 0) /* overflow has occured (for systems with 32bit tick counter) */ + { +#ifdef WALLCLOCK + dt = 0; +#else /* #ifdef WALLCLOCK */ + dt = t1 + pow(2, 32) / CLOCKS_PER_SEC - t0; +#endif /* #ifdef WALLCLOCK #else */ + } + + return dt; +} + +/*! \brief Global minimum of long long variables. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void minimum_large_ints(int n, long long *src, long long *res) +{ + if(src == res) + { + /* we need a buffer */ + long long buf[n]; + memcpy(buf, src, n * sizeof(long long)); + MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD); + } + else + MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD); +} + +/*! \brief Global sum of an array of int variables into a long long. + * + * Can be used with arbitrary MPI communicator. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * \param[in] comm MPI communicator. + * + * \return void + */ +void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm) +{ + long long lsrc[n]; + + for(int i = 0; i < n; i++) + lsrc[i] = src[i]; + + MPI_Allreduce(lsrc, res, n, MPI_LONG_LONG_INT, MPI_SUM, comm); +} + +/*! \brief Global sum of an array of int variables into a long long. + * + * To prevent overflow when summing up; wrapper funciton for + * sumup_large_ints_comm. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void sumup_large_ints(int n, int *src, long long *res) { sumup_large_ints_comm(n, src, res, MPI_COMM_WORLD); } + +/*! \brief Global sum of an array of long long variables. + * + * Automatically allocates a buffer, if source and result array are identical. + * + * \param[in] n Length of array. + * \param[in] src Source array. + * \param[in] res Result array. + * + * \return void + */ +void sumup_longs(int n, long long *src, long long *res) +{ + if(src == res) + { + /* we need a buffer */ + long long buf[n]; + memcpy(buf, src, n * sizeof(long long)); + MPI_Allreduce(buf, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + } + else + MPI_Allreduce(src, res, n, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); +} + +/*! \brief Compares two elements of type size_t. + * + * \param[in] a First element. + * \param[in] b Second element. + * + * \return The larger of both elements, first one if equal. + */ +size_t sizemax(size_t a, size_t b) +{ + if(a < b) + return b; + else + return a; +} + +/*! \brief Reads from process info file of linux system. + * + * \return void + */ +void report_VmRSS(void) +{ + pid_t my_pid; + FILE *fd; + char buf[1024]; + + my_pid = getpid(); + + sprintf(buf, "/proc/%d/status", my_pid); + + if((fd = fopen(buf, "r"))) + { + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + if(strncmp(buf, "VmRSS", 5) == 0) + { + printf("ThisTask=%d: %s", ThisTask, buf); + } + if(strncmp(buf, "VmSize", 6) == 0) + { + printf("ThisTask=%d: %s", ThisTask, buf); + } + } + fclose(fd); + } +} + +/*! \brief Reads from memory info file of Linux system. + * + * \return Comittable memory. + */ +long long report_comittable_memory(long long *MemTotal, long long *Committed_AS, long long *SwapTotal, long long *SwapFree) +{ + FILE *fd; + char buf[1024]; + + if((fd = fopen("/proc/meminfo", "r"))) + { + while(1) + { + if(fgets(buf, 500, fd) != buf) + break; + + if(bcmp(buf, "MemTotal", 8) == 0) + { + *MemTotal = atoll(buf + 10); + } + if(strncmp(buf, "Committed_AS", 12) == 0) + { + *Committed_AS = atoll(buf + 14); + } + if(strncmp(buf, "SwapTotal", 9) == 0) + { + *SwapTotal = atoll(buf + 11); + } + if(strncmp(buf, "SwapFree", 8) == 0) + { + *SwapFree = atoll(buf + 10); + } + } + fclose(fd); + } + + return (*MemTotal - *Committed_AS); +} + +/*! \brief Checks if parameter max memsize is smaller than avialable memory. + * + * \return void + */ +void check_maxmemsize_setting(void) +{ + int errflag = 0, errflag_tot; + + if(All.MaxMemSize > (MemoryOnNode / 1024.0 / TasksInThisNode) && RankInThisNode == 0) + { + printf("On node '%s', we have %d MPI ranks and at most %g MB available. This is not enough space for MaxMemSize = %g MB\n", + loc_node.name, TasksInThisNode, MemoryOnNode / 1024.0, (double)All.MaxMemSize); + errflag = 1; + fflush(stdout); + } + + MPI_Allreduce(&errflag, &errflag_tot, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); +#ifndef __OSX__ + if(errflag_tot) + mpi_terminate("Not enough memory error!"); +#endif /* #ifndef __OSX__ */ +} + +/*! \brief Gathers memory information from tasks and write them stdout. + * + * Part of HOST_MEMORY_REPORTING, printed at startup. + * + * \return void + */ +void mpi_report_committable_memory(void) +{ + long long *sizelist, maxsize[6], minsize[6]; + double avgsize[6]; + int i, imem, mintask[6], maxtask[6]; + long long Mem[6]; + char label[512]; + + Mem[0] = report_comittable_memory(&Mem[1], &Mem[2], &Mem[3], &Mem[4]); + Mem[5] = Mem[1] - Mem[0]; + + MemoryOnNode = Mem[1]; + + for(imem = 0; imem < 6; imem++) + { + sizelist = (long long *)malloc(NTask * sizeof(long long)); + MPI_Allgather(&Mem[imem], sizeof(long long), MPI_BYTE, sizelist, sizeof(long long), MPI_BYTE, MPI_COMM_WORLD); + + for(i = 1, mintask[imem] = 0, maxtask[imem] = 0, maxsize[imem] = minsize[imem] = sizelist[0], avgsize[imem] = sizelist[0]; + i < NTask; i++) + { + if(sizelist[i] > maxsize[imem]) + { + maxsize[imem] = sizelist[i]; + maxtask[imem] = i; + } + if(sizelist[i] < minsize[imem]) + { + minsize[imem] = sizelist[i]; + mintask[imem] = i; + } + avgsize[imem] += sizelist[i]; + } + + free(sizelist); + } + + if(ThisTask == 0) + { + printf( + "\n-------------------------------------------------------------------------------------------------------------------------" + "\n"); + for(imem = 0; imem < 6; imem++) + { + switch(imem) + { + case 0: + sprintf(label, "AvailMem"); + break; + case 1: + sprintf(label, "Total Mem"); + break; + case 2: + sprintf(label, "Committed_AS"); + break; + case 3: + sprintf(label, "SwapTotal"); + break; + case 4: + sprintf(label, "SwapFree"); + break; + case 5: + sprintf(label, "AllocMem"); + break; + } + printf("%s:\t Largest = %10.2f Mb (on task=%4d), Smallest = %10.2f Mb (on task=%4d), Average = %10.2f Mb\n", label, + maxsize[imem] / (1024.0), maxtask[imem], minsize[imem] / (1024.0), mintask[imem], avgsize[imem] / (1024.0 * NTask)); + } + printf( + "-------------------------------------------------------------------------------------------------------------------------" + "\n"); + } + + char name[MPI_MAX_PROCESSOR_NAME]; + + if(ThisTask == maxtask[2]) + { + int len; + MPI_Get_processor_name(name, &len); + } + + MPI_Bcast(name, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, maxtask[2], MPI_COMM_WORLD); + + if(ThisTask == 0) + { + printf("Task=%d has the maximum commited memory and is host: %s\n", maxtask[2], name); + printf( + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + "\n"); + } + + fflush(stdout); +} + +/*! \brief Find the first bit set in the argument. + * + * \param[in] i Peankoey variable. + * + * \return First bit set (type int). + */ +int my_ffsll(peanokey i) +{ + int res = 0; + + while(i > 0xffffffff) + { + res += 32; + i >>= 32; + } + + return res + ffs(i); +} + +/*! \brief Finds last bit set in x. + * + * The following function appears in the linux kernel. + * + * \param[in] x Ineger Input. + * + * \return Last bit set in x. + */ +int my_fls(int x) +{ + int r = 32; + + if(!x) + return 0; + if(!(x & 0xffff0000u)) + { + x <<= 16; + r -= 16; + } + if(!(x & 0xff000000u)) + { + x <<= 8; + r -= 8; + } + if(!(x & 0xf0000000u)) + { + x <<= 4; + r -= 4; + } + if(!(x & 0xc0000000u)) + { + x <<= 2; + r -= 2; + } + if(!(x & 0x80000000u)) + { + x <<= 1; + r -= 1; + } + return r; +} diff --git a/src/amuse/community/arepo/src/utils/tags.h b/src/amuse/community/arepo/src/utils/tags.h new file mode 100644 index 0000000000..e26bbaa4a5 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/tags.h @@ -0,0 +1,50 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/tags.h + * \date 05/2018 + * \brief Tag defines. + * \details Choice of numbers for historic reasons. + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#define TAG_N 10 /*!< Various tags used for labelling MPI messages */ +#define TAG_HEADER 11 +#define TAG_PDATA 12 +#define TAG_SPHDATA 13 +#define TAG_KEY 14 +#define TAG_GRAV_B 19 +#define TAG_HYDRO_A 22 +#define TAG_HYDRO_B 23 +#define TAG_NFORTHISTASK 24 +#define TAG_NONPERIOD_A 29 +#define TAG_NONPERIOD_B 30 +#define TAG_NONPERIOD_C 31 +#define TAG_DENS_A 35 +#define TAG_DENS_B 36 +#define TAG_LOCALN 37 +#define TAG_FOF_A 45 +#define TAG_PDATA_SPH 70 +#define TAG_KEY_SPH 71 +#define TAG_BARRIER 85 +#define TAG_NODE_DATA 105 diff --git a/src/amuse/community/arepo/src/utils/timer.h b/src/amuse/community/arepo/src/utils/timer.h new file mode 100644 index 0000000000..a622d1e8e5 --- /dev/null +++ b/src/amuse/community/arepo/src/utils/timer.h @@ -0,0 +1,251 @@ +/*! + * \copyright This file is part of the public version of the AREPO code. + * \copyright Copyright (C) 2009-2019, Max-Planck Institute for Astrophysics + * \copyright Developed by Volker Springel (vspringel@MPA-Garching.MPG.DE) and + * contributing authors. + * \copyright Arepo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Arepo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A copy of the GNU General Public License is available under + * LICENSE as part of this program. See also + * . + * + * \file src/utils/timer.h + * \date 05/2018 + * \brief Timer macros for Arepo. + * \details + * + * \par Major modifications and contributions: + * + * - DD.MM.YYYY Description + * - 28.05.2018 Prepared file for public release -- Rainer Weinberger + */ + +#if !defined(TIMER_H) || defined(TIMER_STRUCT) +#define TIMER_H + +#define DETAILED_TIMING_GRAVWALK 0 +#define DETAILED_TIMING_STELLARDENSITY 1 + +#define TIMER_INSTRUMENT_START(counter) +#define TIMER_INSTRUMENT_STOP(counter) +#define TIMER_INSTRUMENT_CREATE(name, descr) ; + +#ifdef TIMER_STRUCT +#undef TIMER_CREATE +/*! \def TIMER_CREATE(name,desc, par, symba, symbb ) + * \brief creates a new CPU timer + * + * \param name name used in the code to reference this timer + * \param desc description string used in output files + * \param parent parent of this timer to build a tree-like hierarchy of timers + * \param symba character used for active time in balance.txt + * \param symbb character used for imbalance in balance.txt + * + */ +#define TIMER_CREATE(name, desc, par, symba, symbb) \ + Timer_data[name].parent = par; \ + strncpy(Timer_data[name].shortname, #name, 40); \ + strncpy(Timer_data[name].longname, (desc), 40); \ + Timer_data[name].symb = (symba); \ + Timer_data[name].symbImbal = (symbb); \ + TIMER_INSTRUMENT_CREATE(name, desc) + +#else /* #ifdef TIMER_STRUCT */ + +#define TIMER_STACK_DEPTH 30 +#define TIMER_CREATE(name, desc, parent, symba, symbb) name, + +/*! \def TIMER_START(counter) + * \brief Starts the timer counter. + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] counter Name of the timer to start. + */ +#define TIMER_START_INTERNAL(counter) \ + { \ + TIMER_INSTRUMENT_START(counter); \ + CPU_Step[TimerStack[TimerStackPos]] += measure_time(); \ + int itimer; \ + for(itimer = 0; itimer <= TimerStackPos; itimer++) \ + if(counter == TimerStack[itimer]) \ + { \ + printf("Try to start timer %d, but it is already running.\n", counter); \ + terminate("fail") \ + }; \ + if(++TimerStackPos >= TIMER_STACK_DEPTH) \ + { \ + terminate("Run out of timer stack space, increase TIMER_STACK_DEPTH"); \ + } \ + else \ + { \ + TimerStack[TimerStackPos] = (counter); \ + } \ + } + +#define TIMER_START(counter) TIMER_START_INTERNAL(counter) + +/*! \def TIMER_STOP(counter) + * \brief Stops the timer counter + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] counter Name of the timer to stop. + */ +#define TIMER_STOP_INTERNAL(counter) \ + { \ + if(TimerStack[TimerStackPos] != (counter)) \ + { \ + terminate("Wrong use of TIMER_STOP, you must stop the timer started last"); \ + } \ + CPU_Step[TimerStack[TimerStackPos--]] += measure_time(); \ + if(TimerStackPos < 0) \ + { \ + terminate("Do not stop the out CPU_MISC timer"); \ + } \ + TIMER_INSTRUMENT_STOP(counter); \ + } + +#define TIMER_STOP(counter) TIMER_STOP_INTERNAL(counter) + +/*! \def TIMER_STOPSTART(stop, start) + * \brief Stops the timer 'stop' and starts the timer 'start' + * + * Use this macro instead of directly accessing the CPU_Step array, + * so manual instrumentation APIs can be attached. + * + * \param[in] stop Name of the timer to stop + * \param[in] start Name of the timer to start + */ +#define TIMER_STOPSTART(stop, start) \ + { \ + TIMER_STOP_INTERNAL(stop); \ + TIMER_START_INTERNAL(start); \ + } + +/*! \def TIMER_ADD(counter, amount) + * \brief Adds amount to the timer counter. + + * \param[in] counter Name of the timer to add to. + * \param[in] amount Amount to add to timer counter. + */ +#define TIMER_ADD(counter, amount) CPU_Step[counter] += (amount); + +/*! \def TIMER_DIFF(counter) + * \brief Returns amount elapsed for the timer since last save with + * TIMER_STORE. + * + * \param[in] counter Name of the timer to add to. + */ +#define TIMER_DIFF(counter) (CPU_Step[counter] - CPU_Step_Stored[counter]) + +/*! \def TIMER_STORE + * \brief Copies the current value of CPU times to a stored variable, such + * that differences with respect to this reference can be calculated. + */ +#define TIMER_STORE memcpy(CPU_Step_Stored, CPU_Step, sizeof(CPU_Step)); + +enum timers +{ + CPU_NONE = -2, /*!< used for counters without a parent */ + CPU_ROOT = -1, /*!< root node of the tree */ +#endif /* #ifdef TIMER_STRUCT #else */ + +/* possible characters to use for marking the parts: + * + * abdefghijklmnopqrstuvABCDEFGHHIJKLMNOPQRSTUV + * 0123456789 + * -:.*=[]^&;~/_$()?+"<>@#!|\ + */ + +/*add your counter here, they must appear in the right order*/ + +TIMER_CREATE(CPU_ALL, "total", CPU_ROOT, '-', '-') /*!< root timer, everything should be below this timer */ +TIMER_CREATE(CPU_TREE, "treegrav", CPU_ALL, 'a', ')') +TIMER_CREATE(CPU_TREEBUILD, "treebuild", CPU_TREE, 'b', '(') +TIMER_CREATE(CPU_TREEBUILD_INSERT, "insert", CPU_TREEBUILD, 'c', '*') +TIMER_CREATE(CPU_TREEBUILD_BRANCHES, "branches", CPU_TREEBUILD, 'd', '&') +TIMER_CREATE(CPU_TREEBUILD_TOPLEVEL, "toplevel", CPU_TREEBUILD, 'e', '^') +TIMER_CREATE(CPU_TREECOSTMEASURE, "treecostm", CPU_TREE, 'f', '%') +TIMER_CREATE(CPU_TREEWALK, "treewalk", CPU_TREE, 'g', '$') +TIMER_CREATE(CPU_TREEWALK1, "treewalk1", CPU_TREEWALK, 'h', '#') +TIMER_CREATE(CPU_TREEWALK2, "treewalk2", CPU_TREEWALK, 'i', '@') +TIMER_CREATE(CPU_TREEBALSNDRCV, "treebalsndrcv", CPU_TREE, 'j', '!') +TIMER_CREATE(CPU_TREESENDBACK, "treeback", CPU_TREE, 'm', '7') +TIMER_CREATE(CPU_TREEDIRECT, "treedirect", CPU_TREE, 'r', '2') +#ifdef PMGRID +TIMER_CREATE(CPU_PM_GRAVITY, "pm_grav", CPU_ALL, 's', '1') +#endif /* #ifdef PMGRID */ +TIMER_CREATE(CPU_NGBTREEBUILD, "ngbtreebuild", CPU_ALL, 't', 'Z') +TIMER_CREATE(CPU_NGBTREEUPDATEVEL, "ngbtreevelupdate", CPU_ALL, 'u', 'Y') +TIMER_CREATE(CPU_MESH, "voronoi", CPU_ALL, 'v', 'X') +TIMER_CREATE(CPU_MESH_INSERT, "insert", CPU_MESH, 'w', 'W') +TIMER_CREATE(CPU_MESH_FIND_DP, "findpoints", CPU_MESH, 'x', 'V') +TIMER_CREATE(CPU_MESH_CELLCHECK, "cellcheck", CPU_MESH, 'y', 'U') +TIMER_CREATE(CPU_MESH_GEOMETRY, "geometry", CPU_MESH, 'z', 'T') +TIMER_CREATE(CPU_MESH_EXCHANGE, "exchange", CPU_MESH, 'A', 'S') +TIMER_CREATE(CPU_MESH_DYNAMIC, "dynamic", CPU_MESH, 'B', 'R') +TIMER_CREATE(CPU_HYDRO, "hydro", CPU_ALL, 'C', 'Q') +TIMER_CREATE(CPU_GRADIENTS, "gradients", CPU_HYDRO, 'D', 'P') +TIMER_CREATE(CPU_FLUXES, "fluxes", CPU_HYDRO, 'F', 'N') +TIMER_CREATE(CPU_FLUXES_COMM, "fluxcomm", CPU_HYDRO, 'H', 'L') +TIMER_CREATE(CPU_CELL_UPDATES, "updates", CPU_HYDRO, 'J', 'j') +TIMER_CREATE(CPU_SET_VERTEXVELS, "vertex vel", CPU_HYDRO, 'K', 'I') +TIMER_CREATE(CPU_MHD, "mhd", CPU_HYDRO, '4', 'p') +TIMER_CREATE(CPU_DOMAIN, "domain", CPU_ALL, 'U', 'y') +TIMER_CREATE(CPU_PEANO, "peano", CPU_ALL, 'V', 'x') +TIMER_CREATE(CPU_DRIFTS, "drift/kicks", CPU_ALL, 'W', 'w') +TIMER_CREATE(CPU_TIMELINE, "timeline", CPU_ALL, 'X', 'v') +#ifdef TREE_BASED_TIMESTEPS +TIMER_CREATE(CPU_TREE_TIMESTEPS, "treetimesteps", CPU_ALL, 'Y', 'u') +#endif /* #ifdef TREE_BASED_TIMESTEPS */ +TIMER_CREATE(CPU_SNAPSHOT, "i/o", CPU_ALL, 'Z', 't') +TIMER_CREATE(CPU_LOGS, "logs", CPU_ALL, '1', 's') +TIMER_CREATE(CPU_COOLINGSFR, "sfrcool", CPU_ALL, '2', 'r') +#ifdef FOF +TIMER_CREATE(CPU_FOF, "fof", CPU_ALL, '#', 'h') +#endif /* #ifdef FOF */ +#ifdef SUBFIND +TIMER_CREATE(CPU_SUBFIND, "subfind", CPU_ALL, '$', 'g') +#endif /* #ifdef SUBFIND */ +TIMER_CREATE(CPU_REFINE, "refine", CPU_ALL, '%', 'f') +TIMER_CREATE(CPU_DEREFINE, "mesh_derefine", CPU_ALL, '^', 'e') +TIMER_CREATE(CPU_MAKEIMAGES, "images", CPU_ALL, '&', 'd') +TIMER_CREATE(CPU_INIT, "initializ.", CPU_ALL, '*', 'c') +TIMER_CREATE(CPU_RESTART, "restart", CPU_ALL, '(', 'b') +TIMER_CREATE(CPU_MISC, "misc", CPU_ALL, ')', 'a') +TIMER_CREATE(CPU_LAST, "LAST", CPU_NONE, ' ', ' ') /*! Date: Fri, 18 Mar 2022 17:27:57 +0000 Subject: [PATCH 03/51] remove '#include ./arepoconfig.h' from allvars.h --- src/amuse/community/arepo/src/main/allvars.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/amuse/community/arepo/src/main/allvars.h b/src/amuse/community/arepo/src/main/allvars.h index 2dc46e56b3..409165f7b4 100644 --- a/src/amuse/community/arepo/src/main/allvars.h +++ b/src/amuse/community/arepo/src/main/allvars.h @@ -41,8 +41,6 @@ #include #include -#include "./arepoconfig.h" - #ifdef IMPOSE_PINNING #include #endif /* #ifdef IMPOSE_PINNING */ From b221aec6cffe53d3cc7c10acf490434774c5e65c Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 17:32:56 +0000 Subject: [PATCH 04/51] update Makefile to include arepo source code --- src/amuse/community/arepo/src/Makefile | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 66f18374a1..4b7835bd0a 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,20 +1,27 @@ -CFLAGS += -Wall -g +GSL_INCL = -I/opt/Homebrew/include # Need to make GSL_INCL generalisable. + +CFLAGS += -Wall -g $(GSL_INCL) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) CODELIB = libarepo.a -CODEOBJS = test.o +SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ + hydro init io main mesh mpi_utils ngbtree star_formation subfind \ + time_integration utils +SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) + +CODEOBJS = test.o $(SRCS:c=o) AR = ar ruv RANLIB = ranlib RM = rm -all: $(CODELIB) - +all: $(CODELIB) clean: $(RM) -f *.o *.a + $(RM) $(SRCS:c=o) distclean: clean From 3b250368dcdb819f97d7bb81e4330a1b4fc98c09 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 18 Mar 2022 17:41:06 +0000 Subject: [PATCH 05/51] add TODO to Makefile --- src/amuse/community/arepo/src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 4b7835bd0a..67b321843b 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,4 +1,4 @@ -GSL_INCL = -I/opt/Homebrew/include # Need to make GSL_INCL generalisable. +GSL_INCL = -I/opt/Homebrew/include # TODO: Need to make GSL_INCL generalisable. CFLAGS += -Wall -g $(GSL_INCL) CXXFLAGS += $(CFLAGS) From d56c2b3d3fd7961f63d307cd010bb2c2076aafe7 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:20:31 +0100 Subject: [PATCH 06/51] rename classes --- src/amuse/community/arepo/interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 64130dc384..7be0a7665f 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,6 +1,6 @@ from amuse.community import * -class arepoInterface(CodeInterface): +class ArepoInterface(CodeInterface): include_headers = ['worker_code.h'] @@ -17,8 +17,8 @@ def echo_int(): return function -class arepo(InCodeComponentImplementation): +class Arepo(InCodeComponentImplementation): def __init__(self, **options): - InCodeComponentImplementation.__init__(self, arepoInterface(**options), **options) + InCodeComponentImplementation.__init__(self, ArepoInterface(**options), **options) From 469b04d20fe1ba2dccf3e6fcaa70bd9db90f3fc9 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:29:16 +0100 Subject: [PATCH 07/51] inherit from GravitationalDynamicsInterface --- src/amuse/community/arepo/interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 7be0a7665f..a52600e5cf 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,6 +1,7 @@ from amuse.community import * +from amuse.community.interface.gd import GravitationalDynamicsInterface -class ArepoInterface(CodeInterface): +class ArepoInterface(CodeInterface, GravitationalDynamicsInterface): include_headers = ['worker_code.h'] From a0a927cf913fa0ae534ebd0c3f94ee41337690cd Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:51:12 +0100 Subject: [PATCH 08/51] inherit from LiteratureReferencesMixIn --- src/amuse/community/arepo/interface.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index a52600e5cf..a9248e400c 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,12 +1,16 @@ from amuse.community import * from amuse.community.interface.gd import GravitationalDynamicsInterface -class ArepoInterface(CodeInterface, GravitationalDynamicsInterface): +class ArepoInterface( + CodeInterface, + GravitationalDynamicsInterface, + LiteratureReferencesMixIn): include_headers = ['worker_code.h'] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) + LiteratureReferencesMixIn.__init__(self) @legacy_function def echo_int(): From 016c75ec2063a553cd2cc73b8b939f0984eb81ee Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 13:55:52 +0100 Subject: [PATCH 09/51] remove wildcard import --- src/amuse/community/arepo/interface.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index a9248e400c..8f11ab923f 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -1,4 +1,8 @@ -from amuse.community import * +from amuse.community import CodeInterface +from amuse.community import LegacyFunctionSpecification +from amuse.community import legacy_function +from amuse.community import LiteratureReferencesMixIn + from amuse.community.interface.gd import GravitationalDynamicsInterface class ArepoInterface( From fae9ec3d41595dc867d330687388190cd647d0ec Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 14:23:25 +0100 Subject: [PATCH 10/51] inherit from GravitationalDynamics --- src/amuse/community/arepo/interface.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 8f11ab923f..6601714257 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -4,6 +4,7 @@ from amuse.community import LiteratureReferencesMixIn from amuse.community.interface.gd import GravitationalDynamicsInterface +from amuse.community.interface.gd import GravitationalDynamics class ArepoInterface( CodeInterface, @@ -26,8 +27,8 @@ def echo_int(): return function -class Arepo(InCodeComponentImplementation): +class Arepo(GravitationalDynamics): def __init__(self, **options): - InCodeComponentImplementation.__init__(self, ArepoInterface(**options), **options) + GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) From e8a31134927ecb0225583195694b3655607d073f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 14:29:11 +0100 Subject: [PATCH 11/51] minor reformatting --- src/amuse/community/arepo/interface.py | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 6601714257..89e089fc2a 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -6,29 +6,30 @@ from amuse.community.interface.gd import GravitationalDynamicsInterface from amuse.community.interface.gd import GravitationalDynamics + class ArepoInterface( CodeInterface, GravitationalDynamicsInterface, - LiteratureReferencesMixIn): - - include_headers = ['worker_code.h'] - + LiteratureReferencesMixIn +): + + include_headers = ["worker_code.h"] + def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) LiteratureReferencesMixIn.__init__(self) - + @legacy_function def echo_int(): - function = LegacyFunctionSpecification() - function.addParameter('int_in', dtype='int32', direction=function.IN) - function.addParameter('int_out', dtype='int32', direction=function.OUT) - function.result_type = 'int32' + function = LegacyFunctionSpecification() + function.addParameter("int_in", dtype="int32", direction=function.IN) + function.addParameter("int_out", dtype="int32", direction=function.OUT) + function.result_type = "int32" function.can_handle_array = True return function - - -class Arepo(GravitationalDynamics): - def __init__(self, **options): - GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) + +class Arepo(GravitationalDynamics): + def __init__(self, **options): + GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) From a3a020a27a8d1f4564d54f0ce4cf4b987aaf7b95 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 15:00:09 +0100 Subject: [PATCH 12/51] add ArepoInterface docstring --- src/amuse/community/arepo/interface.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 89e089fc2a..34d46a07a9 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -12,6 +12,16 @@ class ArepoInterface( GravitationalDynamicsInterface, LiteratureReferencesMixIn ): + """ + Arepo is a cosmological magnetohydrodynamical moving-mesh simulation code, + descended from GADGET. + + References: + .. [#] Springel, V., 2010, MNRAS, 401, 791 (Arepo) [2010MNRAS.401..791S] + .. [#] Pakmor, R., Bauer, A., Springel, V., 2011, MNRAS, 418, 1392 (Magnetohydrodynamics Module) [2011MNRAS.418.1392P] + .. [#] Pakmor, R. et al., 2016, MNRAS, 455, 1134 (Gradient Estimation) [2016MNRAS.455.1134P] + .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] + """ include_headers = ["worker_code.h"] From 1d668dd6b713caf0a5bd43c44674d42d785e9863 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 15:08:46 +0100 Subject: [PATCH 13/51] add TODO re CodeWithDataDirectories --- src/amuse/community/arepo/interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 34d46a07a9..989190ebd8 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -28,6 +28,7 @@ class ArepoInterface( def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) LiteratureReferencesMixIn.__init__(self) + # TODO: Determine whether need to inherit from CodeWithDataDirectories. @legacy_function def echo_int(): From aa1c5982c557cee7cac87e4fa5d0541bb6bc7221 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 16:32:49 +0100 Subject: [PATCH 14/51] add set_parameters() --- src/amuse/community/arepo/interface.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 989190ebd8..ac122b9f23 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -31,12 +31,10 @@ def __init__(self, **keyword_arguments): # TODO: Determine whether need to inherit from CodeWithDataDirectories. @legacy_function - def echo_int(): + def set_parameters(): function = LegacyFunctionSpecification() - function.addParameter("int_in", dtype="int32", direction=function.IN) - function.addParameter("int_out", dtype="int32", direction=function.OUT) + function.addParameter("param_file", dtype="string", direction=function.IN) function.result_type = "int32" - function.can_handle_array = True return function From e312e0185138fb4e99230588540c7ad513e37b20 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 17:24:54 +0100 Subject: [PATCH 15/51] change GSL_INCL to GSL_FLAGS --- src/amuse/community/arepo/src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 67b321843b..4f99b34024 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,6 +1,6 @@ -GSL_INCL = -I/opt/Homebrew/include # TODO: Need to make GSL_INCL generalisable. +GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. -CFLAGS += -Wall -g $(GSL_INCL) +CFLAGS += -Wall -g $(GSL_FLAGS) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) From d1c5ccaeb08ad872d4c39076aa72d4f56babb9df Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Mon, 28 Mar 2022 17:57:32 +0100 Subject: [PATCH 16/51] add AMUSE_DIR conditional and include config.mk --- src/amuse/community/arepo/src/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 4f99b34024..8b3de12905 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,3 +1,8 @@ +ifeq ($(origin AMUSE_DIR), undefined) + AMUSE_DIR := $(shell amusifier --get-amuse-dir) +endif +-include $(AMUSE_DIR)/config.mk + GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. CFLAGS += -Wall -g $(GSL_FLAGS) From c1a672f8d1162cfe12a99814b4a8577dafaf43fd Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:02:33 +0100 Subject: [PATCH 17/51] generate interface.cc --- src/amuse/community/arepo/interface.cc | 175 +++++++++++++++++++++++-- 1 file changed, 166 insertions(+), 9 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a590e82689..160b99ab25 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,11 +1,168 @@ -extern int echo(int input); - -/* - * Interface code - */ - -int echo_int(int input, int * output){ - *output = echo(input); - return 0; +#include "worker_code.h" + +int get_mass(int index_of_the_particle, double * mass){ + return 0; +} + +int commit_particles(){ + return 0; +} + +int get_time(double * time){ + return 0; +} + +int set_mass(int index_of_the_particle, double mass){ + return 0; +} + +int get_index_of_first_particle(int * index_of_the_particle){ + return 0; +} + +int get_total_radius(double * radius){ + return 0; +} + +int new_particle(int * index_of_the_particle, double mass, double x, + double y, double z, double vx, double vy, double vz, double radius){ + return 0; +} + +int get_total_mass(double * mass){ + return 0; +} + +int evolve_model(double time){ + return 0; +} + +int set_eps2(double epsilon_squared){ + return 0; +} + +int get_begin_time(double * time){ + return 0; +} + +int get_eps2(double * epsilon_squared){ + return 0; +} + +int get_index_of_next_particle(int index_of_the_particle, + int * index_of_the_next_particle){ + return 0; +} + +int delete_particle(int index_of_the_particle){ + return 0; +} + +int get_potential(int index_of_the_particle, double * potential){ + return 0; +} + +int synchronize_model(){ + return 0; +} + +int set_state(int index_of_the_particle, double mass, double x, double y, + double z, double vx, double vy, double vz, double radius){ + return 0; +} + +int get_state(int index_of_the_particle, double * mass, double * x, + double * y, double * z, double * vx, double * vy, double * vz, + double * radius){ + return 0; +} + +int get_time_step(double * time_step){ + return 0; +} + +int recommit_particles(){ + return 0; +} + +int get_kinetic_energy(double * kinetic_energy){ + return 0; +} + +int get_number_of_particles(int * number_of_particles){ + return 0; +} + +int set_acceleration(int index_of_the_particle, double ax, double ay, + double az){ + return 0; +} + +int get_center_of_mass_position(double * x, double * y, double * z){ + return 0; +} + +int get_center_of_mass_velocity(double * vx, double * vy, double * vz){ + return 0; +} + +int get_radius(int index_of_the_particle, double * radius){ + return 0; +} + +int set_begin_time(double time){ + return 0; +} + +int set_radius(int index_of_the_particle, double radius){ + return 0; +} + +int cleanup_code(){ + return 0; +} + +int recommit_parameters(){ + return 0; +} + +int initialize_code(){ + return 0; +} + +int get_potential_energy(double * potential_energy){ + return 0; +} + +int get_velocity(int index_of_the_particle, double * vx, double * vy, + double * vz){ + return 0; +} + +int get_position(int index_of_the_particle, double * x, double * y, + double * z){ + return 0; +} + +int set_position(int index_of_the_particle, double x, double y, double z){ + return 0; +} + +int get_acceleration(int index_of_the_particle, double * ax, double * ay, + double * az){ + return 0; +} + +int commit_parameters(){ + return 0; +} + +int set_parameters(char * param_file){ + return 0; +} + +int set_velocity(int index_of_the_particle, double vx, double vy, + double vz){ + return 0; } From 12a022d97abb63a30468db039011c3b53843e47f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:08:44 +0100 Subject: [PATCH 18/51] update interface name in Makefile --- src/amuse/community/arepo/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 7c392db261..567a479296 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -30,10 +30,10 @@ $(CODELIB): make -C src all worker_code.cc: interface.py - $(CODE_GENERATOR) --type=c interface.py arepoInterface -o $@ + $(CODE_GENERATOR) --type=c interface.py ArepoInterface -o $@ worker_code.h: interface.py - $(CODE_GENERATOR) --type=H interface.py arepoInterface -o $@ + $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ From fb1f673a05290800331631c390c5473eb2b99ab9 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:20:13 +0100 Subject: [PATCH 19/51] update interface name in test_arepo.py --- src/amuse/community/arepo/test_arepo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index 8cdeabb474..d4a7b22b38 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -1,12 +1,12 @@ from amuse.test.amusetest import TestWithMPI -from .interface import arepoInterface +from .interface import ArepoInterface from .interface import arepo -class arepoInterfaceTests(TestWithMPI): +class ArepoInterfaceTests(TestWithMPI): def test1(self): - instance = arepoInterface() + instance = ArepoInterface() result,error = instance.echo_int(12) self.assertEquals(error, 0) self.assertEquals(result, 12) From 4d050da72c6cd0310ed4293f972cb2d030e183c6 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Thu, 31 Mar 2022 10:29:58 +0100 Subject: [PATCH 20/51] update arepo to Arepo --- src/amuse/community/arepo/test_arepo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index d4a7b22b38..a54108fd43 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -1,7 +1,7 @@ from amuse.test.amusetest import TestWithMPI from .interface import ArepoInterface -from .interface import arepo +from .interface import Arepo class ArepoInterfaceTests(TestWithMPI): From 4ff74a9cce4c78b08d5669c2562e8c011e182edf Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:33:26 +0100 Subject: [PATCH 21/51] add code from arepo main.c to initialize_code() and cleanup_code() --- src/amuse/community/arepo/interface.cc | 79 +++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 160b99ab25..b6656bc04a 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,5 +1,76 @@ #include "worker_code.h" +#include "src/main/allvars.h" +#include "src/main/proto.h" + +int initialize_code(){ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); + MPI_Comm_size(MPI_COMM_WORLD, &NTask); + + /* output a welcome message */ + hello(); + + /* initialize CPU-time/Wallclock-time measurement */ + init_cpu_log(); + + determine_compute_nodes(); + + for(PTask = 0; NTask > (1 << PTask); PTask++) + ; + + begrun0(); + + strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ + RestartFlag = 0; + + begrun1(); /* set-up run */ + + char fname[MAXLEN_PATH]; + strcpy(fname, All.InitCondFile); + + /* now we can load the file */ + +#ifdef READ_DM_AS_GAS + read_ic(fname, (RestartFlag == 14) ? 0x02 : LOAD_TYPES); +#else /* #ifdef READ_DM_AS_GAS */ + read_ic(fname, (RestartFlag == 14) ? 0x01 : LOAD_TYPES); +#endif /* #ifdef READ_DM_AS_GAS #else */ + + /* init returns a status code, where a value of >=0 means that endrun() should be called. */ + int status = init(); + + if(status >= 0) + { + if(status > 0) + mpi_printf("init() returned with %d\n", status); + + cleanup_code(); + } + + begrun2(); + return 0; +} + +int cleanup_code(){ + mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + fflush(stdout); + +#ifdef HAVE_HDF5 + /*The hdf5 library will sometimes register an atexit() handler that calls its + * error handler. In AREPO this is set to my_hdf_error_handler, which calls + * MPI_Abort. Calling MPI_Abort after MPI_Finalize is not allowed. + * Hence unset the HDF error handler here + */ + H5Eset_auto(NULL, NULL); +#endif /* #ifdef HAVE_HDF5 */ + + MPI_Finalize(); + exit(0); + return 0; +} + int get_mass(int index_of_the_particle, double * mass){ return 0; } @@ -118,18 +189,10 @@ int set_radius(int index_of_the_particle, double radius){ return 0; } -int cleanup_code(){ - return 0; -} - int recommit_parameters(){ return 0; } -int initialize_code(){ - return 0; -} - int get_potential_energy(double * potential_energy){ return 0; } From 7efa65153bf353a56a0601d1decca50a881adb6f Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:44:25 +0100 Subject: [PATCH 22/51] comment out set_parameters() and add TODO --- src/amuse/community/arepo/interface.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index ac122b9f23..fef740d365 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -30,12 +30,15 @@ def __init__(self, **keyword_arguments): LiteratureReferencesMixIn.__init__(self) # TODO: Determine whether need to inherit from CodeWithDataDirectories. - @legacy_function - def set_parameters(): - function = LegacyFunctionSpecification() - function.addParameter("param_file", dtype="string", direction=function.IN) - function.result_type = "int32" - return function + # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. + # This function has been kept as a template for future functions. + + # @legacy_function + # def set_parameters(): + # function = LegacyFunctionSpecification() + # function.addParameter("param_file", dtype="string", direction=function.IN) + # function.result_type = "int32" + # return function class Arepo(GravitationalDynamics): From 2c1f16add06379e9d8d62c67f9026eed1678f775 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 15:50:26 +0100 Subject: [PATCH 23/51] add TODO to test_arepo.py --- src/amuse/community/arepo/test_arepo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/test_arepo.py b/src/amuse/community/arepo/test_arepo.py index a54108fd43..6aea82105f 100644 --- a/src/amuse/community/arepo/test_arepo.py +++ b/src/amuse/community/arepo/test_arepo.py @@ -7,7 +7,7 @@ class ArepoInterfaceTests(TestWithMPI): def test1(self): instance = ArepoInterface() - result,error = instance.echo_int(12) + result,error = instance.echo_int(12) # TODO: Update test and add more... self.assertEquals(error, 0) self.assertEquals(result, 12) instance.stop() From 5daa7e494ca52a8039fbf98dd11edc4594f8eec6 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 16:04:20 +0100 Subject: [PATCH 24/51] add TODO to Makefile --- src/amuse/community/arepo/src/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 8b3de12905..27c1b505b9 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,3 +1,4 @@ +# TODO: Determine whether this is needed as included in arepo/Makefile. ifeq ($(origin AMUSE_DIR), undefined) AMUSE_DIR := $(shell amusifier --get-amuse-dir) endif From ed27e203529c185d23c275b8afb141f1a8f14f25 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 17:02:21 +0100 Subject: [PATCH 25/51] remove test.cc as not needed --- src/amuse/community/arepo/src/test.cc | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 src/amuse/community/arepo/src/test.cc diff --git a/src/amuse/community/arepo/src/test.cc b/src/amuse/community/arepo/src/test.cc deleted file mode 100644 index c30eeef8cb..0000000000 --- a/src/amuse/community/arepo/src/test.cc +++ /dev/null @@ -1,6 +0,0 @@ -/* - * Example function for a code - */ -int echo(int input){ - return input; -} From 5b4065610e3b21d8f0b06e3dfccf533c9b947867 Mon Sep 17 00:00:00 2001 From: Felicity Guest Date: Fri, 1 Apr 2022 17:27:11 +0100 Subject: [PATCH 26/51] add initialize_code() and define_methods() to Arepo --- src/amuse/community/arepo/interface.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index fef740d365..0e1ae7e60b 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -30,9 +30,7 @@ def __init__(self, **keyword_arguments): LiteratureReferencesMixIn.__init__(self) # TODO: Determine whether need to inherit from CodeWithDataDirectories. - # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. - # This function has been kept as a template for future functions. - + # This function has been kept as a basic template for future functions. # @legacy_function # def set_parameters(): # function = LegacyFunctionSpecification() @@ -45,3 +43,20 @@ class Arepo(GravitationalDynamics): def __init__(self, **options): GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) + + def initialize_code(self): + result = self.overridden().initialize_code() + + # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. + # Could be done in the way in which Gadget2 sets the gadget_output_directory. + #self.parameters.gadget_output_directory = self.get_output_directory() + + return result + + def define_methods(self, builder): + # TODO: Determine how to link this to Arepo's run() - the main simulation loop. + builder.add_method( + "run", + (), + (builder.ERROR_CODE) + ) From deb6a8b49ecafd0435f3982f240cdfaf9a11dfff Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 12 May 2022 13:46:02 +0100 Subject: [PATCH 27/51] add default parameters to arepo's interface.cc --- src/amuse/community/arepo/interface.cc | 133 ++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 12 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index b6656bc04a..10fa51a8af 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -3,6 +3,117 @@ #include "src/main/allvars.h" #include "src/main/proto.h" +// general interface functions: + +void set_default_parameters(){ + // Relevant files + strcpy(All.InitCondFile, "./snap_010"); + strcpy(All.OutputDir, "./output"); + strcpy(All.SnapshotFileBase, "snap"); + strcpy(All.OutputListFilename, "./output_list.txt"); + + // File formats + All.ICFormat = 1; + All.SnapFormat = 3; + + // CPU-time LimitUBelowThisDensity + All.TimeLimitCPU = 93000; + All.CpuTimeBetRestartFile = 12000; + All.ResubmitOn = 0; + strcpy(All.ResubmitCommand, "my-scriptfile"); + + // Memory allocation + All.MaxMemSize = 2500; + + // Characteristics of run + All.TimeBegin = 0.0; + All.TimeMax = 1.0; + + // Basic code options that set simulation type + All.ComovingIntegrationOn = 0; + All.PeriodicBoundariesOn = 0; + All.CoolingOn = 0; + All.StarformationOn = 0; + + // Cosmological parameters + All.Omega0 = 0.0; + All.OmegaLambda = 0.0; + All.OmegaBaryon = 0.0; + All.HubbleParam = 1.0; + All.BoxSize = 100000.0; + + // Output frequency and output parameters + All.OutputListOn = 1; + All.TimeBetSnapshot = 0.0; + All.TimeOfFirstSnapshot = 0.0; + All.TimeBetStatistics = 0.01; + All.NumFilesPerSnapshot = 1; + All.NumFilesWrittenInParallel = 1; + + // Integration timing accuracy + All.TypeOfTimestepCriterion = 0; + All.ErrTolIntAccuracy = 0.012; + All.CourantFac = 0.3; + All.MaxSizeTimestep = 0.05; + All.MinSizeTimestep = 2.0e-9; + + // Treatment of empty space and temp limits + All.InitGasTemp = 244.8095; + All.MinGasTemp = 5.0; + All.MinimumDensityOnStartUp = 1.0e-20; + All.LimitUBelowThisDensity = 0.0; + All.LimitUBelowCertainDensityToThisValue = 0.0; + All.MinEgySpec = 0.0; + + // Tree algorithm, force accuracy, domain update frequency + All.TypeOfOpeningCriterion = 1; + All.ErrTolTheta = 0.7; + All.ErrTolForceAcc = 0.0025; + All.MultipleDomains = 8; + All.TopNodeFactor = 2.5; + All.ActivePartFracForNewDomainDecomp = 0.01; + + // Initial density estimates + All.DesNumNgb = 64; + All.MaxNumNgbDeviation = 4; + + // System of Units + All.UnitLength_in_cm = 3.085678e21; + All.UnitMass_in_g = 1.989e43; + All.UnitVelocity_in_cm_per_s = 1e5; + + // Gravitational softening lengths + All.SofteningComovingType0 = 1.0; + All.SofteningComovingType1 = 1.0; + + All.SofteningMaxPhysType0 = 1.0; + All.SofteningMaxPhysType1 = 1.0; + + All.GasSoftFactor = 2.5; + + All.SofteningTypeOfPartType0 = 0; + All.SofteningTypeOfPartType1 = 1; + All.SofteningTypeOfPartType2 = 1; + All.SofteningTypeOfPartType3 = 1; + All.SofteningTypeOfPartType4 = 1; + All.SofteningTypeOfPartType5 = 1; + + All.MinimumComovingHydroSoftening = 1.0; + All.AdaptiveHydroSofteningSpacing = 1.2; + + // Mesh regularization options + All.CellShapingSpeed = 0.5; + All.CellShapingFactor = 1.0; + + // parameters that are fixed for AMUSE: + All.PartAllocFactor = 1.5; // Memory allocation parameter + All.TreeAllocFactor = 0.8; // Memory allocation parameter + All.BufferSize = 25; // Memory allocation parameter + All.ResubmitOn = 0; // Keep this turned off! + All.OutputListOn = 0; // Keep this turned off + All.GravityConstantInternal = 0; // Keep this turned off +} + int initialize_code(){ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); @@ -21,7 +132,6 @@ int initialize_code(){ begrun0(); - strcpy(ParameterFile, "param.txt"); /* Removing command line parsing. argv[1] replaced with "param.txt". */ RestartFlag = 0; begrun1(); /* set-up run */ @@ -95,7 +205,7 @@ int get_total_radius(double * radius){ return 0; } -int new_particle(int * index_of_the_particle, double mass, double x, +int new_particle(int * index_of_the_particle, double mass, double x, double y, double z, double vx, double vy, double vz, double radius){ return 0; } @@ -120,7 +230,7 @@ int get_eps2(double * epsilon_squared){ return 0; } -int get_index_of_next_particle(int index_of_the_particle, +int get_index_of_next_particle(int index_of_the_particle, int * index_of_the_next_particle){ return 0; } @@ -137,13 +247,13 @@ int synchronize_model(){ return 0; } -int set_state(int index_of_the_particle, double mass, double x, double y, +int set_state(int index_of_the_particle, double mass, double x, double y, double z, double vx, double vy, double vz, double radius){ return 0; } -int get_state(int index_of_the_particle, double * mass, double * x, - double * y, double * z, double * vx, double * vy, double * vz, +int get_state(int index_of_the_particle, double * mass, double * x, + double * y, double * z, double * vx, double * vy, double * vz, double * radius){ return 0; } @@ -164,7 +274,7 @@ int get_number_of_particles(int * number_of_particles){ return 0; } -int set_acceleration(int index_of_the_particle, double ax, double ay, +int set_acceleration(int index_of_the_particle, double ax, double ay, double az){ return 0; } @@ -197,12 +307,12 @@ int get_potential_energy(double * potential_energy){ return 0; } -int get_velocity(int index_of_the_particle, double * vx, double * vy, +int get_velocity(int index_of_the_particle, double * vx, double * vy, double * vz){ return 0; } -int get_position(int index_of_the_particle, double * x, double * y, +int get_position(int index_of_the_particle, double * x, double * y, double * z){ return 0; } @@ -211,7 +321,7 @@ int set_position(int index_of_the_particle, double x, double y, double z){ return 0; } -int get_acceleration(int index_of_the_particle, double * ax, double * ay, +int get_acceleration(int index_of_the_particle, double * ax, double * ay, double * az){ return 0; } @@ -224,8 +334,7 @@ int set_parameters(char * param_file){ return 0; } -int set_velocity(int index_of_the_particle, double vx, double vy, +int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } - From fc35f9d8bce838975587490108467f4cfce7ff1f Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 13 May 2022 10:57:33 +0100 Subject: [PATCH 28/51] Comment out read_parameter_file in begrun, add run_sim in interface.cc, add cleanup_code to interface.py, add set_default_parameters to initialize_code --- src/amuse/community/arepo/interface.cc | 7 +++++++ src/amuse/community/arepo/interface.py | 14 ++++++++------ src/amuse/community/arepo/src/init/begrun.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 10fa51a8af..6a1b9641b1 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -134,6 +134,7 @@ int initialize_code(){ RestartFlag = 0; + set_default_parameters(); begrun1(); /* set-up run */ char fname[MAXLEN_PATH]; @@ -162,6 +163,12 @@ int initialize_code(){ return 0; } +int run_sim() { + /* This run command is for the Arepo simulation */ + run(); + return 0; +} + int cleanup_code(){ mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 0e1ae7e60b..1f228321b9 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -40,23 +40,25 @@ def __init__(self, **keyword_arguments): class Arepo(GravitationalDynamics): - + def __init__(self, **options): GravitationalDynamics.__init__(self, ArepoInterface(**options), **options) def initialize_code(self): result = self.overridden().initialize_code() - # TODO: Pass parameter file to initialize_code(), and undo hardcoding of parameter file within the function. - # Could be done in the way in which Gadget2 sets the gadget_output_directory. - #self.parameters.gadget_output_directory = self.get_output_directory() - return result def define_methods(self, builder): # TODO: Determine how to link this to Arepo's run() - the main simulation loop. builder.add_method( - "run", + "run_sim", + (), + (builder.ERROR_CODE) + ) + # When simulation is finished, shutdown HDF5 & MPI, and exit(0) + builder.add_method( + "cleanup_code", (), (builder.ERROR_CODE) ) diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index ad8a5222ca..5db8ba6351 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -101,7 +101,7 @@ void begrun0(void) */ void begrun1(void) { - read_parameter_file(ParameterFile); /* ... read in parameters for this run */ + /* read_parameter_file(ParameterFile); ... read in parameters for this run */ check_parameters(); /* consistency check of parameters */ From 201535d042be63baf5a2bcdb01795d9d13d40424 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Wed, 18 May 2022 12:59:52 +0100 Subject: [PATCH 29/51] fixes --- src/amuse/community/arepo/src/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 27c1b505b9..197fa00cef 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -1,11 +1,14 @@ +MPICXX ?= mpicxx +MPICC ?= mpicc # TODO: Determine whether this is needed as included in arepo/Makefile. ifeq ($(origin AMUSE_DIR), undefined) AMUSE_DIR := $(shell amusifier --get-amuse-dir) endif -include $(AMUSE_DIR)/config.mk -GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. +CC = $(MPICC) # sets the C-compiler +# GSL_FLAGS = -I/opt/Homebrew/include # TODO: Need to make GSL_FLAGS generalisable. CFLAGS += -Wall -g $(GSL_FLAGS) CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) @@ -17,7 +20,7 @@ SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ time_integration utils SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) -CODEOBJS = test.o $(SRCS:c=o) +CODEOBJS = $(SRCS:c=o) AR = ar ruv RANLIB = ranlib From fbcce03f21c342f67be4c9f8da5c855fd16aa549 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Wed, 18 May 2022 13:13:22 +0100 Subject: [PATCH 30/51] define CXX flag in Arepo makefile --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 567a479296..46f0668422 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -6,7 +6,7 @@ endif -include $(AMUSE_DIR)/config.mk MPICXX ?= mpicxx - +CXX = $(MPICXX) CFLAGS += -Wall -g CXXFLAGS += $(CFLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) From 5cf22a118f03c6b8df78c3b454d445edab234f1d Mon Sep 17 00:00:00 2001 From: Matthew West Date: Wed, 18 May 2022 13:50:04 +0100 Subject: [PATCH 31/51] set_default values to match current arepo config data structures --- src/amuse/community/arepo/interface.cc | 33 +++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 6a1b9641b1..44fe3205d7 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -83,38 +83,39 @@ void set_default_parameters(){ All.UnitVelocity_in_cm_per_s = 1e5; // Gravitational softening lengths - All.SofteningComovingType0 = 1.0; - All.SofteningComovingType1 = 1.0; - - All.SofteningMaxPhysType0 = 1.0; - All.SofteningMaxPhysType1 = 1.0; - + All.SofteningComoving[0] = 1.0; + All.SofteningComoving[1] = 1.0; + All.SofteningMaxPhys[0] = 1.0; + All.SofteningMaxPhys[1] = 1.0; All.GasSoftFactor = 2.5; - All.SofteningTypeOfPartType0 = 0; - All.SofteningTypeOfPartType1 = 1; - All.SofteningTypeOfPartType2 = 1; - All.SofteningTypeOfPartType3 = 1; - All.SofteningTypeOfPartType4 = 1; - All.SofteningTypeOfPartType5 = 1; - All.MinimumComovingHydroSoftening = 1.0; - All.AdaptiveHydroSofteningSpacing = 1.2; + All.SofteningTypeOfPartType[0] = 0; + All.SofteningTypeOfPartType[1] = 1; + All.SofteningTypeOfPartType[2] = 1; + All.SofteningTypeOfPartType[3] = 1; + All.SofteningTypeOfPartType[4] = 1; + All.SofteningTypeOfPartType[5] = 1; + #ifdef ADAPTIVE_HYDRO_SOFTENING + All.MinimumComovingHydroSoftening = 1.0; + All.AdaptiveHydroSofteningSpacing = 1.2; + #endif // Mesh regularization options All.CellShapingSpeed = 0.5; All.CellShapingFactor = 1.0; // parameters that are fixed for AMUSE: - All.PartAllocFactor = 1.5; // Memory allocation parameter All.TreeAllocFactor = 0.8; // Memory allocation parameter - All.BufferSize = 25; // Memory allocation parameter All.ResubmitOn = 0; // Keep this turned off! All.OutputListOn = 0; // Keep this turned off All.GravityConstantInternal = 0; // Keep this turned off } int initialize_code(){ + int argc = 0; + char **argv=NULL; + MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); MPI_Comm_size(MPI_COMM_WORLD, &NTask); From af0f37408b87f53079b6e268a72ac54eb28c6ad3 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 19 May 2022 12:14:44 +0100 Subject: [PATCH 32/51] add #include mpi.h & run.c change mpi_print to just print --- src/amuse/community/arepo/interface.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 44fe3205d7..a212d90b24 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,7 +1,12 @@ +#ifndef NOMPI +#include +#endif + #include "worker_code.h" #include "src/main/allvars.h" #include "src/main/proto.h" +#include "src/main/run.c" // general interface functions: @@ -155,7 +160,7 @@ int initialize_code(){ if(status >= 0) { if(status > 0) - mpi_printf("init() returned with %d\n", status); + printf("init() returned with %d\n", status); cleanup_code(); } @@ -171,8 +176,8 @@ int run_sim() { } int cleanup_code(){ - mpi_printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); - mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); + printf("Code run for %f seconds!\n", timediff(StartOfRun, second())); + printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); fflush(stdout); #ifdef HAVE_HDF5 From 3ecbaa3ae7a7db1b990cd479a4d158c037210f51 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:17:54 +0100 Subject: [PATCH 33/51] include GSL headers --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 46f0668422..3e67a00625 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -39,4 +39,4 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ .cc.o: $< - $(CXX) $(CXXFLAGS) -c -o $@ $< + $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< From 5ab0a4b3d6845b83631547015f8e0c26defdfb44 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:39:28 +0100 Subject: [PATCH 34/51] add GSL_FLAGS to CXXFLAGS --- src/amuse/community/arepo/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 3e67a00625..b055eb3c18 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -8,7 +8,7 @@ endif MPICXX ?= mpicxx CXX = $(MPICXX) CFLAGS += -Wall -g -CXXFLAGS += $(CFLAGS) +CXXFLAGS += $(CFLAGS) $(GSL_FLAGS) LDFLAGS += -lm $(MUSE_LD_FLAGS) OBJS = interface.o @@ -38,5 +38,5 @@ worker_code.h: interface.py arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ -.cc.o: $< - $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< +# .cc.o: $< +# $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< From dfac304c57102167c571f4f9cb4289b95e713b09 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 13:39:50 +0100 Subject: [PATCH 35/51] remove include --- src/amuse/community/arepo/interface.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a212d90b24..9815df3021 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -6,7 +6,6 @@ #include "src/main/allvars.h" #include "src/main/proto.h" -#include "src/main/run.c" // general interface functions: From d1da5b8a5ff4fa9339b93516ff125a1139aa8237 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 16:03:16 +0100 Subject: [PATCH 36/51] arepo is C not C++ --- src/amuse/community/arepo/interface.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 9815df3021..7374928af6 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -7,6 +7,10 @@ #include "src/main/allvars.h" #include "src/main/proto.h" +#ifdef __cplusplus +extern "C" { +#endif + // general interface functions: void set_default_parameters(){ @@ -350,3 +354,6 @@ int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } +#ifdef __cplusplus +} +#endif From 2f9296793592efb38bc7f1f810c338f48cc3e518 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 16:09:34 +0100 Subject: [PATCH 37/51] re-add lines --- src/amuse/community/arepo/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index b055eb3c18..934b56a210 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -38,5 +38,5 @@ worker_code.h: interface.py arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ -# .cc.o: $< -# $(CXX) $(CXXFLAGS) $(GSL_FLAGS) -c -o $@ $< +.cc.o: $< + $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< From 980aae5ca43042e6d9f00167a33e2965f25311d8 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Thu, 19 May 2022 18:15:59 +0100 Subject: [PATCH 38/51] create arepo interface.h and move #include allvars.h & proto.h along with ifdef Cpp choice there --- src/amuse/community/arepo/interface.cc | 17 ++--------------- src/amuse/community/arepo/interface.h | 10 ++++++++++ src/amuse/community/arepo/interface.py | 2 +- 3 files changed, 13 insertions(+), 16 deletions(-) create mode 100644 src/amuse/community/arepo/interface.h diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 7374928af6..525745f074 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -4,12 +4,6 @@ #include "worker_code.h" -#include "src/main/allvars.h" -#include "src/main/proto.h" - -#ifdef __cplusplus -extern "C" { -#endif // general interface functions: @@ -136,12 +130,6 @@ int initialize_code(){ determine_compute_nodes(); - for(PTask = 0; NTask > (1 << PTask); PTask++) - ; - - begrun0(); - - RestartFlag = 0; set_default_parameters(); begrun1(); /* set-up run */ @@ -354,6 +342,5 @@ int set_velocity(int index_of_the_particle, double vx, double vy, double vz){ return 0; } -#ifdef __cplusplus -} -#endif + + diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h new file mode 100644 index 0000000000..2e1d880db7 --- /dev/null +++ b/src/amuse/community/arepo/interface.h @@ -0,0 +1,10 @@ +#ifdef __cplusplus +extern "C" { +#endif + +#include "src/allvars.h" +#include "src/proto.h" + +#ifdef __cplusplus +} +#endif diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index 1f228321b9..c1da97b511 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -23,7 +23,7 @@ class ArepoInterface( .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] """ - include_headers = ["worker_code.h"] + include_headers = ["worker_code.h", "inteface.h"] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) From b683887a8d3f0fc8514359762e56431aa90a863f Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 21:00:57 +0100 Subject: [PATCH 39/51] fix typo --- src/amuse/community/arepo/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.py b/src/amuse/community/arepo/interface.py index c1da97b511..a8e770a79d 100644 --- a/src/amuse/community/arepo/interface.py +++ b/src/amuse/community/arepo/interface.py @@ -23,7 +23,7 @@ class ArepoInterface( .. [#] Weinberger, R., Springel, V., Pakmor, R., 2020, ApJS, 248, 32 (Public Code Release) [2020ApJS..248...32W] """ - include_headers = ["worker_code.h", "inteface.h"] + include_headers = ["worker_code.h", "interface.h"] def __init__(self, **keyword_arguments): CodeInterface.__init__(self, name_of_the_worker="arepo_worker", **keyword_arguments) From 3e84f77e24339f50883f4c52bd7d4fa8510377ec Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Thu, 19 May 2022 21:01:47 +0100 Subject: [PATCH 40/51] fix paths --- src/amuse/community/arepo/interface.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h index 2e1d880db7..890b78b765 100644 --- a/src/amuse/community/arepo/interface.h +++ b/src/amuse/community/arepo/interface.h @@ -2,8 +2,8 @@ extern "C" { #endif -#include "src/allvars.h" -#include "src/proto.h" +#include "src/main/allvars.h" +#include "src/main/proto.h" #ifdef __cplusplus } From 1cee0565eed561b659bee84980248b91751358d4 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 20 May 2022 09:57:07 +0100 Subject: [PATCH 41/51] add #include statement for new interface.h header --- src/amuse/community/arepo/interface.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 525745f074..ad3cfba8c9 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -3,7 +3,7 @@ #endif #include "worker_code.h" - +#include "interface.h" // general interface functions: From 719aaa3e6ee211460959a806fdbcc54cca54d2cc Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 11:05:14 +0200 Subject: [PATCH 42/51] some fixes for c<->cpp --- src/amuse/community/arepo/interface.cc | 7 ++++++- src/amuse/community/arepo/interface.h | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index 525745f074..ebaed03a03 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -1,12 +1,17 @@ +#include +#include + #ifndef NOMPI #include #endif +#include "interface.h" #include "worker_code.h" - // general interface functions: +using namespace std; + void set_default_parameters(){ // Relevant files strcpy(All.InitCondFile, "./snap_010"); diff --git a/src/amuse/community/arepo/interface.h b/src/amuse/community/arepo/interface.h index 890b78b765..7bcd4497c6 100644 --- a/src/amuse/community/arepo/interface.h +++ b/src/amuse/community/arepo/interface.h @@ -1,10 +1,13 @@ #ifdef __cplusplus extern "C" { +#define ___cplusplus +#undef __cplusplus #endif #include "src/main/allvars.h" #include "src/main/proto.h" -#ifdef __cplusplus +#ifdef ___cplusplus } +#define __cplusplus #endif From 81223f643360fa8850efab638ab13fbebbbe7647 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 12:59:47 +0200 Subject: [PATCH 43/51] fixes to build, note the source changes --- src/amuse/community/arepo/Makefile | 2 +- src/amuse/community/arepo/src/Makefile | 3 ++- src/amuse/community/arepo/src/init/begrun.c | 2 +- src/amuse/community/arepo/src/main/main.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 934b56a210..271a4671ee 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -36,7 +36,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $< $(OBJS) $(CODELIB) -o $@ + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@ .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 197fa00cef..e852b39c53 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -16,11 +16,12 @@ LDFLAGS += -lm $(MUSE_LD_FLAGS) CODELIB = libarepo.a SUBDIR = add_backgroundgrid cooling debug_md5 domain fof gitversion gravity \ - hydro init io main mesh mpi_utils ngbtree star_formation subfind \ + hydro init io mesh mesh/voronoi mpi_utils ngbtree star_formation subfind \ time_integration utils SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) CODEOBJS = $(SRCS:c=o) +CODEOBJS += main/allvars.o main/run.o main/main.o AR = ar ruv RANLIB = ranlib diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index 5db8ba6351..a70748cfd3 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -84,7 +84,7 @@ void begrun0(void) if(ThisTask == 0) { - output_compile_time_options(); +// output_compile_time_options(); } } diff --git a/src/amuse/community/arepo/src/main/main.c b/src/amuse/community/arepo/src/main/main.c index f1ae80be6a..0b0824b5b1 100644 --- a/src/amuse/community/arepo/src/main/main.c +++ b/src/amuse/community/arepo/src/main/main.c @@ -58,7 +58,7 @@ * * \return status of exit; 0 for normal exit. */ -int main(int argc, char **argv) +int no_main(int argc, char **argv) { // #ifdef IMPOSE_PINNING // detect_topology(); From 1e0c6b97ab41d2894819a35ff618405553f3bdda Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 20 May 2022 13:06:37 +0200 Subject: [PATCH 44/51] fix build to detect source changes --- src/amuse/community/arepo/Makefile | 4 +++- src/amuse/community/arepo/src/Makefile | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 271a4671ee..14eef8f6a0 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -26,7 +26,7 @@ clean: distclean: clean make -C src distclean -$(CODELIB): +$(CODELIB): .FORCE make -C src all worker_code.cc: interface.py @@ -40,3 +40,5 @@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< + +.FORCE: diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index e852b39c53..23d0d99c52 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -41,4 +41,7 @@ $(CODELIB): $(CODEOBJS) $(RANLIB) $@ .cc.o: $< - $(CXX) $(CXXFLAGS) -c -o $@ $< + $(MPICXX) $(CXXFLAGS) -c -o $@ $< + +.c.o: $< + $(MPICC) $(CXXFLAGS) -c -o $@ $< From edc21b73b48e8bdb7a0a0fa93327c2a2495bd600 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Fri, 20 May 2022 13:51:22 +0100 Subject: [PATCH 45/51] move GSL_LIBS and GMP_LIBS to the end of the flags for Arepo make file --- src/amuse/community/arepo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/arepo/Makefile b/src/amuse/community/arepo/Makefile index 14eef8f6a0..4fa877bb9a 100644 --- a/src/amuse/community/arepo/Makefile +++ b/src/amuse/community/arepo/Makefile @@ -36,7 +36,7 @@ worker_code.h: interface.py $(CODE_GENERATOR) --type=H interface.py ArepoInterface -o $@ arepo_worker: worker_code.cc worker_code.h $(CODELIB) $(OBJS) - $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $(GMP_LIBS) $(GSL_LIBS) $< $(OBJS) $(CODELIB) -o $@ + $(MPICXX) $(CXXFLAGS) $(GSL_FLAGS) $< $(OBJS) $(CODELIB) -o $@ $(GMP_LIBS) $(GSL_LIBS) .cc.o: $< $(MPICXX) $(CXXFLAGS) $(CODELIB) -c -o $@ $< From 73d53b70e02442a5a01e7e86cdc16075753c6d58 Mon Sep 17 00:00:00 2001 From: Matthew West Date: Mon, 23 May 2022 08:56:56 +0100 Subject: [PATCH 46/51] remove extra mpi_init from initialize_code --- src/amuse/community/arepo/interface.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index a8e949cb4c..db8497b93a 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -121,10 +121,7 @@ void set_default_parameters(){ } int initialize_code(){ - int argc = 0; - char **argv=NULL; - MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); MPI_Comm_size(MPI_COMM_WORLD, &NTask); From ff2ea53cdbf05e7cce479363ff746bedf57e89b0 Mon Sep 17 00:00:00 2001 From: Steven Rieder Date: Wed, 25 May 2022 11:04:23 +0200 Subject: [PATCH 47/51] Default to non-periodic gravity --- src/amuse/community/arepo/src/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/amuse/community/arepo/src/Makefile b/src/amuse/community/arepo/src/Makefile index 23d0d99c52..c0ec73a619 100644 --- a/src/amuse/community/arepo/src/Makefile +++ b/src/amuse/community/arepo/src/Makefile @@ -23,6 +23,10 @@ SRCS = $(foreach fd, $(SUBDIR), $(wildcard $(fd)/*.c)) CODEOBJS = $(SRCS:c=o) CODEOBJS += main/allvars.o main/run.o main/main.o +AREPOFLAGS += -DGRAVITY_NOT_PERIODIC # no periodic boundaries by default + +CXXFLAGS += $(AREPOFLAGS) + AR = ar ruv RANLIB = ranlib RM = rm From 285b495bd60348d5c4a98cc1230433022e767e08 Mon Sep 17 00:00:00 2001 From: "Stephen P. Cook" Date: Wed, 28 Sep 2022 11:25:39 +0000 Subject: [PATCH 48/51] Add missing calls to memory management helpers Add basic arepo test. Co-authored-by: Steven Rieder --- src/amuse/community/arepo/__init__.py | 3 ++- src/amuse/community/arepo/interface.cc | 3 ++- src/amuse/community/arepo/src/init/begrun.c | 6 ++++++ src/amuse/community/arepo/test_simple.py | 5 +++++ 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 src/amuse/community/arepo/test_simple.py diff --git a/src/amuse/community/arepo/__init__.py b/src/amuse/community/arepo/__init__.py index abe3ba85b6..b08b6187b7 100644 --- a/src/amuse/community/arepo/__init__.py +++ b/src/amuse/community/arepo/__init__.py @@ -1 +1,2 @@ -# generated file \ No newline at end of file +# generated file +from .interface import Arepo diff --git a/src/amuse/community/arepo/interface.cc b/src/amuse/community/arepo/interface.cc index db8497b93a..e05dd6a0dc 100644 --- a/src/amuse/community/arepo/interface.cc +++ b/src/amuse/community/arepo/interface.cc @@ -132,7 +132,8 @@ int initialize_code(){ init_cpu_log(); determine_compute_nodes(); - + // Needed to check available memory + mpi_report_committable_memory(); set_default_parameters(); begrun1(); /* set-up run */ diff --git a/src/amuse/community/arepo/src/init/begrun.c b/src/amuse/community/arepo/src/init/begrun.c index a70748cfd3..f6944d7306 100644 --- a/src/amuse/community/arepo/src/init/begrun.c +++ b/src/amuse/community/arepo/src/init/begrun.c @@ -103,6 +103,12 @@ void begrun1(void) { /* read_parameter_file(ParameterFile); ... read in parameters for this run */ +#ifdef HOST_MEMORY_REPORTING + check_maxmemsize_setting(); +#endif /* #ifdef HOST_MEMORY_REPORTING */ + + mymalloc_init(); /* Added from read_parameter_file */ + check_parameters(); /* consistency check of parameters */ #ifdef HAVE_HDF5 diff --git a/src/amuse/community/arepo/test_simple.py b/src/amuse/community/arepo/test_simple.py new file mode 100644 index 0000000000..1951ac6edd --- /dev/null +++ b/src/amuse/community/arepo/test_simple.py @@ -0,0 +1,5 @@ +from amuse.community.arepo import Arepo + +# Check code runs without errors +x = Arepo(redirection="none") +x.initialize_code() From 62cd3459792301bf40106312c6934fbb4938b41b Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Mon, 10 Jan 2022 21:21:32 +0100 Subject: [PATCH 49/51] small bug fix: in case HIERARCHICAL_GRAVITY is not used, and the maximum used timestep sizes increases during a step, it could happen that for particles on the maximum timestep one gravity half-step is not applied (because HighestActiveTimeBin increases) --- .../community/arepo/src/time_integration/do_gravity_hydro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c index 88b7f89a34..40a06ac282 100644 --- a/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c +++ b/src/amuse/community/arepo/src/time_integration/do_gravity_hydro.c @@ -265,7 +265,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void) } /* reconstruct list of active particles because it is used for other things too (i.e. wind particles) */ - timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin); sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); #else /* #ifdef HIERARCHICAL_GRAVITY */ @@ -276,7 +276,7 @@ void find_gravity_timesteps_and_do_gravity_step_first_half(void) timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, TIMEBINS); else #endif /* #ifdef FORCE_EQUAL_TIMESTEPS */ - timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestActiveTimeBin); + timebin_make_list_of_active_particles_up_to_timebin(&TimeBinsGravity, All.HighestSynchronizedTimeBin); sumup_large_ints(1, &TimeBinsGravity.NActiveParticles, &TimeBinsGravity.GlobalNActiveParticles); mpi_printf("KICKS: 1st gravity for highest active timebin=%d: particles %lld\n", All.HighestActiveTimeBin, From 33d3f94feb37d69d4115bf09d8b663dd3abd4708 Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Thu, 2 Jun 2022 13:26:13 +0200 Subject: [PATCH 50/51] removed non-standard uint in favor of 'unsigned int' --- src/amuse/community/arepo/src/io/hdf5_util.c | 2 +- src/amuse/community/arepo/src/main/proto.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/arepo/src/io/hdf5_util.c b/src/amuse/community/arepo/src/io/hdf5_util.c index a613a36bdc..a690bd71f3 100644 --- a/src/amuse/community/arepo/src/io/hdf5_util.c +++ b/src/amuse/community/arepo/src/io/hdf5_util.c @@ -847,7 +847,7 @@ herr_t my_H5Pset_shuffle(hid_t plist_id) * * \return Non-negative value if successful. */ -herr_t my_H5Pset_deflate(hid_t plist_id, uint level) +herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level) { herr_t status = H5Pset_deflate(plist_id, level); if(status < 0) diff --git a/src/amuse/community/arepo/src/main/proto.h b/src/amuse/community/arepo/src/main/proto.h index 15a346f1bc..61bdaad467 100644 --- a/src/amuse/community/arepo/src/main/proto.h +++ b/src/amuse/community/arepo/src/main/proto.h @@ -598,7 +598,7 @@ hid_t my_H5Pcreate(hid_t class_id); herr_t my_H5Pclose(hid_t plist); herr_t my_H5Pset_chunk(hid_t plist, int ndims, const hsize_t *dim); herr_t my_H5Pset_shuffle(hid_t plist_id); -herr_t my_H5Pset_deflate(hid_t plist_id, uint level); +herr_t my_H5Pset_deflate(hid_t plist_id, unsigned int level); herr_t my_H5Pset_fletcher32(hid_t plist_id); #endif /* #ifdef HDF5_FILTERS */ From 0193040db0bc011ee7552f088720ef19c3818081 Mon Sep 17 00:00:00 2001 From: Volker Springel Date: Thu, 2 Jun 2022 14:03:04 +0200 Subject: [PATCH 51/51] disabled a superfluous call of get_starformtion_rate() --- src/amuse/community/arepo/src/cooling/cooling.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/arepo/src/cooling/cooling.c b/src/amuse/community/arepo/src/cooling/cooling.c index 7e7cebbc98..3baf82d3a3 100644 --- a/src/amuse/community/arepo/src/cooling/cooling.c +++ b/src/amuse/community/arepo/src/cooling/cooling.c @@ -477,9 +477,9 @@ void SetOutputGasState(int i, double *ne_guess, double *nH0, double *coolrate) double u = dmax(All.MinEgySpec, SphP[i].Utherm); /* update GasState as appropriate given compile-time options and cell properties */ -#if defined(USE_SFR) - sfr = get_starformation_rate(i); -#endif /* #if defined(USE_SFR) */ + // #if defined(USE_SFR) + // sfr = get_starformation_rate(i); // call is superfluous at this place + // #endif /* update DoCool */ DoCool.u_old_input = u;